LLVM  9.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140  /// This records all nodes attempted to add to the worklist since we
141  /// considered a new worklist entry. As we keep do not add duplicate nodes
142  /// in the worklist, this is different from the tail of the worklist.
143  SmallSetVector<SDNode *, 32> PruningList;
144 
145  /// Set of nodes which have been combined (at least once).
146  ///
147  /// This is used to allow us to reliably add any operands of a DAG node
148  /// which have not yet been combined to the worklist.
149  SmallPtrSet<SDNode *, 32> CombinedNodes;
150 
151  // AA - Used for DAG load/store alias analysis.
152  AliasAnalysis *AA;
153 
154  /// When an instruction is simplified, add all users of the instruction to
155  /// the work lists because they might get more simplified now.
156  void AddUsersToWorklist(SDNode *N) {
157  for (SDNode *Node : N->uses())
158  AddToWorklist(Node);
159  }
160 
161  // Prune potentially dangling nodes. This is called after
162  // any visit to a node, but should also be called during a visit after any
163  // failed combine which may have created a DAG node.
164  void clearAddedDanglingWorklistEntries() {
165  // Check any nodes added to the worklist to see if they are prunable.
166  while (!PruningList.empty()) {
167  auto *N = PruningList.pop_back_val();
168  if (N->use_empty())
169  recursivelyDeleteUnusedNodes(N);
170  }
171  }
172 
173  SDNode *getNextWorklistEntry() {
174  // Before we do any work, remove nodes that are not in use.
175  clearAddedDanglingWorklistEntries();
176  SDNode *N = nullptr;
177  // The Worklist holds the SDNodes in order, but it may contain null
178  // entries.
179  while (!N && !Worklist.empty()) {
180  N = Worklist.pop_back_val();
181  }
182 
183  if (N) {
184  bool GoodWorklistEntry = WorklistMap.erase(N);
185  (void)GoodWorklistEntry;
186  assert(GoodWorklistEntry &&
187  "Found a worklist entry without a corresponding map entry!");
188  }
189  return N;
190  }
191 
192  /// Call the node-specific routine that folds each particular type of node.
193  SDValue visit(SDNode *N);
194 
195  public:
196  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
197  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
198  OptLevel(OL), AA(AA) {
199  ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
200 
201  MaximumLegalStoreInBits = 0;
202  for (MVT VT : MVT::all_valuetypes())
203  if (EVT(VT).isSimple() && VT != MVT::Other &&
204  TLI.isTypeLegal(EVT(VT)) &&
205  VT.getSizeInBits() >= MaximumLegalStoreInBits)
206  MaximumLegalStoreInBits = VT.getSizeInBits();
207  }
208 
209  void ConsiderForPruning(SDNode *N) {
210  // Mark this for potential pruning.
211  PruningList.insert(N);
212  }
213 
214  /// Add to the worklist making sure its instance is at the back (next to be
215  /// processed.)
216  void AddToWorklist(SDNode *N) {
218  "Deleted Node added to Worklist");
219 
220  // Skip handle nodes as they can't usefully be combined and confuse the
221  // zero-use deletion strategy.
222  if (N->getOpcode() == ISD::HANDLENODE)
223  return;
224 
225  ConsiderForPruning(N);
226 
227  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
228  Worklist.push_back(N);
229  }
230 
231  /// Remove all instances of N from the worklist.
232  void removeFromWorklist(SDNode *N) {
233  CombinedNodes.erase(N);
234  PruningList.remove(N);
235 
236  auto It = WorklistMap.find(N);
237  if (It == WorklistMap.end())
238  return; // Not in the worklist.
239 
240  // Null out the entry rather than erasing it to avoid a linear operation.
241  Worklist[It->second] = nullptr;
242  WorklistMap.erase(It);
243  }
244 
245  void deleteAndRecombine(SDNode *N);
246  bool recursivelyDeleteUnusedNodes(SDNode *N);
247 
248  /// Replaces all uses of the results of one DAG node with new values.
249  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
250  bool AddTo = true);
251 
252  /// Replaces all uses of the results of one DAG node with new values.
253  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
254  return CombineTo(N, &Res, 1, AddTo);
255  }
256 
257  /// Replaces all uses of the results of one DAG node with new values.
258  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
259  bool AddTo = true) {
260  SDValue To[] = { Res0, Res1 };
261  return CombineTo(N, To, 2, AddTo);
262  }
263 
264  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
265 
266  private:
267  unsigned MaximumLegalStoreInBits;
268 
269  /// Check the specified integer node value to see if it can be simplified or
270  /// if things it uses can be simplified by bit propagation.
271  /// If so, return true.
272  bool SimplifyDemandedBits(SDValue Op) {
273  unsigned BitWidth = Op.getScalarValueSizeInBits();
275  return SimplifyDemandedBits(Op, DemandedBits);
276  }
277 
278  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
279  EVT VT = Op.getValueType();
280  unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
281  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
282  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
283  }
284 
285  /// Check the specified vector node value to see if it can be simplified or
286  /// if things it uses can be simplified as it only uses some of the
287  /// elements. If so, return true.
288  bool SimplifyDemandedVectorElts(SDValue Op) {
289  unsigned NumElts = Op.getValueType().getVectorNumElements();
290  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
291  return SimplifyDemandedVectorElts(Op, DemandedElts);
292  }
293 
294  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
295  const APInt &DemandedElts);
296  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
297  bool AssumeSingleUse = false);
298 
299  bool CombineToPreIndexedLoadStore(SDNode *N);
300  bool CombineToPostIndexedLoadStore(SDNode *N);
301  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
302  bool SliceUpLoad(SDNode *N);
303 
304  // Scalars have size 0 to distinguish from singleton vectors.
305  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
306  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
307  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
308 
309  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
310  /// load.
311  ///
312  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
313  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
314  /// \param EltNo index of the vector element to load.
315  /// \param OriginalLoad load that EVE came from to be replaced.
316  /// \returns EVE on success SDValue() on failure.
317  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
318  SDValue EltNo,
319  LoadSDNode *OriginalLoad);
320  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
321  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
322  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
323  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
324  SDValue PromoteIntBinOp(SDValue Op);
325  SDValue PromoteIntShiftOp(SDValue Op);
326  SDValue PromoteExtend(SDValue Op);
327  bool PromoteLoad(SDValue Op);
328 
329  /// Call the node-specific routine that knows how to fold each
330  /// particular type of node. If that doesn't do anything, try the
331  /// target-specific DAG combines.
332  SDValue combine(SDNode *N);
333 
334  // Visitation implementation - Implement dag node combining for different
335  // node types. The semantics are as follows:
336  // Return Value:
337  // SDValue.getNode() == 0 - No change was made
338  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
339  // otherwise - N should be replaced by the returned Operand.
340  //
341  SDValue visitTokenFactor(SDNode *N);
342  SDValue visitMERGE_VALUES(SDNode *N);
343  SDValue visitADD(SDNode *N);
344  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
345  SDValue visitSUB(SDNode *N);
346  SDValue visitADDSAT(SDNode *N);
347  SDValue visitSUBSAT(SDNode *N);
348  SDValue visitADDC(SDNode *N);
349  SDValue visitADDO(SDNode *N);
350  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
351  SDValue visitSUBC(SDNode *N);
352  SDValue visitSUBO(SDNode *N);
353  SDValue visitADDE(SDNode *N);
354  SDValue visitADDCARRY(SDNode *N);
355  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
356  SDValue visitSUBE(SDNode *N);
357  SDValue visitSUBCARRY(SDNode *N);
358  SDValue visitMUL(SDNode *N);
359  SDValue useDivRem(SDNode *N);
360  SDValue visitSDIV(SDNode *N);
361  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
362  SDValue visitUDIV(SDNode *N);
363  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
364  SDValue visitREM(SDNode *N);
365  SDValue visitMULHU(SDNode *N);
366  SDValue visitMULHS(SDNode *N);
367  SDValue visitSMUL_LOHI(SDNode *N);
368  SDValue visitUMUL_LOHI(SDNode *N);
369  SDValue visitMULO(SDNode *N);
370  SDValue visitIMINMAX(SDNode *N);
371  SDValue visitAND(SDNode *N);
372  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
373  SDValue visitOR(SDNode *N);
374  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
375  SDValue visitXOR(SDNode *N);
376  SDValue SimplifyVBinOp(SDNode *N);
377  SDValue visitSHL(SDNode *N);
378  SDValue visitSRA(SDNode *N);
379  SDValue visitSRL(SDNode *N);
380  SDValue visitFunnelShift(SDNode *N);
381  SDValue visitRotate(SDNode *N);
382  SDValue visitABS(SDNode *N);
383  SDValue visitBSWAP(SDNode *N);
384  SDValue visitBITREVERSE(SDNode *N);
385  SDValue visitCTLZ(SDNode *N);
386  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
387  SDValue visitCTTZ(SDNode *N);
388  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
389  SDValue visitCTPOP(SDNode *N);
390  SDValue visitSELECT(SDNode *N);
391  SDValue visitVSELECT(SDNode *N);
392  SDValue visitSELECT_CC(SDNode *N);
393  SDValue visitSETCC(SDNode *N);
394  SDValue visitSETCCCARRY(SDNode *N);
395  SDValue visitSIGN_EXTEND(SDNode *N);
396  SDValue visitZERO_EXTEND(SDNode *N);
397  SDValue visitANY_EXTEND(SDNode *N);
398  SDValue visitAssertExt(SDNode *N);
399  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
400  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
401  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
402  SDValue visitTRUNCATE(SDNode *N);
403  SDValue visitBITCAST(SDNode *N);
404  SDValue visitBUILD_PAIR(SDNode *N);
405  SDValue visitFADD(SDNode *N);
406  SDValue visitFSUB(SDNode *N);
407  SDValue visitFMUL(SDNode *N);
408  SDValue visitFMA(SDNode *N);
409  SDValue visitFDIV(SDNode *N);
410  SDValue visitFREM(SDNode *N);
411  SDValue visitFSQRT(SDNode *N);
412  SDValue visitFCOPYSIGN(SDNode *N);
413  SDValue visitFPOW(SDNode *N);
414  SDValue visitSINT_TO_FP(SDNode *N);
415  SDValue visitUINT_TO_FP(SDNode *N);
416  SDValue visitFP_TO_SINT(SDNode *N);
417  SDValue visitFP_TO_UINT(SDNode *N);
418  SDValue visitFP_ROUND(SDNode *N);
419  SDValue visitFP_ROUND_INREG(SDNode *N);
420  SDValue visitFP_EXTEND(SDNode *N);
421  SDValue visitFNEG(SDNode *N);
422  SDValue visitFABS(SDNode *N);
423  SDValue visitFCEIL(SDNode *N);
424  SDValue visitFTRUNC(SDNode *N);
425  SDValue visitFFLOOR(SDNode *N);
426  SDValue visitFMINNUM(SDNode *N);
427  SDValue visitFMAXNUM(SDNode *N);
428  SDValue visitFMINIMUM(SDNode *N);
429  SDValue visitFMAXIMUM(SDNode *N);
430  SDValue visitBRCOND(SDNode *N);
431  SDValue visitBR_CC(SDNode *N);
432  SDValue visitLOAD(SDNode *N);
433 
434  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
435  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
436 
437  SDValue visitSTORE(SDNode *N);
438  SDValue visitLIFETIME_END(SDNode *N);
439  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
440  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
441  SDValue visitBUILD_VECTOR(SDNode *N);
442  SDValue visitCONCAT_VECTORS(SDNode *N);
443  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
444  SDValue visitVECTOR_SHUFFLE(SDNode *N);
445  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
446  SDValue visitINSERT_SUBVECTOR(SDNode *N);
447  SDValue visitMLOAD(SDNode *N);
448  SDValue visitMSTORE(SDNode *N);
449  SDValue visitMGATHER(SDNode *N);
450  SDValue visitMSCATTER(SDNode *N);
451  SDValue visitFP_TO_FP16(SDNode *N);
452  SDValue visitFP16_TO_FP(SDNode *N);
453  SDValue visitVECREDUCE(SDNode *N);
454 
455  SDValue visitFADDForFMACombine(SDNode *N);
456  SDValue visitFSUBForFMACombine(SDNode *N);
457  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
458 
459  SDValue XformToShuffleWithZero(SDNode *N);
460  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
461  SDValue N1, SDNodeFlags Flags);
462 
463  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
464 
465  SDValue foldSelectOfConstants(SDNode *N);
466  SDValue foldVSelectOfConstants(SDNode *N);
467  SDValue foldBinOpIntoSelect(SDNode *BO);
468  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
469  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
470  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
471  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
472  SDValue N2, SDValue N3, ISD::CondCode CC,
473  bool NotExtCompare = false);
474  SDValue convertSelectOfFPConstantsToLoadOffset(
475  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
476  ISD::CondCode CC);
477  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
478  SDValue N2, SDValue N3, ISD::CondCode CC);
479  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
480  const SDLoc &DL);
481  SDValue unfoldMaskedMerge(SDNode *N);
482  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
483  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
484  const SDLoc &DL, bool foldBooleans);
485  SDValue rebuildSetCC(SDValue N);
486 
487  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
488  SDValue &CC) const;
489  bool isOneUseSetCC(SDValue N) const;
490 
491  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
492  unsigned HiOp);
493  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
494  SDValue CombineExtLoad(SDNode *N);
495  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
496  SDValue combineRepeatedFPDivisors(SDNode *N);
497  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
498  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
499  SDValue BuildSDIV(SDNode *N);
500  SDValue BuildSDIVPow2(SDNode *N);
501  SDValue BuildUDIV(SDNode *N);
502  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
503  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
504  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
505  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
506  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
507  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
508  SDNodeFlags Flags, bool Reciprocal);
509  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
510  SDNodeFlags Flags, bool Reciprocal);
511  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
512  bool DemandHighBits = true);
513  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
514  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
515  SDValue InnerPos, SDValue InnerNeg,
516  unsigned PosOpcode, unsigned NegOpcode,
517  const SDLoc &DL);
518  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
519  SDValue MatchLoadCombine(SDNode *N);
520  SDValue ReduceLoadWidth(SDNode *N);
521  SDValue ReduceLoadOpStoreWidth(SDNode *N);
523  SDValue TransformFPLoadStorePair(SDNode *N);
524  SDValue convertBuildVecZextToZext(SDNode *N);
525  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
526  SDValue reduceBuildVecToShuffle(SDNode *N);
527  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
528  ArrayRef<int> VectorMask, SDValue VecIn1,
529  SDValue VecIn2, unsigned LeftIdx,
530  bool DidSplitVec);
531  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
532 
533  /// Walk up chain skipping non-aliasing memory nodes,
534  /// looking for aliasing nodes and adding them to the Aliases vector.
535  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
536  SmallVectorImpl<SDValue> &Aliases);
537 
538  /// Return true if there is any possibility that the two addresses overlap.
539  bool isAlias(SDNode *Op0, SDNode *Op1) const;
540 
541  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
542  /// chain (aliasing node.)
543  SDValue FindBetterChain(SDNode *N, SDValue Chain);
544 
545  /// Try to replace a store and any possibly adjacent stores on
546  /// consecutive chains with better chains. Return true only if St is
547  /// replaced.
548  ///
549  /// Notice that other chains may still be replaced even if the function
550  /// returns false.
551  bool findBetterNeighborChains(StoreSDNode *St);
552 
553  // Helper for findBetterNeighborChains. Walk up store chain add additional
554  // chained stores that do not overlap and can be parallelized.
555  bool parallelizeChainedStores(StoreSDNode *St);
556 
557  /// Holds a pointer to an LSBaseSDNode as well as information on where it
558  /// is located in a sequence of memory operations connected by a chain.
559  struct MemOpLink {
560  // Ptr to the mem node.
561  LSBaseSDNode *MemNode;
562 
563  // Offset from the base ptr.
564  int64_t OffsetFromBase;
565 
566  MemOpLink(LSBaseSDNode *N, int64_t Offset)
567  : MemNode(N), OffsetFromBase(Offset) {}
568  };
569 
570  /// This is a helper function for visitMUL to check the profitability
571  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
572  /// MulNode is the original multiply, AddNode is (add x, c1),
573  /// and ConstNode is c2.
574  bool isMulAddWithConstProfitable(SDNode *MulNode,
575  SDValue &AddNode,
576  SDValue &ConstNode);
577 
578  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
579  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
580  /// the type of the loaded value to be extended.
581  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
582  EVT LoadResultTy, EVT &ExtVT);
583 
584  /// Helper function to calculate whether the given Load/Store can have its
585  /// width reduced to ExtVT.
586  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
587  EVT &MemVT, unsigned ShAmt = 0);
588 
589  /// Used by BackwardsPropagateMask to find suitable loads.
590  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
591  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
592  ConstantSDNode *Mask, SDNode *&NodeToMask);
593  /// Attempt to propagate a given AND node back to load leaves so that they
594  /// can be combined into narrow loads.
595  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
596 
597  /// Helper function for MergeConsecutiveStores which merges the
598  /// component store chains.
599  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
600  unsigned NumStores);
601 
602  /// This is a helper function for MergeConsecutiveStores. When the
603  /// source elements of the consecutive stores are all constants or
604  /// all extracted vector elements, try to merge them into one
605  /// larger store introducing bitcasts if necessary. \return True
606  /// if a merged store was created.
607  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
608  EVT MemVT, unsigned NumStores,
609  bool IsConstantSrc, bool UseVector,
610  bool UseTrunc);
611 
612  /// This is a helper function for MergeConsecutiveStores. Stores
613  /// that potentially may be merged with St are placed in
614  /// StoreNodes. RootNode is a chain predecessor to all store
615  /// candidates.
616  void getStoreMergeCandidates(StoreSDNode *St,
617  SmallVectorImpl<MemOpLink> &StoreNodes,
618  SDNode *&Root);
619 
620  /// Helper function for MergeConsecutiveStores. Checks if
621  /// candidate stores have indirect dependency through their
622  /// operands. RootNode is the predecessor to all stores calculated
623  /// by getStoreMergeCandidates and is used to prune the dependency check.
624  /// \return True if safe to merge.
625  bool checkMergeStoreCandidatesForDependencies(
626  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
627  SDNode *RootNode);
628 
629  /// Merge consecutive store operations into a wide store.
630  /// This optimization uses wide integers or vectors when possible.
631  /// \return number of stores that were merged into a merged store (the
632  /// affected nodes are stored as a prefix in \p StoreNodes).
633  bool MergeConsecutiveStores(StoreSDNode *St);
634 
635  /// Try to transform a truncation where C is a constant:
636  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
637  ///
638  /// \p N needs to be a truncation and its first operand an AND. Other
639  /// requirements are checked by the function (e.g. that trunc is
640  /// single-use) and if missed an empty SDValue is returned.
641  SDValue distributeTruncateThroughAnd(SDNode *N);
642 
643  /// Helper function to determine whether the target supports operation
644  /// given by \p Opcode for type \p VT, that is, whether the operation
645  /// is legal or custom before legalizing operations, and whether is
646  /// legal (but not custom) after legalization.
647  bool hasOperation(unsigned Opcode, EVT VT) {
648  if (LegalOperations)
649  return TLI.isOperationLegal(Opcode, VT);
650  return TLI.isOperationLegalOrCustom(Opcode, VT);
651  }
652 
653  public:
654  /// Runs the dag combiner on all nodes in the work list
655  void Run(CombineLevel AtLevel);
656 
657  SelectionDAG &getDAG() const { return DAG; }
658 
659  /// Returns a type large enough to hold any valid shift amount - before type
660  /// legalization these can be huge.
661  EVT getShiftAmountTy(EVT LHSTy) {
662  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
663  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
664  }
665 
666  /// This method returns true if we are running before type legalization or
667  /// if the specified VT is legal.
668  bool isTypeLegal(const EVT &VT) {
669  if (!LegalTypes) return true;
670  return TLI.isTypeLegal(VT);
671  }
672 
673  /// Convenience wrapper around TargetLowering::getSetCCResultType
674  EVT getSetCCResultType(EVT VT) const {
675  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
676  }
677 
678  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
679  SDValue OrigLoad, SDValue ExtLoad,
681  };
682 
683 /// This class is a DAGUpdateListener that removes any deleted
684 /// nodes from the worklist.
685 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
686  DAGCombiner &DC;
687 
688 public:
689  explicit WorklistRemover(DAGCombiner &dc)
690  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
691 
692  void NodeDeleted(SDNode *N, SDNode *E) override {
693  DC.removeFromWorklist(N);
694  }
695 };
696 
697 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
698  DAGCombiner &DC;
699 
700 public:
701  explicit WorklistInserter(DAGCombiner &dc)
702  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
703 
704  // FIXME: Ideally we could add N to the worklist, but this causes exponential
705  // compile time costs in large DAGs, e.g. Halide.
706  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
707 };
708 
709 } // end anonymous namespace
710 
711 //===----------------------------------------------------------------------===//
712 // TargetLowering::DAGCombinerInfo implementation
713 //===----------------------------------------------------------------------===//
714 
716  ((DAGCombiner*)DC)->AddToWorklist(N);
717 }
718 
720 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
721  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
722 }
723 
725 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
726  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
727 }
728 
730 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
731  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
732 }
733 
736  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
737 }
738 
739 //===----------------------------------------------------------------------===//
740 // Helper Functions
741 //===----------------------------------------------------------------------===//
742 
743 void DAGCombiner::deleteAndRecombine(SDNode *N) {
744  removeFromWorklist(N);
745 
746  // If the operands of this node are only used by the node, they will now be
747  // dead. Make sure to re-visit them and recursively delete dead nodes.
748  for (const SDValue &Op : N->ops())
749  // For an operand generating multiple values, one of the values may
750  // become dead allowing further simplification (e.g. split index
751  // arithmetic from an indexed load).
752  if (Op->hasOneUse() || Op->getNumValues() > 1)
753  AddToWorklist(Op.getNode());
754 
755  DAG.DeleteNode(N);
756 }
757 
758 /// Return 1 if we can compute the negated form of the specified expression for
759 /// the same cost as the expression itself, or 2 if we can compute the negated
760 /// form more cheaply than the expression itself.
761 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
762  const TargetLowering &TLI,
763  const TargetOptions *Options,
764  bool ForCodeSize,
765  unsigned Depth = 0) {
766  // fneg is removable even if it has multiple uses.
767  if (Op.getOpcode() == ISD::FNEG) return 2;
768 
769  // Don't allow anything with multiple uses unless we know it is free.
770  EVT VT = Op.getValueType();
771  const SDNodeFlags Flags = Op->getFlags();
772  if (!Op.hasOneUse())
773  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
774  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
775  return 0;
776 
777  // Don't recurse exponentially.
778  if (Depth > 6) return 0;
779 
780  switch (Op.getOpcode()) {
781  default: return false;
782  case ISD::ConstantFP: {
783  if (!LegalOperations)
784  return 1;
785 
786  // Don't invert constant FP values after legalization unless the target says
787  // the negated constant is legal.
788  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
789  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
790  ForCodeSize);
791  }
792  case ISD::FADD:
793  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
794  return 0;
795 
796  // After operation legalization, it might not be legal to create new FSUBs.
797  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
798  return 0;
799 
800  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
801  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
802  Options, ForCodeSize, Depth + 1))
803  return V;
804  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
805  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
806  ForCodeSize, Depth + 1);
807  case ISD::FSUB:
808  // We can't turn -(A-B) into B-A when we honor signed zeros.
809  if (!Options->NoSignedZerosFPMath &&
810  !Flags.hasNoSignedZeros())
811  return 0;
812 
813  // fold (fneg (fsub A, B)) -> (fsub B, A)
814  return 1;
815 
816  case ISD::FMUL:
817  case ISD::FDIV:
818  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
819  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
820  Options, ForCodeSize, Depth + 1))
821  return V;
822 
823  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
824  ForCodeSize, Depth + 1);
825 
826  case ISD::FP_EXTEND:
827  case ISD::FP_ROUND:
828  case ISD::FSIN:
829  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
830  ForCodeSize, Depth + 1);
831  }
832 }
833 
834 /// If isNegatibleForFree returns true, return the newly negated expression.
836  bool LegalOperations, bool ForCodeSize,
837  unsigned Depth = 0) {
838  const TargetOptions &Options = DAG.getTarget().Options;
839  // fneg is removable even if it has multiple uses.
840  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
841 
842  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
843 
844  const SDNodeFlags Flags = Op.getNode()->getFlags();
845 
846  switch (Op.getOpcode()) {
847  default: llvm_unreachable("Unknown code");
848  case ISD::ConstantFP: {
849  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
850  V.changeSign();
851  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
852  }
853  case ISD::FADD:
854  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
855 
856  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
857  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
858  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
859  Depth+1))
860  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
861  GetNegatedExpression(Op.getOperand(0), DAG,
862  LegalOperations, ForCodeSize,
863  Depth+1),
864  Op.getOperand(1), Flags);
865  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
866  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
867  GetNegatedExpression(Op.getOperand(1), DAG,
868  LegalOperations, ForCodeSize,
869  Depth+1),
870  Op.getOperand(0), Flags);
871  case ISD::FSUB:
872  // fold (fneg (fsub 0, B)) -> B
873  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
874  if (N0CFP->isZero())
875  return Op.getOperand(1);
876 
877  // fold (fneg (fsub A, B)) -> (fsub B, A)
878  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
879  Op.getOperand(1), Op.getOperand(0), Flags);
880 
881  case ISD::FMUL:
882  case ISD::FDIV:
883  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
884  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
885  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
886  Depth+1))
887  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
888  GetNegatedExpression(Op.getOperand(0), DAG,
889  LegalOperations, ForCodeSize,
890  Depth+1),
891  Op.getOperand(1), Flags);
892 
893  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
894  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
895  Op.getOperand(0),
896  GetNegatedExpression(Op.getOperand(1), DAG,
897  LegalOperations, ForCodeSize,
898  Depth+1), Flags);
899 
900  case ISD::FP_EXTEND:
901  case ISD::FSIN:
902  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
903  GetNegatedExpression(Op.getOperand(0), DAG,
904  LegalOperations, ForCodeSize,
905  Depth+1));
906  case ISD::FP_ROUND:
907  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
908  GetNegatedExpression(Op.getOperand(0), DAG,
909  LegalOperations, ForCodeSize,
910  Depth+1),
911  Op.getOperand(1));
912  }
913 }
914 
915 // APInts must be the same size for most operations, this helper
916 // function zero extends the shorter of the pair so that they match.
917 // We provide an Offset so that we can create bitwidths that won't overflow.
918 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
919  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
920  LHS = LHS.zextOrSelf(Bits);
921  RHS = RHS.zextOrSelf(Bits);
922 }
923 
924 // Return true if this node is a setcc, or is a select_cc
925 // that selects between the target values used for true and false, making it
926 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
927 // the appropriate nodes based on the type of node we are checking. This
928 // simplifies life a bit for the callers.
929 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
930  SDValue &CC) const {
931  if (N.getOpcode() == ISD::SETCC) {
932  LHS = N.getOperand(0);
933  RHS = N.getOperand(1);
934  CC = N.getOperand(2);
935  return true;
936  }
937 
938  if (N.getOpcode() != ISD::SELECT_CC ||
939  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
940  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
941  return false;
942 
943  if (TLI.getBooleanContents(N.getValueType()) ==
945  return false;
946 
947  LHS = N.getOperand(0);
948  RHS = N.getOperand(1);
949  CC = N.getOperand(4);
950  return true;
951 }
952 
953 /// Return true if this is a SetCC-equivalent operation with only one use.
954 /// If this is true, it allows the users to invert the operation for free when
955 /// it is profitable to do so.
956 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
957  SDValue N0, N1, N2;
958  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
959  return true;
960  return false;
961 }
962 
963 // Returns the SDNode if it is a constant float BuildVector
964 // or constant float.
966  if (isa<ConstantFPSDNode>(N))
967  return N.getNode();
969  return N.getNode();
970  return nullptr;
971 }
972 
973 // Determines if it is a constant integer or a build vector of constant
974 // integers (and undefs).
975 // Do not permit build vector implicit truncation.
976 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
977  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
978  return !(Const->isOpaque() && NoOpaques);
979  if (N.getOpcode() != ISD::BUILD_VECTOR)
980  return false;
981  unsigned BitWidth = N.getScalarValueSizeInBits();
982  for (const SDValue &Op : N->op_values()) {
983  if (Op.isUndef())
984  continue;
986  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
987  (Const->isOpaque() && NoOpaques))
988  return false;
989  }
990  return true;
991 }
992 
993 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
994 // undef's.
995 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
996  if (V.getOpcode() != ISD::BUILD_VECTOR)
997  return false;
998  return isConstantOrConstantVector(V, NoOpaques) ||
1000 }
1001 
1002 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1003  SDValue N1, SDNodeFlags Flags) {
1004  // Don't reassociate reductions.
1005  if (Flags.hasVectorReduction())
1006  return SDValue();
1007 
1008  EVT VT = N0.getValueType();
1009  if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
1011  if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1012  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
1013  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
1014  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1015  return SDValue();
1016  }
1017  if (N0.hasOneUse()) {
1018  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
1019  // use
1020  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1021  if (!OpNode.getNode())
1022  return SDValue();
1023  AddToWorklist(OpNode.getNode());
1024  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1025  }
1026  }
1027  }
1028 
1029  if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
1031  if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1032  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
1033  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
1034  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
1035  return SDValue();
1036  }
1037  if (N1.hasOneUse()) {
1038  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
1039  // use
1040  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
1041  if (!OpNode.getNode())
1042  return SDValue();
1043  AddToWorklist(OpNode.getNode());
1044  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
1045  }
1046  }
1047  }
1048 
1049  return SDValue();
1050 }
1051 
1052 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1053  bool AddTo) {
1054  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1055  ++NodesCombined;
1056  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1057  To[0].getNode()->dump(&DAG);
1058  dbgs() << " and " << NumTo - 1 << " other values\n");
1059  for (unsigned i = 0, e = NumTo; i != e; ++i)
1060  assert((!To[i].getNode() ||
1061  N->getValueType(i) == To[i].getValueType()) &&
1062  "Cannot combine value to value of different type!");
1063 
1064  WorklistRemover DeadNodes(*this);
1065  DAG.ReplaceAllUsesWith(N, To);
1066  if (AddTo) {
1067  // Push the new nodes and any users onto the worklist
1068  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1069  if (To[i].getNode()) {
1070  AddToWorklist(To[i].getNode());
1071  AddUsersToWorklist(To[i].getNode());
1072  }
1073  }
1074  }
1075 
1076  // Finally, if the node is now dead, remove it from the graph. The node
1077  // may not be dead if the replacement process recursively simplified to
1078  // something else needing this node.
1079  if (N->use_empty())
1080  deleteAndRecombine(N);
1081  return SDValue(N, 0);
1082 }
1083 
1084 void DAGCombiner::
1085 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1086  // Replace all uses. If any nodes become isomorphic to other nodes and
1087  // are deleted, make sure to remove them from our worklist.
1088  WorklistRemover DeadNodes(*this);
1089  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1090 
1091  // Push the new node and any (possibly new) users onto the worklist.
1092  AddToWorklist(TLO.New.getNode());
1093  AddUsersToWorklist(TLO.New.getNode());
1094 
1095  // Finally, if the node is now dead, remove it from the graph. The node
1096  // may not be dead if the replacement process recursively simplified to
1097  // something else needing this node.
1098  if (TLO.Old.getNode()->use_empty())
1099  deleteAndRecombine(TLO.Old.getNode());
1100 }
1101 
1102 /// Check the specified integer node value to see if it can be simplified or if
1103 /// things it uses can be simplified by bit propagation. If so, return true.
1104 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1105  const APInt &DemandedElts) {
1106  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1107  KnownBits Known;
1108  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1109  return false;
1110 
1111  // Revisit the node.
1112  AddToWorklist(Op.getNode());
1113 
1114  // Replace the old value with the new one.
1115  ++NodesCombined;
1116  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1117  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1118  dbgs() << '\n');
1119 
1120  CommitTargetLoweringOpt(TLO);
1121  return true;
1122 }
1123 
1124 /// Check the specified vector node value to see if it can be simplified or
1125 /// if things it uses can be simplified as it only uses some of the elements.
1126 /// If so, return true.
1127 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1128  const APInt &DemandedElts,
1129  bool AssumeSingleUse) {
1130  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1131  APInt KnownUndef, KnownZero;
1132  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1133  TLO, 0, AssumeSingleUse))
1134  return false;
1135 
1136  // Revisit the node.
1137  AddToWorklist(Op.getNode());
1138 
1139  // Replace the old value with the new one.
1140  ++NodesCombined;
1141  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1142  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1143  dbgs() << '\n');
1144 
1145  CommitTargetLoweringOpt(TLO);
1146  return true;
1147 }
1148 
1149 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1150  SDLoc DL(Load);
1151  EVT VT = Load->getValueType(0);
1152  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1153 
1154  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1155  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1156  WorklistRemover DeadNodes(*this);
1157  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1158  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1159  deleteAndRecombine(Load);
1160  AddToWorklist(Trunc.getNode());
1161 }
1162 
1163 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1164  Replace = false;
1165  SDLoc DL(Op);
1166  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1167  LoadSDNode *LD = cast<LoadSDNode>(Op);
1168  EVT MemVT = LD->getMemoryVT();
1170  : LD->getExtensionType();
1171  Replace = true;
1172  return DAG.getExtLoad(ExtType, DL, PVT,
1173  LD->getChain(), LD->getBasePtr(),
1174  MemVT, LD->getMemOperand());
1175  }
1176 
1177  unsigned Opc = Op.getOpcode();
1178  switch (Opc) {
1179  default: break;
1180  case ISD::AssertSext:
1181  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1182  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1183  break;
1184  case ISD::AssertZext:
1185  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1186  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1187  break;
1188  case ISD::Constant: {
1189  unsigned ExtOpc =
1191  return DAG.getNode(ExtOpc, DL, PVT, Op);
1192  }
1193  }
1194 
1195  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1196  return SDValue();
1197  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1198 }
1199 
1200 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1202  return SDValue();
1203  EVT OldVT = Op.getValueType();
1204  SDLoc DL(Op);
1205  bool Replace = false;
1206  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1207  if (!NewOp.getNode())
1208  return SDValue();
1209  AddToWorklist(NewOp.getNode());
1210 
1211  if (Replace)
1212  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1213  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1214  DAG.getValueType(OldVT));
1215 }
1216 
1217 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1218  EVT OldVT = Op.getValueType();
1219  SDLoc DL(Op);
1220  bool Replace = false;
1221  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1222  if (!NewOp.getNode())
1223  return SDValue();
1224  AddToWorklist(NewOp.getNode());
1225 
1226  if (Replace)
1227  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1228  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1229 }
1230 
1231 /// Promote the specified integer binary operation if the target indicates it is
1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1233 /// i32 since i16 instructions are longer.
1234 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1235  if (!LegalOperations)
1236  return SDValue();
1237 
1238  EVT VT = Op.getValueType();
1239  if (VT.isVector() || !VT.isInteger())
1240  return SDValue();
1241 
1242  // If operation type is 'undesirable', e.g. i16 on x86, consider
1243  // promoting it.
1244  unsigned Opc = Op.getOpcode();
1245  if (TLI.isTypeDesirableForOp(Opc, VT))
1246  return SDValue();
1247 
1248  EVT PVT = VT;
1249  // Consult target whether it is a good idea to promote this operation and
1250  // what's the right type to promote it to.
1251  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1252  assert(PVT != VT && "Don't know what type to promote to!");
1253 
1254  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1255 
1256  bool Replace0 = false;
1257  SDValue N0 = Op.getOperand(0);
1258  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1259 
1260  bool Replace1 = false;
1261  SDValue N1 = Op.getOperand(1);
1262  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1263  SDLoc DL(Op);
1264 
1265  SDValue RV =
1266  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1267 
1268  // We are always replacing N0/N1's use in N and only need
1269  // additional replacements if there are additional uses.
1270  Replace0 &= !N0->hasOneUse();
1271  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1272 
1273  // Combine Op here so it is preserved past replacements.
1274  CombineTo(Op.getNode(), RV);
1275 
1276  // If operands have a use ordering, make sure we deal with
1277  // predecessor first.
1278  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1279  std::swap(N0, N1);
1280  std::swap(NN0, NN1);
1281  }
1282 
1283  if (Replace0) {
1284  AddToWorklist(NN0.getNode());
1285  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1286  }
1287  if (Replace1) {
1288  AddToWorklist(NN1.getNode());
1289  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1290  }
1291  return Op;
1292  }
1293  return SDValue();
1294 }
1295 
1296 /// Promote the specified integer shift operation if the target indicates it is
1297 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1298 /// i32 since i16 instructions are longer.
1299 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1300  if (!LegalOperations)
1301  return SDValue();
1302 
1303  EVT VT = Op.getValueType();
1304  if (VT.isVector() || !VT.isInteger())
1305  return SDValue();
1306 
1307  // If operation type is 'undesirable', e.g. i16 on x86, consider
1308  // promoting it.
1309  unsigned Opc = Op.getOpcode();
1310  if (TLI.isTypeDesirableForOp(Opc, VT))
1311  return SDValue();
1312 
1313  EVT PVT = VT;
1314  // Consult target whether it is a good idea to promote this operation and
1315  // what's the right type to promote it to.
1316  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1317  assert(PVT != VT && "Don't know what type to promote to!");
1318 
1319  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1320 
1321  bool Replace = false;
1322  SDValue N0 = Op.getOperand(0);
1323  SDValue N1 = Op.getOperand(1);
1324  if (Opc == ISD::SRA)
1325  N0 = SExtPromoteOperand(N0, PVT);
1326  else if (Opc == ISD::SRL)
1327  N0 = ZExtPromoteOperand(N0, PVT);
1328  else
1329  N0 = PromoteOperand(N0, PVT, Replace);
1330 
1331  if (!N0.getNode())
1332  return SDValue();
1333 
1334  SDLoc DL(Op);
1335  SDValue RV =
1336  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1337 
1338  AddToWorklist(N0.getNode());
1339  if (Replace)
1340  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1341 
1342  // Deal with Op being deleted.
1343  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1344  return RV;
1345  }
1346  return SDValue();
1347 }
1348 
1349 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1350  if (!LegalOperations)
1351  return SDValue();
1352 
1353  EVT VT = Op.getValueType();
1354  if (VT.isVector() || !VT.isInteger())
1355  return SDValue();
1356 
1357  // If operation type is 'undesirable', e.g. i16 on x86, consider
1358  // promoting it.
1359  unsigned Opc = Op.getOpcode();
1360  if (TLI.isTypeDesirableForOp(Opc, VT))
1361  return SDValue();
1362 
1363  EVT PVT = VT;
1364  // Consult target whether it is a good idea to promote this operation and
1365  // what's the right type to promote it to.
1366  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1367  assert(PVT != VT && "Don't know what type to promote to!");
1368  // fold (aext (aext x)) -> (aext x)
1369  // fold (aext (zext x)) -> (zext x)
1370  // fold (aext (sext x)) -> (sext x)
1371  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1372  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1373  }
1374  return SDValue();
1375 }
1376 
1377 bool DAGCombiner::PromoteLoad(SDValue Op) {
1378  if (!LegalOperations)
1379  return false;
1380 
1381  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1382  return false;
1383 
1384  EVT VT = Op.getValueType();
1385  if (VT.isVector() || !VT.isInteger())
1386  return false;
1387 
1388  // If operation type is 'undesirable', e.g. i16 on x86, consider
1389  // promoting it.
1390  unsigned Opc = Op.getOpcode();
1391  if (TLI.isTypeDesirableForOp(Opc, VT))
1392  return false;
1393 
1394  EVT PVT = VT;
1395  // Consult target whether it is a good idea to promote this operation and
1396  // what's the right type to promote it to.
1397  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1398  assert(PVT != VT && "Don't know what type to promote to!");
1399 
1400  SDLoc DL(Op);
1401  SDNode *N = Op.getNode();
1402  LoadSDNode *LD = cast<LoadSDNode>(N);
1403  EVT MemVT = LD->getMemoryVT();
1405  : LD->getExtensionType();
1406  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1407  LD->getChain(), LD->getBasePtr(),
1408  MemVT, LD->getMemOperand());
1409  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1410 
1411  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1412  Result.getNode()->dump(&DAG); dbgs() << '\n');
1413  WorklistRemover DeadNodes(*this);
1414  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1415  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1416  deleteAndRecombine(N);
1417  AddToWorklist(Result.getNode());
1418  return true;
1419  }
1420  return false;
1421 }
1422 
1423 /// Recursively delete a node which has no uses and any operands for
1424 /// which it is the only use.
1425 ///
1426 /// Note that this both deletes the nodes and removes them from the worklist.
1427 /// It also adds any nodes who have had a user deleted to the worklist as they
1428 /// may now have only one use and subject to other combines.
1429 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1430  if (!N->use_empty())
1431  return false;
1432 
1434  Nodes.insert(N);
1435  do {
1436  N = Nodes.pop_back_val();
1437  if (!N)
1438  continue;
1439 
1440  if (N->use_empty()) {
1441  for (const SDValue &ChildN : N->op_values())
1442  Nodes.insert(ChildN.getNode());
1443 
1444  removeFromWorklist(N);
1445  DAG.DeleteNode(N);
1446  } else {
1447  AddToWorklist(N);
1448  }
1449  } while (!Nodes.empty());
1450  return true;
1451 }
1452 
1453 //===----------------------------------------------------------------------===//
1454 // Main DAG Combiner implementation
1455 //===----------------------------------------------------------------------===//
1456 
1457 void DAGCombiner::Run(CombineLevel AtLevel) {
1458  // set the instance variables, so that the various visit routines may use it.
1459  Level = AtLevel;
1460  LegalOperations = Level >= AfterLegalizeVectorOps;
1461  LegalTypes = Level >= AfterLegalizeTypes;
1462 
1463  WorklistInserter AddNodes(*this);
1464 
1465  // Add all the dag nodes to the worklist.
1466  for (SDNode &Node : DAG.allnodes())
1467  AddToWorklist(&Node);
1468 
1469  // Create a dummy node (which is not added to allnodes), that adds a reference
1470  // to the root node, preventing it from being deleted, and tracking any
1471  // changes of the root.
1472  HandleSDNode Dummy(DAG.getRoot());
1473 
1474  // While we have a valid worklist entry node, try to combine it.
1475  while (SDNode *N = getNextWorklistEntry()) {
1476  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1477  // N is deleted from the DAG, since they too may now be dead or may have a
1478  // reduced number of uses, allowing other xforms.
1479  if (recursivelyDeleteUnusedNodes(N))
1480  continue;
1481 
1482  WorklistRemover DeadNodes(*this);
1483 
1484  // If this combine is running after legalizing the DAG, re-legalize any
1485  // nodes pulled off the worklist.
1486  if (Level == AfterLegalizeDAG) {
1487  SmallSetVector<SDNode *, 16> UpdatedNodes;
1488  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1489 
1490  for (SDNode *LN : UpdatedNodes) {
1491  AddToWorklist(LN);
1492  AddUsersToWorklist(LN);
1493  }
1494  if (!NIsValid)
1495  continue;
1496  }
1497 
1498  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1499 
1500  // Add any operands of the new node which have not yet been combined to the
1501  // worklist as well. Because the worklist uniques things already, this
1502  // won't repeatedly process the same operand.
1503  CombinedNodes.insert(N);
1504  for (const SDValue &ChildN : N->op_values())
1505  if (!CombinedNodes.count(ChildN.getNode()))
1506  AddToWorklist(ChildN.getNode());
1507 
1508  SDValue RV = combine(N);
1509 
1510  if (!RV.getNode())
1511  continue;
1512 
1513  ++NodesCombined;
1514 
1515  // If we get back the same node we passed in, rather than a new node or
1516  // zero, we know that the node must have defined multiple values and
1517  // CombineTo was used. Since CombineTo takes care of the worklist
1518  // mechanics for us, we have no work to do in this case.
1519  if (RV.getNode() == N)
1520  continue;
1521 
1523  RV.getOpcode() != ISD::DELETED_NODE &&
1524  "Node was deleted but visit returned new node!");
1525 
1526  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1527 
1528  if (N->getNumValues() == RV.getNode()->getNumValues())
1529  DAG.ReplaceAllUsesWith(N, RV.getNode());
1530  else {
1531  assert(N->getValueType(0) == RV.getValueType() &&
1532  N->getNumValues() == 1 && "Type mismatch");
1533  DAG.ReplaceAllUsesWith(N, &RV);
1534  }
1535 
1536  // Push the new node and any users onto the worklist
1537  AddToWorklist(RV.getNode());
1538  AddUsersToWorklist(RV.getNode());
1539 
1540  // Finally, if the node is now dead, remove it from the graph. The node
1541  // may not be dead if the replacement process recursively simplified to
1542  // something else needing this node. This will also take care of adding any
1543  // operands which have lost a user to the worklist.
1544  recursivelyDeleteUnusedNodes(N);
1545  }
1546 
1547  // If the root changed (e.g. it was a dead load, update the root).
1548  DAG.setRoot(Dummy.getValue());
1549  DAG.RemoveDeadNodes();
1550 }
1551 
1552 SDValue DAGCombiner::visit(SDNode *N) {
1553  switch (N->getOpcode()) {
1554  default: break;
1555  case ISD::TokenFactor: return visitTokenFactor(N);
1556  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1557  case ISD::ADD: return visitADD(N);
1558  case ISD::SUB: return visitSUB(N);
1559  case ISD::SADDSAT:
1560  case ISD::UADDSAT: return visitADDSAT(N);
1561  case ISD::SSUBSAT:
1562  case ISD::USUBSAT: return visitSUBSAT(N);
1563  case ISD::ADDC: return visitADDC(N);
1564  case ISD::SADDO:
1565  case ISD::UADDO: return visitADDO(N);
1566  case ISD::SUBC: return visitSUBC(N);
1567  case ISD::SSUBO:
1568  case ISD::USUBO: return visitSUBO(N);
1569  case ISD::ADDE: return visitADDE(N);
1570  case ISD::ADDCARRY: return visitADDCARRY(N);
1571  case ISD::SUBE: return visitSUBE(N);
1572  case ISD::SUBCARRY: return visitSUBCARRY(N);
1573  case ISD::MUL: return visitMUL(N);
1574  case ISD::SDIV: return visitSDIV(N);
1575  case ISD::UDIV: return visitUDIV(N);
1576  case ISD::SREM:
1577  case ISD::UREM: return visitREM(N);
1578  case ISD::MULHU: return visitMULHU(N);
1579  case ISD::MULHS: return visitMULHS(N);
1580  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1581  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1582  case ISD::SMULO:
1583  case ISD::UMULO: return visitMULO(N);
1584  case ISD::SMIN:
1585  case ISD::SMAX:
1586  case ISD::UMIN:
1587  case ISD::UMAX: return visitIMINMAX(N);
1588  case ISD::AND: return visitAND(N);
1589  case ISD::OR: return visitOR(N);
1590  case ISD::XOR: return visitXOR(N);
1591  case ISD::SHL: return visitSHL(N);
1592  case ISD::SRA: return visitSRA(N);
1593  case ISD::SRL: return visitSRL(N);
1594  case ISD::ROTR:
1595  case ISD::ROTL: return visitRotate(N);
1596  case ISD::FSHL:
1597  case ISD::FSHR: return visitFunnelShift(N);
1598  case ISD::ABS: return visitABS(N);
1599  case ISD::BSWAP: return visitBSWAP(N);
1600  case ISD::BITREVERSE: return visitBITREVERSE(N);
1601  case ISD::CTLZ: return visitCTLZ(N);
1602  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1603  case ISD::CTTZ: return visitCTTZ(N);
1604  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1605  case ISD::CTPOP: return visitCTPOP(N);
1606  case ISD::SELECT: return visitSELECT(N);
1607  case ISD::VSELECT: return visitVSELECT(N);
1608  case ISD::SELECT_CC: return visitSELECT_CC(N);
1609  case ISD::SETCC: return visitSETCC(N);
1610  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1611  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1612  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1613  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1614  case ISD::AssertSext:
1615  case ISD::AssertZext: return visitAssertExt(N);
1616  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1617  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1618  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1619  case ISD::TRUNCATE: return visitTRUNCATE(N);
1620  case ISD::BITCAST: return visitBITCAST(N);
1621  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1622  case ISD::FADD: return visitFADD(N);
1623  case ISD::FSUB: return visitFSUB(N);
1624  case ISD::FMUL: return visitFMUL(N);
1625  case ISD::FMA: return visitFMA(N);
1626  case ISD::FDIV: return visitFDIV(N);
1627  case ISD::FREM: return visitFREM(N);
1628  case ISD::FSQRT: return visitFSQRT(N);
1629  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1630  case ISD::FPOW: return visitFPOW(N);
1631  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1632  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1633  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1634  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1635  case ISD::FP_ROUND: return visitFP_ROUND(N);
1636  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1637  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1638  case ISD::FNEG: return visitFNEG(N);
1639  case ISD::FABS: return visitFABS(N);
1640  case ISD::FFLOOR: return visitFFLOOR(N);
1641  case ISD::FMINNUM: return visitFMINNUM(N);
1642  case ISD::FMAXNUM: return visitFMAXNUM(N);
1643  case ISD::FMINIMUM: return visitFMINIMUM(N);
1644  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1645  case ISD::FCEIL: return visitFCEIL(N);
1646  case ISD::FTRUNC: return visitFTRUNC(N);
1647  case ISD::BRCOND: return visitBRCOND(N);
1648  case ISD::BR_CC: return visitBR_CC(N);
1649  case ISD::LOAD: return visitLOAD(N);
1650  case ISD::STORE: return visitSTORE(N);
1651  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1652  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1653  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1654  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1655  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1656  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1657  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1658  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1659  case ISD::MGATHER: return visitMGATHER(N);
1660  case ISD::MLOAD: return visitMLOAD(N);
1661  case ISD::MSCATTER: return visitMSCATTER(N);
1662  case ISD::MSTORE: return visitMSTORE(N);
1663  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1664  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1665  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1666  case ISD::VECREDUCE_FADD:
1667  case ISD::VECREDUCE_FMUL:
1668  case ISD::VECREDUCE_ADD:
1669  case ISD::VECREDUCE_MUL:
1670  case ISD::VECREDUCE_AND:
1671  case ISD::VECREDUCE_OR:
1672  case ISD::VECREDUCE_XOR:
1673  case ISD::VECREDUCE_SMAX:
1674  case ISD::VECREDUCE_SMIN:
1675  case ISD::VECREDUCE_UMAX:
1676  case ISD::VECREDUCE_UMIN:
1677  case ISD::VECREDUCE_FMAX:
1678  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1679  }
1680  return SDValue();
1681 }
1682 
1683 SDValue DAGCombiner::combine(SDNode *N) {
1684  SDValue RV = visit(N);
1685 
1686  // If nothing happened, try a target-specific DAG combine.
1687  if (!RV.getNode()) {
1689  "Node was deleted but visit returned NULL!");
1690 
1691  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1693 
1694  // Expose the DAG combiner to the target combiner impls.
1696  DagCombineInfo(DAG, Level, false, this);
1697 
1698  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1699  }
1700  }
1701 
1702  // If nothing happened still, try promoting the operation.
1703  if (!RV.getNode()) {
1704  switch (N->getOpcode()) {
1705  default: break;
1706  case ISD::ADD:
1707  case ISD::SUB:
1708  case ISD::MUL:
1709  case ISD::AND:
1710  case ISD::OR:
1711  case ISD::XOR:
1712  RV = PromoteIntBinOp(SDValue(N, 0));
1713  break;
1714  case ISD::SHL:
1715  case ISD::SRA:
1716  case ISD::SRL:
1717  RV = PromoteIntShiftOp(SDValue(N, 0));
1718  break;
1719  case ISD::SIGN_EXTEND:
1720  case ISD::ZERO_EXTEND:
1721  case ISD::ANY_EXTEND:
1722  RV = PromoteExtend(SDValue(N, 0));
1723  break;
1724  case ISD::LOAD:
1725  if (PromoteLoad(SDValue(N, 0)))
1726  RV = SDValue(N, 0);
1727  break;
1728  }
1729  }
1730 
1731  // If N is a commutative binary node, try eliminate it if the commuted
1732  // version is already present in the DAG.
1733  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1734  N->getNumValues() == 1) {
1735  SDValue N0 = N->getOperand(0);
1736  SDValue N1 = N->getOperand(1);
1737 
1738  // Constant operands are canonicalized to RHS.
1739  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1740  SDValue Ops[] = {N1, N0};
1741  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1742  N->getFlags());
1743  if (CSENode)
1744  return SDValue(CSENode, 0);
1745  }
1746  }
1747 
1748  return RV;
1749 }
1750 
1751 /// Given a node, return its input chain if it has one, otherwise return a null
1752 /// sd operand.
1754  if (unsigned NumOps = N->getNumOperands()) {
1755  if (N->getOperand(0).getValueType() == MVT::Other)
1756  return N->getOperand(0);
1757  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1758  return N->getOperand(NumOps-1);
1759  for (unsigned i = 1; i < NumOps-1; ++i)
1760  if (N->getOperand(i).getValueType() == MVT::Other)
1761  return N->getOperand(i);
1762  }
1763  return SDValue();
1764 }
1765 
1766 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1767  // If N has two operands, where one has an input chain equal to the other,
1768  // the 'other' chain is redundant.
1769  if (N->getNumOperands() == 2) {
1770  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1771  return N->getOperand(0);
1772  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1773  return N->getOperand(1);
1774  }
1775 
1776  // Don't simplify token factors if optnone.
1777  if (OptLevel == CodeGenOpt::None)
1778  return SDValue();
1779 
1780  // If the sole user is a token factor, we should make sure we have a
1781  // chance to merge them together. This prevents TF chains from inhibiting
1782  // optimizations.
1783  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1784  AddToWorklist(*(N->use_begin()));
1785 
1786  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1787  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1788  SmallPtrSet<SDNode*, 16> SeenOps;
1789  bool Changed = false; // If we should replace this token factor.
1790 
1791  // Start out with this token factor.
1792  TFs.push_back(N);
1793 
1794  // Iterate through token factors. The TFs grows when new token factors are
1795  // encountered.
1796  for (unsigned i = 0; i < TFs.size(); ++i) {
1797  SDNode *TF = TFs[i];
1798 
1799  // Check each of the operands.
1800  for (const SDValue &Op : TF->op_values()) {
1801  switch (Op.getOpcode()) {
1802  case ISD::EntryToken:
1803  // Entry tokens don't need to be added to the list. They are
1804  // redundant.
1805  Changed = true;
1806  break;
1807 
1808  case ISD::TokenFactor:
1809  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1810  // Queue up for processing.
1811  TFs.push_back(Op.getNode());
1812  // Clean up in case the token factor is removed.
1813  AddToWorklist(Op.getNode());
1814  Changed = true;
1815  break;
1816  }
1818 
1819  default:
1820  // Only add if it isn't already in the list.
1821  if (SeenOps.insert(Op.getNode()).second)
1822  Ops.push_back(Op);
1823  else
1824  Changed = true;
1825  break;
1826  }
1827  }
1828  }
1829 
1830  // Remove Nodes that are chained to another node in the list. Do so
1831  // by walking up chains breath-first stopping when we've seen
1832  // another operand. In general we must climb to the EntryNode, but we can exit
1833  // early if we find all remaining work is associated with just one operand as
1834  // no further pruning is possible.
1835 
1836  // List of nodes to search through and original Ops from which they originate.
1838  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1839  SmallPtrSet<SDNode *, 16> SeenChains;
1840  bool DidPruneOps = false;
1841 
1842  unsigned NumLeftToConsider = 0;
1843  for (const SDValue &Op : Ops) {
1844  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1845  OpWorkCount.push_back(1);
1846  }
1847 
1848  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1849  // If this is an Op, we can remove the op from the list. Remark any
1850  // search associated with it as from the current OpNumber.
1851  if (SeenOps.count(Op) != 0) {
1852  Changed = true;
1853  DidPruneOps = true;
1854  unsigned OrigOpNumber = 0;
1855  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1856  OrigOpNumber++;
1857  assert((OrigOpNumber != Ops.size()) &&
1858  "expected to find TokenFactor Operand");
1859  // Re-mark worklist from OrigOpNumber to OpNumber
1860  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1861  if (Worklist[i].second == OrigOpNumber) {
1862  Worklist[i].second = OpNumber;
1863  }
1864  }
1865  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1866  OpWorkCount[OrigOpNumber] = 0;
1867  NumLeftToConsider--;
1868  }
1869  // Add if it's a new chain
1870  if (SeenChains.insert(Op).second) {
1871  OpWorkCount[OpNumber]++;
1872  Worklist.push_back(std::make_pair(Op, OpNumber));
1873  }
1874  };
1875 
1876  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1877  // We need at least be consider at least 2 Ops to prune.
1878  if (NumLeftToConsider <= 1)
1879  break;
1880  auto CurNode = Worklist[i].first;
1881  auto CurOpNumber = Worklist[i].second;
1882  assert((OpWorkCount[CurOpNumber] > 0) &&
1883  "Node should not appear in worklist");
1884  switch (CurNode->getOpcode()) {
1885  case ISD::EntryToken:
1886  // Hitting EntryToken is the only way for the search to terminate without
1887  // hitting
1888  // another operand's search. Prevent us from marking this operand
1889  // considered.
1890  NumLeftToConsider++;
1891  break;
1892  case ISD::TokenFactor:
1893  for (const SDValue &Op : CurNode->op_values())
1894  AddToWorklist(i, Op.getNode(), CurOpNumber);
1895  break;
1896  case ISD::LIFETIME_START:
1897  case ISD::LIFETIME_END:
1898  case ISD::CopyFromReg:
1899  case ISD::CopyToReg:
1900  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1901  break;
1902  default:
1903  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1904  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1905  break;
1906  }
1907  OpWorkCount[CurOpNumber]--;
1908  if (OpWorkCount[CurOpNumber] == 0)
1909  NumLeftToConsider--;
1910  }
1911 
1912  // If we've changed things around then replace token factor.
1913  if (Changed) {
1914  SDValue Result;
1915  if (Ops.empty()) {
1916  // The entry token is the only possible outcome.
1917  Result = DAG.getEntryNode();
1918  } else {
1919  if (DidPruneOps) {
1920  SmallVector<SDValue, 8> PrunedOps;
1921  //
1922  for (const SDValue &Op : Ops) {
1923  if (SeenChains.count(Op.getNode()) == 0)
1924  PrunedOps.push_back(Op);
1925  }
1926  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1927  } else {
1928  Result = DAG.getTokenFactor(SDLoc(N), Ops);
1929  }
1930  }
1931  return Result;
1932  }
1933  return SDValue();
1934 }
1935 
1936 /// MERGE_VALUES can always be eliminated.
1937 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1938  WorklistRemover DeadNodes(*this);
1939  // Replacing results may cause a different MERGE_VALUES to suddenly
1940  // be CSE'd with N, and carry its uses with it. Iterate until no
1941  // uses remain, to ensure that the node can be safely deleted.
1942  // First add the users of this node to the work list so that they
1943  // can be tried again once they have new operands.
1944  AddUsersToWorklist(N);
1945  do {
1946  // Do as a single replacement to avoid rewalking use lists.
1948  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1949  Ops.push_back(N->getOperand(i));
1950  DAG.ReplaceAllUsesWith(N, Ops.data());
1951  } while (!N->use_empty());
1952  deleteAndRecombine(N);
1953  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1954 }
1955 
1956 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1957 /// ConstantSDNode pointer else nullptr.
1960  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1961 }
1962 
1963 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1964  assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1965 
1966  // Don't do this unless the old select is going away. We want to eliminate the
1967  // binary operator, not replace a binop with a select.
1968  // TODO: Handle ISD::SELECT_CC.
1969  unsigned SelOpNo = 0;
1970  SDValue Sel = BO->getOperand(0);
1971  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1972  SelOpNo = 1;
1973  Sel = BO->getOperand(1);
1974  }
1975 
1976  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1977  return SDValue();
1978 
1979  SDValue CT = Sel.getOperand(1);
1980  if (!isConstantOrConstantVector(CT, true) &&
1982  return SDValue();
1983 
1984  SDValue CF = Sel.getOperand(2);
1985  if (!isConstantOrConstantVector(CF, true) &&
1987  return SDValue();
1988 
1989  // Bail out if any constants are opaque because we can't constant fold those.
1990  // The exception is "and" and "or" with either 0 or -1 in which case we can
1991  // propagate non constant operands into select. I.e.:
1992  // and (select Cond, 0, -1), X --> select Cond, 0, X
1993  // or X, (select Cond, -1, 0) --> select Cond, -1, X
1994  auto BinOpcode = BO->getOpcode();
1995  bool CanFoldNonConst =
1996  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1999 
2000  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2001  if (!CanFoldNonConst &&
2002  !isConstantOrConstantVector(CBO, true) &&
2004  return SDValue();
2005 
2006  EVT VT = Sel.getValueType();
2007 
2008  // In case of shift value and shift amount may have different VT. For instance
2009  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2010  // swapped operands and value types do not match. NB: x86 is fine if operands
2011  // are not swapped with shift amount VT being not bigger than shifted value.
2012  // TODO: that is possible to check for a shift operation, correct VTs and
2013  // still perform optimization on x86 if needed.
2014  if (SelOpNo && VT != CBO.getValueType())
2015  return SDValue();
2016 
2017  // We have a select-of-constants followed by a binary operator with a
2018  // constant. Eliminate the binop by pulling the constant math into the select.
2019  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2020  SDLoc DL(Sel);
2021  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2022  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2023  if (!CanFoldNonConst && !NewCT.isUndef() &&
2024  !isConstantOrConstantVector(NewCT, true) &&
2026  return SDValue();
2027 
2028  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2029  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2030  if (!CanFoldNonConst && !NewCF.isUndef() &&
2031  !isConstantOrConstantVector(NewCF, true) &&
2033  return SDValue();
2034 
2035  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2036 }
2037 
2039  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2040  "Expecting add or sub");
2041 
2042  // Match a constant operand and a zext operand for the math instruction:
2043  // add Z, C
2044  // sub C, Z
2045  bool IsAdd = N->getOpcode() == ISD::ADD;
2046  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2047  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2048  auto *CN = dyn_cast<ConstantSDNode>(C);
2049  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2050  return SDValue();
2051 
2052  // Match the zext operand as a setcc of a boolean.
2053  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2054  Z.getOperand(0).getValueType() != MVT::i1)
2055  return SDValue();
2056 
2057  // Match the compare as: setcc (X & 1), 0, eq.
2058  SDValue SetCC = Z.getOperand(0);
2059  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2060  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2061  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2062  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2063  return SDValue();
2064 
2065  // We are adding/subtracting a constant and an inverted low bit. Turn that
2066  // into a subtract/add of the low bit with incremented/decremented constant:
2067  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2068  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2069  EVT VT = C.getValueType();
2070  SDLoc DL(N);
2071  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2072  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2073  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2074  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2075 }
2076 
2077 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2078 /// a shift and add with a different constant.
2080  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2081  "Expecting add or sub");
2082 
2083  // We need a constant operand for the add/sub, and the other operand is a
2084  // logical shift right: add (srl), C or sub C, (srl).
2085  bool IsAdd = N->getOpcode() == ISD::ADD;
2086  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2087  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2088  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2089  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2090  return SDValue();
2091 
2092  // The shift must be of a 'not' value.
2093  SDValue Not = ShiftOp.getOperand(0);
2094  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2095  return SDValue();
2096 
2097  // The shift must be moving the sign bit to the least-significant-bit.
2098  EVT VT = ShiftOp.getValueType();
2099  SDValue ShAmt = ShiftOp.getOperand(1);
2100  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2101  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2102  return SDValue();
2103 
2104  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2105  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2106  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2107  SDLoc DL(N);
2108  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2109  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2110  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2111  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2112 }
2113 
2114 SDValue DAGCombiner::visitADD(SDNode *N) {
2115  SDValue N0 = N->getOperand(0);
2116  SDValue N1 = N->getOperand(1);
2117  EVT VT = N0.getValueType();
2118  SDLoc DL(N);
2119 
2120  // fold vector ops
2121  if (VT.isVector()) {
2122  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2123  return FoldedVOp;
2124 
2125  // fold (add x, 0) -> x, vector edition
2127  return N0;
2129  return N1;
2130  }
2131 
2132  // fold (add x, undef) -> undef
2133  if (N0.isUndef())
2134  return N0;
2135 
2136  if (N1.isUndef())
2137  return N1;
2138 
2140  // canonicalize constant to RHS
2142  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2143  // fold (add c1, c2) -> c1+c2
2144  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2145  N1.getNode());
2146  }
2147 
2148  // fold (add x, 0) -> x
2149  if (isNullConstant(N1))
2150  return N0;
2151 
2152  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2153  // fold ((c1-A)+c2) -> (c1+c2)-A
2154  if (N0.getOpcode() == ISD::SUB &&
2155  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2156  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2157  return DAG.getNode(ISD::SUB, DL, VT,
2158  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2159  N0.getOperand(1));
2160  }
2161 
2162  // add (sext i1 X), 1 -> zext (not i1 X)
2163  // We don't transform this pattern:
2164  // add (zext i1 X), -1 -> sext (not i1 X)
2165  // because most (?) targets generate better code for the zext form.
2166  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2167  isOneOrOneSplat(N1)) {
2168  SDValue X = N0.getOperand(0);
2169  if ((!LegalOperations ||
2170  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2171  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2172  X.getScalarValueSizeInBits() == 1) {
2173  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2174  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2175  }
2176  }
2177 
2178  // Undo the add -> or combine to merge constant offsets from a frame index.
2179  if (N0.getOpcode() == ISD::OR &&
2180  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2181  isa<ConstantSDNode>(N0.getOperand(1)) &&
2182  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2183  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2184  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2185  }
2186  }
2187 
2188  if (SDValue NewSel = foldBinOpIntoSelect(N))
2189  return NewSel;
2190 
2191  // reassociate add
2192  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2193  return RADD;
2194 
2195  // fold ((0-A) + B) -> B-A
2196  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2197  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2198 
2199  // fold (A + (0-B)) -> A-B
2200  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2201  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2202 
2203  // fold (A+(B-A)) -> B
2204  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2205  return N1.getOperand(0);
2206 
2207  // fold ((B-A)+A) -> B
2208  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2209  return N0.getOperand(0);
2210 
2211  // fold ((A-B)+(C-A)) -> (C-B)
2212  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2213  N0.getOperand(0) == N1.getOperand(1))
2214  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2215  N0.getOperand(1));
2216 
2217  // fold ((A-B)+(B-C)) -> (A-C)
2218  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2219  N0.getOperand(1) == N1.getOperand(0))
2220  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2221  N1.getOperand(1));
2222 
2223  // fold (A+(B-(A+C))) to (B-C)
2224  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2225  N0 == N1.getOperand(1).getOperand(0))
2226  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2227  N1.getOperand(1).getOperand(1));
2228 
2229  // fold (A+(B-(C+A))) to (B-C)
2230  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2231  N0 == N1.getOperand(1).getOperand(1))
2232  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2233  N1.getOperand(1).getOperand(0));
2234 
2235  // fold (A+((B-A)+or-C)) to (B+or-C)
2236  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2237  N1.getOperand(0).getOpcode() == ISD::SUB &&
2238  N0 == N1.getOperand(0).getOperand(1))
2239  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2240  N1.getOperand(1));
2241 
2242  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2243  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2244  SDValue N00 = N0.getOperand(0);
2245  SDValue N01 = N0.getOperand(1);
2246  SDValue N10 = N1.getOperand(0);
2247  SDValue N11 = N1.getOperand(1);
2248 
2250  return DAG.getNode(ISD::SUB, DL, VT,
2251  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2252  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2253  }
2254 
2255  // fold (add (umax X, C), -C) --> (usubsat X, C)
2256  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2257  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2258  return (!Max && !Op) ||
2259  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2260  };
2261  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2262  /*AllowUndefs*/ true))
2263  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2264  N0.getOperand(1));
2265  }
2266 
2267  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2268  return V;
2269 
2270  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2271  return V;
2272 
2273  if (SimplifyDemandedBits(SDValue(N, 0)))
2274  return SDValue(N, 0);
2275 
2276  // fold (a+b) -> (a|b) iff a and b share no bits.
2277  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2278  DAG.haveNoCommonBitsSet(N0, N1))
2279  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2280 
2281  if (isOneOrOneSplat(N1)) {
2282  // fold (add (xor a, -1), 1) -> (sub 0, a)
2283  if (isBitwiseNot(N0))
2284  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2285  N0.getOperand(0));
2286 
2287  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2288  if (N0.getOpcode() == ISD::ADD ||
2289  N0.getOpcode() == ISD::UADDO ||
2290  N0.getOpcode() == ISD::SADDO) {
2291  SDValue A, Xor;
2292 
2293  if (isBitwiseNot(N0.getOperand(0))) {
2294  A = N0.getOperand(1);
2295  Xor = N0.getOperand(0);
2296  } else if (isBitwiseNot(N0.getOperand(1))) {
2297  A = N0.getOperand(0);
2298  Xor = N0.getOperand(1);
2299  }
2300 
2301  if (Xor)
2302  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2303  }
2304  }
2305 
2306  if (SDValue Combined = visitADDLike(N0, N1, N))
2307  return Combined;
2308 
2309  if (SDValue Combined = visitADDLike(N1, N0, N))
2310  return Combined;
2311 
2312  return SDValue();
2313 }
2314 
2315 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2316  unsigned Opcode = N->getOpcode();
2317  SDValue N0 = N->getOperand(0);
2318  SDValue N1 = N->getOperand(1);
2319  EVT VT = N0.getValueType();
2320  SDLoc DL(N);
2321 
2322  // fold vector ops
2323  if (VT.isVector()) {
2324  // TODO SimplifyVBinOp
2325 
2326  // fold (add_sat x, 0) -> x, vector edition
2328  return N0;
2330  return N1;
2331  }
2332 
2333  // fold (add_sat x, undef) -> -1
2334  if (N0.isUndef() || N1.isUndef())
2335  return DAG.getAllOnesConstant(DL, VT);
2336 
2338  // canonicalize constant to RHS
2340  return DAG.getNode(Opcode, DL, VT, N1, N0);
2341  // fold (add_sat c1, c2) -> c3
2342  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2343  N1.getNode());
2344  }
2345 
2346  // fold (add_sat x, 0) -> x
2347  if (isNullConstant(N1))
2348  return N0;
2349 
2350  // If it cannot overflow, transform into an add.
2351  if (Opcode == ISD::UADDSAT)
2352  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2353  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2354 
2355  return SDValue();
2356 }
2357 
2358 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2359  bool Masked = false;
2360 
2361  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2362  while (true) {
2363  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2364  V = V.getOperand(0);
2365  continue;
2366  }
2367 
2368  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2369  Masked = true;
2370  V = V.getOperand(0);
2371  continue;
2372  }
2373 
2374  break;
2375  }
2376 
2377  // If this is not a carry, return.
2378  if (V.getResNo() != 1)
2379  return SDValue();
2380 
2381  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2382  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2383  return SDValue();
2384 
2385  EVT VT = V.getNode()->getValueType(0);
2386  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2387  return SDValue();
2388 
2389  // If the result is masked, then no matter what kind of bool it is we can
2390  // return. If it isn't, then we need to make sure the bool type is either 0 or
2391  // 1 and not other values.
2392  if (Masked ||
2393  TLI.getBooleanContents(V.getValueType()) ==
2395  return V;
2396 
2397  return SDValue();
2398 }
2399 
2400 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2401 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2402 /// the opcode and bypass the mask operation.
2403 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2404  SelectionDAG &DAG, const SDLoc &DL) {
2405  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2406  return SDValue();
2407 
2408  EVT VT = N0.getValueType();
2409  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2410  return SDValue();
2411 
2412  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2413  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2414  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2415 }
2416 
2417 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2418  EVT VT = N0.getValueType();
2419  SDLoc DL(LocReference);
2420 
2421  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2422  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2424  return DAG.getNode(ISD::SUB, DL, VT, N0,
2425  DAG.getNode(ISD::SHL, DL, VT,
2426  N1.getOperand(0).getOperand(1),
2427  N1.getOperand(1)));
2428 
2429  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2430  return V;
2431 
2432  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2433  // rather than 'add 0/-1' (the zext should get folded).
2434  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2435  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2436  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2438  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2439  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2440  }
2441 
2442  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2443  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2444  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2445  if (TN->getVT() == MVT::i1) {
2446  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2447  DAG.getConstant(1, DL, VT));
2448  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2449  }
2450  }
2451 
2452  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2453  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2454  N1.getResNo() == 0)
2455  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2456  N0, N1.getOperand(0), N1.getOperand(2));
2457 
2458  // (add X, Carry) -> (addcarry X, 0, Carry)
2460  if (SDValue Carry = getAsCarry(TLI, N1))
2461  return DAG.getNode(ISD::ADDCARRY, DL,
2462  DAG.getVTList(VT, Carry.getValueType()), N0,
2463  DAG.getConstant(0, DL, VT), Carry);
2464 
2465  return SDValue();
2466 }
2467 
2468 SDValue DAGCombiner::visitADDC(SDNode *N) {
2469  SDValue N0 = N->getOperand(0);
2470  SDValue N1 = N->getOperand(1);
2471  EVT VT = N0.getValueType();
2472  SDLoc DL(N);
2473 
2474  // If the flag result is dead, turn this into an ADD.
2475  if (!N->hasAnyUseOfValue(1))
2476  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2477  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2478 
2479  // canonicalize constant to RHS.
2482  if (N0C && !N1C)
2483  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2484 
2485  // fold (addc x, 0) -> x + no carry out
2486  if (isNullConstant(N1))
2487  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2488  DL, MVT::Glue));
2489 
2490  // If it cannot overflow, transform into an add.
2491  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2492  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2493  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2494 
2495  return SDValue();
2496 }
2497 
2498 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2499  SelectionDAG &DAG, const TargetLowering &TLI) {
2500  EVT VT = V.getValueType();
2501 
2502  SDValue Cst;
2503  switch (TLI.getBooleanContents(VT)) {
2506  Cst = DAG.getConstant(1, DL, VT);
2507  break;
2509  Cst = DAG.getConstant(-1, DL, VT);
2510  break;
2511  }
2512 
2513  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2514 }
2515 
2517  if (V.getOpcode() != ISD::XOR)
2518  return SDValue();
2519 
2520  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2521  if (!Const)
2522  return SDValue();
2523 
2524  EVT VT = V.getValueType();
2525 
2526  bool IsFlip = false;
2527  switch(TLI.getBooleanContents(VT)) {
2529  IsFlip = Const->isOne();
2530  break;
2532  IsFlip = Const->isAllOnesValue();
2533  break;
2535  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2536  break;
2537  }
2538 
2539  if (IsFlip)
2540  return V.getOperand(0);
2541  return SDValue();
2542 }
2543 
2544 SDValue DAGCombiner::visitADDO(SDNode *N) {
2545  SDValue N0 = N->getOperand(0);
2546  SDValue N1 = N->getOperand(1);
2547  EVT VT = N0.getValueType();
2548  bool IsSigned = (ISD::SADDO == N->getOpcode());
2549 
2550  EVT CarryVT = N->getValueType(1);
2551  SDLoc DL(N);
2552 
2553  // If the flag result is dead, turn this into an ADD.
2554  if (!N->hasAnyUseOfValue(1))
2555  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2556  DAG.getUNDEF(CarryVT));
2557 
2558  // canonicalize constant to RHS.
2561  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2562 
2563  // fold (addo x, 0) -> x + no carry out
2564  if (isNullOrNullSplat(N1))
2565  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2566 
2567  if (!IsSigned) {
2568  // If it cannot overflow, transform into an add.
2569  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2570  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2571  DAG.getConstant(0, DL, CarryVT));
2572 
2573  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2574  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2575  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2576  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2577  return CombineTo(N, Sub,
2578  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2579  }
2580 
2581  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2582  return Combined;
2583 
2584  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2585  return Combined;
2586  }
2587 
2588  return SDValue();
2589 }
2590 
2591 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2592  EVT VT = N0.getValueType();
2593  if (VT.isVector())
2594  return SDValue();
2595 
2596  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2597  // If Y + 1 cannot overflow.
2598  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2599  SDValue Y = N1.getOperand(0);
2600  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2601  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2602  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2603  N1.getOperand(2));
2604  }
2605 
2606  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2608  if (SDValue Carry = getAsCarry(TLI, N1))
2609  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2610  DAG.getConstant(0, SDLoc(N), VT), Carry);
2611 
2612  return SDValue();
2613 }
2614 
2615 SDValue DAGCombiner::visitADDE(SDNode *N) {
2616  SDValue N0 = N->getOperand(0);
2617  SDValue N1 = N->getOperand(1);
2618  SDValue CarryIn = N->getOperand(2);
2619 
2620  // canonicalize constant to RHS
2623  if (N0C && !N1C)
2624  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2625  N1, N0, CarryIn);
2626 
2627  // fold (adde x, y, false) -> (addc x, y)
2628  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2629  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2630 
2631  return SDValue();
2632 }
2633 
2634 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2635  SDValue N0 = N->getOperand(0);
2636  SDValue N1 = N->getOperand(1);
2637  SDValue CarryIn = N->getOperand(2);
2638  SDLoc DL(N);
2639 
2640  // canonicalize constant to RHS
2643  if (N0C && !N1C)
2644  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2645 
2646  // fold (addcarry x, y, false) -> (uaddo x, y)
2647  if (isNullConstant(CarryIn)) {
2648  if (!LegalOperations ||
2650  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2651  }
2652 
2653  EVT CarryVT = CarryIn.getValueType();
2654 
2655  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2656  if (isNullConstant(N0) && isNullConstant(N1)) {
2657  EVT VT = N0.getValueType();
2658  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2659  AddToWorklist(CarryExt.getNode());
2660  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2661  DAG.getConstant(1, DL, VT)),
2662  DAG.getConstant(0, DL, CarryVT));
2663  }
2664 
2665  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2666  if (isBitwiseNot(N0) && isNullConstant(N1)) {
2667  if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2668  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2669  DAG.getConstant(0, DL, N0.getValueType()),
2670  N0.getOperand(0), B);
2671  return CombineTo(N, Sub,
2672  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2673  }
2674  }
2675 
2676  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2677  return Combined;
2678 
2679  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2680  return Combined;
2681 
2682  return SDValue();
2683 }
2684 
2685 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2686  SDNode *N) {
2687  // Iff the flag result is dead:
2688  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2689  if ((N0.getOpcode() == ISD::ADD ||
2690  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2691  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2692  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2693  N0.getOperand(0), N0.getOperand(1), CarryIn);
2694 
2695  /**
2696  * When one of the addcarry argument is itself a carry, we may be facing
2697  * a diamond carry propagation. In which case we try to transform the DAG
2698  * to ensure linear carry propagation if that is possible.
2699  *
2700  * We are trying to get:
2701  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2702  */
2703  if (auto Y = getAsCarry(TLI, N1)) {
2704  /**
2705  * (uaddo A, B)
2706  * / \
2707  * Carry Sum
2708  * | \
2709  * | (addcarry *, 0, Z)
2710  * | /
2711  * \ Carry
2712  * | /
2713  * (addcarry X, *, *)
2714  */
2715  if (Y.getOpcode() == ISD::UADDO &&
2716  CarryIn.getResNo() == 1 &&
2717  CarryIn.getOpcode() == ISD::ADDCARRY &&
2718  isNullConstant(CarryIn.getOperand(1)) &&
2719  CarryIn.getOperand(0) == Y.getValue(0)) {
2720  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2721  Y.getOperand(0), Y.getOperand(1),
2722  CarryIn.getOperand(2));
2723  AddToWorklist(NewY.getNode());
2724  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2725  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2726  NewY.getValue(1));
2727  }
2728  }
2729 
2730  return SDValue();
2731 }
2732 
2733 // Since it may not be valid to emit a fold to zero for vector initializers
2734 // check if we can before folding.
2735 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2736  SelectionDAG &DAG, bool LegalOperations) {
2737  if (!VT.isVector())
2738  return DAG.getConstant(0, DL, VT);
2739  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2740  return DAG.getConstant(0, DL, VT);
2741  return SDValue();
2742 }
2743 
2744 SDValue DAGCombiner::visitSUB(SDNode *N) {
2745  SDValue N0 = N->getOperand(0);
2746  SDValue N1 = N->getOperand(1);
2747  EVT VT = N0.getValueType();
2748  SDLoc DL(N);
2749 
2750  // fold vector ops
2751  if (VT.isVector()) {
2752  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2753  return FoldedVOp;
2754 
2755  // fold (sub x, 0) -> x, vector edition
2757  return N0;
2758  }
2759 
2760  // fold (sub x, x) -> 0
2761  // FIXME: Refactor this and xor and other similar operations together.
2762  if (N0 == N1)
2763  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2766  // fold (sub c1, c2) -> c1-c2
2767  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2768  N1.getNode());
2769  }
2770 
2771  if (SDValue NewSel = foldBinOpIntoSelect(N))
2772  return NewSel;
2773 
2775 
2776  // fold (sub x, c) -> (add x, -c)
2777  if (N1C) {
2778  return DAG.getNode(ISD::ADD, DL, VT, N0,
2779  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2780  }
2781 
2782  if (isNullOrNullSplat(N0)) {
2783  unsigned BitWidth = VT.getScalarSizeInBits();
2784  // Right-shifting everything out but the sign bit followed by negation is
2785  // the same as flipping arithmetic/logical shift type without the negation:
2786  // -(X >>u 31) -> (X >>s 31)
2787  // -(X >>s 31) -> (X >>u 31)
2788  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2789  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2790  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2791  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2792  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2793  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2794  }
2795  }
2796 
2797  // 0 - X --> 0 if the sub is NUW.
2798  if (N->getFlags().hasNoUnsignedWrap())
2799  return N0;
2800 
2801  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2802  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2803  // N1 must be 0 because negating the minimum signed value is undefined.
2804  if (N->getFlags().hasNoSignedWrap())
2805  return N0;
2806 
2807  // 0 - X --> X if X is 0 or the minimum signed value.
2808  return N1;
2809  }
2810  }
2811 
2812  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2813  if (isAllOnesOrAllOnesSplat(N0))
2814  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2815 
2816  // fold (A - (0-B)) -> A+B
2817  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2818  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2819 
2820  // fold A-(A-B) -> B
2821  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2822  return N1.getOperand(1);
2823 
2824  // fold (A+B)-A -> B
2825  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2826  return N0.getOperand(1);
2827 
2828  // fold (A+B)-B -> A
2829  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2830  return N0.getOperand(0);
2831 
2832  // fold C2-(A+C1) -> (C2-C1)-A
2833  if (N1.getOpcode() == ISD::ADD) {
2834  SDValue N11 = N1.getOperand(1);
2835  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2836  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2837  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2838  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2839  }
2840  }
2841 
2842  // fold ((A+(B+or-C))-B) -> A+or-C
2843  if (N0.getOpcode() == ISD::ADD &&
2844  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2845  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2846  N0.getOperand(1).getOperand(0) == N1)
2847  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2848  N0.getOperand(1).getOperand(1));
2849 
2850  // fold ((A+(C+B))-B) -> A+C
2851  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2852  N0.getOperand(1).getOperand(1) == N1)
2853  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2854  N0.getOperand(1).getOperand(0));
2855 
2856  // fold ((A-(B-C))-C) -> A-B
2857  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2858  N0.getOperand(1).getOperand(1) == N1)
2859  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2860  N0.getOperand(1).getOperand(0));
2861 
2862  // fold (A-(B-C)) -> A+(C-B)
2863  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2864  return DAG.getNode(ISD::ADD, DL, VT, N0,
2865  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2866  N1.getOperand(0)));
2867 
2868  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2869  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2870  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2872  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2873  N1.getOperand(0).getOperand(1),
2874  N1.getOperand(1));
2875  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2876  }
2877  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2879  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2880  N1.getOperand(0),
2881  N1.getOperand(1).getOperand(1));
2882  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2883  }
2884  }
2885 
2886  // If either operand of a sub is undef, the result is undef
2887  if (N0.isUndef())
2888  return N0;
2889  if (N1.isUndef())
2890  return N1;
2891 
2892  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2893  return V;
2894 
2895  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2896  return V;
2897 
2898  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2899  return V;
2900 
2901  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2902  // rather than 'sub 0/1' (the sext should get folded).
2903  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2904  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2905  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2906  TLI.getBooleanContents(VT) ==
2908  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2909  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2910  }
2911 
2912  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2913  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2914  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2915  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2916  SDValue S0 = N1.getOperand(0);
2917  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2918  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2920  if (C->getAPIntValue() == (OpSizeInBits - 1))
2921  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2922  }
2923  }
2924  }
2925 
2926  // If the relocation model supports it, consider symbol offsets.
2927  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2928  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2929  // fold (sub Sym, c) -> Sym-c
2930  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2931  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2932  GA->getOffset() -
2933  (uint64_t)N1C->getSExtValue());
2934  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2935  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2936  if (GA->getGlobal() == GB->getGlobal())
2937  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2938  DL, VT);
2939  }
2940 
2941  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2942  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2943  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2944  if (TN->getVT() == MVT::i1) {
2945  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2946  DAG.getConstant(1, DL, VT));
2947  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2948  }
2949  }
2950 
2951  // Prefer an add for more folding potential and possibly better codegen:
2952  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2953  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2954  SDValue ShAmt = N1.getOperand(1);
2955  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2956  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2957  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2958  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2959  }
2960  }
2961 
2962  return SDValue();
2963 }
2964 
2965 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2966  SDValue N0 = N->getOperand(0);
2967  SDValue N1 = N->getOperand(1);
2968  EVT VT = N0.getValueType();
2969  SDLoc DL(N);
2970 
2971  // fold vector ops
2972  if (VT.isVector()) {
2973  // TODO SimplifyVBinOp
2974 
2975  // fold (sub_sat x, 0) -> x, vector edition
2977  return N0;
2978  }
2979 
2980  // fold (sub_sat x, undef) -> 0
2981  if (N0.isUndef() || N1.isUndef())
2982  return DAG.getConstant(0, DL, VT);
2983 
2984  // fold (sub_sat x, x) -> 0
2985  if (N0 == N1)
2986  return DAG.getConstant(0, DL, VT);
2987 
2990  // fold (sub_sat c1, c2) -> c3
2991  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
2992  N1.getNode());
2993  }
2994 
2995  // fold (sub_sat x, 0) -> x
2996  if (isNullConstant(N1))
2997  return N0;
2998 
2999  return SDValue();
3000 }
3001 
3002 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3003  SDValue N0 = N->getOperand(0);
3004  SDValue N1 = N->getOperand(1);
3005  EVT VT = N0.getValueType();
3006  SDLoc DL(N);
3007 
3008  // If the flag result is dead, turn this into an SUB.
3009  if (!N->hasAnyUseOfValue(1))
3010  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3011  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3012 
3013  // fold (subc x, x) -> 0 + no borrow
3014  if (N0 == N1)
3015  return CombineTo(N, DAG.getConstant(0, DL, VT),
3016  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3017 
3018  // fold (subc x, 0) -> x + no borrow
3019  if (isNullConstant(N1))
3020  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3021 
3022  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3023  if (isAllOnesConstant(N0))
3024  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3025  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3026 
3027  return SDValue();
3028 }
3029 
3030 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3031  SDValue N0 = N->getOperand(0);
3032  SDValue N1 = N->getOperand(1);
3033  EVT VT = N0.getValueType();
3034  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3035 
3036  EVT CarryVT = N->getValueType(1);
3037  SDLoc DL(N);
3038 
3039  // If the flag result is dead, turn this into an SUB.
3040  if (!N->hasAnyUseOfValue(1))
3041  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3042  DAG.getUNDEF(CarryVT));
3043 
3044  // fold (subo x, x) -> 0 + no borrow
3045  if (N0 == N1)
3046  return CombineTo(N, DAG.getConstant(0, DL, VT),
3047  DAG.getConstant(0, DL, CarryVT));
3048 
3050 
3051  // fold (subox, c) -> (addo x, -c)
3052  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3053  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3054  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3055  }
3056 
3057  // fold (subo x, 0) -> x + no borrow
3058  if (isNullOrNullSplat(N1))
3059  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3060 
3061  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3062  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3063  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3064  DAG.getConstant(0, DL, CarryVT));
3065 
3066  return SDValue();
3067 }
3068 
3069 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3070  SDValue N0 = N->getOperand(0);
3071  SDValue N1 = N->getOperand(1);
3072  SDValue CarryIn = N->getOperand(2);
3073 
3074  // fold (sube x, y, false) -> (subc x, y)
3075  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3076  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3077 
3078  return SDValue();
3079 }
3080 
3081 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3082  SDValue N0 = N->getOperand(0);
3083  SDValue N1 = N->getOperand(1);
3084  SDValue CarryIn = N->getOperand(2);
3085 
3086  // fold (subcarry x, y, false) -> (usubo x, y)
3087  if (isNullConstant(CarryIn)) {
3088  if (!LegalOperations ||
3090  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3091  }
3092 
3093  return SDValue();
3094 }
3095 
3096 SDValue DAGCombiner::visitMUL(SDNode *N) {
3097  SDValue N0 = N->getOperand(0);
3098  SDValue N1 = N->getOperand(1);
3099  EVT VT = N0.getValueType();
3100 
3101  // fold (mul x, undef) -> 0
3102  if (N0.isUndef() || N1.isUndef())
3103  return DAG.getConstant(0, SDLoc(N), VT);
3104 
3105  bool N0IsConst = false;
3106  bool N1IsConst = false;
3107  bool N1IsOpaqueConst = false;
3108  bool N0IsOpaqueConst = false;
3109  APInt ConstValue0, ConstValue1;
3110  // fold vector ops
3111  if (VT.isVector()) {
3112  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3113  return FoldedVOp;
3114 
3115  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3116  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3117  assert((!N0IsConst ||
3118  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3119  "Splat APInt should be element width");
3120  assert((!N1IsConst ||
3121  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3122  "Splat APInt should be element width");
3123  } else {
3124  N0IsConst = isa<ConstantSDNode>(N0);
3125  if (N0IsConst) {
3126  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3127  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3128  }
3129  N1IsConst = isa<ConstantSDNode>(N1);
3130  if (N1IsConst) {
3131  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3132  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3133  }
3134  }
3135 
3136  // fold (mul c1, c2) -> c1*c2
3137  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3138  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3139  N0.getNode(), N1.getNode());
3140 
3141  // canonicalize constant to RHS (vector doesn't have to splat)
3144  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3145  // fold (mul x, 0) -> 0
3146  if (N1IsConst && ConstValue1.isNullValue())
3147  return N1;
3148  // fold (mul x, 1) -> x
3149  if (N1IsConst && ConstValue1.isOneValue())
3150  return N0;
3151 
3152  if (SDValue NewSel = foldBinOpIntoSelect(N))
3153  return NewSel;
3154 
3155  // fold (mul x, -1) -> 0-x
3156  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3157  SDLoc DL(N);
3158  return DAG.getNode(ISD::SUB, DL, VT,
3159  DAG.getConstant(0, DL, VT), N0);
3160  }
3161  // fold (mul x, (1 << c)) -> x << c
3162  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3163  DAG.isKnownToBeAPowerOfTwo(N1) &&
3164  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3165  SDLoc DL(N);
3166  SDValue LogBase2 = BuildLogBase2(N1, DL);
3167  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3168  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3169  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3170  }
3171  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3172  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3173  unsigned Log2Val = (-ConstValue1).logBase2();
3174  SDLoc DL(N);
3175  // FIXME: If the input is something that is easily negated (e.g. a
3176  // single-use add), we should put the negate there.
3177  return DAG.getNode(ISD::SUB, DL, VT,
3178  DAG.getConstant(0, DL, VT),
3179  DAG.getNode(ISD::SHL, DL, VT, N0,
3180  DAG.getConstant(Log2Val, DL,
3181  getShiftAmountTy(N0.getValueType()))));
3182  }
3183 
3184  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3185  // mul x, (2^N + 1) --> add (shl x, N), x
3186  // mul x, (2^N - 1) --> sub (shl x, N), x
3187  // Examples: x * 33 --> (x << 5) + x
3188  // x * 15 --> (x << 4) - x
3189  // x * -33 --> -((x << 5) + x)
3190  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3191  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3192  // TODO: We could handle more general decomposition of any constant by
3193  // having the target set a limit on number of ops and making a
3194  // callback to determine that sequence (similar to sqrt expansion).
3195  unsigned MathOp = ISD::DELETED_NODE;
3196  APInt MulC = ConstValue1.abs();
3197  if ((MulC - 1).isPowerOf2())
3198  MathOp = ISD::ADD;
3199  else if ((MulC + 1).isPowerOf2())
3200  MathOp = ISD::SUB;
3201 
3202  if (MathOp != ISD::DELETED_NODE) {
3203  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3204  : (MulC + 1).logBase2();
3205  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3206  "Not expecting multiply-by-constant that could have simplified");
3207  SDLoc DL(N);
3208  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3209  DAG.getConstant(ShAmt, DL, VT));
3210  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3211  if (ConstValue1.isNegative())
3212  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3213  return R;
3214  }
3215  }
3216 
3217  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3218  if (N0.getOpcode() == ISD::SHL &&
3219  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3220  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3221  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3223  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3224  }
3225 
3226  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3227  // use.
3228  {
3229  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3230 
3231  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3232  if (N0.getOpcode() == ISD::SHL &&
3234  N0.getNode()->hasOneUse()) {
3235  Sh = N0; Y = N1;
3236  } else if (N1.getOpcode() == ISD::SHL &&
3238  N1.getNode()->hasOneUse()) {
3239  Sh = N1; Y = N0;
3240  }
3241 
3242  if (Sh.getNode()) {
3243  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3244  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3245  }
3246  }
3247 
3248  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3250  N0.getOpcode() == ISD::ADD &&
3252  isMulAddWithConstProfitable(N, N0, N1))
3253  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3254  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3255  N0.getOperand(0), N1),
3256  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3257  N0.getOperand(1), N1));
3258 
3259  // reassociate mul
3260  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3261  return RMUL;
3262 
3263  return SDValue();
3264 }
3265 
3266 /// Return true if divmod libcall is available.
3267 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3268  const TargetLowering &TLI) {
3269  RTLIB::Libcall LC;
3270  EVT NodeType = Node->getValueType(0);
3271  if (!NodeType.isSimple())
3272  return false;
3273  switch (NodeType.getSimpleVT().SimpleTy) {
3274  default: return false; // No libcall for vector types.
3275  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3276  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3277  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3278  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3279  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3280  }
3281 
3282  return TLI.getLibcallName(LC) != nullptr;
3283 }
3284 
3285 /// Issue divrem if both quotient and remainder are needed.
3286 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3287  if (Node->use_empty())
3288  return SDValue(); // This is a dead node, leave it alone.
3289 
3290  unsigned Opcode = Node->getOpcode();
3291  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3292  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3293 
3294  // DivMod lib calls can still work on non-legal types if using lib-calls.
3295  EVT VT = Node->getValueType(0);
3296  if (VT.isVector() || !VT.isInteger())
3297  return SDValue();
3298 
3299  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3300  return SDValue();
3301 
3302  // If DIVREM is going to get expanded into a libcall,
3303  // but there is no libcall available, then don't combine.
3304  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3305  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3306  return SDValue();
3307 
3308  // If div is legal, it's better to do the normal expansion
3309  unsigned OtherOpcode = 0;
3310  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3311  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3312  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3313  return SDValue();
3314  } else {
3315  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3316  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3317  return SDValue();
3318  }
3319 
3320  SDValue Op0 = Node->getOperand(0);
3321  SDValue Op1 = Node->getOperand(1);
3322  SDValue combined;
3323  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3324  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3325  SDNode *User = *UI;
3326  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3327  User->use_empty())
3328  continue;
3329  // Convert the other matching node(s), too;
3330  // otherwise, the DIVREM may get target-legalized into something
3331  // target-specific that we won't be able to recognize.
3332  unsigned UserOpc = User->getOpcode();
3333  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3334  User->getOperand(0) == Op0 &&
3335  User->getOperand(1) == Op1) {
3336  if (!combined) {
3337  if (UserOpc == OtherOpcode) {
3338  SDVTList VTs = DAG.getVTList(VT, VT);
3339  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3340  } else if (UserOpc == DivRemOpc) {
3341  combined = SDValue(User, 0);
3342  } else {
3343  assert(UserOpc == Opcode);
3344  continue;
3345  }
3346  }
3347  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3348  CombineTo(User, combined);
3349  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3350  CombineTo(User, combined.getValue(1));
3351  }
3352  }
3353  return combined;
3354 }
3355 
3357  SDValue N0 = N->getOperand(0);
3358  SDValue N1 = N->getOperand(1);
3359  EVT VT = N->getValueType(0);
3360  SDLoc DL(N);
3361 
3362  unsigned Opc = N->getOpcode();
3363  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3365 
3366  // X / undef -> undef
3367  // X % undef -> undef
3368  // X / 0 -> undef
3369  // X % 0 -> undef
3370  // NOTE: This includes vectors where any divisor element is zero/undef.
3371  if (DAG.isUndef(Opc, {N0, N1}))
3372  return DAG.getUNDEF(VT);
3373 
3374  // undef / X -> 0
3375  // undef % X -> 0
3376  if (N0.isUndef())
3377  return DAG.getConstant(0, DL, VT);
3378 
3379  // 0 / X -> 0
3380  // 0 % X -> 0
3382  if (N0C && N0C->isNullValue())
3383  return N0;
3384 
3385  // X / X -> 1
3386  // X % X -> 0
3387  if (N0 == N1)
3388  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3389 
3390  // X / 1 -> X
3391  // X % 1 -> 0
3392  // If this is a boolean op (single-bit element type), we can't have
3393  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3394  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3395  // it's a 1.
3396  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3397  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3398 
3399  return SDValue();
3400 }
3401 
3402 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3403  SDValue N0 = N->getOperand(0);
3404  SDValue N1 = N->getOperand(1);
3405  EVT VT = N->getValueType(0);
3406  EVT CCVT = getSetCCResultType(VT);
3407 
3408  // fold vector ops
3409  if (VT.isVector())
3410  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3411  return FoldedVOp;
3412 
3413  SDLoc DL(N);
3414 
3415  // fold (sdiv c1, c2) -> c1/c2
3418  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3419  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3420  // fold (sdiv X, -1) -> 0-X
3421  if (N1C && N1C->isAllOnesValue())
3422  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3423  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3424  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3425  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3426  DAG.getConstant(1, DL, VT),
3427  DAG.getConstant(0, DL, VT));
3428 
3429  if (SDValue V = simplifyDivRem(N, DAG))
3430  return V;
3431 
3432  if (SDValue NewSel = foldBinOpIntoSelect(N))
3433  return NewSel;
3434 
3435  // If we know the sign bits of both operands are zero, strength reduce to a
3436  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3437  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3438  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3439 
3440  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3441  // If the corresponding remainder node exists, update its users with
3442  // (Dividend - (Quotient * Divisor).
3443  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3444  { N0, N1 })) {
3445  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3446  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3447  AddToWorklist(Mul.getNode());
3448  AddToWorklist(Sub.getNode());
3449  CombineTo(RemNode, Sub);
3450  }
3451  return V;
3452  }
3453 
3454  // sdiv, srem -> sdivrem
3455  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3456  // true. Otherwise, we break the simplification logic in visitREM().
3458  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3459  if (SDValue DivRem = useDivRem(N))
3460  return DivRem;
3461 
3462  return SDValue();
3463 }
3464 
3465 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3466  SDLoc DL(N);
3467  EVT VT = N->getValueType(0);
3468  EVT CCVT = getSetCCResultType(VT);
3469  unsigned BitWidth = VT.getScalarSizeInBits();
3470 
3471  // Helper for determining whether a value is a power-2 constant scalar or a
3472  // vector of such elements.
3473  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3474  if (C->isNullValue() || C->isOpaque())
3475  return false;
3476  if (C->getAPIntValue().isPowerOf2())
3477  return true;
3478  if ((-C->getAPIntValue()).isPowerOf2())
3479  return true;
3480  return false;
3481  };
3482 
3483  // fold (sdiv X, pow2) -> simple ops after legalize
3484  // FIXME: We check for the exact bit here because the generic lowering gives
3485  // better results in that case. The target-specific lowering should learn how
3486  // to handle exact sdivs efficiently.
3487  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3488  // Target-specific implementation of sdiv x, pow2.
3489  if (SDValue Res = BuildSDIVPow2(N))
3490  return Res;
3491 
3492  // Create constants that are functions of the shift amount value.
3493  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3494  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3495  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3496  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3497  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3498  if (!isConstantOrConstantVector(Inexact))
3499  return SDValue();
3500 
3501  // Splat the sign bit into the register
3502  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3503  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3504  AddToWorklist(Sign.getNode());
3505 
3506  // Add (N0 < 0) ? abs2 - 1 : 0;
3507  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3508  AddToWorklist(Srl.getNode());
3509  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3510  AddToWorklist(Add.getNode());
3511  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3512  AddToWorklist(Sra.getNode());
3513 
3514  // Special case: (sdiv X, 1) -> X
3515  // Special Case: (sdiv X, -1) -> 0-X
3516  SDValue One = DAG.getConstant(1, DL, VT);
3517  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3518  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3519  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3520  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3521  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3522 
3523  // If dividing by a positive value, we're done. Otherwise, the result must
3524  // be negated.
3525  SDValue Zero = DAG.getConstant(0, DL, VT);
3526  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3527 
3528  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3529  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3530  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3531  return Res;
3532  }
3533 
3534  // If integer divide is expensive and we satisfy the requirements, emit an
3535  // alternate sequence. Targets may check function attributes for size/speed
3536  // trade-offs.
3538  if (isConstantOrConstantVector(N1) &&
3539  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3540  if (SDValue Op = BuildSDIV(N))
3541  return Op;
3542 
3543  return SDValue();
3544 }
3545 
3546 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3547  SDValue N0 = N->getOperand(0);
3548  SDValue N1 = N->getOperand(1);
3549  EVT VT = N->getValueType(0);
3550  EVT CCVT = getSetCCResultType(VT);
3551 
3552  // fold vector ops
3553  if (VT.isVector())
3554  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3555  return FoldedVOp;
3556 
3557  SDLoc DL(N);
3558 
3559  // fold (udiv c1, c2) -> c1/c2
3562  if (N0C && N1C)
3563  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3564  N0C, N1C))
3565  return Folded;
3566  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3567  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3568  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3569  DAG.getConstant(1, DL, VT),
3570  DAG.getConstant(0, DL, VT));
3571 
3572  if (SDValue V = simplifyDivRem(N, DAG))
3573  return V;
3574 
3575  if (SDValue NewSel = foldBinOpIntoSelect(N))
3576  return NewSel;
3577 
3578  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3579  // If the corresponding remainder node exists, update its users with
3580  // (Dividend - (Quotient * Divisor).
3581  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3582  { N0, N1 })) {
3583  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3584  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3585  AddToWorklist(Mul.getNode());
3586  AddToWorklist(Sub.getNode());
3587  CombineTo(RemNode, Sub);
3588  }
3589  return V;
3590  }
3591 
3592  // sdiv, srem -> sdivrem
3593  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3594  // true. Otherwise, we break the simplification logic in visitREM().
3596  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3597  if (SDValue DivRem = useDivRem(N))
3598  return DivRem;
3599 
3600  return SDValue();
3601 }
3602 
3603 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3604  SDLoc DL(N);
3605  EVT VT = N->getValueType(0);
3606 
3607  // fold (udiv x, (1 << c)) -> x >>u c
3608  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3609  DAG.isKnownToBeAPowerOfTwo(N1)) {
3610  SDValue LogBase2 = BuildLogBase2(N1, DL);
3611  AddToWorklist(LogBase2.getNode());
3612 
3613  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3614  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3615  AddToWorklist(Trunc.getNode());
3616  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3617  }
3618 
3619  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3620  if (N1.getOpcode() == ISD::SHL) {
3621  SDValue N10 = N1.getOperand(0);
3622  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3623  DAG.isKnownToBeAPowerOfTwo(N10)) {
3624  SDValue LogBase2 = BuildLogBase2(N10, DL);
3625  AddToWorklist(LogBase2.getNode());
3626 
3627  EVT ADDVT = N1.getOperand(1).getValueType();
3628  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3629  AddToWorklist(Trunc.getNode());
3630  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3631  AddToWorklist(Add.getNode());
3632  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3633  }
3634  }
3635 
3636  // fold (udiv x, c) -> alternate
3638  if (isConstantOrConstantVector(N1) &&
3639  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3640  if (SDValue Op = BuildUDIV(N))
3641  return Op;
3642 
3643  return SDValue();
3644 }
3645 
3646 // handles ISD::SREM and ISD::UREM
3647 SDValue DAGCombiner::visitREM(SDNode *N) {
3648  unsigned Opcode = N->getOpcode();
3649  SDValue N0 = N->getOperand(0);
3650  SDValue N1 = N->getOperand(1);
3651  EVT VT = N->getValueType(0);
3652  EVT CCVT = getSetCCResultType(VT);
3653 
3654  bool isSigned = (Opcode == ISD::SREM);
3655  SDLoc DL(N);
3656 
3657  // fold (rem c1, c2) -> c1%c2
3660  if (N0C && N1C)
3661  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3662  return Folded;
3663  // fold (urem X, -1) -> select(X == -1, 0, x)
3664  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3665  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3666  DAG.getConstant(0, DL, VT), N0);
3667 
3668  if (SDValue V = simplifyDivRem(N, DAG))
3669  return V;
3670 
3671  if (SDValue NewSel = foldBinOpIntoSelect(N))
3672  return NewSel;
3673 
3674  if (isSigned) {
3675  // If we know the sign bits of both operands are zero, strength reduce to a
3676  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3677  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3678  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3679  } else {
3680  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3681  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3682  // fold (urem x, pow2) -> (and x, pow2-1)
3683  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3684  AddToWorklist(Add.getNode());
3685  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3686  }
3687  if (N1.getOpcode() == ISD::SHL &&
3688  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3689  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3690  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3691  AddToWorklist(Add.getNode());
3692  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3693  }
3694  }
3695 
3697 
3698  // If X/C can be simplified by the division-by-constant logic, lower
3699  // X%C to the equivalent of X-X/C*C.
3700  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3701  // speculative DIV must not cause a DIVREM conversion. We guard against this
3702  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3703  // combine will not return a DIVREM. Regardless, checking cheapness here
3704  // makes sense since the simplification results in fatter code.
3705  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3706  SDValue OptimizedDiv =
3707  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3708  if (OptimizedDiv.getNode()) {
3709  // If the equivalent Div node also exists, update its users.
3710  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3711  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3712  { N0, N1 }))
3713  CombineTo(DivNode, OptimizedDiv);
3714  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3715  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3716  AddToWorklist(OptimizedDiv.getNode());
3717  AddToWorklist(Mul.getNode());
3718  return Sub;
3719  }
3720  }
3721 
3722  // sdiv, srem -> sdivrem
3723  if (SDValue DivRem = useDivRem(N))
3724  return DivRem.getValue(1);
3725 
3726  return SDValue();
3727 }
3728 
3729 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3730  SDValue N0 = N->getOperand(0);
3731  SDValue N1 = N->getOperand(1);
3732  EVT VT = N->getValueType(0);
3733  SDLoc DL(N);
3734 
3735  if (VT.isVector()) {
3736  // fold (mulhs x, 0) -> 0
3738  return N1;
3740  return N0;
3741  }
3742 
3743  // fold (mulhs x, 0) -> 0
3744  if (isNullConstant(N1))
3745  return N1;
3746  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3747  if (isOneConstant(N1))
3748  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3749  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3750  getShiftAmountTy(N0.getValueType())));
3751 
3752  // fold (mulhs x, undef) -> 0
3753  if (N0.isUndef() || N1.isUndef())
3754  return DAG.getConstant(0, DL, VT);
3755 
3756  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3757  // plus a shift.
3758  if (VT.isSimple() && !VT.isVector()) {
3759  MVT Simple = VT.getSimpleVT();
3760  unsigned SimpleSize = Simple.getSizeInBits();
3761  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3762  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3763  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3764  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3765  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3766  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3767  DAG.getConstant(SimpleSize, DL,
3768  getShiftAmountTy(N1.getValueType())));
3769  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3770  }
3771  }
3772 
3773  return SDValue();
3774 }
3775 
3776 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3777  SDValue N0 = N->getOperand(0);
3778  SDValue N1 = N->getOperand(1);
3779  EVT VT = N->getValueType(0);
3780  SDLoc DL(N);
3781 
3782  if (VT.isVector()) {
3783  // fold (mulhu x, 0) -> 0
3785  return N1;
3787  return N0;
3788  }
3789 
3790  // fold (mulhu x, 0) -> 0
3791  if (isNullConstant(N1))
3792  return N1;
3793  // fold (mulhu x, 1) -> 0
3794  if (isOneConstant(N1))
3795  return DAG.getConstant(0, DL, N0.getValueType());
3796  // fold (mulhu x, undef) -> 0
3797  if (N0.isUndef() || N1.isUndef())
3798  return DAG.getConstant(0, DL, VT);
3799 
3800  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3801  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3802  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3803  SDLoc DL(N);
3804  unsigned NumEltBits = VT.getScalarSizeInBits();
3805  SDValue LogBase2 = BuildLogBase2(N1, DL);
3806  SDValue SRLAmt = DAG.getNode(
3807  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3808  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3809  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3810  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3811  }
3812 
3813  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3814  // plus a shift.
3815  if (VT.isSimple() && !VT.isVector()) {
3816  MVT Simple = VT.getSimpleVT();
3817  unsigned SimpleSize = Simple.getSizeInBits();
3818  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3819  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3820  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3821  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3822  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3823  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3824  DAG.getConstant(SimpleSize, DL,
3825  getShiftAmountTy(N1.getValueType())));
3826  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3827  }
3828  }
3829 
3830  return SDValue();
3831 }
3832 
3833 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3834 /// give the opcodes for the two computations that are being performed. Return
3835 /// true if a simplification was made.
3836 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3837  unsigned HiOp) {
3838  // If the high half is not needed, just compute the low half.
3839  bool HiExists = N->hasAnyUseOfValue(1);
3840  if (!HiExists && (!LegalOperations ||
3841  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3842  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3843  return CombineTo(N, Res, Res);
3844  }
3845 
3846  // If the low half is not needed, just compute the high half.
3847  bool LoExists = N->hasAnyUseOfValue(0);
3848  if (!LoExists && (!LegalOperations ||
3849  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3850  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3851  return CombineTo(N, Res, Res);
3852  }
3853 
3854  // If both halves are used, return as it is.
3855  if (LoExists && HiExists)
3856  return SDValue();
3857 
3858  // If the two computed results can be simplified separately, separate them.
3859  if (LoExists) {
3860  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3861  AddToWorklist(Lo.getNode());
3862  SDValue LoOpt = combine(Lo.getNode());
3863  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3864  (!LegalOperations ||
3865  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3866  return CombineTo(N, LoOpt, LoOpt);
3867  }
3868 
3869  if (HiExists) {
3870  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3871  AddToWorklist(Hi.getNode());
3872  SDValue HiOpt = combine(Hi.getNode());
3873  if (HiOpt.getNode() && HiOpt != Hi &&
3874  (!LegalOperations ||
3875  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3876  return CombineTo(N, HiOpt, HiOpt);
3877  }
3878 
3879  return SDValue();
3880 }
3881 
3882 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3883  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3884  return Res;
3885 
3886  EVT VT = N->getValueType(0);
3887  SDLoc DL(N);
3888 
3889  // If the type is twice as wide is legal, transform the mulhu to a wider
3890  // multiply plus a shift.
3891  if (VT.isSimple() && !VT.isVector()) {
3892  MVT Simple = VT.getSimpleVT();
3893  unsigned SimpleSize = Simple.getSizeInBits();
3894  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3895  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3896  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3897  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3898  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3899  // Compute the high part as N1.
3900  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3901  DAG.getConstant(SimpleSize, DL,
3902  getShiftAmountTy(Lo.getValueType())));
3903  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3904  // Compute the low part as N0.
3905  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3906  return CombineTo(N, Lo, Hi);
3907  }
3908  }
3909 
3910  return SDValue();
3911 }
3912 
3913 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3914  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3915  return Res;
3916 
3917  EVT VT = N->getValueType(0);
3918  SDLoc DL(N);
3919 
3920  // If the type is twice as wide is legal, transform the mulhu to a wider
3921  // multiply plus a shift.
3922  if (VT.isSimple() && !VT.isVector()) {
3923  MVT Simple = VT.getSimpleVT();
3924  unsigned SimpleSize = Simple.getSizeInBits();
3925  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3926  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3927  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3928  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3929  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3930  // Compute the high part as N1.
3931  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3932  DAG.getConstant(SimpleSize, DL,
3933  getShiftAmountTy(Lo.getValueType())));
3934  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3935  // Compute the low part as N0.
3936  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3937  return CombineTo(N, Lo, Hi);
3938  }
3939  }
3940 
3941  return SDValue();
3942 }
3943 
3944 SDValue DAGCombiner::visitMULO(SDNode *N) {
3945  bool IsSigned = (ISD::SMULO == N->getOpcode());
3946 
3947  // (mulo x, 2) -> (addo x, x)
3949  if (C2->getAPIntValue() == 2)
3950  return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
3951  N->getVTList(), N->getOperand(0), N->getOperand(0));
3952 
3953  return SDValue();
3954 }
3955 
3956 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3957  SDValue N0 = N->getOperand(0);
3958  SDValue N1 = N->getOperand(1);
3959  EVT VT = N0.getValueType();
3960 
3961  // fold vector ops
3962  if (VT.isVector())
3963  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3964  return FoldedVOp;
3965 
3966  // fold operation with constant operands.
3969  if (N0C && N1C)
3970  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3971 
3972  // canonicalize constant to RHS
3975  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3976 
3977  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3978  // Only do this if the current op isn't legal and the flipped is.
3979  unsigned Opcode = N->getOpcode();
3980  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3981  if (!TLI.isOperationLegal(Opcode, VT) &&
3982  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3983  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3984  unsigned AltOpcode;
3985  switch (Opcode) {
3986  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3987  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3988  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3989  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3990  default: llvm_unreachable("Unknown MINMAX opcode");
3991  }
3992  if (TLI.isOperationLegal(AltOpcode, VT))
3993  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3994  }
3995 
3996  return SDValue();
3997 }
3998 
3999 /// If this is a bitwise logic instruction and both operands have the same
4000 /// opcode, try to sink the other opcode after the logic instruction.
4001 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4002  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4003  EVT VT = N0.getValueType();
4004  unsigned LogicOpcode = N->getOpcode();
4005  unsigned HandOpcode = N0.getOpcode();
4006  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4007  LogicOpcode == ISD::XOR) && "Expected logic opcode");
4008  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4009 
4010  // Bail early if none of these transforms apply.
4011  if (N0.getNumOperands() == 0)
4012  return SDValue();
4013 
4014  // FIXME: We should check number of uses of the operands to not increase
4015  // the instruction count for all transforms.
4016 
4017  // Handle size-changing casts.
4018  SDValue X = N0.getOperand(0);
4019  SDValue Y = N1.getOperand(0);
4020  EVT XVT = X.getValueType();
4021  SDLoc DL(N);
4022  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4023  HandOpcode == ISD::SIGN_EXTEND) {
4024  // If both operands have other uses, this transform would create extra
4025  // instructions without eliminating anything.
4026  if (!N0.hasOneUse() && !N1.hasOneUse())
4027  return SDValue();
4028  // We need matching integer source types.
4029  if (XVT != Y.getValueType())
4030  return SDValue();
4031  // Don't create an illegal op during or after legalization. Don't ever
4032  // create an unsupported vector op.
4033  if ((VT.isVector() || LegalOperations) &&
4034  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4035  return SDValue();
4036  // Avoid infinite looping with PromoteIntBinOp.
4037  // TODO: Should we apply desirable/legal constraints to all opcodes?
4038  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4039  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4040  return SDValue();
4041  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4042  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4043  return DAG.getNode(HandOpcode, DL, VT, Logic);
4044  }
4045 
4046  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4047  if (HandOpcode == ISD::TRUNCATE) {
4048  // If both operands have other uses, this transform would create extra
4049  // instructions without eliminating anything.
4050  if (!N0.hasOneUse() && !N1.hasOneUse())
4051  return SDValue();
4052  // We need matching source types.
4053  if (XVT != Y.getValueType())
4054  return SDValue();
4055  // Don't create an illegal op during or after legalization.
4056  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4057  return SDValue();
4058  // Be extra careful sinking truncate. If it's free, there's no benefit in
4059  // widening a binop. Also, don't create a logic op on an illegal type.
4060  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4061  return SDValue();
4062  if (!TLI.isTypeLegal(XVT))
4063  return SDValue();
4064  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4065  return DAG.getNode(HandOpcode, DL, VT, Logic);
4066  }
4067 
4068  // For binops SHL/SRL/SRA/AND:
4069  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4070  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4071  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4072  N0.getOperand(1) == N1.getOperand(1)) {
4073  // If either operand has other uses, this transform is not an improvement.
4074  if (!N0.hasOneUse() || !N1.hasOneUse())
4075  return SDValue();
4076  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4077  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4078  }
4079 
4080  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4081  if (HandOpcode == ISD::BSWAP) {
4082  // If either operand has other uses, this transform is not an improvement.
4083  if (!N0.hasOneUse() || !N1.hasOneUse())
4084  return SDValue();
4085  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4086  return DAG.getNode(HandOpcode, DL, VT, Logic);
4087  }
4088 
4089  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4090  // Only perform this optimization up until type legalization, before
4091  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4092  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4093  // we don't want to undo this promotion.
4094  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4095  // on scalars.
4096  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4097  Level <= AfterLegalizeTypes) {
4098  // Input types must be integer and the same.
4099  if (XVT.isInteger() && XVT == Y.getValueType()) {
4100  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4101  return DAG.getNode(HandOpcode, DL, VT, Logic);
4102  }
4103  }
4104 
4105  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4106  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4107  // If both shuffles use the same mask, and both shuffle within a single
4108  // vector, then it is worthwhile to move the swizzle after the operation.
4109  // The type-legalizer generates this pattern when loading illegal
4110  // vector types from memory. In many cases this allows additional shuffle
4111  // optimizations.
4112  // There are other cases where moving the shuffle after the xor/and/or
4113  // is profitable even if shuffles don't perform a swizzle.
4114  // If both shuffles use the same mask, and both shuffles have the same first
4115  // or second operand, then it might still be profitable to move the shuffle
4116  // after the xor/and/or operation.
4117  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4118  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4119  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4120  assert(X.getValueType() == Y.getValueType() &&
4121  "Inputs to shuffles are not the same type");
4122 
4123  // Check that both shuffles use the same mask. The masks are known to be of
4124  // the same length because the result vector type is the same.
4125  // Check also that shuffles have only one use to avoid introducing extra
4126  // instructions.
4127  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4128  !SVN0->getMask().equals(SVN1->getMask()))
4129  return SDValue();
4130 
4131  // Don't try to fold this node if it requires introducing a
4132  // build vector of all zeros that might be illegal at this stage.
4133  SDValue ShOp = N0.getOperand(1);
4134  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4135  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4136 
4137  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4138  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4139  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4140  N0.getOperand(0), N1.getOperand(0));
4141  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4142  }
4143 
4144  // Don't try to fold this node if it requires introducing a
4145  // build vector of all zeros that might be illegal at this stage.
4146  ShOp = N0.getOperand(0);
4147  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4148  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4149 
4150  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4151  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4152  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4153  N1.getOperand(1));
4154  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4155  }
4156  }
4157 
4158  return SDValue();
4159 }
4160 
4161 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4162 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4163  const SDLoc &DL) {
4164  SDValue LL, LR, RL, RR, N0CC, N1CC;
4165  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4166  !isSetCCEquivalent(N1, RL, RR, N1CC))
4167  return SDValue();
4168 
4169  assert(N0.getValueType() == N1.getValueType() &&
4170  "Unexpected operand types for bitwise logic op");
4171  assert(LL.getValueType() == LR.getValueType() &&
4172  RL.getValueType() == RR.getValueType() &&
4173  "Unexpected operand types for setcc");
4174 
4175  // If we're here post-legalization or the logic op type is not i1, the logic
4176  // op type must match a setcc result type. Also, all folds require new
4177  // operations on the left and right operands, so those types must match.
4178  EVT VT = N0.getValueType();
4179  EVT OpVT = LL.getValueType();
4180  if (LegalOperations || VT.getScalarType() != MVT::i1)
4181  if (VT != getSetCCResultType(OpVT))
4182  return SDValue();
4183  if (OpVT != RL.getValueType())
4184  return SDValue();
4185 
4186  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4187  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4188  bool IsInteger = OpVT.isInteger();
4189  if (LR == RR && CC0 == CC1 && IsInteger) {
4190  bool IsZero = isNullOrNullSplat(LR);
4191  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4192 
4193  // All bits clear?
4194  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4195  // All sign bits clear?
4196  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4197  // Any bits set?
4198  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4199  // Any sign bits set?
4200  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4201 
4202  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4203  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4204  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4205  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4206  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4207  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4208  AddToWorklist(Or.getNode());
4209  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4210  }
4211 
4212  // All bits set?
4213  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4214  // All sign bits set?
4215  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4216  // Any bits clear?
4217  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4218  // Any sign bits clear?
4219  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4220 
4221  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4222  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4223  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4224  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4225  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4226  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4227  AddToWorklist(And.getNode());
4228  return DAG.getSetCC(DL, VT, And, LR, CC1);
4229  }
4230  }
4231 
4232  // TODO: What is the 'or' equivalent of this fold?
4233  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4234  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4235  IsInteger && CC0 == ISD::SETNE &&
4236  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4237  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4238  SDValue One = DAG.getConstant(1, DL, OpVT);
4239  SDValue Two = DAG.getConstant(2, DL, OpVT);
4240  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4241  AddToWorklist(Add.getNode());
4242  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4243  }
4244 
4245  // Try more general transforms if the predicates match and the only user of
4246  // the compares is the 'and' or 'or'.
4247  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4248  N0.hasOneUse() && N1.hasOneUse()) {
4249  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4250  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4251  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4252  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4253  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4254  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4255  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4256  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4257  }
4258 
4259  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4260  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4261  // Match a shared variable operand and 2 non-opaque constant operands.
4264  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4265  // Canonicalize larger constant as C0.
4266  if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4267  std::swap(C0, C1);
4268 
4269  // The difference of the constants must be a single bit.
4270  const APInt &C0Val = C0->getAPIntValue();
4271  const APInt &C1Val = C1->getAPIntValue();
4272  if ((C0Val - C1Val).isPowerOf2()) {
4273  // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4274  // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4275  SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4276  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4277  SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4278  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4279  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4280  return DAG.getSetCC(DL, VT, And, Zero, CC0);
4281  }
4282  }
4283  }
4284  }
4285 
4286  // Canonicalize equivalent operands to LL == RL.
4287  if (LL == RR && LR == RL) {
4288  CC1 = ISD::getSetCCSwappedOperands(CC1);
4289  std::swap(RL, RR);
4290  }
4291 
4292  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4293  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4294  if (LL == RL && LR == RR) {
4295  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4296  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4297  if (NewCC != ISD::SETCC_INVALID &&
4298  (!LegalOperations ||
4299  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4300  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4301  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4302  }
4303 
4304  return SDValue();
4305 }
4306 
4307 /// This contains all DAGCombine rules which reduce two values combined by
4308 /// an And operation to a single value. This makes them reusable in the context
4309 /// of visitSELECT(). Rules involving constants are not included as
4310 /// visitSELECT() already handles those cases.
4311 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4312  EVT VT = N1.getValueType();
4313  SDLoc DL(N);
4314 
4315  // fold (and x, undef) -> 0
4316  if (N0.isUndef() || N1.isUndef())
4317  return DAG.getConstant(0, DL, VT);
4318 
4319  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4320  return V;
4321 
4322  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4323  VT.getSizeInBits() <= 64) {
4324  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4325  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4326  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4327  // immediate for an add, but it is legal if its top c2 bits are set,
4328  // transform the ADD so the immediate doesn't need to be materialized
4329  // in a register.
4330  APInt ADDC = ADDI->getAPIntValue();
4331  APInt SRLC = SRLI->getAPIntValue();
4332  if (ADDC.getMinSignedBits() <= 64 &&
4333  SRLC.ult(VT.getSizeInBits()) &&
4334  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4336  SRLC.getZExtValue());
4337  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4338  ADDC |= Mask;
4339  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4340  SDLoc DL0(N0);
4341  SDValue NewAdd =
4342  DAG.getNode(ISD::ADD, DL0, VT,
4343  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4344  CombineTo(N0.getNode(), NewAdd);
4345  // Return N so it doesn't get rechecked!
4346  return SDValue(N, 0);
4347  }
4348  }
4349  }
4350  }
4351  }
4352  }
4353 
4354  // Reduce bit extract of low half of an integer to the narrower type.
4355  // (and (srl i64:x, K), KMask) ->
4356  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4357  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4358  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4359  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4360  unsigned Size = VT.getSizeInBits();
4361  const APInt &AndMask = CAnd->getAPIntValue();
4362  unsigned ShiftBits = CShift->getZExtValue();
4363 
4364  // Bail out, this node will probably disappear anyway.
4365  if (ShiftBits == 0)
4366  return SDValue();
4367 
4368  unsigned MaskBits = AndMask.countTrailingOnes();
4369  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4370 
4371  if (AndMask.isMask() &&
4372  // Required bits must not span the two halves of the integer and
4373  // must fit in the half size type.
4374  (ShiftBits + MaskBits <= Size / 2) &&
4375  TLI.isNarrowingProfitable(VT, HalfVT) &&
4376  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4377  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4378  TLI.isTruncateFree(VT, HalfVT) &&
4379  TLI.isZExtFree(HalfVT, VT)) {
4380  // The isNarrowingProfitable is to avoid regressions on PPC and
4381  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4382  // on downstream users of this. Those patterns could probably be
4383  // extended to handle extensions mixed in.
4384 
4385  SDValue SL(N0);
4386  assert(MaskBits <= Size);
4387 
4388  // Extracting the highest bit of the low half.
4389  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4390  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4391  N0.getOperand(0));
4392 
4393  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4394  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4395  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4396  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4397  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4398  }
4399  }
4400  }
4401  }
4402 
4403  return SDValue();
4404 }
4405 
4406 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4407  EVT LoadResultTy, EVT &ExtVT) {
4408  if (!AndC->getAPIntValue().isMask())
4409  return false;
4410 
4411  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4412 
4413  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4414  EVT LoadedVT = LoadN->getMemoryVT();
4415 
4416  if (ExtVT == LoadedVT &&
4417  (!LegalOperations ||
4418  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4419  // ZEXTLOAD will match without needing to change the size of the value being
4420  // loaded.
4421  return true;
4422  }
4423 
4424  // Do not change the width of a volatile load.
4425  if (LoadN->isVolatile())
4426  return false;
4427 
4428  // Do not generate loads of non-round integer types since these can
4429  // be expensive (and would be wrong if the type is not byte sized).
4430  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4431  return false;
4432 
4433  if (LegalOperations &&
4434  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4435  return false;
4436 
4437  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4438  return false;
4439 
4440  return true;
4441 }
4442 
4443 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4444  ISD::LoadExtType ExtType, EVT &MemVT,
4445  unsigned ShAmt) {
4446  if (!LDST)
4447  return false;
4448  // Only allow byte offsets.
4449  if (ShAmt % 8)
4450  return false;
4451 
4452  // Do not generate loads of non-round integer types since these can
4453  // be expensive (and would be wrong if the type is not byte sized).
4454  if (!MemVT.isRound())
4455  return false;
4456 
4457  // Don't change the width of a volatile load.
4458  if (LDST->isVolatile())
4459  return false;
4460 
4461  // Verify that we are actually reducing a load width here.
4462  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4463  return false;
4464 
4465  // Ensure that this isn't going to produce an unsupported unaligned access.
4466  if (ShAmt &&
4467  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4468  LDST->getAddressSpace(), ShAmt / 8))
4469  return false;
4470 
4471  // It's not possible to generate a constant of extended or untyped type.
4472  EVT PtrType = LDST->getBasePtr().getValueType();
4473  if (PtrType == MVT::Untyped || PtrType.isExtended())
4474  return false;
4475 
4476  if (isa<LoadSDNode>(LDST)) {
4477  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4478  // Don't transform one with multiple uses, this would require adding a new
4479  // load.
4480  if (!SDValue(Load, 0).hasOneUse())
4481  return false;
4482 
4483  if (LegalOperations &&
4484  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4485  return false;
4486 
4487  // For the transform to be legal, the load must produce only two values
4488  // (the value loaded and the chain). Don't transform a pre-increment
4489  // load, for example, which produces an extra value. Otherwise the
4490  // transformation is not equivalent, and the downstream logic to replace
4491  // uses gets things wrong.
4492  if (Load->getNumValues() > 2)
4493  return false;
4494 
4495  // If the load that we're shrinking is an extload and we're not just
4496  // discarding the extension we can't simply shrink the load. Bail.
4497  // TODO: It would be possible to merge the extensions in some cases.
4498  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4499  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4500  return false;
4501 
4502  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4503  return false;
4504  } else {
4505  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4506  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4507  // Can't write outside the original store
4508  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4509  return false;
4510 
4511  if (LegalOperations &&
4512  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4513  return false;
4514  }
4515  return true;
4516 }
4517 
4518 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4520  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4522  SDNode *&NodeToMask) {
4523  // Recursively search for the operands, looking for loads which can be
4524  // narrowed.
4525  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4526  SDValue Op = N->getOperand(i);
4527 
4528  if (Op.getValueType().isVector())
4529  return false;
4530 
4531  // Some constants may need fixing up later if they are too large.
4532  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4533  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4534  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4535  NodesWithConsts.insert(N);
4536  continue;
4537  }
4538 
4539  if (!Op.hasOneUse())
4540  return false;
4541 
4542  switch(Op.getOpcode()) {
4543  case ISD::LOAD: {
4544  auto *Load = cast<LoadSDNode>(Op);
4545  EVT ExtVT;
4546  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4547  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4548 
4549  // ZEXTLOAD is already small enough.
4550  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4551  ExtVT.bitsGE(Load->getMemoryVT()))
4552  continue;
4553 
4554  // Use LE to convert equal sized loads to zext.
4555  if (ExtVT.bitsLE(Load->getMemoryVT()))
4556  Loads.push_back(Load);
4557 
4558  continue;
4559  }
4560  return false;
4561  }
4562  case ISD::ZERO_EXTEND:
4563  case ISD::AssertZext: {
4564  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4565  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4566  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4567  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4568  Op.getOperand(0).getValueType();
4569 
4570  // We can accept extending nodes if the mask is wider or an equal
4571  // width to the original type.
4572  if (ExtVT.bitsGE(VT))
4573  continue;
4574  break;
4575  }
4576  case ISD::OR:
4577  case ISD::XOR:
4578  case ISD::AND:
4579  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4580  NodeToMask))
4581  return false;
4582  continue;
4583  }
4584 
4585  // Allow one node which will masked along with any loads found.
4586  if (NodeToMask)
4587  return false;
4588 
4589  // Also ensure that the node to be masked only produces one data result.
4590  NodeToMask = Op.getNode();
4591  if (NodeToMask->getNumValues() > 1) {
4592  bool HasValue = false;
4593  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4594  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4595  if (VT != MVT::Glue && VT != MVT::Other) {
4596  if (HasValue) {
4597  NodeToMask = nullptr;
4598  return false;
4599  }
4600  HasValue = true;
4601  }
4602  }
4603  assert(HasValue && "Node to be masked has no data result?");
4604  }
4605  }
4606  return true;
4607 }
4608 
4609 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4610  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4611  if (!Mask)
4612  return false;
4613 
4614  if (!Mask->getAPIntValue().isMask())
4615  return false;
4616 
4617  // No need to do anything if the and directly uses a load.
4618  if (isa<LoadSDNode>(N->getOperand(0)))
4619  return false;
4620 
4622  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4623  SDNode *FixupNode = nullptr;
4624  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4625  if (Loads.size() == 0)
4626  return false;
4627 
4628  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4629  SDValue MaskOp = N->getOperand(1);
4630 
4631  // If it exists, fixup the single node we allow in the tree that needs
4632  // masking.
4633  if (FixupNode) {
4634  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4635  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4636  FixupNode->getValueType(0),
4637  SDValue(FixupNode, 0), MaskOp);
4638  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4639  if (And.getOpcode() == ISD ::AND)
4640  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4641  }
4642 
4643  // Narrow any constants that need it.
4644  for (auto *LogicN : NodesWithConsts) {
4645  SDValue Op0 = LogicN->getOperand(0);
4646  SDValue Op1 = LogicN->getOperand(1);
4647 
4648  if (isa<ConstantSDNode>(Op0))
4649  std::swap(Op0, Op1);
4650 
4651  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4652  Op1, MaskOp);
4653 
4654  DAG.UpdateNodeOperands(LogicN, Op0, And);
4655  }
4656 
4657  // Create narrow loads.
4658  for (auto *Load : Loads) {
4659  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4660  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4661  SDValue(Load, 0), MaskOp);
4662  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4663  if (And.getOpcode() == ISD ::AND)
4664  And = SDValue(
4665  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4666  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4667  assert(NewLoad &&
4668  "Shouldn't be masking the load if it can't be narrowed");
4669  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4670  }
4671  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4672  return true;
4673  }
4674  return false;
4675 }
4676 
4677 // Unfold
4678 // x & (-1 'logical shift' y)
4679 // To
4680 // (x 'opposite logical shift' y) 'logical shift' y
4681 // if it is better for performance.
4682 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4683  assert(N->getOpcode() == ISD::AND);
4684 
4685  SDValue N0 = N->getOperand(0);
4686  SDValue N1 = N->getOperand(1);
4687 
4688  // Do we actually prefer shifts over mask?
4689  if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4690  return SDValue();
4691 
4692  // Try to match (-1 '[outer] logical shift' y)
4693  unsigned OuterShift;
4694  unsigned InnerShift; // The opposite direction to the OuterShift.
4695  SDValue Y; // Shift amount.
4696  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4697  if (!M.hasOneUse())
4698  return false;
4699  OuterShift = M->getOpcode();
4700  if (OuterShift == ISD::SHL)
4701  InnerShift = ISD::SRL;
4702  else if (OuterShift == ISD::SRL)
4703  InnerShift = ISD::SHL;
4704  else
4705  return false;
4706  if (!isAllOnesConstant(M->getOperand(0)))
4707  return false;
4708  Y = M->getOperand(1);
4709  return true;
4710  };
4711 
4712  SDValue X;
4713  if (matchMask(N1))
4714  X = N0;
4715  else if (matchMask(N0))
4716  X = N1;
4717  else
4718  return SDValue();
4719 
4720  SDLoc DL(N);
4721  EVT VT = N->getValueType(0);
4722 
4723  // tmp = x 'opposite logical shift' y
4724  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4725  // ret = tmp 'logical shift' y
4726  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4727 
4728  return T1;
4729 }
4730 
4731 SDValue DAGCombiner::visitAND(SDNode *N) {
4732  SDValue N0 = N->getOperand(0);
4733  SDValue N1 = N->getOperand(1);
4734  EVT VT = N1.getValueType();
4735 
4736  // x & x --> x
4737  if (N0 == N1)
4738  return N0;
4739 
4740  // fold vector ops
4741  if (VT.isVector()) {
4742  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4743  return FoldedVOp;
4744 
4745  // fold (and x, 0) -> 0, vector edition
4747  // do not return N0, because undef node may exist in N0
4749  SDLoc(N), N0.getValueType());
4751  // do not return N1, because undef node may exist in N1
4753  SDLoc(N), N1.getValueType());
4754 
4755  // fold (and x, -1) -> x, vector edition
4757  return N1;
4759  return N0;
4760  }
4761 
4762  // fold (and c1, c2) -> c1&c2
4765  if (N0C && N1C && !N1C->isOpaque())
4766  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4767  // canonicalize constant to RHS
4770  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4771  // fold (and x, -1) -> x
4772  if (isAllOnesConstant(N1))
4773  return N0;
4774  // if (and x, c) is known to be zero, return 0
4775  unsigned BitWidth = VT.getScalarSizeInBits();
4776  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4777  APInt::getAllOnesValue(BitWidth)))
4778  return DAG.getConstant(0, SDLoc(N), VT);
4779 
4780  if (SDValue NewSel = foldBinOpIntoSelect(N))
4781  return NewSel;
4782 
4783  // reassociate and
4784  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4785  return RAND;
4786 
4787  // Try to convert a constant mask AND into a shuffle clear mask.
4788  if (VT.isVector())
4789  if (SDValue Shuffle = XformToShuffleWithZero(N))
4790  return Shuffle;
4791 
4792  // fold (and (or x, C), D) -> D if (C & D) == D
4793  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4794  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4795  };
4796  if (N0.getOpcode() == ISD::OR &&
4797  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4798  return N1;
4799  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4800  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4801  SDValue N0Op0 = N0.getOperand(0);
4802  APInt Mask = ~N1C->getAPIntValue();
4803  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4804  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4805  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4806  N0.getValueType(), N0Op0);
4807 
4808  // Replace uses of the AND with uses of the Zero extend node.
4809  CombineTo(N, Zext);
4810 
4811  // We actually want to replace all uses of the any_extend with the
4812  // zero_extend, to avoid duplicating things. This will later cause this
4813  // AND to be folded.
4814  CombineTo(N0.getNode(), Zext);
4815  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4816  }
4817  }
4818  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4819  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4820  // already be zero by virtue of the width of the base type of the load.
4821  //
4822  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4823  // more cases.
4824  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4826  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4827  N0.getOperand(0).getResNo() == 0) ||
4828  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4829  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4830  N0 : N0.getOperand(0) );
4831 
4832  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4833  // This can be a pure constant or a vector splat, in which case we treat the
4834  // vector as a scalar and use the splat value.
4836  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4837  Constant = C->getAPIntValue();
4838  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4839  APInt SplatValue, SplatUndef;
4840  unsigned SplatBitSize;
4841  bool HasAnyUndefs;
4842  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4843  SplatBitSize, HasAnyUndefs);
4844  if (IsSplat) {
4845  // Undef bits can contribute to a possible optimisation if set, so
4846  // set them.
4847  SplatValue |= SplatUndef;
4848 
4849  // The splat value may be something like "0x00FFFFFF", which means 0 for
4850  // the first vector value and FF for the rest, repeating. We need a mask
4851  // that will apply equally to all members of the vector, so AND all the
4852  // lanes of the constant together.
4853  EVT VT = Vector->getValueType(0);
4854  unsigned BitWidth = VT.getScalarSizeInBits();
4855 
4856  // If the splat value has been compressed to a bitlength lower
4857  // than the size of the vector lane, we need to re-expand it to
4858  // the lane size.
4859  if (BitWidth > SplatBitSize)
4860  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4861  SplatBitSize < BitWidth;
4862  SplatBitSize = SplatBitSize * 2)
4863  SplatValue |= SplatValue.shl(SplatBitSize);
4864 
4865  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4866  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4867  if (SplatBitSize % BitWidth == 0) {
4868  Constant = APInt::getAllOnesValue(BitWidth);
4869  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4870  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4871  }
4872  }
4873  }
4874 
4875  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4876  // actually legal and isn't going to get expanded, else this is a false
4877  // optimisation.
4878  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4879  Load->getValueType(0),
4880  Load->getMemoryVT());
4881 
4882  // Resize the constant to the same size as the original memory access before
4883  // extension. If it is still the AllOnesValue then this AND is completely
4884  // unneeded.
4885  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4886 
4887  bool B;
4888  switch (Load->