LLVM  6.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// \brief Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// \brief Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140 
141  /// \brief Set of nodes which have been combined (at least once).
142  ///
143  /// This is used to allow us to reliably add any operands of a DAG node
144  /// which have not yet been combined to the worklist.
145  SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147  // AA - Used for DAG load/store alias analysis.
148  AliasAnalysis *AA;
149 
150  /// When an instruction is simplified, add all users of the instruction to
151  /// the work lists because they might get more simplified now.
152  void AddUsersToWorklist(SDNode *N) {
153  for (SDNode *Node : N->uses())
154  AddToWorklist(Node);
155  }
156 
157  /// Call the node-specific routine that folds each particular type of node.
158  SDValue visit(SDNode *N);
159 
160  public:
161  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163  OptLevel(OL), AA(AA) {
164  ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
165 
166  MaximumLegalStoreInBits = 0;
167  for (MVT VT : MVT::all_valuetypes())
168  if (EVT(VT).isSimple() && VT != MVT::Other &&
169  TLI.isTypeLegal(EVT(VT)) &&
170  VT.getSizeInBits() >= MaximumLegalStoreInBits)
171  MaximumLegalStoreInBits = VT.getSizeInBits();
172  }
173 
174  /// Add to the worklist making sure its instance is at the back (next to be
175  /// processed.)
176  void AddToWorklist(SDNode *N) {
178  "Deleted Node added to Worklist");
179 
180  // Skip handle nodes as they can't usefully be combined and confuse the
181  // zero-use deletion strategy.
182  if (N->getOpcode() == ISD::HANDLENODE)
183  return;
184 
185  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186  Worklist.push_back(N);
187  }
188 
189  /// Remove all instances of N from the worklist.
190  void removeFromWorklist(SDNode *N) {
191  CombinedNodes.erase(N);
192 
193  auto It = WorklistMap.find(N);
194  if (It == WorklistMap.end())
195  return; // Not in the worklist.
196 
197  // Null out the entry rather than erasing it to avoid a linear operation.
198  Worklist[It->second] = nullptr;
199  WorklistMap.erase(It);
200  }
201 
202  void deleteAndRecombine(SDNode *N);
203  bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205  /// Replaces all uses of the results of one DAG node with new values.
206  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207  bool AddTo = true);
208 
209  /// Replaces all uses of the results of one DAG node with new values.
210  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211  return CombineTo(N, &Res, 1, AddTo);
212  }
213 
214  /// Replaces all uses of the results of one DAG node with new values.
215  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216  bool AddTo = true) {
217  SDValue To[] = { Res0, Res1 };
218  return CombineTo(N, To, 2, AddTo);
219  }
220 
221  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223  private:
224  unsigned MaximumLegalStoreInBits;
225 
226  /// Check the specified integer node value to see if it can be simplified or
227  /// if things it uses can be simplified by bit propagation.
228  /// If so, return true.
229  bool SimplifyDemandedBits(SDValue Op) {
230  unsigned BitWidth = Op.getScalarValueSizeInBits();
231  APInt Demanded = APInt::getAllOnesValue(BitWidth);
232  return SimplifyDemandedBits(Op, Demanded);
233  }
234 
235  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
236 
237  bool CombineToPreIndexedLoadStore(SDNode *N);
238  bool CombineToPostIndexedLoadStore(SDNode *N);
239  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
240  bool SliceUpLoad(SDNode *N);
241 
242  /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
243  /// load.
244  ///
245  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
246  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
247  /// \param EltNo index of the vector element to load.
248  /// \param OriginalLoad load that EVE came from to be replaced.
249  /// \returns EVE on success SDValue() on failure.
250  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
251  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
252  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
253  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
254  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
255  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
256  SDValue PromoteIntBinOp(SDValue Op);
257  SDValue PromoteIntShiftOp(SDValue Op);
258  SDValue PromoteExtend(SDValue Op);
259  bool PromoteLoad(SDValue Op);
260 
261  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
262  SDValue ExtLoad, const SDLoc &DL,
263  ISD::NodeType ExtType);
264 
265  /// Call the node-specific routine that knows how to fold each
266  /// particular type of node. If that doesn't do anything, try the
267  /// target-specific DAG combines.
268  SDValue combine(SDNode *N);
269 
270  // Visitation implementation - Implement dag node combining for different
271  // node types. The semantics are as follows:
272  // Return Value:
273  // SDValue.getNode() == 0 - No change was made
274  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
275  // otherwise - N should be replaced by the returned Operand.
276  //
277  SDValue visitTokenFactor(SDNode *N);
278  SDValue visitMERGE_VALUES(SDNode *N);
279  SDValue visitADD(SDNode *N);
280  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
281  SDValue visitSUB(SDNode *N);
282  SDValue visitADDC(SDNode *N);
283  SDValue visitUADDO(SDNode *N);
284  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
285  SDValue visitSUBC(SDNode *N);
286  SDValue visitUSUBO(SDNode *N);
287  SDValue visitADDE(SDNode *N);
288  SDValue visitADDCARRY(SDNode *N);
289  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
290  SDValue visitSUBE(SDNode *N);
291  SDValue visitSUBCARRY(SDNode *N);
292  SDValue visitMUL(SDNode *N);
293  SDValue useDivRem(SDNode *N);
294  SDValue visitSDIV(SDNode *N);
295  SDValue visitUDIV(SDNode *N);
296  SDValue visitREM(SDNode *N);
297  SDValue visitMULHU(SDNode *N);
298  SDValue visitMULHS(SDNode *N);
299  SDValue visitSMUL_LOHI(SDNode *N);
300  SDValue visitUMUL_LOHI(SDNode *N);
301  SDValue visitSMULO(SDNode *N);
302  SDValue visitUMULO(SDNode *N);
303  SDValue visitIMINMAX(SDNode *N);
304  SDValue visitAND(SDNode *N);
305  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
306  SDValue visitOR(SDNode *N);
307  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
308  SDValue visitXOR(SDNode *N);
309  SDValue SimplifyVBinOp(SDNode *N);
310  SDValue visitSHL(SDNode *N);
311  SDValue visitSRA(SDNode *N);
312  SDValue visitSRL(SDNode *N);
313  SDValue visitRotate(SDNode *N);
314  SDValue visitABS(SDNode *N);
315  SDValue visitBSWAP(SDNode *N);
316  SDValue visitBITREVERSE(SDNode *N);
317  SDValue visitCTLZ(SDNode *N);
318  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
319  SDValue visitCTTZ(SDNode *N);
320  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
321  SDValue visitCTPOP(SDNode *N);
322  SDValue visitSELECT(SDNode *N);
323  SDValue visitVSELECT(SDNode *N);
324  SDValue visitSELECT_CC(SDNode *N);
325  SDValue visitSETCC(SDNode *N);
326  SDValue visitSETCCE(SDNode *N);
327  SDValue visitSETCCCARRY(SDNode *N);
328  SDValue visitSIGN_EXTEND(SDNode *N);
329  SDValue visitZERO_EXTEND(SDNode *N);
330  SDValue visitANY_EXTEND(SDNode *N);
331  SDValue visitAssertExt(SDNode *N);
332  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
333  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
334  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
335  SDValue visitTRUNCATE(SDNode *N);
336  SDValue visitBITCAST(SDNode *N);
337  SDValue visitBUILD_PAIR(SDNode *N);
338  SDValue visitFADD(SDNode *N);
339  SDValue visitFSUB(SDNode *N);
340  SDValue visitFMUL(SDNode *N);
341  SDValue visitFMA(SDNode *N);
342  SDValue visitFDIV(SDNode *N);
343  SDValue visitFREM(SDNode *N);
344  SDValue visitFSQRT(SDNode *N);
345  SDValue visitFCOPYSIGN(SDNode *N);
346  SDValue visitSINT_TO_FP(SDNode *N);
347  SDValue visitUINT_TO_FP(SDNode *N);
348  SDValue visitFP_TO_SINT(SDNode *N);
349  SDValue visitFP_TO_UINT(SDNode *N);
350  SDValue visitFP_ROUND(SDNode *N);
351  SDValue visitFP_ROUND_INREG(SDNode *N);
352  SDValue visitFP_EXTEND(SDNode *N);
353  SDValue visitFNEG(SDNode *N);
354  SDValue visitFABS(SDNode *N);
355  SDValue visitFCEIL(SDNode *N);
356  SDValue visitFTRUNC(SDNode *N);
357  SDValue visitFFLOOR(SDNode *N);
358  SDValue visitFMINNUM(SDNode *N);
359  SDValue visitFMAXNUM(SDNode *N);
360  SDValue visitBRCOND(SDNode *N);
361  SDValue visitBR_CC(SDNode *N);
362  SDValue visitLOAD(SDNode *N);
363 
364  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
365  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
366 
367  SDValue visitSTORE(SDNode *N);
368  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
369  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
370  SDValue visitBUILD_VECTOR(SDNode *N);
371  SDValue visitCONCAT_VECTORS(SDNode *N);
372  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
373  SDValue visitVECTOR_SHUFFLE(SDNode *N);
374  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
375  SDValue visitINSERT_SUBVECTOR(SDNode *N);
376  SDValue visitMLOAD(SDNode *N);
377  SDValue visitMSTORE(SDNode *N);
378  SDValue visitMGATHER(SDNode *N);
379  SDValue visitMSCATTER(SDNode *N);
380  SDValue visitFP_TO_FP16(SDNode *N);
381  SDValue visitFP16_TO_FP(SDNode *N);
382 
383  SDValue visitFADDForFMACombine(SDNode *N);
384  SDValue visitFSUBForFMACombine(SDNode *N);
385  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
386 
387  SDValue XformToShuffleWithZero(SDNode *N);
388  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
389  SDValue RHS);
390 
391  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
392 
393  SDValue foldSelectOfConstants(SDNode *N);
394  SDValue foldVSelectOfConstants(SDNode *N);
395  SDValue foldBinOpIntoSelect(SDNode *BO);
396  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
397  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
398  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
399  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
400  SDValue N2, SDValue N3, ISD::CondCode CC,
401  bool NotExtCompare = false);
402  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
403  SDValue N2, SDValue N3, ISD::CondCode CC);
404  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
405  const SDLoc &DL);
406  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
407  const SDLoc &DL, bool foldBooleans = true);
408 
409  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
410  SDValue &CC) const;
411  bool isOneUseSetCC(SDValue N) const;
412 
413  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
414  unsigned HiOp);
415  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
416  SDValue CombineExtLoad(SDNode *N);
417  SDValue combineRepeatedFPDivisors(SDNode *N);
418  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
419  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
420  SDValue BuildSDIV(SDNode *N);
421  SDValue BuildSDIVPow2(SDNode *N);
422  SDValue BuildUDIV(SDNode *N);
423  SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
424  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
425  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
426  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
427  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
428  SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
429  SDNodeFlags Flags, bool Reciprocal);
430  SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
431  SDNodeFlags Flags, bool Reciprocal);
432  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
433  bool DemandHighBits = true);
434  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
435  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
436  SDValue InnerPos, SDValue InnerNeg,
437  unsigned PosOpcode, unsigned NegOpcode,
438  const SDLoc &DL);
439  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
440  SDValue MatchLoadCombine(SDNode *N);
441  SDValue ReduceLoadWidth(SDNode *N);
442  SDValue ReduceLoadOpStoreWidth(SDNode *N);
444  SDValue TransformFPLoadStorePair(SDNode *N);
445  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
446  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
447  SDValue reduceBuildVecToShuffle(SDNode *N);
448  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
449  ArrayRef<int> VectorMask, SDValue VecIn1,
450  SDValue VecIn2, unsigned LeftIdx);
451  SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
452 
453  /// Walk up chain skipping non-aliasing memory nodes,
454  /// looking for aliasing nodes and adding them to the Aliases vector.
455  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
456  SmallVectorImpl<SDValue> &Aliases);
457 
458  /// Return true if there is any possibility that the two addresses overlap.
459  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
460 
461  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
462  /// chain (aliasing node.)
463  SDValue FindBetterChain(SDNode *N, SDValue Chain);
464 
465  /// Try to replace a store and any possibly adjacent stores on
466  /// consecutive chains with better chains. Return true only if St is
467  /// replaced.
468  ///
469  /// Notice that other chains may still be replaced even if the function
470  /// returns false.
471  bool findBetterNeighborChains(StoreSDNode *St);
472 
473  /// Match "(X shl/srl V1) & V2" where V2 may not be present.
474  bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
475 
476  /// Holds a pointer to an LSBaseSDNode as well as information on where it
477  /// is located in a sequence of memory operations connected by a chain.
478  struct MemOpLink {
479  // Ptr to the mem node.
480  LSBaseSDNode *MemNode;
481 
482  // Offset from the base ptr.
483  int64_t OffsetFromBase;
484 
485  MemOpLink(LSBaseSDNode *N, int64_t Offset)
486  : MemNode(N), OffsetFromBase(Offset) {}
487  };
488 
489  /// This is a helper function for visitMUL to check the profitability
490  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
491  /// MulNode is the original multiply, AddNode is (add x, c1),
492  /// and ConstNode is c2.
493  bool isMulAddWithConstProfitable(SDNode *MulNode,
494  SDValue &AddNode,
495  SDValue &ConstNode);
496 
497  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
498  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
499  /// the type of the loaded value to be extended. LoadedVT returns the type
500  /// of the original loaded value. NarrowLoad returns whether the load would
501  /// need to be narrowed in order to match.
502  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
503  EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
504  bool &NarrowLoad);
505 
506  /// Helper function for MergeConsecutiveStores which merges the
507  /// component store chains.
508  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
509  unsigned NumStores);
510 
511  /// This is a helper function for MergeConsecutiveStores. When the
512  /// source elements of the consecutive stores are all constants or
513  /// all extracted vector elements, try to merge them into one
514  /// larger store introducing bitcasts if necessary. \return True
515  /// if a merged store was created.
516  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
517  EVT MemVT, unsigned NumStores,
518  bool IsConstantSrc, bool UseVector,
519  bool UseTrunc);
520 
521  /// This is a helper function for MergeConsecutiveStores. Stores
522  /// that potentially may be merged with St are placed in
523  /// StoreNodes.
524  void getStoreMergeCandidates(StoreSDNode *St,
525  SmallVectorImpl<MemOpLink> &StoreNodes);
526 
527  /// Helper function for MergeConsecutiveStores. Checks if
528  /// candidate stores have indirect dependency through their
529  /// operands. \return True if safe to merge.
530  bool checkMergeStoreCandidatesForDependencies(
531  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
532 
533  /// Merge consecutive store operations into a wide store.
534  /// This optimization uses wide integers or vectors when possible.
535  /// \return number of stores that were merged into a merged store (the
536  /// affected nodes are stored as a prefix in \p StoreNodes).
537  bool MergeConsecutiveStores(StoreSDNode *N);
538 
539  /// \brief Try to transform a truncation where C is a constant:
540  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
541  ///
542  /// \p N needs to be a truncation and its first operand an AND. Other
543  /// requirements are checked by the function (e.g. that trunc is
544  /// single-use) and if missed an empty SDValue is returned.
545  SDValue distributeTruncateThroughAnd(SDNode *N);
546 
547  public:
548  /// Runs the dag combiner on all nodes in the work list
549  void Run(CombineLevel AtLevel);
550 
551  SelectionDAG &getDAG() const { return DAG; }
552 
553  /// Returns a type large enough to hold any valid shift amount - before type
554  /// legalization these can be huge.
555  EVT getShiftAmountTy(EVT LHSTy) {
556  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
557  if (LHSTy.isVector())
558  return LHSTy;
559  auto &DL = DAG.getDataLayout();
560  return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
561  : TLI.getPointerTy(DL);
562  }
563 
564  /// This method returns true if we are running before type legalization or
565  /// if the specified VT is legal.
566  bool isTypeLegal(const EVT &VT) {
567  if (!LegalTypes) return true;
568  return TLI.isTypeLegal(VT);
569  }
570 
571  /// Convenience wrapper around TargetLowering::getSetCCResultType
572  EVT getSetCCResultType(EVT VT) const {
573  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
574  }
575  };
576 
577 /// This class is a DAGUpdateListener that removes any deleted
578 /// nodes from the worklist.
579 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
580  DAGCombiner &DC;
581 
582 public:
583  explicit WorklistRemover(DAGCombiner &dc)
584  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
585 
586  void NodeDeleted(SDNode *N, SDNode *E) override {
587  DC.removeFromWorklist(N);
588  }
589 };
590 
591 } // end anonymous namespace
592 
593 //===----------------------------------------------------------------------===//
594 // TargetLowering::DAGCombinerInfo implementation
595 //===----------------------------------------------------------------------===//
596 
598  ((DAGCombiner*)DC)->AddToWorklist(N);
599 }
600 
602 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
603  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
604 }
605 
607 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
608  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
609 }
610 
612 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
613  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
614 }
615 
618  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
619 }
620 
621 //===----------------------------------------------------------------------===//
622 // Helper Functions
623 //===----------------------------------------------------------------------===//
624 
625 void DAGCombiner::deleteAndRecombine(SDNode *N) {
626  removeFromWorklist(N);
627 
628  // If the operands of this node are only used by the node, they will now be
629  // dead. Make sure to re-visit them and recursively delete dead nodes.
630  for (const SDValue &Op : N->ops())
631  // For an operand generating multiple values, one of the values may
632  // become dead allowing further simplification (e.g. split index
633  // arithmetic from an indexed load).
634  if (Op->hasOneUse() || Op->getNumValues() > 1)
635  AddToWorklist(Op.getNode());
636 
637  DAG.DeleteNode(N);
638 }
639 
640 /// Return 1 if we can compute the negated form of the specified expression for
641 /// the same cost as the expression itself, or 2 if we can compute the negated
642 /// form more cheaply than the expression itself.
643 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
644  const TargetLowering &TLI,
645  const TargetOptions *Options,
646  unsigned Depth = 0) {
647  // fneg is removable even if it has multiple uses.
648  if (Op.getOpcode() == ISD::FNEG) return 2;
649 
650  // Don't allow anything with multiple uses.
651  if (!Op.hasOneUse()) return 0;
652 
653  // Don't recurse exponentially.
654  if (Depth > 6) return 0;
655 
656  switch (Op.getOpcode()) {
657  default: return false;
658  case ISD::ConstantFP: {
659  if (!LegalOperations)
660  return 1;
661 
662  // Don't invert constant FP values after legalization unless the target says
663  // the negated constant is legal.
664  EVT VT = Op.getValueType();
665  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
666  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
667  }
668  case ISD::FADD:
669  // FIXME: determine better conditions for this xform.
670  if (!Options->UnsafeFPMath) return 0;
671 
672  // After operation legalization, it might not be legal to create new FSUBs.
673  if (LegalOperations &&
675  return 0;
676 
677  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
678  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
679  Options, Depth + 1))
680  return V;
681  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
682  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
683  Depth + 1);
684  case ISD::FSUB:
685  // We can't turn -(A-B) into B-A when we honor signed zeros.
686  if (!Options->NoSignedZerosFPMath &&
687  !Op.getNode()->getFlags().hasNoSignedZeros())
688  return 0;
689 
690  // fold (fneg (fsub A, B)) -> (fsub B, A)
691  return 1;
692 
693  case ISD::FMUL:
694  case ISD::FDIV:
695  if (Options->HonorSignDependentRoundingFPMath()) return 0;
696 
697  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
698  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
699  Options, Depth + 1))
700  return V;
701 
702  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
703  Depth + 1);
704 
705  case ISD::FP_EXTEND:
706  case ISD::FP_ROUND:
707  case ISD::FSIN:
708  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
709  Depth + 1);
710  }
711 }
712 
713 /// If isNegatibleForFree returns true, return the newly negated expression.
715  bool LegalOperations, unsigned Depth = 0) {
716  const TargetOptions &Options = DAG.getTarget().Options;
717  // fneg is removable even if it has multiple uses.
718  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
719 
720  // Don't allow anything with multiple uses.
721  assert(Op.hasOneUse() && "Unknown reuse!");
722 
723  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
724 
725  const SDNodeFlags Flags = Op.getNode()->getFlags();
726 
727  switch (Op.getOpcode()) {
728  default: llvm_unreachable("Unknown code");
729  case ISD::ConstantFP: {
730  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
731  V.changeSign();
732  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
733  }
734  case ISD::FADD:
735  // FIXME: determine better conditions for this xform.
736  assert(Options.UnsafeFPMath);
737 
738  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
739  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
740  DAG.getTargetLoweringInfo(), &Options, Depth+1))
741  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
742  GetNegatedExpression(Op.getOperand(0), DAG,
743  LegalOperations, Depth+1),
744  Op.getOperand(1), Flags);
745  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
746  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
747  GetNegatedExpression(Op.getOperand(1), DAG,
748  LegalOperations, Depth+1),
749  Op.getOperand(0), Flags);
750  case ISD::FSUB:
751  // fold (fneg (fsub 0, B)) -> B
752  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
753  if (N0CFP->isZero())
754  return Op.getOperand(1);
755 
756  // fold (fneg (fsub A, B)) -> (fsub B, A)
757  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
758  Op.getOperand(1), Op.getOperand(0), Flags);
759 
760  case ISD::FMUL:
761  case ISD::FDIV:
763 
764  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
765  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
766  DAG.getTargetLoweringInfo(), &Options, Depth+1))
767  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
768  GetNegatedExpression(Op.getOperand(0), DAG,
769  LegalOperations, Depth+1),
770  Op.getOperand(1), Flags);
771 
772  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
773  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
774  Op.getOperand(0),
775  GetNegatedExpression(Op.getOperand(1), DAG,
776  LegalOperations, Depth+1), Flags);
777 
778  case ISD::FP_EXTEND:
779  case ISD::FSIN:
780  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
781  GetNegatedExpression(Op.getOperand(0), DAG,
782  LegalOperations, Depth+1));
783  case ISD::FP_ROUND:
784  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
785  GetNegatedExpression(Op.getOperand(0), DAG,
786  LegalOperations, Depth+1),
787  Op.getOperand(1));
788  }
789 }
790 
791 // APInts must be the same size for most operations, this helper
792 // function zero extends the shorter of the pair so that they match.
793 // We provide an Offset so that we can create bitwidths that won't overflow.
794 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
795  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
796  LHS = LHS.zextOrSelf(Bits);
797  RHS = RHS.zextOrSelf(Bits);
798 }
799 
800 // Return true if this node is a setcc, or is a select_cc
801 // that selects between the target values used for true and false, making it
802 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
803 // the appropriate nodes based on the type of node we are checking. This
804 // simplifies life a bit for the callers.
805 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
806  SDValue &CC) const {
807  if (N.getOpcode() == ISD::SETCC) {
808  LHS = N.getOperand(0);
809  RHS = N.getOperand(1);
810  CC = N.getOperand(2);
811  return true;
812  }
813 
814  if (N.getOpcode() != ISD::SELECT_CC ||
815  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
816  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
817  return false;
818 
819  if (TLI.getBooleanContents(N.getValueType()) ==
821  return false;
822 
823  LHS = N.getOperand(0);
824  RHS = N.getOperand(1);
825  CC = N.getOperand(4);
826  return true;
827 }
828 
829 /// Return true if this is a SetCC-equivalent operation with only one use.
830 /// If this is true, it allows the users to invert the operation for free when
831 /// it is profitable to do so.
832 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
833  SDValue N0, N1, N2;
834  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
835  return true;
836  return false;
837 }
838 
839 // \brief Returns the SDNode if it is a constant float BuildVector
840 // or constant float.
842  if (isa<ConstantFPSDNode>(N))
843  return N.getNode();
845  return N.getNode();
846  return nullptr;
847 }
848 
849 // Determines if it is a constant integer or a build vector of constant
850 // integers (and undefs).
851 // Do not permit build vector implicit truncation.
852 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
853  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
854  return !(Const->isOpaque() && NoOpaques);
855  if (N.getOpcode() != ISD::BUILD_VECTOR)
856  return false;
857  unsigned BitWidth = N.getScalarValueSizeInBits();
858  for (const SDValue &Op : N->op_values()) {
859  if (Op.isUndef())
860  continue;
862  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
863  (Const->isOpaque() && NoOpaques))
864  return false;
865  }
866  return true;
867 }
868 
869 // Determines if it is a constant null integer or a splatted vector of a
870 // constant null integer (with no undefs).
871 // Build vector implicit truncation is not an issue for null values.
873  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
874  return Splat->isNullValue();
875  return false;
876 }
877 
878 // Determines if it is a constant integer of one or a splatted vector of a
879 // constant integer of one (with no undefs).
880 // Do not permit build vector implicit truncation.
882  unsigned BitWidth = N.getScalarValueSizeInBits();
883  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
884  return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
885  return false;
886 }
887 
888 // Determines if it is a constant integer of all ones or a splatted vector of a
889 // constant integer of all ones (with no undefs).
890 // Do not permit build vector implicit truncation.
892  unsigned BitWidth = N.getScalarValueSizeInBits();
893  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
894  return Splat->isAllOnesValue() &&
895  Splat->getAPIntValue().getBitWidth() == BitWidth;
896  return false;
897 }
898 
899 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
900 // undef's.
901 static bool isAnyConstantBuildVector(const SDNode *N) {
904 }
905 
906 // Attempt to match a unary predicate against a scalar/splat constant or
907 // every element of a constant BUILD_VECTOR.
909  std::function<bool(ConstantSDNode *)> Match) {
910  if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
911  return Match(Cst);
912 
913  if (ISD::BUILD_VECTOR != Op.getOpcode())
914  return false;
915 
916  EVT SVT = Op.getValueType().getScalarType();
917  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
918  auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
919  if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
920  return false;
921  }
922  return true;
923 }
924 
925 // Attempt to match a binary predicate against a pair of scalar/splat constants
926 // or every element of a pair of constant BUILD_VECTORs.
928  SDValue LHS, SDValue RHS,
929  std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
930  if (LHS.getValueType() != RHS.getValueType())
931  return false;
932 
933  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
934  if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
935  return Match(LHSCst, RHSCst);
936 
937  if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
938  ISD::BUILD_VECTOR != RHS.getOpcode())
939  return false;
940 
941  EVT SVT = LHS.getValueType().getScalarType();
942  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
943  auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
944  auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
945  if (!LHSCst || !RHSCst)
946  return false;
947  if (LHSCst->getValueType(0) != SVT ||
948  LHSCst->getValueType(0) != RHSCst->getValueType(0))
949  return false;
950  if (!Match(LHSCst, RHSCst))
951  return false;
952  }
953  return true;
954 }
955 
956 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
957  SDValue N1) {
958  EVT VT = N0.getValueType();
959  if (N0.getOpcode() == Opc) {
962  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
963  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
964  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
965  return SDValue();
966  }
967  if (N0.hasOneUse()) {
968  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
969  // use
970  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
971  if (!OpNode.getNode())
972  return SDValue();
973  AddToWorklist(OpNode.getNode());
974  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
975  }
976  }
977  }
978 
979  if (N1.getOpcode() == Opc) {
982  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
983  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
984  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
985  return SDValue();
986  }
987  if (N1.hasOneUse()) {
988  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
989  // use
990  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
991  if (!OpNode.getNode())
992  return SDValue();
993  AddToWorklist(OpNode.getNode());
994  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
995  }
996  }
997  }
998 
999  return SDValue();
1000 }
1001 
1002 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1003  bool AddTo) {
1004  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1005  ++NodesCombined;
1006  DEBUG(dbgs() << "\nReplacing.1 ";
1007  N->dump(&DAG);
1008  dbgs() << "\nWith: ";
1009  To[0].getNode()->dump(&DAG);
1010  dbgs() << " and " << NumTo-1 << " other values\n");
1011  for (unsigned i = 0, e = NumTo; i != e; ++i)
1012  assert((!To[i].getNode() ||
1013  N->getValueType(i) == To[i].getValueType()) &&
1014  "Cannot combine value to value of different type!");
1015 
1016  WorklistRemover DeadNodes(*this);
1017  DAG.ReplaceAllUsesWith(N, To);
1018  if (AddTo) {
1019  // Push the new nodes and any users onto the worklist
1020  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1021  if (To[i].getNode()) {
1022  AddToWorklist(To[i].getNode());
1023  AddUsersToWorklist(To[i].getNode());
1024  }
1025  }
1026  }
1027 
1028  // Finally, if the node is now dead, remove it from the graph. The node
1029  // may not be dead if the replacement process recursively simplified to
1030  // something else needing this node.
1031  if (N->use_empty())
1032  deleteAndRecombine(N);
1033  return SDValue(N, 0);
1034 }
1035 
1036 void DAGCombiner::
1037 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1038  // Replace all uses. If any nodes become isomorphic to other nodes and
1039  // are deleted, make sure to remove them from our worklist.
1040  WorklistRemover DeadNodes(*this);
1041  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1042 
1043  // Push the new node and any (possibly new) users onto the worklist.
1044  AddToWorklist(TLO.New.getNode());
1045  AddUsersToWorklist(TLO.New.getNode());
1046 
1047  // Finally, if the node is now dead, remove it from the graph. The node
1048  // may not be dead if the replacement process recursively simplified to
1049  // something else needing this node.
1050  if (TLO.Old.getNode()->use_empty())
1051  deleteAndRecombine(TLO.Old.getNode());
1052 }
1053 
1054 /// Check the specified integer node value to see if it can be simplified or if
1055 /// things it uses can be simplified by bit propagation. If so, return true.
1056 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1057  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1058  KnownBits Known;
1059  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1060  return false;
1061 
1062  // Revisit the node.
1063  AddToWorklist(Op.getNode());
1064 
1065  // Replace the old value with the new one.
1066  ++NodesCombined;
1067  DEBUG(dbgs() << "\nReplacing.2 ";
1068  TLO.Old.getNode()->dump(&DAG);
1069  dbgs() << "\nWith: ";
1070  TLO.New.getNode()->dump(&DAG);
1071  dbgs() << '\n');
1072 
1073  CommitTargetLoweringOpt(TLO);
1074  return true;
1075 }
1076 
1077 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1078  SDLoc DL(Load);
1079  EVT VT = Load->getValueType(0);
1080  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1081 
1082  DEBUG(dbgs() << "\nReplacing.9 ";
1083  Load->dump(&DAG);
1084  dbgs() << "\nWith: ";
1085  Trunc.getNode()->dump(&DAG);
1086  dbgs() << '\n');
1087  WorklistRemover DeadNodes(*this);
1088  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1089  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1090  deleteAndRecombine(Load);
1091  AddToWorklist(Trunc.getNode());
1092 }
1093 
1094 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1095  Replace = false;
1096  SDLoc DL(Op);
1097  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1098  LoadSDNode *LD = cast<LoadSDNode>(Op);
1099  EVT MemVT = LD->getMemoryVT();
1100  ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1101  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1102  : ISD::EXTLOAD)
1103  : LD->getExtensionType();
1104  Replace = true;
1105  return DAG.getExtLoad(ExtType, DL, PVT,
1106  LD->getChain(), LD->getBasePtr(),
1107  MemVT, LD->getMemOperand());
1108  }
1109 
1110  unsigned Opc = Op.getOpcode();
1111  switch (Opc) {
1112  default: break;
1113  case ISD::AssertSext:
1114  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1115  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1116  break;
1117  case ISD::AssertZext:
1118  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1119  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1120  break;
1121  case ISD::Constant: {
1122  unsigned ExtOpc =
1124  return DAG.getNode(ExtOpc, DL, PVT, Op);
1125  }
1126  }
1127 
1128  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1129  return SDValue();
1130  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1131 }
1132 
1133 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1135  return SDValue();
1136  EVT OldVT = Op.getValueType();
1137  SDLoc DL(Op);
1138  bool Replace = false;
1139  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1140  if (!NewOp.getNode())
1141  return SDValue();
1142  AddToWorklist(NewOp.getNode());
1143 
1144  if (Replace)
1145  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1146  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1147  DAG.getValueType(OldVT));
1148 }
1149 
1150 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1151  EVT OldVT = Op.getValueType();
1152  SDLoc DL(Op);
1153  bool Replace = false;
1154  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1155  if (!NewOp.getNode())
1156  return SDValue();
1157  AddToWorklist(NewOp.getNode());
1158 
1159  if (Replace)
1160  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1161  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1162 }
1163 
1164 /// Promote the specified integer binary operation if the target indicates it is
1165 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1166 /// i32 since i16 instructions are longer.
1167 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1168  if (!LegalOperations)
1169  return SDValue();
1170 
1171  EVT VT = Op.getValueType();
1172  if (VT.isVector() || !VT.isInteger())
1173  return SDValue();
1174 
1175  // If operation type is 'undesirable', e.g. i16 on x86, consider
1176  // promoting it.
1177  unsigned Opc = Op.getOpcode();
1178  if (TLI.isTypeDesirableForOp(Opc, VT))
1179  return SDValue();
1180 
1181  EVT PVT = VT;
1182  // Consult target whether it is a good idea to promote this operation and
1183  // what's the right type to promote it to.
1184  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1185  assert(PVT != VT && "Don't know what type to promote to!");
1186 
1187  DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1188 
1189  bool Replace0 = false;
1190  SDValue N0 = Op.getOperand(0);
1191  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1192 
1193  bool Replace1 = false;
1194  SDValue N1 = Op.getOperand(1);
1195  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1196  SDLoc DL(Op);
1197 
1198  SDValue RV =
1199  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1200 
1201  // We are always replacing N0/N1's use in N and only need
1202  // additional replacements if there are additional uses.
1203  Replace0 &= !N0->hasOneUse();
1204  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1205 
1206  // Combine Op here so it is preserved past replacements.
1207  CombineTo(Op.getNode(), RV);
1208 
1209  // If operands have a use ordering, make sure we deal with
1210  // predecessor first.
1211  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1212  std::swap(N0, N1);
1213  std::swap(NN0, NN1);
1214  }
1215 
1216  if (Replace0) {
1217  AddToWorklist(NN0.getNode());
1218  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1219  }
1220  if (Replace1) {
1221  AddToWorklist(NN1.getNode());
1222  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1223  }
1224  return Op;
1225  }
1226  return SDValue();
1227 }
1228 
1229 /// Promote the specified integer shift operation if the target indicates it is
1230 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1231 /// i32 since i16 instructions are longer.
1232 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1233  if (!LegalOperations)
1234  return SDValue();
1235 
1236  EVT VT = Op.getValueType();
1237  if (VT.isVector() || !VT.isInteger())
1238  return SDValue();
1239 
1240  // If operation type is 'undesirable', e.g. i16 on x86, consider
1241  // promoting it.
1242  unsigned Opc = Op.getOpcode();
1243  if (TLI.isTypeDesirableForOp(Opc, VT))
1244  return SDValue();
1245 
1246  EVT PVT = VT;
1247  // Consult target whether it is a good idea to promote this operation and
1248  // what's the right type to promote it to.
1249  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1250  assert(PVT != VT && "Don't know what type to promote to!");
1251 
1252  DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1253 
1254  bool Replace = false;
1255  SDValue N0 = Op.getOperand(0);
1256  SDValue N1 = Op.getOperand(1);
1257  if (Opc == ISD::SRA)
1258  N0 = SExtPromoteOperand(N0, PVT);
1259  else if (Opc == ISD::SRL)
1260  N0 = ZExtPromoteOperand(N0, PVT);
1261  else
1262  N0 = PromoteOperand(N0, PVT, Replace);
1263 
1264  if (!N0.getNode())
1265  return SDValue();
1266 
1267  SDLoc DL(Op);
1268  SDValue RV =
1269  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1270 
1271  AddToWorklist(N0.getNode());
1272  if (Replace)
1273  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1274 
1275  // Deal with Op being deleted.
1276  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1277  return RV;
1278  }
1279  return SDValue();
1280 }
1281 
1282 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1283  if (!LegalOperations)
1284  return SDValue();
1285 
1286  EVT VT = Op.getValueType();
1287  if (VT.isVector() || !VT.isInteger())
1288  return SDValue();
1289 
1290  // If operation type is 'undesirable', e.g. i16 on x86, consider
1291  // promoting it.
1292  unsigned Opc = Op.getOpcode();
1293  if (TLI.isTypeDesirableForOp(Opc, VT))
1294  return SDValue();
1295 
1296  EVT PVT = VT;
1297  // Consult target whether it is a good idea to promote this operation and
1298  // what's the right type to promote it to.
1299  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1300  assert(PVT != VT && "Don't know what type to promote to!");
1301  // fold (aext (aext x)) -> (aext x)
1302  // fold (aext (zext x)) -> (zext x)
1303  // fold (aext (sext x)) -> (sext x)
1304  DEBUG(dbgs() << "\nPromoting ";
1305  Op.getNode()->dump(&DAG));
1306  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1307  }
1308  return SDValue();
1309 }
1310 
1311 bool DAGCombiner::PromoteLoad(SDValue Op) {
1312  if (!LegalOperations)
1313  return false;
1314 
1315  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1316  return false;
1317 
1318  EVT VT = Op.getValueType();
1319  if (VT.isVector() || !VT.isInteger())
1320  return false;
1321 
1322  // If operation type is 'undesirable', e.g. i16 on x86, consider
1323  // promoting it.
1324  unsigned Opc = Op.getOpcode();
1325  if (TLI.isTypeDesirableForOp(Opc, VT))
1326  return false;
1327 
1328  EVT PVT = VT;
1329  // Consult target whether it is a good idea to promote this operation and
1330  // what's the right type to promote it to.
1331  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1332  assert(PVT != VT && "Don't know what type to promote to!");
1333 
1334  SDLoc DL(Op);
1335  SDNode *N = Op.getNode();
1336  LoadSDNode *LD = cast<LoadSDNode>(N);
1337  EVT MemVT = LD->getMemoryVT();
1338  ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1339  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1340  : ISD::EXTLOAD)
1341  : LD->getExtensionType();
1342  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1343  LD->getChain(), LD->getBasePtr(),
1344  MemVT, LD->getMemOperand());
1345  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1346 
1347  DEBUG(dbgs() << "\nPromoting ";
1348  N->dump(&DAG);
1349  dbgs() << "\nTo: ";
1350  Result.getNode()->dump(&DAG);
1351  dbgs() << '\n');
1352  WorklistRemover DeadNodes(*this);
1353  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1354  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1355  deleteAndRecombine(N);
1356  AddToWorklist(Result.getNode());
1357  return true;
1358  }
1359  return false;
1360 }
1361 
1362 /// \brief Recursively delete a node which has no uses and any operands for
1363 /// which it is the only use.
1364 ///
1365 /// Note that this both deletes the nodes and removes them from the worklist.
1366 /// It also adds any nodes who have had a user deleted to the worklist as they
1367 /// may now have only one use and subject to other combines.
1368 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1369  if (!N->use_empty())
1370  return false;
1371 
1373  Nodes.insert(N);
1374  do {
1375  N = Nodes.pop_back_val();
1376  if (!N)
1377  continue;
1378 
1379  if (N->use_empty()) {
1380  for (const SDValue &ChildN : N->op_values())
1381  Nodes.insert(ChildN.getNode());
1382 
1383  removeFromWorklist(N);
1384  DAG.DeleteNode(N);
1385  } else {
1386  AddToWorklist(N);
1387  }
1388  } while (!Nodes.empty());
1389  return true;
1390 }
1391 
1392 //===----------------------------------------------------------------------===//
1393 // Main DAG Combiner implementation
1394 //===----------------------------------------------------------------------===//
1395 
1396 void DAGCombiner::Run(CombineLevel AtLevel) {
1397  // set the instance variables, so that the various visit routines may use it.
1398  Level = AtLevel;
1399  LegalOperations = Level >= AfterLegalizeVectorOps;
1400  LegalTypes = Level >= AfterLegalizeTypes;
1401 
1402  // Add all the dag nodes to the worklist.
1403  for (SDNode &Node : DAG.allnodes())
1404  AddToWorklist(&Node);
1405 
1406  // Create a dummy node (which is not added to allnodes), that adds a reference
1407  // to the root node, preventing it from being deleted, and tracking any
1408  // changes of the root.
1409  HandleSDNode Dummy(DAG.getRoot());
1410 
1411  // While the worklist isn't empty, find a node and try to combine it.
1412  while (!WorklistMap.empty()) {
1413  SDNode *N;
1414  // The Worklist holds the SDNodes in order, but it may contain null entries.
1415  do {
1416  N = Worklist.pop_back_val();
1417  } while (!N);
1418 
1419  bool GoodWorklistEntry = WorklistMap.erase(N);
1420  (void)GoodWorklistEntry;
1421  assert(GoodWorklistEntry &&
1422  "Found a worklist entry without a corresponding map entry!");
1423 
1424  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1425  // N is deleted from the DAG, since they too may now be dead or may have a
1426  // reduced number of uses, allowing other xforms.
1427  if (recursivelyDeleteUnusedNodes(N))
1428  continue;
1429 
1430  WorklistRemover DeadNodes(*this);
1431 
1432  // If this combine is running after legalizing the DAG, re-legalize any
1433  // nodes pulled off the worklist.
1434  if (Level == AfterLegalizeDAG) {
1435  SmallSetVector<SDNode *, 16> UpdatedNodes;
1436  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1437 
1438  for (SDNode *LN : UpdatedNodes) {
1439  AddToWorklist(LN);
1440  AddUsersToWorklist(LN);
1441  }
1442  if (!NIsValid)
1443  continue;
1444  }
1445 
1446  DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1447 
1448  // Add any operands of the new node which have not yet been combined to the
1449  // worklist as well. Because the worklist uniques things already, this
1450  // won't repeatedly process the same operand.
1451  CombinedNodes.insert(N);
1452  for (const SDValue &ChildN : N->op_values())
1453  if (!CombinedNodes.count(ChildN.getNode()))
1454  AddToWorklist(ChildN.getNode());
1455 
1456  SDValue RV = combine(N);
1457 
1458  if (!RV.getNode())
1459  continue;
1460 
1461  ++NodesCombined;
1462 
1463  // If we get back the same node we passed in, rather than a new node or
1464  // zero, we know that the node must have defined multiple values and
1465  // CombineTo was used. Since CombineTo takes care of the worklist
1466  // mechanics for us, we have no work to do in this case.
1467  if (RV.getNode() == N)
1468  continue;
1469 
1471  RV.getOpcode() != ISD::DELETED_NODE &&
1472  "Node was deleted but visit returned new node!");
1473 
1474  DEBUG(dbgs() << " ... into: ";
1475  RV.getNode()->dump(&DAG));
1476 
1477  if (N->getNumValues() == RV.getNode()->getNumValues())
1478  DAG.ReplaceAllUsesWith(N, RV.getNode());
1479  else {
1480  assert(N->getValueType(0) == RV.getValueType() &&
1481  N->getNumValues() == 1 && "Type mismatch");
1482  DAG.ReplaceAllUsesWith(N, &RV);
1483  }
1484 
1485  // Push the new node and any users onto the worklist
1486  AddToWorklist(RV.getNode());
1487  AddUsersToWorklist(RV.getNode());
1488 
1489  // Finally, if the node is now dead, remove it from the graph. The node
1490  // may not be dead if the replacement process recursively simplified to
1491  // something else needing this node. This will also take care of adding any
1492  // operands which have lost a user to the worklist.
1493  recursivelyDeleteUnusedNodes(N);
1494  }
1495 
1496  // If the root changed (e.g. it was a dead load, update the root).
1497  DAG.setRoot(Dummy.getValue());
1498  DAG.RemoveDeadNodes();
1499 }
1500 
1501 SDValue DAGCombiner::visit(SDNode *N) {
1502  switch (N->getOpcode()) {
1503  default: break;
1504  case ISD::TokenFactor: return visitTokenFactor(N);
1505  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1506  case ISD::ADD: return visitADD(N);
1507  case ISD::SUB: return visitSUB(N);
1508  case ISD::ADDC: return visitADDC(N);
1509  case ISD::UADDO: return visitUADDO(N);
1510  case ISD::SUBC: return visitSUBC(N);
1511  case ISD::USUBO: return visitUSUBO(N);
1512  case ISD::ADDE: return visitADDE(N);
1513  case ISD::ADDCARRY: return visitADDCARRY(N);
1514  case ISD::SUBE: return visitSUBE(N);
1515  case ISD::SUBCARRY: return visitSUBCARRY(N);
1516  case ISD::MUL: return visitMUL(N);
1517  case ISD::SDIV: return visitSDIV(N);
1518  case ISD::UDIV: return visitUDIV(N);
1519  case ISD::SREM:
1520  case ISD::UREM: return visitREM(N);
1521  case ISD::MULHU: return visitMULHU(N);
1522  case ISD::MULHS: return visitMULHS(N);
1523  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1524  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1525  case ISD::SMULO: return visitSMULO(N);
1526  case ISD::UMULO: return visitUMULO(N);
1527  case ISD::SMIN:
1528  case ISD::SMAX:
1529  case ISD::UMIN:
1530  case ISD::UMAX: return visitIMINMAX(N);
1531  case ISD::AND: return visitAND(N);
1532  case ISD::OR: return visitOR(N);
1533  case ISD::XOR: return visitXOR(N);
1534  case ISD::SHL: return visitSHL(N);
1535  case ISD::SRA: return visitSRA(N);
1536  case ISD::SRL: return visitSRL(N);
1537  case ISD::ROTR:
1538  case ISD::ROTL: return visitRotate(N);
1539  case ISD::ABS: return visitABS(N);
1540  case ISD::BSWAP: return visitBSWAP(N);
1541  case ISD::BITREVERSE: return visitBITREVERSE(N);
1542  case ISD::CTLZ: return visitCTLZ(N);
1543  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1544  case ISD::CTTZ: return visitCTTZ(N);
1545  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1546  case ISD::CTPOP: return visitCTPOP(N);
1547  case ISD::SELECT: return visitSELECT(N);
1548  case ISD::VSELECT: return visitVSELECT(N);
1549  case ISD::SELECT_CC: return visitSELECT_CC(N);
1550  case ISD::SETCC: return visitSETCC(N);
1551  case ISD::SETCCE: return visitSETCCE(N);
1552  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1553  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1554  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1555  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1556  case ISD::AssertSext:
1557  case ISD::AssertZext: return visitAssertExt(N);
1558  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1559  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1560  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1561  case ISD::TRUNCATE: return visitTRUNCATE(N);
1562  case ISD::BITCAST: return visitBITCAST(N);
1563  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1564  case ISD::FADD: return visitFADD(N);
1565  case ISD::FSUB: return visitFSUB(N);
1566  case ISD::FMUL: return visitFMUL(N);
1567  case ISD::FMA: return visitFMA(N);
1568  case ISD::FDIV: return visitFDIV(N);
1569  case ISD::FREM: return visitFREM(N);
1570  case ISD::FSQRT: return visitFSQRT(N);
1571  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1572  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1573  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1574  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1575  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1576  case ISD::FP_ROUND: return visitFP_ROUND(N);
1577  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1578  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1579  case ISD::FNEG: return visitFNEG(N);
1580  case ISD::FABS: return visitFABS(N);
1581  case ISD::FFLOOR: return visitFFLOOR(N);
1582  case ISD::FMINNUM: return visitFMINNUM(N);
1583  case ISD::FMAXNUM: return visitFMAXNUM(N);
1584  case ISD::FCEIL: return visitFCEIL(N);
1585  case ISD::FTRUNC: return visitFTRUNC(N);
1586  case ISD::BRCOND: return visitBRCOND(N);
1587  case ISD::BR_CC: return visitBR_CC(N);
1588  case ISD::LOAD: return visitLOAD(N);
1589  case ISD::STORE: return visitSTORE(N);
1590  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1591  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1592  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1593  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1594  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1595  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1596  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1597  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1598  case ISD::MGATHER: return visitMGATHER(N);
1599  case ISD::MLOAD: return visitMLOAD(N);
1600  case ISD::MSCATTER: return visitMSCATTER(N);
1601  case ISD::MSTORE: return visitMSTORE(N);
1602  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1603  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1604  }
1605  return SDValue();
1606 }
1607 
1608 SDValue DAGCombiner::combine(SDNode *N) {
1609  SDValue RV = visit(N);
1610 
1611  // If nothing happened, try a target-specific DAG combine.
1612  if (!RV.getNode()) {
1614  "Node was deleted but visit returned NULL!");
1615 
1616  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1618 
1619  // Expose the DAG combiner to the target combiner impls.
1621  DagCombineInfo(DAG, Level, false, this);
1622 
1623  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1624  }
1625  }
1626 
1627  // If nothing happened still, try promoting the operation.
1628  if (!RV.getNode()) {
1629  switch (N->getOpcode()) {
1630  default: break;
1631  case ISD::ADD:
1632  case ISD::SUB:
1633  case ISD::MUL:
1634  case ISD::AND:
1635  case ISD::OR:
1636  case ISD::XOR:
1637  RV = PromoteIntBinOp(SDValue(N, 0));
1638  break;
1639  case ISD::SHL:
1640  case ISD::SRA:
1641  case ISD::SRL:
1642  RV = PromoteIntShiftOp(SDValue(N, 0));
1643  break;
1644  case ISD::SIGN_EXTEND:
1645  case ISD::ZERO_EXTEND:
1646  case ISD::ANY_EXTEND:
1647  RV = PromoteExtend(SDValue(N, 0));
1648  break;
1649  case ISD::LOAD:
1650  if (PromoteLoad(SDValue(N, 0)))
1651  RV = SDValue(N, 0);
1652  break;
1653  }
1654  }
1655 
1656  // If N is a commutative binary node, try eliminate it if the commuted
1657  // version is already present in the DAG.
1658  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1659  N->getNumValues() == 1) {
1660  SDValue N0 = N->getOperand(0);
1661  SDValue N1 = N->getOperand(1);
1662 
1663  // Constant operands are canonicalized to RHS.
1664  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1665  SDValue Ops[] = {N1, N0};
1666  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1667  N->getFlags());
1668  if (CSENode)
1669  return SDValue(CSENode, 0);
1670  }
1671  }
1672 
1673  return RV;
1674 }
1675 
1676 /// Given a node, return its input chain if it has one, otherwise return a null
1677 /// sd operand.
1679  if (unsigned NumOps = N->getNumOperands()) {
1680  if (N->getOperand(0).getValueType() == MVT::Other)
1681  return N->getOperand(0);
1682  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1683  return N->getOperand(NumOps-1);
1684  for (unsigned i = 1; i < NumOps-1; ++i)
1685  if (N->getOperand(i).getValueType() == MVT::Other)
1686  return N->getOperand(i);
1687  }
1688  return SDValue();
1689 }
1690 
1691 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1692  // If N has two operands, where one has an input chain equal to the other,
1693  // the 'other' chain is redundant.
1694  if (N->getNumOperands() == 2) {
1695  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1696  return N->getOperand(0);
1697  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1698  return N->getOperand(1);
1699  }
1700 
1701  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1702  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1703  SmallPtrSet<SDNode*, 16> SeenOps;
1704  bool Changed = false; // If we should replace this token factor.
1705 
1706  // Start out with this token factor.
1707  TFs.push_back(N);
1708 
1709  // Iterate through token factors. The TFs grows when new token factors are
1710  // encountered.
1711  for (unsigned i = 0; i < TFs.size(); ++i) {
1712  SDNode *TF = TFs[i];
1713 
1714  // Check each of the operands.
1715  for (const SDValue &Op : TF->op_values()) {
1716  switch (Op.getOpcode()) {
1717  case ISD::EntryToken:
1718  // Entry tokens don't need to be added to the list. They are
1719  // redundant.
1720  Changed = true;
1721  break;
1722 
1723  case ISD::TokenFactor:
1724  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1725  // Queue up for processing.
1726  TFs.push_back(Op.getNode());
1727  // Clean up in case the token factor is removed.
1728  AddToWorklist(Op.getNode());
1729  Changed = true;
1730  break;
1731  }
1733 
1734  default:
1735  // Only add if it isn't already in the list.
1736  if (SeenOps.insert(Op.getNode()).second)
1737  Ops.push_back(Op);
1738  else
1739  Changed = true;
1740  break;
1741  }
1742  }
1743  }
1744 
1745  // Remove Nodes that are chained to another node in the list. Do so
1746  // by walking up chains breath-first stopping when we've seen
1747  // another operand. In general we must climb to the EntryNode, but we can exit
1748  // early if we find all remaining work is associated with just one operand as
1749  // no further pruning is possible.
1750 
1751  // List of nodes to search through and original Ops from which they originate.
1753  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1754  SmallPtrSet<SDNode *, 16> SeenChains;
1755  bool DidPruneOps = false;
1756 
1757  unsigned NumLeftToConsider = 0;
1758  for (const SDValue &Op : Ops) {
1759  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1760  OpWorkCount.push_back(1);
1761  }
1762 
1763  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1764  // If this is an Op, we can remove the op from the list. Remark any
1765  // search associated with it as from the current OpNumber.
1766  if (SeenOps.count(Op) != 0) {
1767  Changed = true;
1768  DidPruneOps = true;
1769  unsigned OrigOpNumber = 0;
1770  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1771  OrigOpNumber++;
1772  assert((OrigOpNumber != Ops.size()) &&
1773  "expected to find TokenFactor Operand");
1774  // Re-mark worklist from OrigOpNumber to OpNumber
1775  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1776  if (Worklist[i].second == OrigOpNumber) {
1777  Worklist[i].second = OpNumber;
1778  }
1779  }
1780  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1781  OpWorkCount[OrigOpNumber] = 0;
1782  NumLeftToConsider--;
1783  }
1784  // Add if it's a new chain
1785  if (SeenChains.insert(Op).second) {
1786  OpWorkCount[OpNumber]++;
1787  Worklist.push_back(std::make_pair(Op, OpNumber));
1788  }
1789  };
1790 
1791  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1792  // We need at least be consider at least 2 Ops to prune.
1793  if (NumLeftToConsider <= 1)
1794  break;
1795  auto CurNode = Worklist[i].first;
1796  auto CurOpNumber = Worklist[i].second;
1797  assert((OpWorkCount[CurOpNumber] > 0) &&
1798  "Node should not appear in worklist");
1799  switch (CurNode->getOpcode()) {
1800  case ISD::EntryToken:
1801  // Hitting EntryToken is the only way for the search to terminate without
1802  // hitting
1803  // another operand's search. Prevent us from marking this operand
1804  // considered.
1805  NumLeftToConsider++;
1806  break;
1807  case ISD::TokenFactor:
1808  for (const SDValue &Op : CurNode->op_values())
1809  AddToWorklist(i, Op.getNode(), CurOpNumber);
1810  break;
1811  case ISD::CopyFromReg:
1812  case ISD::CopyToReg:
1813  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1814  break;
1815  default:
1816  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1817  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1818  break;
1819  }
1820  OpWorkCount[CurOpNumber]--;
1821  if (OpWorkCount[CurOpNumber] == 0)
1822  NumLeftToConsider--;
1823  }
1824 
1825  // If we've changed things around then replace token factor.
1826  if (Changed) {
1827  SDValue Result;
1828  if (Ops.empty()) {
1829  // The entry token is the only possible outcome.
1830  Result = DAG.getEntryNode();
1831  } else {
1832  if (DidPruneOps) {
1833  SmallVector<SDValue, 8> PrunedOps;
1834  //
1835  for (const SDValue &Op : Ops) {
1836  if (SeenChains.count(Op.getNode()) == 0)
1837  PrunedOps.push_back(Op);
1838  }
1839  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1840  } else {
1841  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1842  }
1843  }
1844  return Result;
1845  }
1846  return SDValue();
1847 }
1848 
1849 /// MERGE_VALUES can always be eliminated.
1850 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1851  WorklistRemover DeadNodes(*this);
1852  // Replacing results may cause a different MERGE_VALUES to suddenly
1853  // be CSE'd with N, and carry its uses with it. Iterate until no
1854  // uses remain, to ensure that the node can be safely deleted.
1855  // First add the users of this node to the work list so that they
1856  // can be tried again once they have new operands.
1857  AddUsersToWorklist(N);
1858  do {
1859  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1860  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1861  } while (!N->use_empty());
1862  deleteAndRecombine(N);
1863  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1864 }
1865 
1866 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1867 /// ConstantSDNode pointer else nullptr.
1870  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1871 }
1872 
1873 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1874  auto BinOpcode = BO->getOpcode();
1875  assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1876  BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1877  BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1878  BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1879  BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1880  BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1881  BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1882  BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1883  BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1884  "Unexpected binary operator");
1885 
1886  // Bail out if any constants are opaque because we can't constant fold those.
1887  SDValue C1 = BO->getOperand(1);
1888  if (!isConstantOrConstantVector(C1, true) &&
1890  return SDValue();
1891 
1892  // Don't do this unless the old select is going away. We want to eliminate the
1893  // binary operator, not replace a binop with a select.
1894  // TODO: Handle ISD::SELECT_CC.
1895  SDValue Sel = BO->getOperand(0);
1896  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1897  return SDValue();
1898 
1899  SDValue CT = Sel.getOperand(1);
1900  if (!isConstantOrConstantVector(CT, true) &&
1902  return SDValue();
1903 
1904  SDValue CF = Sel.getOperand(2);
1905  if (!isConstantOrConstantVector(CF, true) &&
1907  return SDValue();
1908 
1909  // We have a select-of-constants followed by a binary operator with a
1910  // constant. Eliminate the binop by pulling the constant math into the select.
1911  // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1912  EVT VT = Sel.getValueType();
1913  SDLoc DL(Sel);
1914  SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1915  assert((NewCT.isUndef() || isConstantOrConstantVector(NewCT) ||
1917  "Failed to constant fold a binop with constant operands");
1918 
1919  SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1920  assert((NewCF.isUndef() || isConstantOrConstantVector(NewCF) ||
1922  "Failed to constant fold a binop with constant operands");
1923 
1924  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1925 }
1926 
1927 SDValue DAGCombiner::visitADD(SDNode *N) {
1928  SDValue N0 = N->getOperand(0);
1929  SDValue N1 = N->getOperand(1);
1930  EVT VT = N0.getValueType();
1931  SDLoc DL(N);
1932 
1933  // fold vector ops
1934  if (VT.isVector()) {
1935  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1936  return FoldedVOp;
1937 
1938  // fold (add x, 0) -> x, vector edition
1940  return N0;
1942  return N1;
1943  }
1944 
1945  // fold (add x, undef) -> undef
1946  if (N0.isUndef())
1947  return N0;
1948 
1949  if (N1.isUndef())
1950  return N1;
1951 
1953  // canonicalize constant to RHS
1955  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1956  // fold (add c1, c2) -> c1+c2
1957  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1958  N1.getNode());
1959  }
1960 
1961  // fold (add x, 0) -> x
1962  if (isNullConstant(N1))
1963  return N0;
1964 
1965  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1966  // fold ((c1-A)+c2) -> (c1+c2)-A
1967  if (N0.getOpcode() == ISD::SUB &&
1968  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1969  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1970  return DAG.getNode(ISD::SUB, DL, VT,
1971  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1972  N0.getOperand(1));
1973  }
1974 
1975  // add (sext i1 X), 1 -> zext (not i1 X)
1976  // We don't transform this pattern:
1977  // add (zext i1 X), -1 -> sext (not i1 X)
1978  // because most (?) targets generate better code for the zext form.
1979  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1981  SDValue X = N0.getOperand(0);
1982  if ((!LegalOperations ||
1983  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1984  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1985  X.getScalarValueSizeInBits() == 1) {
1986  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1987  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1988  }
1989  }
1990 
1991  // Undo the add -> or combine to merge constant offsets from a frame index.
1992  if (N0.getOpcode() == ISD::OR &&
1993  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1994  isa<ConstantSDNode>(N0.getOperand(1)) &&
1995  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
1996  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1997  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1998  }
1999  }
2000 
2001  if (SDValue NewSel = foldBinOpIntoSelect(N))
2002  return NewSel;
2003 
2004  // reassociate add
2005  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
2006  return RADD;
2007 
2008  // fold ((0-A) + B) -> B-A
2009  if (N0.getOpcode() == ISD::SUB &&
2011  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2012 
2013  // fold (A + (0-B)) -> A-B
2014  if (N1.getOpcode() == ISD::SUB &&
2016  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2017 
2018  // fold (A+(B-A)) -> B
2019  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2020  return N1.getOperand(0);
2021 
2022  // fold ((B-A)+A) -> B
2023  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2024  return N0.getOperand(0);
2025 
2026  // fold (A+(B-(A+C))) to (B-C)
2027  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2028  N0 == N1.getOperand(1).getOperand(0))
2029  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2030  N1.getOperand(1).getOperand(1));
2031 
2032  // fold (A+(B-(C+A))) to (B-C)
2033  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2034  N0 == N1.getOperand(1).getOperand(1))
2035  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2036  N1.getOperand(1).getOperand(0));
2037 
2038  // fold (A+((B-A)+or-C)) to (B+or-C)
2039  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2040  N1.getOperand(0).getOpcode() == ISD::SUB &&
2041  N0 == N1.getOperand(0).getOperand(1))
2042  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2043  N1.getOperand(1));
2044 
2045  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2046  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2047  SDValue N00 = N0.getOperand(0);
2048  SDValue N01 = N0.getOperand(1);
2049  SDValue N10 = N1.getOperand(0);
2050  SDValue N11 = N1.getOperand(1);
2051 
2053  return DAG.getNode(ISD::SUB, DL, VT,
2054  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2055  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2056  }
2057 
2058  if (SimplifyDemandedBits(SDValue(N, 0)))
2059  return SDValue(N, 0);
2060 
2061  // fold (a+b) -> (a|b) iff a and b share no bits.
2062  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2063  DAG.haveNoCommonBitsSet(N0, N1))
2064  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2065 
2066  if (SDValue Combined = visitADDLike(N0, N1, N))
2067  return Combined;
2068 
2069  if (SDValue Combined = visitADDLike(N1, N0, N))
2070  return Combined;
2071 
2072  return SDValue();
2073 }
2074 
2075 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2076  bool Masked = false;
2077 
2078  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2079  while (true) {
2080  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2081  V = V.getOperand(0);
2082  continue;
2083  }
2084 
2085  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2086  Masked = true;
2087  V = V.getOperand(0);
2088  continue;
2089  }
2090 
2091  break;
2092  }
2093 
2094  // If this is not a carry, return.
2095  if (V.getResNo() != 1)
2096  return SDValue();
2097 
2098  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2099  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2100  return SDValue();
2101 
2102  // If the result is masked, then no matter what kind of bool it is we can
2103  // return. If it isn't, then we need to make sure the bool type is either 0 or
2104  // 1 and not other values.
2105  if (Masked ||
2106  TLI.getBooleanContents(V.getValueType()) ==
2108  return V;
2109 
2110  return SDValue();
2111 }
2112 
2113 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2114  EVT VT = N0.getValueType();
2115  SDLoc DL(LocReference);
2116 
2117  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2118  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2120  return DAG.getNode(ISD::SUB, DL, VT, N0,
2121  DAG.getNode(ISD::SHL, DL, VT,
2122  N1.getOperand(0).getOperand(1),
2123  N1.getOperand(1)));
2124 
2125  if (N1.getOpcode() == ISD::AND) {
2126  SDValue AndOp0 = N1.getOperand(0);
2127  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2128  unsigned DestBits = VT.getScalarSizeInBits();
2129 
2130  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2131  // and similar xforms where the inner op is either ~0 or 0.
2132  if (NumSignBits == DestBits &&
2134  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2135  }
2136 
2137  // add (sext i1), X -> sub X, (zext i1)
2138  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2139  N0.getOperand(0).getValueType() == MVT::i1 &&
2141  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2142  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2143  }
2144 
2145  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2146  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2147  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2148  if (TN->getVT() == MVT::i1) {
2149  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2150  DAG.getConstant(1, DL, VT));
2151  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2152  }
2153  }
2154 
2155  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2156  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
2157  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2158  N0, N1.getOperand(0), N1.getOperand(2));
2159 
2160  // (add X, Carry) -> (addcarry X, 0, Carry)
2162  if (SDValue Carry = getAsCarry(TLI, N1))
2163  return DAG.getNode(ISD::ADDCARRY, DL,
2164  DAG.getVTList(VT, Carry.getValueType()), N0,
2165  DAG.getConstant(0, DL, VT), Carry);
2166 
2167  return SDValue();
2168 }
2169 
2170 SDValue DAGCombiner::visitADDC(SDNode *N) {
2171  SDValue N0 = N->getOperand(0);
2172  SDValue N1 = N->getOperand(1);
2173  EVT VT = N0.getValueType();
2174  SDLoc DL(N);
2175 
2176  // If the flag result is dead, turn this into an ADD.
2177  if (!N->hasAnyUseOfValue(1))
2178  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2179  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2180 
2181  // canonicalize constant to RHS.
2184  if (N0C && !N1C)
2185  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2186 
2187  // fold (addc x, 0) -> x + no carry out
2188  if (isNullConstant(N1))
2189  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2190  DL, MVT::Glue));
2191 
2192  // If it cannot overflow, transform into an add.
2193  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2194  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2195  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2196 
2197  return SDValue();
2198 }
2199 
2200 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2201  SDValue N0 = N->getOperand(0);
2202  SDValue N1 = N->getOperand(1);
2203  EVT VT = N0.getValueType();
2204  if (VT.isVector())
2205  return SDValue();
2206 
2207  EVT CarryVT = N->getValueType(1);
2208  SDLoc DL(N);
2209 
2210  // If the flag result is dead, turn this into an ADD.
2211  if (!N->hasAnyUseOfValue(1))
2212  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2213  DAG.getUNDEF(CarryVT));
2214 
2215  // canonicalize constant to RHS.
2218  if (N0C && !N1C)
2219  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2220 
2221  // fold (uaddo x, 0) -> x + no carry out
2222  if (isNullConstant(N1))
2223  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2224 
2225  // If it cannot overflow, transform into an add.
2226  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2227  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2228  DAG.getConstant(0, DL, CarryVT));
2229 
2230  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2231  return Combined;
2232 
2233  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2234  return Combined;
2235 
2236  return SDValue();
2237 }
2238 
2239 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2240  auto VT = N0.getValueType();
2241 
2242  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2243  // If Y + 1 cannot overflow.
2244  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2245  SDValue Y = N1.getOperand(0);
2246  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2247  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2248  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2249  N1.getOperand(2));
2250  }
2251 
2252  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2254  if (SDValue Carry = getAsCarry(TLI, N1))
2255  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2256  DAG.getConstant(0, SDLoc(N), VT), Carry);
2257 
2258  return SDValue();
2259 }
2260 
2261 SDValue DAGCombiner::visitADDE(SDNode *N) {
2262  SDValue N0 = N->getOperand(0);
2263  SDValue N1 = N->getOperand(1);
2264  SDValue CarryIn = N->getOperand(2);
2265 
2266  // canonicalize constant to RHS
2269  if (N0C && !N1C)
2270  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2271  N1, N0, CarryIn);
2272 
2273  // fold (adde x, y, false) -> (addc x, y)
2274  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2275  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2276 
2277  return SDValue();
2278 }
2279 
2280 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2281  SDValue N0 = N->getOperand(0);
2282  SDValue N1 = N->getOperand(1);
2283  SDValue CarryIn = N->getOperand(2);
2284  SDLoc DL(N);
2285 
2286  // canonicalize constant to RHS
2289  if (N0C && !N1C)
2290  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2291 
2292  // fold (addcarry x, y, false) -> (uaddo x, y)
2293  if (isNullConstant(CarryIn))
2294  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2295 
2296  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2297  if (isNullConstant(N0) && isNullConstant(N1)) {
2298  EVT VT = N0.getValueType();
2299  EVT CarryVT = CarryIn.getValueType();
2300  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2301  AddToWorklist(CarryExt.getNode());
2302  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2303  DAG.getConstant(1, DL, VT)),
2304  DAG.getConstant(0, DL, CarryVT));
2305  }
2306 
2307  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2308  return Combined;
2309 
2310  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2311  return Combined;
2312 
2313  return SDValue();
2314 }
2315 
2316 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2317  SDNode *N) {
2318  // Iff the flag result is dead:
2319  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2320  if ((N0.getOpcode() == ISD::ADD ||
2321  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2322  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2323  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2324  N0.getOperand(0), N0.getOperand(1), CarryIn);
2325 
2326  /**
2327  * When one of the addcarry argument is itself a carry, we may be facing
2328  * a diamond carry propagation. In which case we try to transform the DAG
2329  * to ensure linear carry propagation if that is possible.
2330  *
2331  * We are trying to get:
2332  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2333  */
2334  if (auto Y = getAsCarry(TLI, N1)) {
2335  /**
2336  * (uaddo A, B)
2337  * / \
2338  * Carry Sum
2339  * | \
2340  * | (addcarry *, 0, Z)
2341  * | /
2342  * \ Carry
2343  * | /
2344  * (addcarry X, *, *)
2345  */
2346  if (Y.getOpcode() == ISD::UADDO &&
2347  CarryIn.getResNo() == 1 &&
2348  CarryIn.getOpcode() == ISD::ADDCARRY &&
2349  isNullConstant(CarryIn.getOperand(1)) &&
2350  CarryIn.getOperand(0) == Y.getValue(0)) {
2351  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2352  Y.getOperand(0), Y.getOperand(1),
2353  CarryIn.getOperand(2));
2354  AddToWorklist(NewY.getNode());
2355  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2356  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2357  NewY.getValue(1));
2358  }
2359  }
2360 
2361  return SDValue();
2362 }
2363 
2364 // Since it may not be valid to emit a fold to zero for vector initializers
2365 // check if we can before folding.
2366 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2367  SelectionDAG &DAG, bool LegalOperations,
2368  bool LegalTypes) {
2369  if (!VT.isVector())
2370  return DAG.getConstant(0, DL, VT);
2371  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2372  return DAG.getConstant(0, DL, VT);
2373  return SDValue();
2374 }
2375 
2376 SDValue DAGCombiner::visitSUB(SDNode *N) {
2377  SDValue N0 = N->getOperand(0);
2378  SDValue N1 = N->getOperand(1);
2379  EVT VT = N0.getValueType();
2380  SDLoc DL(N);
2381 
2382  // fold vector ops
2383  if (VT.isVector()) {
2384  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2385  return FoldedVOp;
2386 
2387  // fold (sub x, 0) -> x, vector edition
2389  return N0;
2390  }
2391 
2392  // fold (sub x, x) -> 0
2393  // FIXME: Refactor this and xor and other similar operations together.
2394  if (N0 == N1)
2395  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2398  // fold (sub c1, c2) -> c1-c2
2399  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2400  N1.getNode());
2401  }
2402 
2403  if (SDValue NewSel = foldBinOpIntoSelect(N))
2404  return NewSel;
2405 
2407 
2408  // fold (sub x, c) -> (add x, -c)
2409  if (N1C) {
2410  return DAG.getNode(ISD::ADD, DL, VT, N0,
2411  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2412  }
2413 
2415  unsigned BitWidth = VT.getScalarSizeInBits();
2416  // Right-shifting everything out but the sign bit followed by negation is
2417  // the same as flipping arithmetic/logical shift type without the negation:
2418  // -(X >>u 31) -> (X >>s 31)
2419  // -(X >>s 31) -> (X >>u 31)
2420  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2421  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2422  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2423  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2424  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2425  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2426  }
2427  }
2428 
2429  // 0 - X --> 0 if the sub is NUW.
2430  if (N->getFlags().hasNoUnsignedWrap())
2431  return N0;
2432 
2433  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2434  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2435  // N1 must be 0 because negating the minimum signed value is undefined.
2436  if (N->getFlags().hasNoSignedWrap())
2437  return N0;
2438 
2439  // 0 - X --> X if X is 0 or the minimum signed value.
2440  return N1;
2441  }
2442  }
2443 
2444  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2446  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2447 
2448  // fold A-(A-B) -> B
2449  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2450  return N1.getOperand(1);
2451 
2452  // fold (A+B)-A -> B
2453  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2454  return N0.getOperand(1);
2455 
2456  // fold (A+B)-B -> A
2457  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2458  return N0.getOperand(0);
2459 
2460  // fold C2-(A+C1) -> (C2-C1)-A
2461  if (N1.getOpcode() == ISD::ADD) {
2462  SDValue N11 = N1.getOperand(1);
2463  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2464  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2465  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2466  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2467  }
2468  }
2469 
2470  // fold ((A+(B+or-C))-B) -> A+or-C
2471  if (N0.getOpcode() == ISD::ADD &&
2472  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2473  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2474  N0.getOperand(1).getOperand(0) == N1)
2475  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2476  N0.getOperand(1).getOperand(1));
2477 
2478  // fold ((A+(C+B))-B) -> A+C
2479  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2480  N0.getOperand(1).getOperand(1) == N1)
2481  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2482  N0.getOperand(1).getOperand(0));
2483 
2484  // fold ((A-(B-C))-C) -> A-B
2485  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2486  N0.getOperand(1).getOperand(1) == N1)
2487  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2488  N0.getOperand(1).getOperand(0));
2489 
2490  // If either operand of a sub is undef, the result is undef
2491  if (N0.isUndef())
2492  return N0;
2493  if (N1.isUndef())
2494  return N1;
2495 
2496  // If the relocation model supports it, consider symbol offsets.
2497  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2498  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2499  // fold (sub Sym, c) -> Sym-c
2500  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2501  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2502  GA->getOffset() -
2503  (uint64_t)N1C->getSExtValue());
2504  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2505  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2506  if (GA->getGlobal() == GB->getGlobal())
2507  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2508  DL, VT);
2509  }
2510 
2511  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2512  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2513  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2514  if (TN->getVT() == MVT::i1) {
2515  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2516  DAG.getConstant(1, DL, VT));
2517  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2518  }
2519  }
2520 
2521  return SDValue();
2522 }
2523 
2524 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2525  SDValue N0 = N->getOperand(0);
2526  SDValue N1 = N->getOperand(1);
2527  EVT VT = N0.getValueType();
2528  SDLoc DL(N);
2529 
2530  // If the flag result is dead, turn this into an SUB.
2531  if (!N->hasAnyUseOfValue(1))
2532  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2533  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2534 
2535  // fold (subc x, x) -> 0 + no borrow
2536  if (N0 == N1)
2537  return CombineTo(N, DAG.getConstant(0, DL, VT),
2538  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2539 
2540  // fold (subc x, 0) -> x + no borrow
2541  if (isNullConstant(N1))
2542  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2543 
2544  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2545  if (isAllOnesConstant(N0))
2546  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2547  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2548 
2549  return SDValue();
2550 }
2551 
2552 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2553  SDValue N0 = N->getOperand(0);
2554  SDValue N1 = N->getOperand(1);
2555  EVT VT = N0.getValueType();
2556  if (VT.isVector())
2557  return SDValue();
2558 
2559  EVT CarryVT = N->getValueType(1);
2560  SDLoc DL(N);
2561 
2562  // If the flag result is dead, turn this into an SUB.
2563  if (!N->hasAnyUseOfValue(1))
2564  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2565  DAG.getUNDEF(CarryVT));
2566 
2567  // fold (usubo x, x) -> 0 + no borrow
2568  if (N0 == N1)
2569  return CombineTo(N, DAG.getConstant(0, DL, VT),
2570  DAG.getConstant(0, DL, CarryVT));
2571 
2572  // fold (usubo x, 0) -> x + no borrow
2573  if (isNullConstant(N1))
2574  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2575 
2576  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2577  if (isAllOnesConstant(N0))
2578  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2579  DAG.getConstant(0, DL, CarryVT));
2580 
2581  return SDValue();
2582 }
2583 
2584 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2585  SDValue N0 = N->getOperand(0);
2586  SDValue N1 = N->getOperand(1);
2587  SDValue CarryIn = N->getOperand(2);
2588 
2589  // fold (sube x, y, false) -> (subc x, y)
2590  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2591  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2592 
2593  return SDValue();
2594 }
2595 
2596 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2597  SDValue N0 = N->getOperand(0);
2598  SDValue N1 = N->getOperand(1);
2599  SDValue CarryIn = N->getOperand(2);
2600 
2601  // fold (subcarry x, y, false) -> (usubo x, y)
2602  if (isNullConstant(CarryIn))
2603  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2604 
2605  return SDValue();
2606 }
2607 
2608 SDValue DAGCombiner::visitMUL(SDNode *N) {
2609  SDValue N0 = N->getOperand(0);
2610  SDValue N1 = N->getOperand(1);
2611  EVT VT = N0.getValueType();
2612 
2613  // fold (mul x, undef) -> 0
2614  if (N0.isUndef() || N1.isUndef())
2615  return DAG.getConstant(0, SDLoc(N), VT);
2616 
2617  bool N0IsConst = false;
2618  bool N1IsConst = false;
2619  bool N1IsOpaqueConst = false;
2620  bool N0IsOpaqueConst = false;
2621  APInt ConstValue0, ConstValue1;
2622  // fold vector ops
2623  if (VT.isVector()) {
2624  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2625  return FoldedVOp;
2626 
2627  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2628  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2629  assert((!N0IsConst ||
2630  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2631  "Splat APInt should be element width");
2632  assert((!N1IsConst ||
2633  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2634  "Splat APInt should be element width");
2635  } else {
2636  N0IsConst = isa<ConstantSDNode>(N0);
2637  if (N0IsConst) {
2638  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2639  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2640  }
2641  N1IsConst = isa<ConstantSDNode>(N1);
2642  if (N1IsConst) {
2643  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2644  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2645  }
2646  }
2647 
2648  // fold (mul c1, c2) -> c1*c2
2649  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2650  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2651  N0.getNode(), N1.getNode());
2652 
2653  // canonicalize constant to RHS (vector doesn't have to splat)
2656  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2657  // fold (mul x, 0) -> 0
2658  if (N1IsConst && ConstValue1.isNullValue())
2659  return N1;
2660  // fold (mul x, 1) -> x
2661  if (N1IsConst && ConstValue1.isOneValue())
2662  return N0;
2663 
2664  if (SDValue NewSel = foldBinOpIntoSelect(N))
2665  return NewSel;
2666 
2667  // fold (mul x, -1) -> 0-x
2668  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2669  SDLoc DL(N);
2670  return DAG.getNode(ISD::SUB, DL, VT,
2671  DAG.getConstant(0, DL, VT), N0);
2672  }
2673  // fold (mul x, (1 << c)) -> x << c
2674  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2675  DAG.isKnownToBeAPowerOfTwo(N1)) {
2676  SDLoc DL(N);
2677  SDValue LogBase2 = BuildLogBase2(N1, DL);
2678  AddToWorklist(LogBase2.getNode());
2679 
2680  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2681  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2682  AddToWorklist(Trunc.getNode());
2683  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2684  }
2685  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2686  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2687  unsigned Log2Val = (-ConstValue1).logBase2();
2688  SDLoc DL(N);
2689  // FIXME: If the input is something that is easily negated (e.g. a
2690  // single-use add), we should put the negate there.
2691  return DAG.getNode(ISD::SUB, DL, VT,
2692  DAG.getConstant(0, DL, VT),
2693  DAG.getNode(ISD::SHL, DL, VT, N0,
2694  DAG.getConstant(Log2Val, DL,
2695  getShiftAmountTy(N0.getValueType()))));
2696  }
2697 
2698  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2699  if (N0.getOpcode() == ISD::SHL &&
2700  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2701  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2702  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2704  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2705  }
2706 
2707  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2708  // use.
2709  {
2710  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2711 
2712  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2713  if (N0.getOpcode() == ISD::SHL &&
2715  N0.getNode()->hasOneUse()) {
2716  Sh = N0; Y = N1;
2717  } else if (N1.getOpcode() == ISD::SHL &&
2719  N1.getNode()->hasOneUse()) {
2720  Sh = N1; Y = N0;
2721  }
2722 
2723  if (Sh.getNode()) {
2724  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2725  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2726  }
2727  }
2728 
2729  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2731  N0.getOpcode() == ISD::ADD &&
2733  isMulAddWithConstProfitable(N, N0, N1))
2734  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2735  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2736  N0.getOperand(0), N1),
2737  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2738  N0.getOperand(1), N1));
2739 
2740  // reassociate mul
2741  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2742  return RMUL;
2743 
2744  return SDValue();
2745 }
2746 
2747 /// Return true if divmod libcall is available.
2748 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2749  const TargetLowering &TLI) {
2750  RTLIB::Libcall LC;
2751  EVT NodeType = Node->getValueType(0);
2752  if (!NodeType.isSimple())
2753  return false;
2754  switch (NodeType.getSimpleVT().SimpleTy) {
2755  default: return false; // No libcall for vector types.
2756  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2757  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2758  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2759  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2760  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2761  }
2762 
2763  return TLI.getLibcallName(LC) != nullptr;
2764 }
2765 
2766 /// Issue divrem if both quotient and remainder are needed.
2767 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2768  if (Node->use_empty())
2769  return SDValue(); // This is a dead node, leave it alone.
2770 
2771  unsigned Opcode = Node->getOpcode();
2772  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2773  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2774 
2775  // DivMod lib calls can still work on non-legal types if using lib-calls.
2776  EVT VT = Node->getValueType(0);
2777  if (VT.isVector() || !VT.isInteger())
2778  return SDValue();
2779 
2780  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2781  return SDValue();
2782 
2783  // If DIVREM is going to get expanded into a libcall,
2784  // but there is no libcall available, then don't combine.
2785  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2786  !isDivRemLibcallAvailable(Node, isSigned, TLI))
2787  return SDValue();
2788 
2789  // If div is legal, it's better to do the normal expansion
2790  unsigned OtherOpcode = 0;
2791  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2792  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2793  if (TLI.isOperationLegalOrCustom(Opcode, VT))
2794  return SDValue();
2795  } else {
2796  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2797  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2798  return SDValue();
2799  }
2800 
2801  SDValue Op0 = Node->getOperand(0);
2802  SDValue Op1 = Node->getOperand(1);
2803  SDValue combined;
2804  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2805  UE = Op0.getNode()->use_end(); UI != UE;) {
2806  SDNode *User = *UI++;
2807  if (User == Node || User->use_empty())
2808  continue;
2809  // Convert the other matching node(s), too;
2810  // otherwise, the DIVREM may get target-legalized into something
2811  // target-specific that we won't be able to recognize.
2812  unsigned UserOpc = User->getOpcode();
2813  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2814  User->getOperand(0) == Op0 &&
2815  User->getOperand(1) == Op1) {
2816  if (!combined) {
2817  if (UserOpc == OtherOpcode) {
2818  SDVTList VTs = DAG.getVTList(VT, VT);
2819  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2820  } else if (UserOpc == DivRemOpc) {
2821  combined = SDValue(User, 0);
2822  } else {
2823  assert(UserOpc == Opcode);
2824  continue;
2825  }
2826  }
2827  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2828  CombineTo(User, combined);
2829  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2830  CombineTo(User, combined.getValue(1));
2831  }
2832  }
2833  return combined;
2834 }
2835 
2837  SDValue N0 = N->getOperand(0);
2838  SDValue N1 = N->getOperand(1);
2839  EVT VT = N->getValueType(0);
2840  SDLoc DL(N);
2841 
2842  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2843  return DAG.getUNDEF(VT);
2844 
2845  // undef / X -> 0
2846  // undef % X -> 0
2847  if (N0.isUndef())
2848  return DAG.getConstant(0, DL, VT);
2849 
2850  return SDValue();
2851 }
2852 
2853 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2854  SDValue N0 = N->getOperand(0);
2855  SDValue N1 = N->getOperand(1);
2856  EVT VT = N->getValueType(0);
2857 
2858  // fold vector ops
2859  if (VT.isVector())
2860  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2861  return FoldedVOp;
2862 
2863  SDLoc DL(N);
2864 
2865  // fold (sdiv c1, c2) -> c1/c2
2868  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2869  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2870  // fold (sdiv X, 1) -> X
2871  if (N1C && N1C->isOne())
2872  return N0;
2873  // fold (sdiv X, -1) -> 0-X
2874  if (N1C && N1C->isAllOnesValue())
2875  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2876 
2877  if (SDValue V = simplifyDivRem(N, DAG))
2878  return V;
2879 
2880  if (SDValue NewSel = foldBinOpIntoSelect(N))
2881  return NewSel;
2882 
2883  // If we know the sign bits of both operands are zero, strength reduce to a
2884  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2885  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2886  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2887 
2888  // fold (sdiv X, pow2) -> simple ops after legalize
2889  // FIXME: We check for the exact bit here because the generic lowering gives
2890  // better results in that case. The target-specific lowering should learn how
2891  // to handle exact sdivs efficiently.
2892  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2893  !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2894  (-N1C->getAPIntValue()).isPowerOf2())) {
2895  // Target-specific implementation of sdiv x, pow2.
2896  if (SDValue Res = BuildSDIVPow2(N))
2897  return Res;
2898 
2899  unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2900 
2901  // Splat the sign bit into the register
2902  SDValue SGN =
2903  DAG.getNode(ISD::SRA, DL, VT, N0,
2904  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2905  getShiftAmountTy(N0.getValueType())));
2906  AddToWorklist(SGN.getNode());
2907 
2908  // Add (N0 < 0) ? abs2 - 1 : 0;
2909  SDValue SRL =
2910  DAG.getNode(ISD::SRL, DL, VT, SGN,
2911  DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2912  getShiftAmountTy(SGN.getValueType())));
2913  SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2914  AddToWorklist(SRL.getNode());
2915  AddToWorklist(ADD.getNode()); // Divide by pow2
2916  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2917  DAG.getConstant(lg2, DL,
2918  getShiftAmountTy(ADD.getValueType())));
2919 
2920  // If we're dividing by a positive value, we're done. Otherwise, we must
2921  // negate the result.
2922  if (N1C->getAPIntValue().isNonNegative())
2923  return SRA;
2924 
2925  AddToWorklist(SRA.getNode());
2926  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2927  }
2928 
2929  // If integer divide is expensive and we satisfy the requirements, emit an
2930  // alternate sequence. Targets may check function attributes for size/speed
2931  // trade-offs.
2933  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2934  if (SDValue Op = BuildSDIV(N))
2935  return Op;
2936 
2937  // sdiv, srem -> sdivrem
2938  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2939  // true. Otherwise, we break the simplification logic in visitREM().
2940  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2941  if (SDValue DivRem = useDivRem(N))
2942  return DivRem;
2943 
2944  return SDValue();
2945 }
2946 
2947 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2948  SDValue N0 = N->getOperand(0);
2949  SDValue N1 = N->getOperand(1);
2950  EVT VT = N->getValueType(0);
2951 
2952  // fold vector ops
2953  if (VT.isVector())
2954  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2955  return FoldedVOp;
2956 
2957  SDLoc DL(N);
2958 
2959  // fold (udiv c1, c2) -> c1/c2
2962  if (N0C && N1C)
2963  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2964  N0C, N1C))
2965  return Folded;
2966 
2967  if (SDValue V = simplifyDivRem(N, DAG))
2968  return V;
2969 
2970  if (SDValue NewSel = foldBinOpIntoSelect(N))
2971  return NewSel;
2972 
2973  // fold (udiv x, (1 << c)) -> x >>u c
2974  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2975  DAG.isKnownToBeAPowerOfTwo(N1)) {
2976  SDValue LogBase2 = BuildLogBase2(N1, DL);
2977  AddToWorklist(LogBase2.getNode());
2978 
2979  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2980  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2981  AddToWorklist(Trunc.getNode());
2982  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2983  }
2984 
2985  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2986  if (N1.getOpcode() == ISD::SHL) {
2987  SDValue N10 = N1.getOperand(0);
2988  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2989  DAG.isKnownToBeAPowerOfTwo(N10)) {
2990  SDValue LogBase2 = BuildLogBase2(N10, DL);
2991  AddToWorklist(LogBase2.getNode());
2992 
2993  EVT ADDVT = N1.getOperand(1).getValueType();
2994  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2995  AddToWorklist(Trunc.getNode());
2996  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2997  AddToWorklist(Add.getNode());
2998  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2999  }
3000  }
3001 
3002  // fold (udiv x, c) -> alternate
3004  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3005  if (SDValue Op = BuildUDIV(N))
3006  return Op;
3007 
3008  // sdiv, srem -> sdivrem
3009  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3010  // true. Otherwise, we break the simplification logic in visitREM().
3011  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3012  if (SDValue DivRem = useDivRem(N))
3013  return DivRem;
3014 
3015  return SDValue();
3016 }
3017 
3018 // handles ISD::SREM and ISD::UREM
3019 SDValue DAGCombiner::visitREM(SDNode *N) {
3020  unsigned Opcode = N->getOpcode();
3021  SDValue N0 = N->getOperand(0);
3022  SDValue N1 = N->getOperand(1);
3023  EVT VT = N->getValueType(0);
3024  bool isSigned = (Opcode == ISD::SREM);
3025  SDLoc DL(N);
3026 
3027  // fold (rem c1, c2) -> c1%c2
3030  if (N0C && N1C)
3031  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3032  return Folded;
3033 
3034  if (SDValue V = simplifyDivRem(N, DAG))
3035  return V;
3036 
3037  if (SDValue NewSel = foldBinOpIntoSelect(N))
3038  return NewSel;
3039 
3040  if (isSigned) {
3041  // If we know the sign bits of both operands are zero, strength reduce to a
3042  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3043  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3044  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3045  } else {
3046  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3047  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3048  // fold (urem x, pow2) -> (and x, pow2-1)
3049  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3050  AddToWorklist(Add.getNode());
3051  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3052  }
3053  if (N1.getOpcode() == ISD::SHL &&
3054  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3055  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3056  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3057  AddToWorklist(Add.getNode());
3058  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3059  }
3060  }
3061 
3063 
3064  // If X/C can be simplified by the division-by-constant logic, lower
3065  // X%C to the equivalent of X-X/C*C.
3066  // To avoid mangling nodes, this simplification requires that the combine()
3067  // call for the speculative DIV must not cause a DIVREM conversion. We guard
3068  // against this by skipping the simplification if isIntDivCheap(). When
3069  // div is not cheap, combine will not return a DIVREM. Regardless,
3070  // checking cheapness here makes sense since the simplification results in
3071  // fatter code.
3072  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3073  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3074  SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3075  AddToWorklist(Div.getNode());
3076  SDValue OptimizedDiv = combine(Div.getNode());
3077  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3078  assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3079  (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3080  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3081  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3082  AddToWorklist(Mul.getNode());
3083  return Sub;
3084  }
3085  }
3086 
3087  // sdiv, srem -> sdivrem
3088  if (SDValue DivRem = useDivRem(N))
3089  return DivRem.getValue(1);
3090 
3091  return SDValue();
3092 }
3093 
3094 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3095  SDValue N0 = N->getOperand(0);
3096  SDValue N1 = N->getOperand(1);
3097  EVT VT = N->getValueType(0);
3098  SDLoc DL(N);
3099 
3100  // fold (mulhs x, 0) -> 0
3101  if (isNullConstant(N1))
3102  return N1;
3103  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3104  if (isOneConstant(N1)) {
3105  SDLoc DL(N);
3106  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3107  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3108  getShiftAmountTy(N0.getValueType())));
3109  }
3110  // fold (mulhs x, undef) -> 0
3111  if (N0.isUndef() || N1.isUndef())
3112  return DAG.getConstant(0, SDLoc(N), VT);
3113 
3114  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3115  // plus a shift.
3116  if (VT.isSimple() && !VT.isVector()) {
3117  MVT Simple = VT.getSimpleVT();
3118  unsigned SimpleSize = Simple.getSizeInBits();
3119  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3120  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3121  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3122  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3123  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3124  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3125  DAG.getConstant(SimpleSize, DL,
3126  getShiftAmountTy(N1.getValueType())));
3127  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3128  }
3129  }
3130 
3131  return SDValue();
3132 }
3133 
3134 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3135  SDValue N0 = N->getOperand(0);
3136  SDValue N1 = N->getOperand(1);
3137  EVT VT = N->getValueType(0);
3138  SDLoc DL(N);
3139 
3140  // fold (mulhu x, 0) -> 0
3141  if (isNullConstant(N1))
3142  return N1;
3143  // fold (mulhu x, 1) -> 0
3144  if (isOneConstant(N1))
3145  return DAG.getConstant(0, DL, N0.getValueType());
3146  // fold (mulhu x, undef) -> 0
3147  if (N0.isUndef() || N1.isUndef())
3148  return DAG.getConstant(0, DL, VT);
3149 
3150  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3151  // plus a shift.
3152  if (VT.isSimple() && !VT.isVector()) {
3153  MVT Simple = VT.getSimpleVT();
3154  unsigned SimpleSize = Simple.getSizeInBits();
3155  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3156  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3157  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3158  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3159  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3160  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3161  DAG.getConstant(SimpleSize, DL,
3162  getShiftAmountTy(N1.getValueType())));
3163  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3164  }
3165  }
3166 
3167  return SDValue();
3168 }
3169 
3170 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3171 /// give the opcodes for the two computations that are being performed. Return
3172 /// true if a simplification was made.
3173 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3174  unsigned HiOp) {
3175  // If the high half is not needed, just compute the low half.
3176  bool HiExists = N->hasAnyUseOfValue(1);
3177  if (!HiExists &&
3178  (!LegalOperations ||
3179  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3180  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3181  return CombineTo(N, Res, Res);
3182  }
3183 
3184  // If the low half is not needed, just compute the high half.
3185  bool LoExists = N->hasAnyUseOfValue(0);
3186  if (!LoExists &&
3187  (!LegalOperations ||
3188  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3189  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3190  return CombineTo(N, Res, Res);
3191  }
3192 
3193  // If both halves are used, return as it is.
3194  if (LoExists && HiExists)
3195  return SDValue();
3196 
3197  // If the two computed results can be simplified separately, separate them.
3198  if (LoExists) {
3199  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3200  AddToWorklist(Lo.getNode());
3201  SDValue LoOpt = combine(Lo.getNode());
3202  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3203  (!LegalOperations ||
3204  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3205  return CombineTo(N, LoOpt, LoOpt);
3206  }
3207 
3208  if (HiExists) {
3209  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3210  AddToWorklist(Hi.getNode());
3211  SDValue HiOpt = combine(Hi.getNode());
3212  if (HiOpt.getNode() && HiOpt != Hi &&
3213  (!LegalOperations ||
3214  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3215  return CombineTo(N, HiOpt, HiOpt);
3216  }
3217 
3218  return SDValue();
3219 }
3220 
3221 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3222  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3223  return Res;
3224 
3225  EVT VT = N->getValueType(0);
3226  SDLoc DL(N);
3227 
3228  // If the type is twice as wide is legal, transform the mulhu to a wider
3229  // multiply plus a shift.
3230  if (VT.isSimple() && !VT.isVector()) {
3231  MVT Simple = VT.getSimpleVT();
3232  unsigned SimpleSize = Simple.getSizeInBits();
3233  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3234  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3235  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3236  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3237  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3238  // Compute the high part as N1.
3239  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3240  DAG.getConstant(SimpleSize, DL,
3241  getShiftAmountTy(Lo.getValueType())));
3242  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3243  // Compute the low part as N0.
3244  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3245  return CombineTo(N, Lo, Hi);
3246  }
3247  }
3248 
3249  return SDValue();
3250 }
3251 
3252 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3253  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3254  return Res;
3255 
3256  EVT VT = N->getValueType(0);
3257  SDLoc DL(N);
3258 
3259  // If the type is twice as wide is legal, transform the mulhu to a wider
3260  // multiply plus a shift.
3261  if (VT.isSimple() && !VT.isVector()) {
3262  MVT Simple = VT.getSimpleVT();
3263  unsigned SimpleSize = Simple.getSizeInBits();
3264  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3265  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3266  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3267  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3268  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3269  // Compute the high part as N1.
3270  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3271  DAG.getConstant(SimpleSize, DL,
3272  getShiftAmountTy(Lo.getValueType())));
3273  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3274  // Compute the low part as N0.
3275  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3276  return CombineTo(N, Lo, Hi);
3277  }
3278  }
3279 
3280  return SDValue();
3281 }
3282 
3283 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3284  // (smulo x, 2) -> (saddo x, x)
3285  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3286  if (C2->getAPIntValue() == 2)
3287  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3288  N->getOperand(0), N->getOperand(0));
3289 
3290  return SDValue();
3291 }
3292 
3293 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3294  // (umulo x, 2) -> (uaddo x, x)
3295  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3296  if (C2->getAPIntValue() == 2)
3297  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3298  N->getOperand(0), N->getOperand(0));
3299 
3300  return SDValue();
3301 }
3302 
3303 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3304  SDValue N0 = N->getOperand(0);
3305  SDValue N1 = N->getOperand(1);
3306  EVT VT = N0.getValueType();
3307 
3308  // fold vector ops
3309  if (VT.isVector())
3310  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3311  return FoldedVOp;
3312 
3313  // fold operation with constant operands.
3316  if (N0C && N1C)
3317  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3318 
3319  // canonicalize constant to RHS
3322  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3323 
3324  return SDValue();
3325 }
3326 
3327 /// If this is a binary operator with two operands of the same opcode, try to
3328 /// simplify it.
3329 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3330  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3331  EVT VT = N0.getValueType();
3332  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3333 
3334  // Bail early if none of these transforms apply.
3335  if (N0.getNumOperands() == 0) return SDValue();
3336 
3337  // For each of OP in AND/OR/XOR:
3338  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3339  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3340  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3341  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3342  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3343  //
3344  // do not sink logical op inside of a vector extend, since it may combine
3345  // into a vsetcc.
3346  EVT Op0VT = N0.getOperand(0).getValueType();
3347  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3348  N0.getOpcode() == ISD::SIGN_EXTEND ||
3349  N0.getOpcode() == ISD::BSWAP ||
3350  // Avoid infinite looping with PromoteIntBinOp.
3351  (N0.getOpcode() == ISD::ANY_EXTEND &&
3352  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3353  (N0.getOpcode() == ISD::TRUNCATE &&
3354  (!TLI.isZExtFree(VT, Op0VT) ||
3355  !TLI.isTruncateFree(Op0VT, VT)) &&
3356  TLI.isTypeLegal(Op0VT))) &&
3357  !VT.isVector() &&
3358  Op0VT == N1.getOperand(0).getValueType() &&
3359  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3360  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3361  N0.getOperand(0).getValueType(),
3362  N0.getOperand(0), N1.getOperand(0));
3363  AddToWorklist(ORNode.getNode());
3364  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3365  }
3366 
3367  // For each of OP in SHL/SRL/SRA/AND...
3368  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3369  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3370  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3371  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3372  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3373  N0.getOperand(1) == N1.getOperand(1)) {
3374  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3375  N0.getOperand(0).getValueType(),
3376  N0.getOperand(0), N1.getOperand(0));
3377  AddToWorklist(ORNode.getNode());
3378  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3379  ORNode, N0.getOperand(1));
3380  }
3381 
3382  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3383  // Only perform this optimization up until type legalization, before
3384  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3385  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3386  // we don't want to undo this promotion.
3387  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3388  // on scalars.
3389  if ((N0.getOpcode() == ISD::BITCAST ||
3390  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3391  Level <= AfterLegalizeTypes) {
3392  SDValue In0 = N0.getOperand(0);
3393  SDValue In1 = N1.getOperand(0);
3394  EVT In0Ty = In0.getValueType();
3395  EVT In1Ty = In1.getValueType();
3396  SDLoc DL(N);
3397  // If both incoming values are integers, and the original types are the
3398  // same.
3399  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3400  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3401  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3402  AddToWorklist(Op.getNode());
3403  return BC;
3404  }
3405  }
3406 
3407  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3408  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3409  // If both shuffles use the same mask, and both shuffle within a single
3410  // vector, then it is worthwhile to move the swizzle after the operation.
3411  // The type-legalizer generates this pattern when loading illegal
3412  // vector types from memory. In many cases this allows additional shuffle
3413  // optimizations.
3414  // There are other cases where moving the shuffle after the xor/and/or
3415  // is profitable even if shuffles don't perform a swizzle.
3416  // If both shuffles use the same mask, and both shuffles have the same first
3417  // or second operand, then it might still be profitable to move the shuffle
3418  // after the xor/and/or operation.
3419  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3420  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3421  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3422 
3423  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3424  "Inputs to shuffles are not the same type");
3425 
3426  // Check that both shuffles use the same mask. The masks are known to be of
3427  // the same length because the result vector type is the same.
3428  // Check also that shuffles have only one use to avoid introducing extra
3429  // instructions.
3430  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3431  SVN0->getMask().equals(SVN1->getMask())) {
3432  SDValue ShOp = N0->getOperand(1);
3433 
3434  // Don't try to fold this node if it requires introducing a
3435  // build vector of all zeros that might be illegal at this stage.
3436  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3437  if (!LegalTypes)
3438  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3439  else
3440  ShOp = SDValue();
3441  }
3442 
3443  // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3444  // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
3445  // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3446  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3447  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3448  N0->getOperand(0), N1->getOperand(0));
3449  AddToWorklist(NewNode.getNode());
3450  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3451  SVN0->getMask());
3452  }
3453 
3454  // Don't try to fold this node if it requires introducing a
3455  // build vector of all zeros that might be illegal at this stage.
3456  ShOp = N0->getOperand(0);
3457  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3458  if (!LegalTypes)
3459  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3460  else
3461  ShOp = SDValue();
3462  }
3463 
3464  // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3465  // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
3466  // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3467  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3468  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3469  N0->getOperand(1), N1->getOperand(1));
3470  AddToWorklist(NewNode.getNode());
3471  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3472  SVN0->getMask());
3473  }
3474  }
3475  }
3476 
3477  return SDValue();
3478 }
3479 
3480 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3481 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3482  const SDLoc &DL) {
3483  SDValue LL, LR, RL, RR, N0CC, N1CC;
3484  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3485  !isSetCCEquivalent(N1, RL, RR, N1CC))
3486  return SDValue();
3487 
3488  assert(N0.getValueType() == N1.getValueType() &&
3489  "Unexpected operand types for bitwise logic op");
3490  assert(LL.getValueType() == LR.getValueType() &&
3491  RL.getValueType() == RR.getValueType() &&
3492  "Unexpected operand types for setcc");
3493 
3494  // If we're here post-legalization or the logic op type is not i1, the logic
3495  // op type must match a setcc result type. Also, all folds require new
3496  // operations on the left and right operands, so those types must match.
3497  EVT VT = N0.getValueType();
3498  EVT OpVT = LL.getValueType();
3499  if (LegalOperations || VT != MVT::i1)
3500  if (VT != getSetCCResultType(OpVT))
3501  return SDValue();
3502  if (OpVT != RL.getValueType())
3503  return SDValue();
3504 
3505  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3506  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3507  bool IsInteger = OpVT.isInteger();
3508  if (LR == RR && CC0 == CC1 && IsInteger) {
3509  bool IsZero = isNullConstantOrNullSplatConstant(LR);
3510  bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3511 
3512  // All bits clear?
3513  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3514  // All sign bits clear?
3515  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3516  // Any bits set?
3517  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3518  // Any sign bits set?
3519  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3520 
3521  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3522  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3523  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3524  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3525  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3526  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3527  AddToWorklist(Or.getNode());
3528  return DAG.getSetCC(DL, VT, Or, LR, CC1);
3529  }
3530 
3531  // All bits set?
3532  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3533  // All sign bits set?
3534  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3535  // Any bits clear?
3536  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3537  // Any sign bits clear?
3538  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3539 
3540  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3541  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3542  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3543  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3544  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3545  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3546  AddToWorklist(And.getNode());
3547  return DAG.getSetCC(DL, VT, And, LR, CC1);
3548  }
3549  }
3550 
3551  // TODO: What is the 'or' equivalent of this fold?
3552  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3553  if (IsAnd && LL == RL && CC0 == CC1 && IsInteger && CC0 == ISD::SETNE &&
3554  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3555  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3556  SDValue One = DAG.getConstant(1, DL, OpVT);
3557  SDValue Two = DAG.getConstant(2, DL, OpVT);
3558  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3559  AddToWorklist(Add.getNode());
3560  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3561  }
3562 
3563  // Try more general transforms if the predicates match and the only user of
3564  // the compares is the 'and' or 'or'.
3565  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3566  N0.hasOneUse() && N1.hasOneUse()) {
3567  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3568  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3569  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3570  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3571  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3572  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3573  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3574  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3575  }
3576  }
3577 
3578  // Canonicalize equivalent operands to LL == RL.
3579  if (LL == RR && LR == RL) {
3580  CC1 = ISD::getSetCCSwappedOperands(CC1);
3581  std::swap(RL, RR);
3582  }
3583 
3584  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3585  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3586  if (LL == RL && LR == RR) {
3587  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3588  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3589  if (NewCC != ISD::SETCC_INVALID &&
3590  (!LegalOperations ||
3591  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3592  TLI.isOperationLegal(ISD::SETCC, OpVT))))
3593  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3594  }
3595 
3596  return SDValue();
3597 }
3598 
3599 /// This contains all DAGCombine rules which reduce two values combined by
3600 /// an And operation to a single value. This makes them reusable in the context
3601 /// of visitSELECT(). Rules involving constants are not included as
3602 /// visitSELECT() already handles those cases.
3603 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3604  EVT VT = N1.getValueType();
3605  SDLoc DL(N);
3606 
3607  // fold (and x, undef) -> 0
3608  if (N0.isUndef() || N1.isUndef())
3609  return DAG.getConstant(0, DL, VT);
3610 
3611  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3612  return V;
3613 
3614  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3615  VT.getSizeInBits() <= 64) {
3616  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3617  APInt ADDC = ADDI->getAPIntValue();
3618  if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3619  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3620  // immediate for an add, but it is legal if its top c2 bits are set,
3621  // transform the ADD so the immediate doesn't need to be materialized
3622  // in a register.
3623  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3625  SRLI->getZExtValue());
3626  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3627  ADDC |= Mask;
3628  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3629  SDLoc DL0(N0);
3630  SDValue NewAdd =
3631  DAG.getNode(ISD::ADD, DL0, VT,
3632  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3633  CombineTo(N0.getNode(), NewAdd);
3634  // Return N so it doesn't get rechecked!
3635  return SDValue(N, 0);
3636  }
3637  }
3638  }
3639  }
3640  }
3641  }
3642 
3643  // Reduce bit extract of low half of an integer to the narrower type.
3644  // (and (srl i64:x, K), KMask) ->
3645  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3646  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3647  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3648  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3649  unsigned Size = VT.getSizeInBits();
3650  const APInt &AndMask = CAnd->getAPIntValue();
3651  unsigned ShiftBits = CShift->getZExtValue();
3652 
3653  // Bail out, this node will probably disappear anyway.
3654  if (ShiftBits == 0)
3655  return SDValue();
3656 
3657  unsigned MaskBits = AndMask.countTrailingOnes();
3658  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3659 
3660  if (AndMask.isMask() &&
3661  // Required bits must not span the two halves of the integer and
3662  // must fit in the half size type.
3663  (ShiftBits + MaskBits <= Size / 2) &&
3664  TLI.isNarrowingProfitable(VT, HalfVT) &&
3665  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3666  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3667  TLI.isTruncateFree(VT, HalfVT) &&
3668  TLI.isZExtFree(HalfVT, VT)) {
3669  // The isNarrowingProfitable is to avoid regressions on PPC and
3670  // AArch64 which match a few 64-bit bit insert / bit extract patterns
3671  // on downstream users of this. Those patterns could probably be
3672  // extended to handle extensions mixed in.
3673 
3674  SDValue SL(N0);
3675  assert(MaskBits <= Size);
3676 
3677  // Extracting the highest bit of the low half.
3678  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3679  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3680  N0.getOperand(0));
3681 
3682  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3683  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3684  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3685  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3686  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3687  }
3688  }
3689  }
3690  }
3691 
3692  return SDValue();
3693 }
3694 
3695 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3696  EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3697  bool &NarrowLoad) {
3698  if (!AndC->getAPIntValue().isMask())
3699  return false;
3700 
3701  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3702 
3703  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3704  LoadedVT = LoadN->getMemoryVT();
3705 
3706  if (ExtVT == LoadedVT &&
3707  (!LegalOperations ||
3708  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3709  // ZEXTLOAD will match without needing to change the size of the value being
3710  // loaded.
3711  NarrowLoad = false;
3712  return true;
3713  }
3714 
3715  // Do not change the width of a volatile load.
3716  if (LoadN->isVolatile())
3717  return false;
3718 
3719  // Do not generate loads of non-round integer types since these can
3720  // be expensive (and would be wrong if the type is not byte sized).
3721  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3722  return false;
3723 
3724  if (LegalOperations &&
3725  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3726  return false;
3727 
3728  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3729  return false;
3730 
3731  NarrowLoad = true;
3732  return true;
3733 }
3734 
3735 SDValue DAGCombiner::visitAND(SDNode *N) {
3736  SDValue N0 = N->getOperand(0);
3737  SDValue N1 = N->getOperand(1);
3738  EVT VT = N1.getValueType();
3739 
3740  // x & x --> x
3741  if (N0 == N1)
3742  return N0;
3743 
3744  // fold vector ops
3745  if (VT.isVector()) {
3746  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3747  return FoldedVOp;
3748 
3749  // fold (and x, 0) -> 0, vector edition
3751  // do not return N0, because undef node may exist in N0
3753  SDLoc(N), N0.getValueType());
3755  // do not return N1, because undef node may exist in N1
3757  SDLoc(N), N1.getValueType());
3758 
3759  // fold (and x, -1) -> x, vector edition
3761  return N1;
3763  return N0;
3764  }
3765 
3766  // fold (and c1, c2) -> c1&c2
3769  if (N0C && N1C && !N1C->isOpaque())
3770  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3771  // canonicalize constant to RHS
3774  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3775  // fold (and x, -1) -> x
3776  if (isAllOnesConstant(N1))
3777  return N0;
3778  // if (and x, c) is known to be zero, return 0
3779  unsigned BitWidth = VT.getScalarSizeInBits();
3780  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3781  APInt::getAllOnesValue(BitWidth)))
3782  return DAG.getConstant(0, SDLoc(N), VT);
3783 
3784  if (SDValue NewSel = foldBinOpIntoSelect(N))
3785  return NewSel;
3786 
3787  // reassociate and
3788  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3789  return RAND;
3790  // fold (and (or x, C), D) -> D if (C & D) == D
3791  if (N1C && N0.getOpcode() == ISD::OR)
3792  if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3793  if (N1C->getAPIntValue().isSubsetOf(ORI->getAPIntValue()))
3794  return N1;
3795  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3796  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3797  SDValue N0Op0 = N0.getOperand(0);
3798  APInt Mask = ~N1C->getAPIntValue();
3799  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3800  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3801  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3802  N0.getValueType(), N0Op0);
3803 
3804  // Replace uses of the AND with uses of the Zero extend node.
3805  CombineTo(N, Zext);
3806 
3807  // We actually want to replace all uses of the any_extend with the
3808  // zero_extend, to avoid duplicating things. This will later cause this
3809  // AND to be folded.
3810  CombineTo(N0.getNode(), Zext);
3811  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3812  }
3813  }
3814  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3815  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3816  // already be zero by virtue of the width of the base type of the load.
3817  //
3818  // the 'X' node here can either be nothing or an extract_vector_elt to catch
3819  // more cases.
3820  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3822  N0.getOperand(0).getOpcode() == ISD::LOAD &&
3823  N0.getOperand(0).getResNo() == 0) ||
3824  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3825  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3826  N0 : N0.getOperand(0) );
3827 
3828  // Get the constant (if applicable) the zero'th operand is being ANDed with.
3829  // This can be a pure constant or a vector splat, in which case we treat the
3830  // vector as a scalar and use the splat value.
3832  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3833  Constant = C->getAPIntValue();
3834  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3835  APInt SplatValue, SplatUndef;
3836  unsigned SplatBitSize;
3837  bool HasAnyUndefs;
3838  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3839  SplatBitSize, HasAnyUndefs);
3840  if (IsSplat) {
3841  // Undef bits can contribute to a possible optimisation if set, so
3842  // set them.
3843  SplatValue |= SplatUndef;
3844 
3845  // The splat value may be something like "0x00FFFFFF", which means 0 for
3846  // the first vector value and FF for the rest, repeating. We need a mask
3847  // that will apply equally to all members of the vector, so AND all the
3848  // lanes of the constant together.
3849  EVT VT = Vector->getValueType(0);
3850  unsigned BitWidth = VT.getScalarSizeInBits();
3851 
3852  // If the splat value has been compressed to a bitlength lower
3853  // than the size of the vector lane, we need to re-expand it to
3854  // the lane size.
3855  if (BitWidth > SplatBitSize)
3856  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3857  SplatBitSize < BitWidth;
3858  SplatBitSize = SplatBitSize * 2)
3859  SplatValue |= SplatValue.shl(SplatBitSize);
3860 
3861  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3862  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3863  if (SplatBitSize % BitWidth == 0) {
3864  Constant = APInt::getAllOnesValue(BitWidth);
3865  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3866  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3867  }
3868  }
3869  }
3870 
3871  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3872  // actually legal and isn't going to get expanded, else this is a false
3873  // optimisation.
3874  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3875  Load->getValueType(0),
3876  Load->getMemoryVT());
3877 
3878  // Resize the constant to the same size as the original memory access before
3879  // extension. If it is still the AllOnesValue then this AND is completely
3880  // unneeded.
3881  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3882 
3883  bool B;
3884  switch (Load->getExtensionType()) {
3885  default: B = false; break;
3886  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3887  case ISD::ZEXTLOAD:
3888  case ISD::NON_EXTLOAD: B = true; break;
3889  }
3890 
3891  if (B && Constant.isAllOnesValue()) {
3892  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3893  // preserve semantics once we get rid of the AND.
3894  SDValue NewLoad(Load, 0);
3895 
3896  // Fold the AND away. NewLoad may get replaced immediately.
3897  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3898 
3899  if (Load->getExtensionType() == ISD::EXTLOAD) {
3900  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3901  Load->getValueType(0), SDLoc(Load),
3902  Load->getChain(), Load->getBasePtr(),
3903  Load->getOffset(), Load->getMemoryVT(),
3904  Load->getMemOperand());
3905  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3906  if (Load->getNumValues() == 3) {
3907  // PRE/POST_INC loads have 3 values.
3908  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3909  NewLoad.getValue(2) };
3910  CombineTo(Load, To, 3, true);
3911  } else {
3912  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3913  }
3914  }
3915 
3916  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3917  }
3918  }
3919 
3920  // fold (and (load x), 255) -> (zextload x, i8)
3921  // fold (and (extload x, i16), 255) -> (zextload x, i8)
3922  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3923  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3924  (N0.getOpcode() == ISD::ANY_EXTEND &&
3925  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3926  bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3927  LoadSDNode *LN0 = HasAnyExt
3928  ? cast<LoadSDNode>(N0.getOperand(0))
3929  : cast<LoadSDNode>(N0);
3930  if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3931  LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3932  auto NarrowLoad = false;
3933  EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3934  EVT ExtVT, LoadedVT;
3935  if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3936  NarrowLoad)) {
3937  if (!NarrowLoad) {
3938  SDValue NewLoad =
3939  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3940  LN0->getChain(), LN0->getBasePtr(), ExtVT,
3941  LN0->getMemOperand());
3942  AddToWorklist(N);
3943  CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3944  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3945  } else {
3946  EVT PtrType = LN0->getOperand(1).getValueType();
3947 
3948  unsigned Alignment = LN0->getAlignment();
3949  SDValue NewPtr = LN0->getBasePtr();
3950 
3951  // For big endian targets, we need to add an offset to the pointer
3952  // to load the correct bytes. For little endian systems, we merely
3953  // need to read fewer bytes from the same pointer.
3954  if (DAG.getDataLayout().isBigEndian()) {
3955  unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3956  unsigned EVTStoreBytes = ExtVT.getStoreSize();
3957  unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3958  SDLoc DL(LN0);
3959  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3960  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3961  Alignment = MinAlign(Alignment, PtrOff);
3962  }
3963 
3964  AddToWorklist(NewPtr.getNode());
3965 
3966  SDValue Load = DAG.getExtLoad(
3967  ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3968  LN0->getPointerInfo(), ExtVT, Alignment,
3969  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3970  AddToWorklist(N);
3971  CombineTo(LN0, Load, Load.getValue(1));
3972  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3973  }
3974  }
3975  }
3976  }
3977 
3978  if (SDValue Combined = visitANDLike(N0, N1, N))
3979  return Combined;
3980 
3981  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
3982  if (N0.getOpcode() == N1.getOpcode())
3983  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3984  return Tmp;
3985 
3986  // Masking the negated extension of a boolean is just the zero-extended
3987  // boolean:
3988  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3989  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3990  //
3991  // Note: the SimplifyDemandedBits fold below can make an information-losing
3992  // transform, and then we have no way to find this better fold.
3993  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3995  SDValue SubRHS = N0.getOperand(1);
3996  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3997  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3998  return SubRHS;
3999  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4000  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4001  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4002  }
4003  }
4004 
4005  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4006  // fold (and (sra)) -> (and (srl)) when possible.
4007  if (SimplifyDemandedBits(SDValue(N, 0)))
4008  return SDValue(N, 0);
4009 
4010  // fold (zext_inreg (extload x)) -> (zextload x)
4011  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4012  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4013  EVT MemVT = LN0->getMemoryVT();
4014  // If we zero all the possible extended bits, then we can turn this into
4015  // a zextload if we are running before legalize or the operation is legal.
4016  unsigned BitWidth = N1.getScalarValueSizeInBits();
4017  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4018  BitWidth - MemVT.getScalarSizeInBits())) &&
4019  ((!LegalOperations && !LN0->isVolatile()) ||
4020  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4021  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4022  LN0->getChain(), LN0->getBasePtr(),
4023  MemVT, LN0->getMemOperand());
4024  AddToWorklist(N);
4025  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4026  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4027  }
4028  }
4029  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4030  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4031  N0.hasOneUse()) {
4032  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4033  EVT MemVT = LN0->getMemoryVT();
4034  // If we zero all the possible extended bits, then we can turn this into
4035  // a zextload if we are running before legalize or the operation is legal.
4036  unsigned BitWidth = N1.getScalarValueSizeInBits();
4037  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4038  BitWidth - MemVT.getScalarSizeInBits())) &&
4039  ((!LegalOperations && !LN0->isVolatile()) ||
4040  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4041  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4042  LN0->getChain(), LN0->getBasePtr(),
4043  MemVT, LN0->getMemOperand());
4044  AddToWorklist(N);
4045  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4046  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4047  }
4048  }
4049  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4050  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4051  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4052  N0.getOperand(1), false))
4053  return BSwap;
4054  }
4055 
4056  return SDValue();
4057 }
4058 
4059 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4060 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4061  bool DemandHighBits) {
4062  if (!LegalOperations)
4063  return SDValue();
4064 
4065  EVT VT = N->getValueType(0);
4066  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4067  return SDValue();
4068  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4069  return SDValue();
4070 
4071  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4072  bool LookPassAnd0 = false;
4073  bool LookPassAnd1 = false;
4074  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4075  std::swap(N0, N1);
4076  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4077  std::swap(N0, N1);
4078  if (N0.getOpcode() == ISD::AND) {
4079  if (!N0.getNode()->hasOneUse())
4080  return SDValue();
4082  if (!N01C || N01C->getZExtValue() != 0xFF00)
4083  return SDValue();
4084  N0 = N0.getOperand(0);
4085  LookPassAnd0 = true;
4086  }
4087 
4088  if (N1.getOpcode() == ISD::AND) {
4089  if (!N1.getNode()->hasOneUse())
4090  return SDValue();
4092  if (!N11C || N11C->getZExtValue() != 0xFF)
4093  return SDValue();
4094  N1 = N1.getOperand(0);
4095  LookPassAnd1 = true;
4096  }
4097 
4098  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4099  std::swap(N0, N1);
4100  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4101  return SDValue();
4102  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4103  return SDValue();
4104 
4107  if (!N01C || !N11C)
4108  return SDValue();
4109  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4110  return SDValue();
4111 
4112  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4113  SDValue N00 = N0->getOperand(0);
4114  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4115  if (!N00.getNode()->hasOneUse())
4116  return SDValue();
4118  if (!N001C || N001C->getZExtValue() != 0xFF)
4119  return SDValue();
4120  N00 = N00.getOperand(0);
4121  LookPassAnd0 = true;
4122  }
4123 
4124  SDValue N10 = N1->getOperand(0);
4125  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4126  if (!N10.getNode()->hasOneUse())
4127  return SDValue();
4129  if (!N101C || N101C->getZExtValue() != 0xFF00)
4130  return SDValue();
4131  N10 = N10.getOperand(0);
4132  LookPassAnd1 = true;
4133  }
4134 
4135  if (N00 != N10)
4136  return SDValue();
4137 
4138  // Make sure everything beyond the low halfword gets set to zero since the SRL
4139  // 16 will clear the top bits.
4140  unsigned OpSizeInBits = VT.getSizeInBits();
4141  if (DemandHighBits && OpSizeInBits > 16) {
4142  // If the left-shift isn't masked out then the only way this is a bswap is
4143  // if all bits beyond the low 8 are 0. In that case the entire pattern
4144  // reduces to a left shift anyway: leave it for other parts of the combiner.
4145  if (!LookPassAnd0)
4146  return SDValue();
4147 
4148  // However, if the right shift isn't masked out then it might be because
4149  // it's not needed. See if we can spot that too.
4150  if (!LookPassAnd1 &&
4151  !DAG.MaskedValueIsZero(
4152  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4153  return SDValue();
4154  }
4155 
4156  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4157  if (OpSizeInBits > 16) {
4158  SDLoc DL(N);
4159  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4160  DAG.getConstant(OpSizeInBits - 16, DL,
4161  getShiftAmountTy(VT)));
4162  }
4163  return Res;
4164 }
4165 
4166 /// Return true if the specified node is an element that makes up a 32-bit
4167 /// packed halfword byteswap.
4168 /// ((x & 0x000000ff) << 8) |
4169 /// ((x & 0x0000ff00) >> 8) |
4170 /// ((x & 0x00ff0000) << 8) |
4171 /// ((x & 0xff000000) >> 8)
4173  if (!N.getNode()->hasOneUse())
4174  return false;
4175 
4176  unsigned Opc = N.getOpcode();
4177  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4178  return false;
4179 
4180  SDValue N0 = N.getOperand(0);
4181  unsigned Opc0 = N0.getOpcode();
4182  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4183  return false;
4184 
4185  ConstantSDNode *N1C = nullptr;
4186  // SHL or SRL: look upstream for AND mask operand
4187  if (Opc == ISD::AND)
4188  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4189  else if (Opc0 == ISD::AND)
4190  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4191  if (!N1C)
4192  return false;
4193 
4194  unsigned MaskByteOffset;
4195  switch (N1C->getZExtValue()) {
4196  default:
4197  return false;
4198  case 0xFF: MaskByteOffset = 0; break;
4199  case 0xFF00: MaskByteOffset = 1; break;
4200  case 0xFF0000: MaskByteOffset = 2; break;
4201  case 0xFF000000: MaskByteOffset = 3; break;
4202  }
4203 
4204  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4205  if (Opc == ISD::AND) {
4206  if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4207  // (x >> 8) & 0xff
4208  // (x >> 8) & 0xff0000
4209  if (Opc0 != ISD::SRL)
4210  return false;
4212  if (!C || C->getZExtValue() != 8)
4213  return false;
4214  } else {
4215  // (x << 8) & 0xff00
4216  // (x << 8) & 0xff000000
4217  if (Opc0 != ISD::SHL)
4218  return false;
4220  if (!C || C->getZExtValue() != 8)
4221  return false;
4222  }
4223  } else if (Opc == ISD::SHL) {
4224  // (x & 0xff) << 8
4225  // (x & 0xff0000) << 8
4226  if (MaskByteOffset != 0 && MaskByteOffset != 2)
4227  return false;
4229  if (!C || C->getZExtValue() != 8)
4230  return false;
4231  } else { // Opc == ISD::SRL
4232  // (x & 0xff00) >> 8
4233  // (x & 0xff000000) >> 8
4234  if (MaskByteOffset != 1 && MaskByteOffset != 3)
4235  return false;
4237  if (!C || C->getZExtValue() != 8)
4238  return false;
4239  }
4240 
4241  if (Parts[MaskByteOffset])
4242  return false;
4243 
4244  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4245  return true;
4246 }
4247 
4248 /// Match a 32-bit packed halfword bswap. That is
4249 /// ((x & 0x000000ff) << 8) |
4250 /// ((x & 0x0000ff00) >> 8) |
4251 /// ((x & 0x00ff0000) << 8) |
4252 /// ((x & 0xff000000) >> 8)
4253 /// => (rotl (bswap x), 16)
4254 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4255  if (!LegalOperations)
4256  return SDValue();
4257 
4258  EVT VT = N->getValueType(0);
4259  if (VT != MVT::i32)
4260  return SDValue();
4261  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4262  return SDValue();
4263 
4264  // Look for either
4265  // (or (or (and), (and)), (or (and), (and)))
4266  // (or (or (or (and), (and)), (and)), (and))
4267  if (N0.getOpcode() != ISD::OR)
4268  return SDValue();
4269  SDValue N00 = N0.getOperand(0);
4270  SDValue N01 = N0.getOperand(1);
4271  SDNode *Parts[4] = {};
4272 
4273  if (N1.getOpcode() == ISD::OR &&
4274  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4275  // (or (or (and), (and)), (or (and), (and)))
4276  if (!isBSwapHWordElement(N00, Parts))
4277  return SDValue();
4278 
4279  if (!isBSwapHWordElement(N01, Parts))
4280  return SDValue();
4281  SDValue N10 = N1.getOperand(0);
4282  if (!isBSwapHWordElement(N10, Parts))
4283  return SDValue();
4284  SDValue N11 = N1.getOperand(1);
4285  if (!isBSwapHWordElement(N11, Parts))
4286  return SDValue();
4287  } else {
4288  // (or (or (or (and), (and)), (and)), (and))
4289  if (!isBSwapHWordElement(N1, Parts))
4290  return SDValue();
4291  if (!isBSwapHWordElement(N01, Parts))
4292  return SDValue();
4293  if (N00.getOpcode() != ISD::OR)
4294  return SDValue();
4295  SDValue N000 = N00.getOperand(0);
4296  if (!isBSwapHWordElement(N000, Parts))
4297  return SDValue();
4298  SDValue N001 = N00.getOperand(1);
4299  if (!isBSwapHWordElement(N001, Parts))
4300  return SDValue();
4301  }
4302 
4303  // Make sure the parts are all coming from the same node.
4304  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4305  return SDValue();
4306 
4307  SDLoc DL(N);
4308  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4309  SDValue(Parts[0], 0));
4310 
4311  // Result of the bswap should be rotated by 16. If it's not legal, then
4312  // do (x << 16) | (x >> 16).
4313  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4314  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4315  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4316  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4317  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4318  return DAG.getNode(ISD::OR, DL, VT,
4319  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4320  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4321 }
4322 
4323 /// This contains all DAGCombine rules which reduce two values combined by
4324 /// an Or operation to a single value \see visitANDLike().
4325 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4326  EVT VT = N1.getValueType();
4327  SDLoc DL(N);
4328 
4329  // fold (or x, undef) -> -1
4330  if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4331  return DAG.getAllOnesConstant(DL, VT);
4332 
4333  if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4334  return V;
4335 
4336  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
4337  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4338  // Don't increase # computations.
4339  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4340  // We can only do this xform if we know that bits from X that are set in C2
4341  // but not in C1 are already zero. Likewise for Y.
4342  if (const ConstantSDNode *N0O1C =
4344  if (const ConstantSDNode *N1O1C =
4346  // We can only do this xform if we know that bits from X that are set in
4347  // C2 but not in C1 are already zero. Likewise for Y.
4348  const APInt &LHSMask = N0O1C->getAPIntValue();
4349  const APInt &RHSMask = N1O1C->getAPIntValue();
4350 
4351  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4352  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4353  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4354  N0.getOperand(0), N1.getOperand(0));
4355  return DAG.getNode(ISD::AND, DL, VT, X,
4356  DAG.getConstant(LHSMask | RHSMask, DL, VT));
4357  }
4358  }
4359  }
4360  }
4361 
4362  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4363  if (N0.getOpcode() == ISD::AND &&
4364  N1.getOpcode() == ISD::AND &&
4365  N0.getOperand(0) == N1.getOperand(0) &&
4366  // Don't increase # computations.
4367  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4368  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4369  N0.getOperand(1), N1.getOperand(1));
4370  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4371  }
4372 
4373  return SDValue();
4374 }
4375 
4376 SDValue DAGCombiner::visitOR(SDNode *N) {
4377  SDValue N0 = N->getOperand(0);
4378  SDValue N1 = N->getOperand(1);
4379  EVT VT = N1.getValueType();
4380 
4381  // x | x --> x
4382  if (N0 == N1)
4383  return N0;
4384 
4385  // fold vector ops
4386  if (VT.isVector()) {
4387  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4388  return FoldedVOp;
4389 
4390  // fold (or x, 0) -> x, vector edition
4392  return N1;
4394  return N0;
4395 
4396  // fold (or x, -1) -> -1, vector edition
4398  // do not return N0, because undef node may exist in N0
4399  return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4401  // do not return N1, because undef node may exist in N1
4402  return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4403 
4404  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4405  // Do this only if the resulting shuffle is legal.
4406  if (isa<ShuffleVectorSDNode>(N0) &&
4407  isa<ShuffleVectorSDNode>(N1) &&
4408  // Avoid folding a node with illegal type.
4409  TLI.isTypeLegal(VT)) {
4410  bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4411  bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4412  bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4413  bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4414  // Ensure both shuffles have a zero input.
4415  if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4416  assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4417  assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4418  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4419  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4420  bool CanFold = true;
4421  int NumElts = VT.getVectorNumElements();
4422  SmallVector<int, 4> Mask(NumElts);
4423 
4424  for (int i = 0; i != NumElts; ++i) {
4425  int M0 = SV0->getMaskElt(i);
4426  int M1 = SV1->getMaskElt(i);
4427 
4428  // Determine if either index is pointing to a zero vector.
4429  bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4430  bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4431 
4432  // If one element is zero and the otherside is undef, keep undef.
4433  // This also handles the case that both are undef.
4434  if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4435  Mask[i] = -1;
4436  continue;
4437  }
4438 
4439  // Make sure only one of the elements is zero.
4440  if (M0Zero == M1Zero) {
4441  CanFold = false;
4442  break;
4443  }
4444 
4445  assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4446 
4447  // We have a zero and non-zero element. If the non-zero came from
4448  // SV0 make the index a LHS index. If it came from SV1, make it
4449  // a RHS index. We need to mod by NumElts because we don't care
4450  // which operand it came from in the original shuffles.
4451  Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4452  }
4453 
4454  if (CanFold) {
4455  SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4456  SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4457 
4458  bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4459  if (!LegalMask) {
4460  std::swap(NewLHS, NewRHS);
4462  LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4463  }
4464 
4465  if (LegalMask)
4466  return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4467  }
4468  }
4469  }
4470  }
4471 
4472  // fold (or c1, c2) -> c1|c2
4475  if (N0C && N1C && !N1C->isOpaque())
4476  return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4477  // canonicalize constant to RHS
4480  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4481  // fold (or x, 0) -> x
4482  if (isNullConstant(N1))
4483  return N0;
4484  // fold (or x, -1) -> -1
4485  if (isAllOnesConstant(N1))
4486  return N1;
4487 
4488  if (SDValue NewSel = foldBinOpIntoSelect(N))
4489  return NewSel;
4490 
4491  // fold (or x, c) -> c iff (x & ~c) == 0
4492  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4493  return N1;
4494 
4495  if (SDValue Combined = visitORLike(N0, N1, N))
4496  return Combined;
4497 
4498  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4499  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4500  return BSwap;
4501  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4502  return BSwap;
4503 
4504  // reassociate or
4505  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4506  return ROR;
4507 
4508  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4509  // iff (c1 & c2) != 0.
4510  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse()) {
4511  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4512  if (C1->getAPIntValue().intersects(N1C->getAPIntValue())) {
4513  if (SDValue COR =
4514  DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, N1C, C1))
4515  return DAG.getNode(
4516  ISD::AND, SDLoc(N), VT,
4517  DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
4518  return SDValue();
4519  }
4520  }
4521  }
4522 
4523  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
4524  if (N0.getOpcode() == N1.getOpcode())
4525  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4526  return Tmp;
4527 
4528  // See if this is some rotate idiom.
4529  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4530  return SDValue(Rot, 0);
4531 
4532  if (SDValue Load = MatchLoadCombine(N))
4533  return Load;
4534 
4535  // Simplify the operands using demanded-bits information.
4536  if (SimplifyDemandedBits(SDValue(N, 0)))
4537  return SDValue(N, 0);
4538 
4539  return SDValue();
4540 }
4541 
4542 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4543 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4544  if (Op.getOpcode() == ISD::AND) {
4546  Mask = Op.getOperand(1);
4547  Op = Op.getOperand(0);
4548  } else {
4549  return false;
4550  }
4551  }
4552 
4553  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4554  Shift = Op;
4555  return true;
4556  }
4557 
4558  return false;
4559 }
4560 
4561 // Return true if we can prove that, whenever Neg and Pos are both in the
4562 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
4563 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4564 //
4565 // (or (shift1 X, Neg), (shift2 X, Pos))
4566 //
4567 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4568 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
4569 // to consider shift amounts with defined behavior.
4570 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4571  // If EltSize is a power of 2 then:
4572  //
4573  // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4574  // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4575  //
4576  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4577  // for the stronger condition:
4578  //
4579  // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
4580  //
4581  // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4582  // we can just replace Neg with Neg' for the rest of the function.
4583  //
4584  // In other cases we check for the even stronger condition:
4585  //
4586  // Neg == EltSize - Pos [B]
4587  //
4588  // for all Neg and Pos. Note that the (or ...) then invokes undefined
4589  // behavior if Pos == 0 (and consequently Neg == EltSize).
4590  //
4591  // We could actually use [A] whenever EltSize is a power of 2, but the
4592  // only extra cases that it would match are those uninteresting ones
4593  // where Neg and Pos are never in range at the same time. E.g. for
4594  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4595  // as well as (sub 32, Pos), but:
4596  //
4597  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4598  //
4599  // always invokes undefined behavior for 32-bit X.
4600  //
4601  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4602  unsigned MaskLoBits = 0;
4603  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4604  if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4605  if (NegC->getAPIntValue() == EltSize - 1) {
4606  Neg = Neg.getOperand(0);
4607  MaskLoBits = Log2_64(EltSize);
4608  }
4609  }
4610  }
4611 
4612  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4613  if (Neg.getOpcode() != ISD::SUB)
4614  return false;
4616  if (!NegC)
4617  return false;
4618  SDValue NegOp1 = Neg.getOperand(1);
4619 
4620  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4621  // Pos'. The truncation is redundant for the purpose of the equality.
4622  if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4623  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4624  if (PosC->getAPIntValue() == EltSize - 1)
4625  Pos = Pos.getOperand(0);
4626 
4627  // The condition we need is now:
4628  //
4629  // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4630  //
4631  // If NegOp1 == Pos then we need:
4632  //
4633  // EltSize & Mask == NegC & Mask
4634  //
4635  // (because "x & Mask" is a truncation and distributes through subtraction).
4636  APInt Width;
4637  if (Pos == NegOp1)
4638  Width = NegC->getAPIntValue();
4639 
4640  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4641  // Then the condition we want to prove becomes:
4642  //
4643  // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4644  //
4645  // which, again because "x & Mask" is a truncation, becomes:
4646  //
4647  // NegC & Mask == (EltSize - PosC) & Mask
4648  // EltSize & Mask == (NegC + PosC) & Mask
4649  else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4650  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4651  Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4652  else
4653  return false;
4654  } else
4655  return false;
4656 
4657  // Now we just need to check that EltSize & Mask == Width & Mask.
4658  if (MaskLoBits)
4659  // EltSize & Mask is 0 since Mask is EltSize - 1.
4660  return Width.getLoBits(MaskLoBits) == 0;
4661  return Width == EltSize;
4662 }
4663 
4664 // A subroutine of MatchRotate used once we have found an OR of two opposite
4665 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
4666 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4667 // former being preferred if supported. InnerPos and InnerNeg are Pos and
4668 // Neg with outer conversions stripped away.
4669 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4670  SDValue Neg, SDValue InnerPos,
4671  SDValue InnerNeg, unsigned PosOpcode,
4672  unsigned NegOpcode, const SDLoc &DL) {
4673  // fold (or (shl x, (*ext y)),
4674  // (srl x, (*ext (sub 32, y)))) ->
4675  // (rotl x, y) or (rotr x, (sub 32, y))
4676  //
4677  // fold (or (shl x, (*ext (sub 32, y))),
4678  // (srl x, (*ext y))) ->
4679  // (rotr x, y) or (rotl x, (sub 32, y))
4680  EVT VT = Shifted.getValueType();
4681  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4682  bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4683  return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4684  HasPos ? Pos : Neg).getNode();
4685  }
4686 
4687  return nullptr;
4688 }
4689 
4690 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
4691 // idioms for rotate, and if the target supports rotation instructions, generate
4692 // a rot[lr].
4693 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4694  // Must be a legal type. Expanded 'n promoted things won't work with rotates.
4695  EVT VT = LHS.getValueType();
4696  if (!TLI.isTypeLegal(VT)) return nullptr;
4697 
4698  // The target must have at least one rotate flavor.
4699  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4700  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4701  if (!HasROTL && !HasROTR) return nullptr;
4702 
4703  // Match "(X shl/srl V1) & V2" where V2 may not be present.
4704  SDValue LHSShift; // The shift.
4705  SDValue LHSMask; // AND value if any.
4706  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4707  return nullptr; // Not part of a rotate.
4708 
4709  SDValue RHSShift; // The shift.
4710  SDValue RHSMask; // AND value if any.
4711  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4712  return nullptr; // Not part of a rotate.
4713 
4714  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4715  return nullptr; // Not shifting the same value.
4716 
4717  if (LHSShift.getOpcode() == RHSShift.getOpcode())
4718  return nullptr; // Shifts must disagree.
4719 
4720  // Canonicalize shl to left side in a shl/srl pair.
4721  if (RHSShift.getOpcode() == ISD::SHL) {
4722  std::swap(LHS, RHS);
4723  std::swap(LHSShift, RHSShift);
4724  std::swap(LHSMask, RHSMask);
4725  }
4726 
4727  unsigned EltSizeInBits = VT.getScalarSizeInBits();
4728  SDValue LHSShiftArg = LHSShift.getOperand(0);
4729  SDValue LHSShiftAmt = LHSShift.getOperand(1);
4730  SDValue RHSShiftArg = RHSShift.getOperand(0);
4731  SDValue RHSShiftAmt = RHSShift.getOperand(1);
4732 
4733  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4734  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4735  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
4736  ConstantSDNode *RHS) {
4737  return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
4738  };
4739  if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
4740  SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4741  LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4742 
4743  // If there is an AND of either shifted operand, apply it to the result.
4744  if (LHSMask.getNode() || RHSMask.getNode()) {
4745  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4746  SDValue Mask = AllOnes;
4747 
4748  if (LHSMask.getNode()) {
4749  SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
4750  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4751  DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
4752  }
4753  if (RHSMask.getNode()) {
4754  SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
4755  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4756  DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
4757  }
4758 
4759  Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4760  }
4761 
4762  return Rot.getNode();
4763  }
4764 
4765  // If there is a mask here, and we have a variable shift, we can't be sure
4766  // that we're masking out the right stuff.
4767  if (LHSMask.getNode() || RHSMask.getNode())
4768  return nullptr;
4769 
4770  // If the shift amount is sign/zext/any-extended just peel it off.
4771  SDValue LExtOp0 = LHSShiftAmt;
4772  SDValue RExtOp0 = RHSShiftAmt;
4773  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4774  LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4775  LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4776  LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4777  (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4778  RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4779  RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4780  RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4781  LExtOp0 = LHSShiftAmt.getOperand(0);
4782  RExtOp0 = RHSShiftAmt.getOperand(0);
4783  }
4784 
4785  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4786  LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4787  if (TryL)
4788  return TryL;
4789 
4790  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4791  RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4792  if (TryR)
4793  return TryR;
4794 
4795  return nullptr;
4796 }
4797 
4798 namespace {
4799 
4800 /// Represents known origin of an individual byte in load combine pattern. The
4801 /// value of the byte is either constant zero or comes from memory.
4802 struct ByteProvider {
4803  // For constant zero providers Load is set to nullptr. For memory providers
4804  // Load represents the node which loads the byte from memory.
4805  // ByteOffset is the offset of the byte in the value produced by the load.
4806  LoadSDNode *Load = nullptr;
4807  unsigned ByteOffset = 0;
4808 
4809  ByteProvider() = default;
4810 
4811  static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
4812  return ByteProvider(Load, ByteOffset);
4813  }
4814 
4815  static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
4816 
4817  bool isConstantZero() const { return !Load; }
4818  bool isMemory() const { return Load; }
4819 
4820  bool operator==(const ByteProvider &Other) const {
4821  return Other.Load == Load && Other.ByteOffset == ByteOffset;
4822  }
4823 
4824 private:
4825  ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
4826  : Load(Load), ByteOffset(ByteOffset) {}
4827 };
4828 
4829 } // end anonymous namespace
4830 
4831 /// Recursively traverses the expression calculating the origin of the requested
4832 /// byte of the given value. Returns None if the provider can't be calculated.
4833 ///
4834 /// For all the values except the root of the expression verifies that the value
4835 /// has exactly one use and if it's not true return None. This way if the origin
4836 /// of the byte is returned it's guaranteed that the values which contribute to
4837 /// the byte are not used outside of this expression.
4838 ///
4839 /// Because the parts of the expression are not allowed to have more than one
4840 /// use this function iterates over trees, not DAGs. So it never visits the same
4841 /// node more than once.
4842 static const Optional<ByteProvider>
4843 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
4844  bool Root = false) {
4845  // Typical i64 by i8 pattern requires recursion up to 8 calls depth
4846  if (Depth == 10)
4847  return None;
4848 
4849  if (!Root && !Op.hasOneUse())
4850  return None;
4851 
4852  assert(Op.getValueType().isScalarInteger() && "can't handle other types");
4853  unsigned BitWidth = Op.getValueSizeInBits();
4854  if (BitWidth % 8 != 0)
4855  return None;
4856  unsigned ByteWidth = BitWidth / 8;
4857  assert(Index < ByteWidth && "invalid index requested");
4858  (void) ByteWidth;
4859 
4860  switch (Op.getOpcode()) {
4861  case ISD::OR: {
4862  auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
4863  if (!LHS)
4864  return None;
4865  auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
4866  if (!RHS)
4867  return None;
4868 
4869  if (LHS->isConstantZero())
4870  return RHS;
4871  if (RHS->isConstantZero())
4872  return LHS;
4873  return None;
4874  }
4875  case ISD::SHL: {
4876  auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
4877  if (!ShiftOp)
4878  return None;
4879 
4880  uint64_t BitShift = ShiftOp->getZExtValue();
4881  if (BitShift % 8 != 0)
4882  return None;
4883  uint64_t ByteShift = BitShift / 8;
4884 
4885  return Index < ByteShift
4886  ? ByteProvider::getConstantZero()
4887  : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
4888  Depth + 1);
4889  }
4890  case ISD::ANY_EXTEND:
4891  case ISD::SIGN_EXTEND:
4892  case ISD::ZERO_EXTEND: {
4893  SDValue NarrowOp = Op->getOperand(0);
4894  unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
4895  if (NarrowBitWidth % 8 != 0)
4896  return None;
4897  uint64_t NarrowByteWidth = NarrowBitWidth / 8;
4898 
4899  if (Index >= NarrowByteWidth)
4900  return Op.getOpcode() == ISD::ZERO_EXTEND
4901  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
4902  : None;
4903  return calculateByteProvider(NarrowOp, Index, Depth + 1);
4904  }
4905  case ISD::BSWAP:
4906  return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
4907  Depth + 1);
4908  case ISD::LOAD: {