LLVM  7.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
49 #include "llvm/IR/Attributes.h"
50 #include "llvm/IR/Constant.h"
51 #include "llvm/IR/DataLayout.h"
52 #include "llvm/IR/DerivedTypes.h"
53 #include "llvm/IR/Function.h"
54 #include "llvm/IR/LLVMContext.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
62 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// \brief Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// \brief Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140 
141  /// \brief Set of nodes which have been combined (at least once).
142  ///
143  /// This is used to allow us to reliably add any operands of a DAG node
144  /// which have not yet been combined to the worklist.
145  SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147  // AA - Used for DAG load/store alias analysis.
148  AliasAnalysis *AA;
149 
150  /// When an instruction is simplified, add all users of the instruction to
151  /// the work lists because they might get more simplified now.
152  void AddUsersToWorklist(SDNode *N) {
153  for (SDNode *Node : N->uses())
154  AddToWorklist(Node);
155  }
156 
157  /// Call the node-specific routine that folds each particular type of node.
158  SDValue visit(SDNode *N);
159 
160  public:
161  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163  OptLevel(OL), AA(AA) {
164  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166  MaximumLegalStoreInBits = 0;
167  for (MVT VT : MVT::all_valuetypes())
168  if (EVT(VT).isSimple() && VT != MVT::Other &&
169  TLI.isTypeLegal(EVT(VT)) &&
170  VT.getSizeInBits() >= MaximumLegalStoreInBits)
171  MaximumLegalStoreInBits = VT.getSizeInBits();
172  }
173 
174  /// Add to the worklist making sure its instance is at the back (next to be
175  /// processed.)
176  void AddToWorklist(SDNode *N) {
178  "Deleted Node added to Worklist");
179 
180  // Skip handle nodes as they can't usefully be combined and confuse the
181  // zero-use deletion strategy.
182  if (N->getOpcode() == ISD::HANDLENODE)
183  return;
184 
185  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186  Worklist.push_back(N);
187  }
188 
189  /// Remove all instances of N from the worklist.
190  void removeFromWorklist(SDNode *N) {
191  CombinedNodes.erase(N);
192 
193  auto It = WorklistMap.find(N);
194  if (It == WorklistMap.end())
195  return; // Not in the worklist.
196 
197  // Null out the entry rather than erasing it to avoid a linear operation.
198  Worklist[It->second] = nullptr;
199  WorklistMap.erase(It);
200  }
201 
202  void deleteAndRecombine(SDNode *N);
203  bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205  /// Replaces all uses of the results of one DAG node with new values.
206  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207  bool AddTo = true);
208 
209  /// Replaces all uses of the results of one DAG node with new values.
210  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211  return CombineTo(N, &Res, 1, AddTo);
212  }
213 
214  /// Replaces all uses of the results of one DAG node with new values.
215  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216  bool AddTo = true) {
217  SDValue To[] = { Res0, Res1 };
218  return CombineTo(N, To, 2, AddTo);
219  }
220 
221  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223  private:
224  unsigned MaximumLegalStoreInBits;
225 
226  /// Check the specified integer node value to see if it can be simplified or
227  /// if things it uses can be simplified by bit propagation.
228  /// If so, return true.
229  bool SimplifyDemandedBits(SDValue Op) {
230  unsigned BitWidth = Op.getScalarValueSizeInBits();
231  APInt Demanded = APInt::getAllOnesValue(BitWidth);
232  return SimplifyDemandedBits(Op, Demanded);
233  }
234 
235  /// Check the specified vector node value to see if it can be simplified or
236  /// if things it uses can be simplified as it only uses some of the
237  /// elements. If so, return true.
238  bool SimplifyDemandedVectorElts(SDValue Op) {
239  unsigned NumElts = Op.getValueType().getVectorNumElements();
240  APInt Demanded = APInt::getAllOnesValue(NumElts);
241  return SimplifyDemandedVectorElts(Op, Demanded);
242  }
243 
244  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
246 
247  bool CombineToPreIndexedLoadStore(SDNode *N);
248  bool CombineToPostIndexedLoadStore(SDNode *N);
249  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250  bool SliceUpLoad(SDNode *N);
251 
252  /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253  /// load.
254  ///
255  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257  /// \param EltNo index of the vector element to load.
258  /// \param OriginalLoad load that EVE came from to be replaced.
259  /// \returns EVE on success SDValue() on failure.
260  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266  SDValue PromoteIntBinOp(SDValue Op);
267  SDValue PromoteIntShiftOp(SDValue Op);
268  SDValue PromoteExtend(SDValue Op);
269  bool PromoteLoad(SDValue Op);
270 
271  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
272  SDValue OrigLoad, SDValue ExtLoad,
273  const SDLoc &DL,
274  ISD::NodeType ExtType);
275 
276  /// Call the node-specific routine that knows how to fold each
277  /// particular type of node. If that doesn't do anything, try the
278  /// target-specific DAG combines.
279  SDValue combine(SDNode *N);
280 
281  // Visitation implementation - Implement dag node combining for different
282  // node types. The semantics are as follows:
283  // Return Value:
284  // SDValue.getNode() == 0 - No change was made
285  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
286  // otherwise - N should be replaced by the returned Operand.
287  //
288  SDValue visitTokenFactor(SDNode *N);
289  SDValue visitMERGE_VALUES(SDNode *N);
290  SDValue visitADD(SDNode *N);
291  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
292  SDValue visitSUB(SDNode *N);
293  SDValue visitADDC(SDNode *N);
294  SDValue visitUADDO(SDNode *N);
295  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
296  SDValue visitSUBC(SDNode *N);
297  SDValue visitUSUBO(SDNode *N);
298  SDValue visitADDE(SDNode *N);
299  SDValue visitADDCARRY(SDNode *N);
300  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
301  SDValue visitSUBE(SDNode *N);
302  SDValue visitSUBCARRY(SDNode *N);
303  SDValue visitMUL(SDNode *N);
304  SDValue useDivRem(SDNode *N);
305  SDValue visitSDIV(SDNode *N);
306  SDValue visitUDIV(SDNode *N);
307  SDValue visitREM(SDNode *N);
308  SDValue visitMULHU(SDNode *N);
309  SDValue visitMULHS(SDNode *N);
310  SDValue visitSMUL_LOHI(SDNode *N);
311  SDValue visitUMUL_LOHI(SDNode *N);
312  SDValue visitSMULO(SDNode *N);
313  SDValue visitUMULO(SDNode *N);
314  SDValue visitIMINMAX(SDNode *N);
315  SDValue visitAND(SDNode *N);
316  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
317  SDValue visitOR(SDNode *N);
318  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
319  SDValue visitXOR(SDNode *N);
320  SDValue SimplifyVBinOp(SDNode *N);
321  SDValue visitSHL(SDNode *N);
322  SDValue visitSRA(SDNode *N);
323  SDValue visitSRL(SDNode *N);
324  SDValue visitRotate(SDNode *N);
325  SDValue visitABS(SDNode *N);
326  SDValue visitBSWAP(SDNode *N);
327  SDValue visitBITREVERSE(SDNode *N);
328  SDValue visitCTLZ(SDNode *N);
329  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
330  SDValue visitCTTZ(SDNode *N);
331  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
332  SDValue visitCTPOP(SDNode *N);
333  SDValue visitSELECT(SDNode *N);
334  SDValue visitVSELECT(SDNode *N);
335  SDValue visitSELECT_CC(SDNode *N);
336  SDValue visitSETCC(SDNode *N);
337  SDValue visitSETCCE(SDNode *N);
338  SDValue visitSETCCCARRY(SDNode *N);
339  SDValue visitSIGN_EXTEND(SDNode *N);
340  SDValue visitZERO_EXTEND(SDNode *N);
341  SDValue visitANY_EXTEND(SDNode *N);
342  SDValue visitAssertExt(SDNode *N);
343  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
344  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
345  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
346  SDValue visitTRUNCATE(SDNode *N);
347  SDValue visitBITCAST(SDNode *N);
348  SDValue visitBUILD_PAIR(SDNode *N);
349  SDValue visitFADD(SDNode *N);
350  SDValue visitFSUB(SDNode *N);
351  SDValue visitFMUL(SDNode *N);
352  SDValue visitFMA(SDNode *N);
353  SDValue visitFDIV(SDNode *N);
354  SDValue visitFREM(SDNode *N);
355  SDValue visitFSQRT(SDNode *N);
356  SDValue visitFCOPYSIGN(SDNode *N);
357  SDValue visitSINT_TO_FP(SDNode *N);
358  SDValue visitUINT_TO_FP(SDNode *N);
359  SDValue visitFP_TO_SINT(SDNode *N);
360  SDValue visitFP_TO_UINT(SDNode *N);
361  SDValue visitFP_ROUND(SDNode *N);
362  SDValue visitFP_ROUND_INREG(SDNode *N);
363  SDValue visitFP_EXTEND(SDNode *N);
364  SDValue visitFNEG(SDNode *N);
365  SDValue visitFABS(SDNode *N);
366  SDValue visitFCEIL(SDNode *N);
367  SDValue visitFTRUNC(SDNode *N);
368  SDValue visitFFLOOR(SDNode *N);
369  SDValue visitFMINNUM(SDNode *N);
370  SDValue visitFMAXNUM(SDNode *N);
371  SDValue visitBRCOND(SDNode *N);
372  SDValue visitBR_CC(SDNode *N);
373  SDValue visitLOAD(SDNode *N);
374 
375  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
376  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
377 
378  SDValue visitSTORE(SDNode *N);
379  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
380  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
381  SDValue visitBUILD_VECTOR(SDNode *N);
382  SDValue visitCONCAT_VECTORS(SDNode *N);
383  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
384  SDValue visitVECTOR_SHUFFLE(SDNode *N);
385  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
386  SDValue visitINSERT_SUBVECTOR(SDNode *N);
387  SDValue visitMLOAD(SDNode *N);
388  SDValue visitMSTORE(SDNode *N);
389  SDValue visitMGATHER(SDNode *N);
390  SDValue visitMSCATTER(SDNode *N);
391  SDValue visitFP_TO_FP16(SDNode *N);
392  SDValue visitFP16_TO_FP(SDNode *N);
393 
394  SDValue visitFADDForFMACombine(SDNode *N);
395  SDValue visitFSUBForFMACombine(SDNode *N);
396  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
397 
398  SDValue XformToShuffleWithZero(SDNode *N);
399  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
400  SDValue RHS);
401 
402  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
403 
404  SDValue foldSelectOfConstants(SDNode *N);
405  SDValue foldVSelectOfConstants(SDNode *N);
406  SDValue foldBinOpIntoSelect(SDNode *BO);
407  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
408  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
409  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
410  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
411  SDValue N2, SDValue N3, ISD::CondCode CC,
412  bool NotExtCompare = false);
413  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
414  SDValue N2, SDValue N3, ISD::CondCode CC);
415  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
416  const SDLoc &DL);
417  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
418  const SDLoc &DL, bool foldBooleans = true);
419 
420  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
421  SDValue &CC) const;
422  bool isOneUseSetCC(SDValue N) const;
423 
424  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
425  unsigned HiOp);
426  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
427  SDValue CombineExtLoad(SDNode *N);
428  SDValue combineRepeatedFPDivisors(SDNode *N);
429  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
430  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
431  SDValue BuildSDIV(SDNode *N);
432  SDValue BuildSDIVPow2(SDNode *N);
433  SDValue BuildUDIV(SDNode *N);
434  SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
435  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
436  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
437  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
438  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
439  SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
440  SDNodeFlags Flags, bool Reciprocal);
441  SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
442  SDNodeFlags Flags, bool Reciprocal);
443  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
444  bool DemandHighBits = true);
445  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
446  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
447  SDValue InnerPos, SDValue InnerNeg,
448  unsigned PosOpcode, unsigned NegOpcode,
449  const SDLoc &DL);
450  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
451  SDValue MatchLoadCombine(SDNode *N);
452  SDValue ReduceLoadWidth(SDNode *N);
453  SDValue ReduceLoadOpStoreWidth(SDNode *N);
455  SDValue TransformFPLoadStorePair(SDNode *N);
456  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
457  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
458  SDValue reduceBuildVecToShuffle(SDNode *N);
459  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
460  ArrayRef<int> VectorMask, SDValue VecIn1,
461  SDValue VecIn2, unsigned LeftIdx);
462  SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
463 
464  /// Walk up chain skipping non-aliasing memory nodes,
465  /// looking for aliasing nodes and adding them to the Aliases vector.
466  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
467  SmallVectorImpl<SDValue> &Aliases);
468 
469  /// Return true if there is any possibility that the two addresses overlap.
470  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
471 
472  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
473  /// chain (aliasing node.)
474  SDValue FindBetterChain(SDNode *N, SDValue Chain);
475 
476  /// Try to replace a store and any possibly adjacent stores on
477  /// consecutive chains with better chains. Return true only if St is
478  /// replaced.
479  ///
480  /// Notice that other chains may still be replaced even if the function
481  /// returns false.
482  bool findBetterNeighborChains(StoreSDNode *St);
483 
484  /// Match "(X shl/srl V1) & V2" where V2 may not be present.
485  bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
486 
487  /// Holds a pointer to an LSBaseSDNode as well as information on where it
488  /// is located in a sequence of memory operations connected by a chain.
489  struct MemOpLink {
490  // Ptr to the mem node.
491  LSBaseSDNode *MemNode;
492 
493  // Offset from the base ptr.
494  int64_t OffsetFromBase;
495 
496  MemOpLink(LSBaseSDNode *N, int64_t Offset)
497  : MemNode(N), OffsetFromBase(Offset) {}
498  };
499 
500  /// This is a helper function for visitMUL to check the profitability
501  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
502  /// MulNode is the original multiply, AddNode is (add x, c1),
503  /// and ConstNode is c2.
504  bool isMulAddWithConstProfitable(SDNode *MulNode,
505  SDValue &AddNode,
506  SDValue &ConstNode);
507 
508  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
509  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
510  /// the type of the loaded value to be extended.
511  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
512  EVT LoadResultTy, EVT &ExtVT);
513 
514  /// Helper function to calculate whether the given Load can have its
515  /// width reduced to ExtVT.
516  bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
517  EVT &ExtVT, unsigned ShAmt = 0);
518 
519  /// Used by BackwardsPropagateMask to find suitable loads.
520  bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
521  SmallPtrSetImpl<SDNode*> &NodeWithConsts,
522  ConstantSDNode *Mask, SDNode *&UncombinedNode);
523  /// Attempt to propagate a given AND node back to load leaves so that they
524  /// can be combined into narrow loads.
525  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
526 
527  /// Helper function for MergeConsecutiveStores which merges the
528  /// component store chains.
529  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
530  unsigned NumStores);
531 
532  /// This is a helper function for MergeConsecutiveStores. When the
533  /// source elements of the consecutive stores are all constants or
534  /// all extracted vector elements, try to merge them into one
535  /// larger store introducing bitcasts if necessary. \return True
536  /// if a merged store was created.
537  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
538  EVT MemVT, unsigned NumStores,
539  bool IsConstantSrc, bool UseVector,
540  bool UseTrunc);
541 
542  /// This is a helper function for MergeConsecutiveStores. Stores
543  /// that potentially may be merged with St are placed in
544  /// StoreNodes.
545  void getStoreMergeCandidates(StoreSDNode *St,
546  SmallVectorImpl<MemOpLink> &StoreNodes);
547 
548  /// Helper function for MergeConsecutiveStores. Checks if
549  /// candidate stores have indirect dependency through their
550  /// operands. \return True if safe to merge.
551  bool checkMergeStoreCandidatesForDependencies(
552  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
553 
554  /// Merge consecutive store operations into a wide store.
555  /// This optimization uses wide integers or vectors when possible.
556  /// \return number of stores that were merged into a merged store (the
557  /// affected nodes are stored as a prefix in \p StoreNodes).
558  bool MergeConsecutiveStores(StoreSDNode *N);
559 
560  /// \brief Try to transform a truncation where C is a constant:
561  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
562  ///
563  /// \p N needs to be a truncation and its first operand an AND. Other
564  /// requirements are checked by the function (e.g. that trunc is
565  /// single-use) and if missed an empty SDValue is returned.
566  SDValue distributeTruncateThroughAnd(SDNode *N);
567 
568  public:
569  /// Runs the dag combiner on all nodes in the work list
570  void Run(CombineLevel AtLevel);
571 
572  SelectionDAG &getDAG() const { return DAG; }
573 
574  /// Returns a type large enough to hold any valid shift amount - before type
575  /// legalization these can be huge.
576  EVT getShiftAmountTy(EVT LHSTy) {
577  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
578  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
579  }
580 
581  /// This method returns true if we are running before type legalization or
582  /// if the specified VT is legal.
583  bool isTypeLegal(const EVT &VT) {
584  if (!LegalTypes) return true;
585  return TLI.isTypeLegal(VT);
586  }
587 
588  /// Convenience wrapper around TargetLowering::getSetCCResultType
589  EVT getSetCCResultType(EVT VT) const {
590  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
591  }
592  };
593 
594 /// This class is a DAGUpdateListener that removes any deleted
595 /// nodes from the worklist.
596 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
597  DAGCombiner &DC;
598 
599 public:
600  explicit WorklistRemover(DAGCombiner &dc)
601  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
602 
603  void NodeDeleted(SDNode *N, SDNode *E) override {
604  DC.removeFromWorklist(N);
605  }
606 };
607 
608 } // end anonymous namespace
609 
610 //===----------------------------------------------------------------------===//
611 // TargetLowering::DAGCombinerInfo implementation
612 //===----------------------------------------------------------------------===//
613 
615  ((DAGCombiner*)DC)->AddToWorklist(N);
616 }
617 
619 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
620  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
621 }
622 
624 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
625  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
626 }
627 
629 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
630  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
631 }
632 
635  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
636 }
637 
638 //===----------------------------------------------------------------------===//
639 // Helper Functions
640 //===----------------------------------------------------------------------===//
641 
642 void DAGCombiner::deleteAndRecombine(SDNode *N) {
643  removeFromWorklist(N);
644 
645  // If the operands of this node are only used by the node, they will now be
646  // dead. Make sure to re-visit them and recursively delete dead nodes.
647  for (const SDValue &Op : N->ops())
648  // For an operand generating multiple values, one of the values may
649  // become dead allowing further simplification (e.g. split index
650  // arithmetic from an indexed load).
651  if (Op->hasOneUse() || Op->getNumValues() > 1)
652  AddToWorklist(Op.getNode());
653 
654  DAG.DeleteNode(N);
655 }
656 
657 /// Return 1 if we can compute the negated form of the specified expression for
658 /// the same cost as the expression itself, or 2 if we can compute the negated
659 /// form more cheaply than the expression itself.
660 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
661  const TargetLowering &TLI,
662  const TargetOptions *Options,
663  unsigned Depth = 0) {
664  // fneg is removable even if it has multiple uses.
665  if (Op.getOpcode() == ISD::FNEG) return 2;
666 
667  // Don't allow anything with multiple uses.
668  if (!Op.hasOneUse()) return 0;
669 
670  // Don't recurse exponentially.
671  if (Depth > 6) return 0;
672 
673  switch (Op.getOpcode()) {
674  default: return false;
675  case ISD::ConstantFP: {
676  if (!LegalOperations)
677  return 1;
678 
679  // Don't invert constant FP values after legalization unless the target says
680  // the negated constant is legal.
681  EVT VT = Op.getValueType();
682  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
683  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
684  }
685  case ISD::FADD:
686  // FIXME: determine better conditions for this xform.
687  if (!Options->UnsafeFPMath) return 0;
688 
689  // After operation legalization, it might not be legal to create new FSUBs.
690  if (LegalOperations &&
692  return 0;
693 
694  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
695  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
696  Options, Depth + 1))
697  return V;
698  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
699  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
700  Depth + 1);
701  case ISD::FSUB:
702  // We can't turn -(A-B) into B-A when we honor signed zeros.
703  if (!Options->NoSignedZerosFPMath &&
704  !Op.getNode()->getFlags().hasNoSignedZeros())
705  return 0;
706 
707  // fold (fneg (fsub A, B)) -> (fsub B, A)
708  return 1;
709 
710  case ISD::FMUL:
711  case ISD::FDIV:
712  if (Options->HonorSignDependentRoundingFPMath()) return 0;
713 
714  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
715  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
716  Options, Depth + 1))
717  return V;
718 
719  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
720  Depth + 1);
721 
722  case ISD::FP_EXTEND:
723  case ISD::FP_ROUND:
724  case ISD::FSIN:
725  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
726  Depth + 1);
727  }
728 }
729 
730 /// If isNegatibleForFree returns true, return the newly negated expression.
732  bool LegalOperations, unsigned Depth = 0) {
733  const TargetOptions &Options = DAG.getTarget().Options;
734  // fneg is removable even if it has multiple uses.
735  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
736 
737  // Don't allow anything with multiple uses.
738  assert(Op.hasOneUse() && "Unknown reuse!");
739 
740  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
741 
742  const SDNodeFlags Flags = Op.getNode()->getFlags();
743 
744  switch (Op.getOpcode()) {
745  default: llvm_unreachable("Unknown code");
746  case ISD::ConstantFP: {
747  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
748  V.changeSign();
749  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
750  }
751  case ISD::FADD:
752  // FIXME: determine better conditions for this xform.
753  assert(Options.UnsafeFPMath);
754 
755  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
756  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
757  DAG.getTargetLoweringInfo(), &Options, Depth+1))
758  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
759  GetNegatedExpression(Op.getOperand(0), DAG,
760  LegalOperations, Depth+1),
761  Op.getOperand(1), Flags);
762  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
763  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
764  GetNegatedExpression(Op.getOperand(1), DAG,
765  LegalOperations, Depth+1),
766  Op.getOperand(0), Flags);
767  case ISD::FSUB:
768  // fold (fneg (fsub 0, B)) -> B
769  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
770  if (N0CFP->isZero())
771  return Op.getOperand(1);
772 
773  // fold (fneg (fsub A, B)) -> (fsub B, A)
774  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
775  Op.getOperand(1), Op.getOperand(0), Flags);
776 
777  case ISD::FMUL:
778  case ISD::FDIV:
780 
781  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
782  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
783  DAG.getTargetLoweringInfo(), &Options, Depth+1))
784  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
785  GetNegatedExpression(Op.getOperand(0), DAG,
786  LegalOperations, Depth+1),
787  Op.getOperand(1), Flags);
788 
789  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
790  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
791  Op.getOperand(0),
792  GetNegatedExpression(Op.getOperand(1), DAG,
793  LegalOperations, Depth+1), Flags);
794 
795  case ISD::FP_EXTEND:
796  case ISD::FSIN:
797  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
798  GetNegatedExpression(Op.getOperand(0), DAG,
799  LegalOperations, Depth+1));
800  case ISD::FP_ROUND:
801  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
802  GetNegatedExpression(Op.getOperand(0), DAG,
803  LegalOperations, Depth+1),
804  Op.getOperand(1));
805  }
806 }
807 
808 // APInts must be the same size for most operations, this helper
809 // function zero extends the shorter of the pair so that they match.
810 // We provide an Offset so that we can create bitwidths that won't overflow.
811 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
812  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
813  LHS = LHS.zextOrSelf(Bits);
814  RHS = RHS.zextOrSelf(Bits);
815 }
816 
817 // Return true if this node is a setcc, or is a select_cc
818 // that selects between the target values used for true and false, making it
819 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
820 // the appropriate nodes based on the type of node we are checking. This
821 // simplifies life a bit for the callers.
822 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
823  SDValue &CC) const {
824  if (N.getOpcode() == ISD::SETCC) {
825  LHS = N.getOperand(0);
826  RHS = N.getOperand(1);
827  CC = N.getOperand(2);
828  return true;
829  }
830 
831  if (N.getOpcode() != ISD::SELECT_CC ||
832  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
833  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
834  return false;
835 
836  if (TLI.getBooleanContents(N.getValueType()) ==
838  return false;
839 
840  LHS = N.getOperand(0);
841  RHS = N.getOperand(1);
842  CC = N.getOperand(4);
843  return true;
844 }
845 
846 /// Return true if this is a SetCC-equivalent operation with only one use.
847 /// If this is true, it allows the users to invert the operation for free when
848 /// it is profitable to do so.
849 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
850  SDValue N0, N1, N2;
851  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
852  return true;
853  return false;
854 }
855 
856 // \brief Returns the SDNode if it is a constant float BuildVector
857 // or constant float.
859  if (isa<ConstantFPSDNode>(N))
860  return N.getNode();
862  return N.getNode();
863  return nullptr;
864 }
865 
866 // Determines if it is a constant integer or a build vector of constant
867 // integers (and undefs).
868 // Do not permit build vector implicit truncation.
869 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
870  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
871  return !(Const->isOpaque() && NoOpaques);
872  if (N.getOpcode() != ISD::BUILD_VECTOR)
873  return false;
874  unsigned BitWidth = N.getScalarValueSizeInBits();
875  for (const SDValue &Op : N->op_values()) {
876  if (Op.isUndef())
877  continue;
879  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
880  (Const->isOpaque() && NoOpaques))
881  return false;
882  }
883  return true;
884 }
885 
886 // Determines if it is a constant null integer or a splatted vector of a
887 // constant null integer (with no undefs).
888 // Build vector implicit truncation is not an issue for null values.
890  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
891  return Splat->isNullValue();
892  return false;
893 }
894 
895 // Determines if it is a constant integer of one or a splatted vector of a
896 // constant integer of one (with no undefs).
897 // Do not permit build vector implicit truncation.
899  unsigned BitWidth = N.getScalarValueSizeInBits();
900  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
901  return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
902  return false;
903 }
904 
905 // Determines if it is a constant integer of all ones or a splatted vector of a
906 // constant integer of all ones (with no undefs).
907 // Do not permit build vector implicit truncation.
909  unsigned BitWidth = N.getScalarValueSizeInBits();
910  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
911  return Splat->isAllOnesValue() &&
912  Splat->getAPIntValue().getBitWidth() == BitWidth;
913  return false;
914 }
915 
916 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
917 // undef's.
918 static bool isAnyConstantBuildVector(const SDNode *N) {
921 }
922 
923 // Attempt to match a unary predicate against a scalar/splat constant or
924 // every element of a constant BUILD_VECTOR.
926  std::function<bool(ConstantSDNode *)> Match) {
927  if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
928  return Match(Cst);
929 
930  if (ISD::BUILD_VECTOR != Op.getOpcode())
931  return false;
932 
933  EVT SVT = Op.getValueType().getScalarType();
934  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
935  auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
936  if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
937  return false;
938  }
939  return true;
940 }
941 
942 // Attempt to match a binary predicate against a pair of scalar/splat constants
943 // or every element of a pair of constant BUILD_VECTORs.
945  SDValue LHS, SDValue RHS,
946  std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
947  if (LHS.getValueType() != RHS.getValueType())
948  return false;
949 
950  if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
951  if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
952  return Match(LHSCst, RHSCst);
953 
954  if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
955  ISD::BUILD_VECTOR != RHS.getOpcode())
956  return false;
957 
958  EVT SVT = LHS.getValueType().getScalarType();
959  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
960  auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
961  auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
962  if (!LHSCst || !RHSCst)
963  return false;
964  if (LHSCst->getValueType(0) != SVT ||
965  LHSCst->getValueType(0) != RHSCst->getValueType(0))
966  return false;
967  if (!Match(LHSCst, RHSCst))
968  return false;
969  }
970  return true;
971 }
972 
973 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
974  SDValue N1) {
975  EVT VT = N0.getValueType();
976  if (N0.getOpcode() == Opc) {
979  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
980  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
981  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
982  return SDValue();
983  }
984  if (N0.hasOneUse()) {
985  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
986  // use
987  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
988  if (!OpNode.getNode())
989  return SDValue();
990  AddToWorklist(OpNode.getNode());
991  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
992  }
993  }
994  }
995 
996  if (N1.getOpcode() == Opc) {
999  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
1000  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
1001  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
1002  return SDValue();
1003  }
1004  if (N1.hasOneUse()) {
1005  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
1006  // use
1007  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
1008  if (!OpNode.getNode())
1009  return SDValue();
1010  AddToWorklist(OpNode.getNode());
1011  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
1012  }
1013  }
1014  }
1015 
1016  return SDValue();
1017 }
1018 
1019 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1020  bool AddTo) {
1021  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1022  ++NodesCombined;
1023  DEBUG(dbgs() << "\nReplacing.1 ";
1024  N->dump(&DAG);
1025  dbgs() << "\nWith: ";
1026  To[0].getNode()->dump(&DAG);
1027  dbgs() << " and " << NumTo-1 << " other values\n");
1028  for (unsigned i = 0, e = NumTo; i != e; ++i)
1029  assert((!To[i].getNode() ||
1030  N->getValueType(i) == To[i].getValueType()) &&
1031  "Cannot combine value to value of different type!");
1032 
1033  WorklistRemover DeadNodes(*this);
1034  DAG.ReplaceAllUsesWith(N, To);
1035  if (AddTo) {
1036  // Push the new nodes and any users onto the worklist
1037  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1038  if (To[i].getNode()) {
1039  AddToWorklist(To[i].getNode());
1040  AddUsersToWorklist(To[i].getNode());
1041  }
1042  }
1043  }
1044 
1045  // Finally, if the node is now dead, remove it from the graph. The node
1046  // may not be dead if the replacement process recursively simplified to
1047  // something else needing this node.
1048  if (N->use_empty())
1049  deleteAndRecombine(N);
1050  return SDValue(N, 0);
1051 }
1052 
1053 void DAGCombiner::
1054 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1055  // Replace all uses. If any nodes become isomorphic to other nodes and
1056  // are deleted, make sure to remove them from our worklist.
1057  WorklistRemover DeadNodes(*this);
1058  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1059 
1060  // Push the new node and any (possibly new) users onto the worklist.
1061  AddToWorklist(TLO.New.getNode());
1062  AddUsersToWorklist(TLO.New.getNode());
1063 
1064  // Finally, if the node is now dead, remove it from the graph. The node
1065  // may not be dead if the replacement process recursively simplified to
1066  // something else needing this node.
1067  if (TLO.Old.getNode()->use_empty())
1068  deleteAndRecombine(TLO.Old.getNode());
1069 }
1070 
1071 /// Check the specified integer node value to see if it can be simplified or if
1072 /// things it uses can be simplified by bit propagation. If so, return true.
1073 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1074  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1075  KnownBits Known;
1076  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1077  return false;
1078 
1079  // Revisit the node.
1080  AddToWorklist(Op.getNode());
1081 
1082  // Replace the old value with the new one.
1083  ++NodesCombined;
1084  DEBUG(dbgs() << "\nReplacing.2 ";
1085  TLO.Old.getNode()->dump(&DAG);
1086  dbgs() << "\nWith: ";
1087  TLO.New.getNode()->dump(&DAG);
1088  dbgs() << '\n');
1089 
1090  CommitTargetLoweringOpt(TLO);
1091  return true;
1092 }
1093 
1094 /// Check the specified vector node value to see if it can be simplified or
1095 /// if things it uses can be simplified as it only uses some of the elements.
1096 /// If so, return true.
1097 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1098  const APInt &Demanded) {
1099  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1100  APInt KnownUndef, KnownZero;
1101  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
1102  return false;
1103 
1104  // Revisit the node.
1105  AddToWorklist(Op.getNode());
1106 
1107  // Replace the old value with the new one.
1108  ++NodesCombined;
1109  DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1110  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); dbgs() << '\n');
1111 
1112  CommitTargetLoweringOpt(TLO);
1113  return true;
1114 }
1115 
1116 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1117  SDLoc DL(Load);
1118  EVT VT = Load->getValueType(0);
1119  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1120 
1121  DEBUG(dbgs() << "\nReplacing.9 ";
1122  Load->dump(&DAG);
1123  dbgs() << "\nWith: ";
1124  Trunc.getNode()->dump(&DAG);
1125  dbgs() << '\n');
1126  WorklistRemover DeadNodes(*this);
1127  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1128  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1129  deleteAndRecombine(Load);
1130  AddToWorklist(Trunc.getNode());
1131 }
1132 
1133 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1134  Replace = false;
1135  SDLoc DL(Op);
1136  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1137  LoadSDNode *LD = cast<LoadSDNode>(Op);
1138  EVT MemVT = LD->getMemoryVT();
1139  ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1140  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1141  : ISD::EXTLOAD)
1142  : LD->getExtensionType();
1143  Replace = true;
1144  return DAG.getExtLoad(ExtType, DL, PVT,
1145  LD->getChain(), LD->getBasePtr(),
1146  MemVT, LD->getMemOperand());
1147  }
1148 
1149  unsigned Opc = Op.getOpcode();
1150  switch (Opc) {
1151  default: break;
1152  case ISD::AssertSext:
1153  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1154  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1155  break;
1156  case ISD::AssertZext:
1157  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1158  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1159  break;
1160  case ISD::Constant: {
1161  unsigned ExtOpc =
1163  return DAG.getNode(ExtOpc, DL, PVT, Op);
1164  }
1165  }
1166 
1167  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1168  return SDValue();
1169  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1170 }
1171 
1172 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1174  return SDValue();
1175  EVT OldVT = Op.getValueType();
1176  SDLoc DL(Op);
1177  bool Replace = false;
1178  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1179  if (!NewOp.getNode())
1180  return SDValue();
1181  AddToWorklist(NewOp.getNode());
1182 
1183  if (Replace)
1184  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1185  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1186  DAG.getValueType(OldVT));
1187 }
1188 
1189 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1190  EVT OldVT = Op.getValueType();
1191  SDLoc DL(Op);
1192  bool Replace = false;
1193  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1194  if (!NewOp.getNode())
1195  return SDValue();
1196  AddToWorklist(NewOp.getNode());
1197 
1198  if (Replace)
1199  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1200  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1201 }
1202 
1203 /// Promote the specified integer binary operation if the target indicates it is
1204 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1205 /// i32 since i16 instructions are longer.
1206 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1207  if (!LegalOperations)
1208  return SDValue();
1209 
1210  EVT VT = Op.getValueType();
1211  if (VT.isVector() || !VT.isInteger())
1212  return SDValue();
1213 
1214  // If operation type is 'undesirable', e.g. i16 on x86, consider
1215  // promoting it.
1216  unsigned Opc = Op.getOpcode();
1217  if (TLI.isTypeDesirableForOp(Opc, VT))
1218  return SDValue();
1219 
1220  EVT PVT = VT;
1221  // Consult target whether it is a good idea to promote this operation and
1222  // what's the right type to promote it to.
1223  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1224  assert(PVT != VT && "Don't know what type to promote to!");
1225 
1226  DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1227 
1228  bool Replace0 = false;
1229  SDValue N0 = Op.getOperand(0);
1230  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1231 
1232  bool Replace1 = false;
1233  SDValue N1 = Op.getOperand(1);
1234  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1235  SDLoc DL(Op);
1236 
1237  SDValue RV =
1238  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1239 
1240  // We are always replacing N0/N1's use in N and only need
1241  // additional replacements if there are additional uses.
1242  Replace0 &= !N0->hasOneUse();
1243  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1244 
1245  // Combine Op here so it is preserved past replacements.
1246  CombineTo(Op.getNode(), RV);
1247 
1248  // If operands have a use ordering, make sure we deal with
1249  // predecessor first.
1250  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1251  std::swap(N0, N1);
1252  std::swap(NN0, NN1);
1253  }
1254 
1255  if (Replace0) {
1256  AddToWorklist(NN0.getNode());
1257  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1258  }
1259  if (Replace1) {
1260  AddToWorklist(NN1.getNode());
1261  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1262  }
1263  return Op;
1264  }
1265  return SDValue();
1266 }
1267 
1268 /// Promote the specified integer shift operation if the target indicates it is
1269 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1270 /// i32 since i16 instructions are longer.
1271 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1272  if (!LegalOperations)
1273  return SDValue();
1274 
1275  EVT VT = Op.getValueType();
1276  if (VT.isVector() || !VT.isInteger())
1277  return SDValue();
1278 
1279  // If operation type is 'undesirable', e.g. i16 on x86, consider
1280  // promoting it.
1281  unsigned Opc = Op.getOpcode();
1282  if (TLI.isTypeDesirableForOp(Opc, VT))
1283  return SDValue();
1284 
1285  EVT PVT = VT;
1286  // Consult target whether it is a good idea to promote this operation and
1287  // what's the right type to promote it to.
1288  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1289  assert(PVT != VT && "Don't know what type to promote to!");
1290 
1291  DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1292 
1293  bool Replace = false;
1294  SDValue N0 = Op.getOperand(0);
1295  SDValue N1 = Op.getOperand(1);
1296  if (Opc == ISD::SRA)
1297  N0 = SExtPromoteOperand(N0, PVT);
1298  else if (Opc == ISD::SRL)
1299  N0 = ZExtPromoteOperand(N0, PVT);
1300  else
1301  N0 = PromoteOperand(N0, PVT, Replace);
1302 
1303  if (!N0.getNode())
1304  return SDValue();
1305 
1306  SDLoc DL(Op);
1307  SDValue RV =
1308  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1309 
1310  AddToWorklist(N0.getNode());
1311  if (Replace)
1312  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1313 
1314  // Deal with Op being deleted.
1315  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1316  return RV;
1317  }
1318  return SDValue();
1319 }
1320 
1321 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1322  if (!LegalOperations)
1323  return SDValue();
1324 
1325  EVT VT = Op.getValueType();
1326  if (VT.isVector() || !VT.isInteger())
1327  return SDValue();
1328 
1329  // If operation type is 'undesirable', e.g. i16 on x86, consider
1330  // promoting it.
1331  unsigned Opc = Op.getOpcode();
1332  if (TLI.isTypeDesirableForOp(Opc, VT))
1333  return SDValue();
1334 
1335  EVT PVT = VT;
1336  // Consult target whether it is a good idea to promote this operation and
1337  // what's the right type to promote it to.
1338  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1339  assert(PVT != VT && "Don't know what type to promote to!");
1340  // fold (aext (aext x)) -> (aext x)
1341  // fold (aext (zext x)) -> (zext x)
1342  // fold (aext (sext x)) -> (sext x)
1343  DEBUG(dbgs() << "\nPromoting ";
1344  Op.getNode()->dump(&DAG));
1345  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1346  }
1347  return SDValue();
1348 }
1349 
1350 bool DAGCombiner::PromoteLoad(SDValue Op) {
1351  if (!LegalOperations)
1352  return false;
1353 
1354  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1355  return false;
1356 
1357  EVT VT = Op.getValueType();
1358  if (VT.isVector() || !VT.isInteger())
1359  return false;
1360 
1361  // If operation type is 'undesirable', e.g. i16 on x86, consider
1362  // promoting it.
1363  unsigned Opc = Op.getOpcode();
1364  if (TLI.isTypeDesirableForOp(Opc, VT))
1365  return false;
1366 
1367  EVT PVT = VT;
1368  // Consult target whether it is a good idea to promote this operation and
1369  // what's the right type to promote it to.
1370  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1371  assert(PVT != VT && "Don't know what type to promote to!");
1372 
1373  SDLoc DL(Op);
1374  SDNode *N = Op.getNode();
1375  LoadSDNode *LD = cast<LoadSDNode>(N);
1376  EVT MemVT = LD->getMemoryVT();
1377  ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1378  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1379  : ISD::EXTLOAD)
1380  : LD->getExtensionType();
1381  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1382  LD->getChain(), LD->getBasePtr(),
1383  MemVT, LD->getMemOperand());
1384  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1385 
1386  DEBUG(dbgs() << "\nPromoting ";
1387  N->dump(&DAG);
1388  dbgs() << "\nTo: ";
1389  Result.getNode()->dump(&DAG);
1390  dbgs() << '\n');
1391  WorklistRemover DeadNodes(*this);
1392  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1393  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1394  deleteAndRecombine(N);
1395  AddToWorklist(Result.getNode());
1396  return true;
1397  }
1398  return false;
1399 }
1400 
1401 /// \brief Recursively delete a node which has no uses and any operands for
1402 /// which it is the only use.
1403 ///
1404 /// Note that this both deletes the nodes and removes them from the worklist.
1405 /// It also adds any nodes who have had a user deleted to the worklist as they
1406 /// may now have only one use and subject to other combines.
1407 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1408  if (!N->use_empty())
1409  return false;
1410 
1412  Nodes.insert(N);
1413  do {
1414  N = Nodes.pop_back_val();
1415  if (!N)
1416  continue;
1417 
1418  if (N->use_empty()) {
1419  for (const SDValue &ChildN : N->op_values())
1420  Nodes.insert(ChildN.getNode());
1421 
1422  removeFromWorklist(N);
1423  DAG.DeleteNode(N);
1424  } else {
1425  AddToWorklist(N);
1426  }
1427  } while (!Nodes.empty());
1428  return true;
1429 }
1430 
1431 //===----------------------------------------------------------------------===//
1432 // Main DAG Combiner implementation
1433 //===----------------------------------------------------------------------===//
1434 
1435 void DAGCombiner::Run(CombineLevel AtLevel) {
1436  // set the instance variables, so that the various visit routines may use it.
1437  Level = AtLevel;
1438  LegalOperations = Level >= AfterLegalizeVectorOps;
1439  LegalTypes = Level >= AfterLegalizeTypes;
1440 
1441  // Add all the dag nodes to the worklist.
1442  for (SDNode &Node : DAG.allnodes())
1443  AddToWorklist(&Node);
1444 
1445  // Create a dummy node (which is not added to allnodes), that adds a reference
1446  // to the root node, preventing it from being deleted, and tracking any
1447  // changes of the root.
1448  HandleSDNode Dummy(DAG.getRoot());
1449 
1450  // While the worklist isn't empty, find a node and try to combine it.
1451  while (!WorklistMap.empty()) {
1452  SDNode *N;
1453  // The Worklist holds the SDNodes in order, but it may contain null entries.
1454  do {
1455  N = Worklist.pop_back_val();
1456  } while (!N);
1457 
1458  bool GoodWorklistEntry = WorklistMap.erase(N);
1459  (void)GoodWorklistEntry;
1460  assert(GoodWorklistEntry &&
1461  "Found a worklist entry without a corresponding map entry!");
1462 
1463  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1464  // N is deleted from the DAG, since they too may now be dead or may have a
1465  // reduced number of uses, allowing other xforms.
1466  if (recursivelyDeleteUnusedNodes(N))
1467  continue;
1468 
1469  WorklistRemover DeadNodes(*this);
1470 
1471  // If this combine is running after legalizing the DAG, re-legalize any
1472  // nodes pulled off the worklist.
1473  if (Level == AfterLegalizeDAG) {
1474  SmallSetVector<SDNode *, 16> UpdatedNodes;
1475  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1476 
1477  for (SDNode *LN : UpdatedNodes) {
1478  AddToWorklist(LN);
1479  AddUsersToWorklist(LN);
1480  }
1481  if (!NIsValid)
1482  continue;
1483  }
1484 
1485  DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1486 
1487  // Add any operands of the new node which have not yet been combined to the
1488  // worklist as well. Because the worklist uniques things already, this
1489  // won't repeatedly process the same operand.
1490  CombinedNodes.insert(N);
1491  for (const SDValue &ChildN : N->op_values())
1492  if (!CombinedNodes.count(ChildN.getNode()))
1493  AddToWorklist(ChildN.getNode());
1494 
1495  SDValue RV = combine(N);
1496 
1497  if (!RV.getNode())
1498  continue;
1499 
1500  ++NodesCombined;
1501 
1502  // If we get back the same node we passed in, rather than a new node or
1503  // zero, we know that the node must have defined multiple values and
1504  // CombineTo was used. Since CombineTo takes care of the worklist
1505  // mechanics for us, we have no work to do in this case.
1506  if (RV.getNode() == N)
1507  continue;
1508 
1510  RV.getOpcode() != ISD::DELETED_NODE &&
1511  "Node was deleted but visit returned new node!");
1512 
1513  DEBUG(dbgs() << " ... into: ";
1514  RV.getNode()->dump(&DAG));
1515 
1516  if (N->getNumValues() == RV.getNode()->getNumValues())
1517  DAG.ReplaceAllUsesWith(N, RV.getNode());
1518  else {
1519  assert(N->getValueType(0) == RV.getValueType() &&
1520  N->getNumValues() == 1 && "Type mismatch");
1521  DAG.ReplaceAllUsesWith(N, &RV);
1522  }
1523 
1524  // Push the new node and any users onto the worklist
1525  AddToWorklist(RV.getNode());
1526  AddUsersToWorklist(RV.getNode());
1527 
1528  // Finally, if the node is now dead, remove it from the graph. The node
1529  // may not be dead if the replacement process recursively simplified to
1530  // something else needing this node. This will also take care of adding any
1531  // operands which have lost a user to the worklist.
1532  recursivelyDeleteUnusedNodes(N);
1533  }
1534 
1535  // If the root changed (e.g. it was a dead load, update the root).
1536  DAG.setRoot(Dummy.getValue());
1537  DAG.RemoveDeadNodes();
1538 }
1539 
1540 SDValue DAGCombiner::visit(SDNode *N) {
1541  switch (N->getOpcode()) {
1542  default: break;
1543  case ISD::TokenFactor: return visitTokenFactor(N);
1544  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1545  case ISD::ADD: return visitADD(N);
1546  case ISD::SUB: return visitSUB(N);
1547  case ISD::ADDC: return visitADDC(N);
1548  case ISD::UADDO: return visitUADDO(N);
1549  case ISD::SUBC: return visitSUBC(N);
1550  case ISD::USUBO: return visitUSUBO(N);
1551  case ISD::ADDE: return visitADDE(N);
1552  case ISD::ADDCARRY: return visitADDCARRY(N);
1553  case ISD::SUBE: return visitSUBE(N);
1554  case ISD::SUBCARRY: return visitSUBCARRY(N);
1555  case ISD::MUL: return visitMUL(N);
1556  case ISD::SDIV: return visitSDIV(N);
1557  case ISD::UDIV: return visitUDIV(N);
1558  case ISD::SREM:
1559  case ISD::UREM: return visitREM(N);
1560  case ISD::MULHU: return visitMULHU(N);
1561  case ISD::MULHS: return visitMULHS(N);
1562  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1563  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1564  case ISD::SMULO: return visitSMULO(N);
1565  case ISD::UMULO: return visitUMULO(N);
1566  case ISD::SMIN:
1567  case ISD::SMAX:
1568  case ISD::UMIN:
1569  case ISD::UMAX: return visitIMINMAX(N);
1570  case ISD::AND: return visitAND(N);
1571  case ISD::OR: return visitOR(N);
1572  case ISD::XOR: return visitXOR(N);
1573  case ISD::SHL: return visitSHL(N);
1574  case ISD::SRA: return visitSRA(N);
1575  case ISD::SRL: return visitSRL(N);
1576  case ISD::ROTR:
1577  case ISD::ROTL: return visitRotate(N);
1578  case ISD::ABS: return visitABS(N);
1579  case ISD::BSWAP: return visitBSWAP(N);
1580  case ISD::BITREVERSE: return visitBITREVERSE(N);
1581  case ISD::CTLZ: return visitCTLZ(N);
1582  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1583  case ISD::CTTZ: return visitCTTZ(N);
1584  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1585  case ISD::CTPOP: return visitCTPOP(N);
1586  case ISD::SELECT: return visitSELECT(N);
1587  case ISD::VSELECT: return visitVSELECT(N);
1588  case ISD::SELECT_CC: return visitSELECT_CC(N);
1589  case ISD::SETCC: return visitSETCC(N);
1590  case ISD::SETCCE: return visitSETCCE(N);
1591  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1592  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1593  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1594  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1595  case ISD::AssertSext:
1596  case ISD::AssertZext: return visitAssertExt(N);
1597  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1598  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1599  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1600  case ISD::TRUNCATE: return visitTRUNCATE(N);
1601  case ISD::BITCAST: return visitBITCAST(N);
1602  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1603  case ISD::FADD: return visitFADD(N);
1604  case ISD::FSUB: return visitFSUB(N);
1605  case ISD::FMUL: return visitFMUL(N);
1606  case ISD::FMA: return visitFMA(N);
1607  case ISD::FDIV: return visitFDIV(N);
1608  case ISD::FREM: return visitFREM(N);
1609  case ISD::FSQRT: return visitFSQRT(N);
1610  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1611  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1612  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1613  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1614  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1615  case ISD::FP_ROUND: return visitFP_ROUND(N);
1616  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1617  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1618  case ISD::FNEG: return visitFNEG(N);
1619  case ISD::FABS: return visitFABS(N);
1620  case ISD::FFLOOR: return visitFFLOOR(N);
1621  case ISD::FMINNUM: return visitFMINNUM(N);
1622  case ISD::FMAXNUM: return visitFMAXNUM(N);
1623  case ISD::FCEIL: return visitFCEIL(N);
1624  case ISD::FTRUNC: return visitFTRUNC(N);
1625  case ISD::BRCOND: return visitBRCOND(N);
1626  case ISD::BR_CC: return visitBR_CC(N);
1627  case ISD::LOAD: return visitLOAD(N);
1628  case ISD::STORE: return visitSTORE(N);
1629  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1630  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1631  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1632  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1633  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1634  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1635  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1636  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1637  case ISD::MGATHER: return visitMGATHER(N);
1638  case ISD::MLOAD: return visitMLOAD(N);
1639  case ISD::MSCATTER: return visitMSCATTER(N);
1640  case ISD::MSTORE: return visitMSTORE(N);
1641  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1642  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1643  }
1644  return SDValue();
1645 }
1646 
1647 SDValue DAGCombiner::combine(SDNode *N) {
1648  SDValue RV = visit(N);
1649 
1650  // If nothing happened, try a target-specific DAG combine.
1651  if (!RV.getNode()) {
1653  "Node was deleted but visit returned NULL!");
1654 
1655  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1657 
1658  // Expose the DAG combiner to the target combiner impls.
1660  DagCombineInfo(DAG, Level, false, this);
1661 
1662  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1663  }
1664  }
1665 
1666  // If nothing happened still, try promoting the operation.
1667  if (!RV.getNode()) {
1668  switch (N->getOpcode()) {
1669  default: break;
1670  case ISD::ADD:
1671  case ISD::SUB:
1672  case ISD::MUL:
1673  case ISD::AND:
1674  case ISD::OR:
1675  case ISD::XOR:
1676  RV = PromoteIntBinOp(SDValue(N, 0));
1677  break;
1678  case ISD::SHL:
1679  case ISD::SRA:
1680  case ISD::SRL:
1681  RV = PromoteIntShiftOp(SDValue(N, 0));
1682  break;
1683  case ISD::SIGN_EXTEND:
1684  case ISD::ZERO_EXTEND:
1685  case ISD::ANY_EXTEND:
1686  RV = PromoteExtend(SDValue(N, 0));
1687  break;
1688  case ISD::LOAD:
1689  if (PromoteLoad(SDValue(N, 0)))
1690  RV = SDValue(N, 0);
1691  break;
1692  }
1693  }
1694 
1695  // If N is a commutative binary node, try eliminate it if the commuted
1696  // version is already present in the DAG.
1697  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1698  N->getNumValues() == 1) {
1699  SDValue N0 = N->getOperand(0);
1700  SDValue N1 = N->getOperand(1);
1701 
1702  // Constant operands are canonicalized to RHS.
1703  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1704  SDValue Ops[] = {N1, N0};
1705  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1706  N->getFlags());
1707  if (CSENode)
1708  return SDValue(CSENode, 0);
1709  }
1710  }
1711 
1712  return RV;
1713 }
1714 
1715 /// Given a node, return its input chain if it has one, otherwise return a null
1716 /// sd operand.
1718  if (unsigned NumOps = N->getNumOperands()) {
1719  if (N->getOperand(0).getValueType() == MVT::Other)
1720  return N->getOperand(0);
1721  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1722  return N->getOperand(NumOps-1);
1723  for (unsigned i = 1; i < NumOps-1; ++i)
1724  if (N->getOperand(i).getValueType() == MVT::Other)
1725  return N->getOperand(i);
1726  }
1727  return SDValue();
1728 }
1729 
1730 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1731  // If N has two operands, where one has an input chain equal to the other,
1732  // the 'other' chain is redundant.
1733  if (N->getNumOperands() == 2) {
1734  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1735  return N->getOperand(0);
1736  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1737  return N->getOperand(1);
1738  }
1739 
1740  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1741  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1742  SmallPtrSet<SDNode*, 16> SeenOps;
1743  bool Changed = false; // If we should replace this token factor.
1744 
1745  // Start out with this token factor.
1746  TFs.push_back(N);
1747 
1748  // Iterate through token factors. The TFs grows when new token factors are
1749  // encountered.
1750  for (unsigned i = 0; i < TFs.size(); ++i) {
1751  SDNode *TF = TFs[i];
1752 
1753  // Check each of the operands.
1754  for (const SDValue &Op : TF->op_values()) {
1755  switch (Op.getOpcode()) {
1756  case ISD::EntryToken:
1757  // Entry tokens don't need to be added to the list. They are
1758  // redundant.
1759  Changed = true;
1760  break;
1761 
1762  case ISD::TokenFactor:
1763  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1764  // Queue up for processing.
1765  TFs.push_back(Op.getNode());
1766  // Clean up in case the token factor is removed.
1767  AddToWorklist(Op.getNode());
1768  Changed = true;
1769  break;
1770  }
1772 
1773  default:
1774  // Only add if it isn't already in the list.
1775  if (SeenOps.insert(Op.getNode()).second)
1776  Ops.push_back(Op);
1777  else
1778  Changed = true;
1779  break;
1780  }
1781  }
1782  }
1783 
1784  // Remove Nodes that are chained to another node in the list. Do so
1785  // by walking up chains breath-first stopping when we've seen
1786  // another operand. In general we must climb to the EntryNode, but we can exit
1787  // early if we find all remaining work is associated with just one operand as
1788  // no further pruning is possible.
1789 
1790  // List of nodes to search through and original Ops from which they originate.
1792  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1793  SmallPtrSet<SDNode *, 16> SeenChains;
1794  bool DidPruneOps = false;
1795 
1796  unsigned NumLeftToConsider = 0;
1797  for (const SDValue &Op : Ops) {
1798  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1799  OpWorkCount.push_back(1);
1800  }
1801 
1802  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1803  // If this is an Op, we can remove the op from the list. Remark any
1804  // search associated with it as from the current OpNumber.
1805  if (SeenOps.count(Op) != 0) {
1806  Changed = true;
1807  DidPruneOps = true;
1808  unsigned OrigOpNumber = 0;
1809  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1810  OrigOpNumber++;
1811  assert((OrigOpNumber != Ops.size()) &&
1812  "expected to find TokenFactor Operand");
1813  // Re-mark worklist from OrigOpNumber to OpNumber
1814  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1815  if (Worklist[i].second == OrigOpNumber) {
1816  Worklist[i].second = OpNumber;
1817  }
1818  }
1819  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1820  OpWorkCount[OrigOpNumber] = 0;
1821  NumLeftToConsider--;
1822  }
1823  // Add if it's a new chain
1824  if (SeenChains.insert(Op).second) {
1825  OpWorkCount[OpNumber]++;
1826  Worklist.push_back(std::make_pair(Op, OpNumber));
1827  }
1828  };
1829 
1830  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1831  // We need at least be consider at least 2 Ops to prune.
1832  if (NumLeftToConsider <= 1)
1833  break;
1834  auto CurNode = Worklist[i].first;
1835  auto CurOpNumber = Worklist[i].second;
1836  assert((OpWorkCount[CurOpNumber] > 0) &&
1837  "Node should not appear in worklist");
1838  switch (CurNode->getOpcode()) {
1839  case ISD::EntryToken:
1840  // Hitting EntryToken is the only way for the search to terminate without
1841  // hitting
1842  // another operand's search. Prevent us from marking this operand
1843  // considered.
1844  NumLeftToConsider++;
1845  break;
1846  case ISD::TokenFactor:
1847  for (const SDValue &Op : CurNode->op_values())
1848  AddToWorklist(i, Op.getNode(), CurOpNumber);
1849  break;
1850  case ISD::CopyFromReg:
1851  case ISD::CopyToReg:
1852  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1853  break;
1854  default:
1855  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1856  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1857  break;
1858  }
1859  OpWorkCount[CurOpNumber]--;
1860  if (OpWorkCount[CurOpNumber] == 0)
1861  NumLeftToConsider--;
1862  }
1863 
1864  // If we've changed things around then replace token factor.
1865  if (Changed) {
1866  SDValue Result;
1867  if (Ops.empty()) {
1868  // The entry token is the only possible outcome.
1869  Result = DAG.getEntryNode();
1870  } else {
1871  if (DidPruneOps) {
1872  SmallVector<SDValue, 8> PrunedOps;
1873  //
1874  for (const SDValue &Op : Ops) {
1875  if (SeenChains.count(Op.getNode()) == 0)
1876  PrunedOps.push_back(Op);
1877  }
1878  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1879  } else {
1880  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1881  }
1882  }
1883  return Result;
1884  }
1885  return SDValue();
1886 }
1887 
1888 /// MERGE_VALUES can always be eliminated.
1889 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1890  WorklistRemover DeadNodes(*this);
1891  // Replacing results may cause a different MERGE_VALUES to suddenly
1892  // be CSE'd with N, and carry its uses with it. Iterate until no
1893  // uses remain, to ensure that the node can be safely deleted.
1894  // First add the users of this node to the work list so that they
1895  // can be tried again once they have new operands.
1896  AddUsersToWorklist(N);
1897  do {
1898  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1899  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1900  } while (!N->use_empty());
1901  deleteAndRecombine(N);
1902  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1903 }
1904 
1905 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1906 /// ConstantSDNode pointer else nullptr.
1909  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1910 }
1911 
1912 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1913  auto BinOpcode = BO->getOpcode();
1914  assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1915  BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1916  BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1917  BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1918  BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1919  BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1920  BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1921  BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1922  BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1923  "Unexpected binary operator");
1924 
1925  // Bail out if any constants are opaque because we can't constant fold those.
1926  SDValue C1 = BO->getOperand(1);
1927  if (!isConstantOrConstantVector(C1, true) &&
1929  return SDValue();
1930 
1931  // Don't do this unless the old select is going away. We want to eliminate the
1932  // binary operator, not replace a binop with a select.
1933  // TODO: Handle ISD::SELECT_CC.
1934  SDValue Sel = BO->getOperand(0);
1935  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1936  return SDValue();
1937 
1938  SDValue CT = Sel.getOperand(1);
1939  if (!isConstantOrConstantVector(CT, true) &&
1941  return SDValue();
1942 
1943  SDValue CF = Sel.getOperand(2);
1944  if (!isConstantOrConstantVector(CF, true) &&
1946  return SDValue();
1947 
1948  // We have a select-of-constants followed by a binary operator with a
1949  // constant. Eliminate the binop by pulling the constant math into the select.
1950  // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1951  EVT VT = Sel.getValueType();
1952  SDLoc DL(Sel);
1953  SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1954  if (!NewCT.isUndef() &&
1955  !isConstantOrConstantVector(NewCT, true) &&
1957  return SDValue();
1958 
1959  SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1960  if (!NewCF.isUndef() &&
1961  !isConstantOrConstantVector(NewCF, true) &&
1963  return SDValue();
1964 
1965  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1966 }
1967 
1968 SDValue DAGCombiner::visitADD(SDNode *N) {
1969  SDValue N0 = N->getOperand(0);
1970  SDValue N1 = N->getOperand(1);
1971  EVT VT = N0.getValueType();
1972  SDLoc DL(N);
1973 
1974  // fold vector ops
1975  if (VT.isVector()) {
1976  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1977  return FoldedVOp;
1978 
1979  // fold (add x, 0) -> x, vector edition
1981  return N0;
1983  return N1;
1984  }
1985 
1986  // fold (add x, undef) -> undef
1987  if (N0.isUndef())
1988  return N0;
1989 
1990  if (N1.isUndef())
1991  return N1;
1992 
1994  // canonicalize constant to RHS
1996  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1997  // fold (add c1, c2) -> c1+c2
1998  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1999  N1.getNode());
2000  }
2001 
2002  // fold (add x, 0) -> x
2003  if (isNullConstant(N1))
2004  return N0;
2005 
2006  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2007  // fold ((c1-A)+c2) -> (c1+c2)-A
2008  if (N0.getOpcode() == ISD::SUB &&
2009  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2010  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2011  return DAG.getNode(ISD::SUB, DL, VT,
2012  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2013  N0.getOperand(1));
2014  }
2015 
2016  // add (sext i1 X), 1 -> zext (not i1 X)
2017  // We don't transform this pattern:
2018  // add (zext i1 X), -1 -> sext (not i1 X)
2019  // because most (?) targets generate better code for the zext form.
2020  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2022  SDValue X = N0.getOperand(0);
2023  if ((!LegalOperations ||
2024  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2025  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2026  X.getScalarValueSizeInBits() == 1) {
2027  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2028  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2029  }
2030  }
2031 
2032  // Undo the add -> or combine to merge constant offsets from a frame index.
2033  if (N0.getOpcode() == ISD::OR &&
2034  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2035  isa<ConstantSDNode>(N0.getOperand(1)) &&
2036  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2037  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2038  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2039  }
2040  }
2041 
2042  if (SDValue NewSel = foldBinOpIntoSelect(N))
2043  return NewSel;
2044 
2045  // reassociate add
2046  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
2047  return RADD;
2048 
2049  // fold ((0-A) + B) -> B-A
2050  if (N0.getOpcode() == ISD::SUB &&
2052  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2053 
2054  // fold (A + (0-B)) -> A-B
2055  if (N1.getOpcode() == ISD::SUB &&
2057  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2058 
2059  // fold (A+(B-A)) -> B
2060  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2061  return N1.getOperand(0);
2062 
2063  // fold ((B-A)+A) -> B
2064  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2065  return N0.getOperand(0);
2066 
2067  // fold (A+(B-(A+C))) to (B-C)
2068  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2069  N0 == N1.getOperand(1).getOperand(0))
2070  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2071  N1.getOperand(1).getOperand(1));
2072 
2073  // fold (A+(B-(C+A))) to (B-C)
2074  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2075  N0 == N1.getOperand(1).getOperand(1))
2076  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2077  N1.getOperand(1).getOperand(0));
2078 
2079  // fold (A+((B-A)+or-C)) to (B+or-C)
2080  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2081  N1.getOperand(0).getOpcode() == ISD::SUB &&
2082  N0 == N1.getOperand(0).getOperand(1))
2083  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2084  N1.getOperand(1));
2085 
2086  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2087  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2088  SDValue N00 = N0.getOperand(0);
2089  SDValue N01 = N0.getOperand(1);
2090  SDValue N10 = N1.getOperand(0);
2091  SDValue N11 = N1.getOperand(1);
2092 
2094  return DAG.getNode(ISD::SUB, DL, VT,
2095  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2096  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2097  }
2098 
2099  if (SimplifyDemandedBits(SDValue(N, 0)))
2100  return SDValue(N, 0);
2101 
2102  // fold (a+b) -> (a|b) iff a and b share no bits.
2103  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2104  DAG.haveNoCommonBitsSet(N0, N1))
2105  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2106 
2107  if (SDValue Combined = visitADDLike(N0, N1, N))
2108  return Combined;
2109 
2110  if (SDValue Combined = visitADDLike(N1, N0, N))
2111  return Combined;
2112 
2113  return SDValue();
2114 }
2115 
2116 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2117  bool Masked = false;
2118 
2119  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2120  while (true) {
2121  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2122  V = V.getOperand(0);
2123  continue;
2124  }
2125 
2126  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2127  Masked = true;
2128  V = V.getOperand(0);
2129  continue;
2130  }
2131 
2132  break;
2133  }
2134 
2135  // If this is not a carry, return.
2136  if (V.getResNo() != 1)
2137  return SDValue();
2138 
2139  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2140  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2141  return SDValue();
2142 
2143  // If the result is masked, then no matter what kind of bool it is we can
2144  // return. If it isn't, then we need to make sure the bool type is either 0 or
2145  // 1 and not other values.
2146  if (Masked ||
2147  TLI.getBooleanContents(V.getValueType()) ==
2149  return V;
2150 
2151  return SDValue();
2152 }
2153 
2154 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2155  EVT VT = N0.getValueType();
2156  SDLoc DL(LocReference);
2157 
2158  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2159  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2161  return DAG.getNode(ISD::SUB, DL, VT, N0,
2162  DAG.getNode(ISD::SHL, DL, VT,
2163  N1.getOperand(0).getOperand(1),
2164  N1.getOperand(1)));
2165 
2166  if (N1.getOpcode() == ISD::AND) {
2167  SDValue AndOp0 = N1.getOperand(0);
2168  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2169  unsigned DestBits = VT.getScalarSizeInBits();
2170 
2171  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2172  // and similar xforms where the inner op is either ~0 or 0.
2173  if (NumSignBits == DestBits &&
2175  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2176  }
2177 
2178  // add (sext i1), X -> sub X, (zext i1)
2179  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2180  N0.getOperand(0).getValueType() == MVT::i1 &&
2182  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2183  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2184  }
2185 
2186  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2187  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2188  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2189  if (TN->getVT() == MVT::i1) {
2190  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2191  DAG.getConstant(1, DL, VT));
2192  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2193  }
2194  }
2195 
2196  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2197  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2198  N1.getResNo() == 0)
2199  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2200  N0, N1.getOperand(0), N1.getOperand(2));
2201 
2202  // (add X, Carry) -> (addcarry X, 0, Carry)
2204  if (SDValue Carry = getAsCarry(TLI, N1))
2205  return DAG.getNode(ISD::ADDCARRY, DL,
2206  DAG.getVTList(VT, Carry.getValueType()), N0,
2207  DAG.getConstant(0, DL, VT), Carry);
2208 
2209  return SDValue();
2210 }
2211 
2212 SDValue DAGCombiner::visitADDC(SDNode *N) {
2213  SDValue N0 = N->getOperand(0);
2214  SDValue N1 = N->getOperand(1);
2215  EVT VT = N0.getValueType();
2216  SDLoc DL(N);
2217 
2218  // If the flag result is dead, turn this into an ADD.
2219  if (!N->hasAnyUseOfValue(1))
2220  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2221  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2222 
2223  // canonicalize constant to RHS.
2226  if (N0C && !N1C)
2227  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2228 
2229  // fold (addc x, 0) -> x + no carry out
2230  if (isNullConstant(N1))
2231  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2232  DL, MVT::Glue));
2233 
2234  // If it cannot overflow, transform into an add.
2235  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2236  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2237  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2238 
2239  return SDValue();
2240 }
2241 
2242 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2243  SDValue N0 = N->getOperand(0);
2244  SDValue N1 = N->getOperand(1);
2245  EVT VT = N0.getValueType();
2246  if (VT.isVector())
2247  return SDValue();
2248 
2249  EVT CarryVT = N->getValueType(1);
2250  SDLoc DL(N);
2251 
2252  // If the flag result is dead, turn this into an ADD.
2253  if (!N->hasAnyUseOfValue(1))
2254  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2255  DAG.getUNDEF(CarryVT));
2256 
2257  // canonicalize constant to RHS.
2260  if (N0C && !N1C)
2261  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2262 
2263  // fold (uaddo x, 0) -> x + no carry out
2264  if (isNullConstant(N1))
2265  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2266 
2267  // If it cannot overflow, transform into an add.
2268  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2269  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2270  DAG.getConstant(0, DL, CarryVT));
2271 
2272  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2273  return Combined;
2274 
2275  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2276  return Combined;
2277 
2278  return SDValue();
2279 }
2280 
2281 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2282  auto VT = N0.getValueType();
2283 
2284  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2285  // If Y + 1 cannot overflow.
2286  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2287  SDValue Y = N1.getOperand(0);
2288  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2289  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2290  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2291  N1.getOperand(2));
2292  }
2293 
2294  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2296  if (SDValue Carry = getAsCarry(TLI, N1))
2297  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2298  DAG.getConstant(0, SDLoc(N), VT), Carry);
2299 
2300  return SDValue();
2301 }
2302 
2303 SDValue DAGCombiner::visitADDE(SDNode *N) {
2304  SDValue N0 = N->getOperand(0);
2305  SDValue N1 = N->getOperand(1);
2306  SDValue CarryIn = N->getOperand(2);
2307 
2308  // canonicalize constant to RHS
2311  if (N0C && !N1C)
2312  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2313  N1, N0, CarryIn);
2314 
2315  // fold (adde x, y, false) -> (addc x, y)
2316  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2317  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2318 
2319  return SDValue();
2320 }
2321 
2322 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2323  SDValue N0 = N->getOperand(0);
2324  SDValue N1 = N->getOperand(1);
2325  SDValue CarryIn = N->getOperand(2);
2326  SDLoc DL(N);
2327 
2328  // canonicalize constant to RHS
2331  if (N0C && !N1C)
2332  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2333 
2334  // fold (addcarry x, y, false) -> (uaddo x, y)
2335  if (isNullConstant(CarryIn))
2336  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2337 
2338  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2339  if (isNullConstant(N0) && isNullConstant(N1)) {
2340  EVT VT = N0.getValueType();
2341  EVT CarryVT = CarryIn.getValueType();
2342  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2343  AddToWorklist(CarryExt.getNode());
2344  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2345  DAG.getConstant(1, DL, VT)),
2346  DAG.getConstant(0, DL, CarryVT));
2347  }
2348 
2349  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2350  return Combined;
2351 
2352  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2353  return Combined;
2354 
2355  return SDValue();
2356 }
2357 
2358 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2359  SDNode *N) {
2360  // Iff the flag result is dead:
2361  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2362  if ((N0.getOpcode() == ISD::ADD ||
2363  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2364  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2365  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2366  N0.getOperand(0), N0.getOperand(1), CarryIn);
2367 
2368  /**
2369  * When one of the addcarry argument is itself a carry, we may be facing
2370  * a diamond carry propagation. In which case we try to transform the DAG
2371  * to ensure linear carry propagation if that is possible.
2372  *
2373  * We are trying to get:
2374  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2375  */
2376  if (auto Y = getAsCarry(TLI, N1)) {
2377  /**
2378  * (uaddo A, B)
2379  * / \
2380  * Carry Sum
2381  * | \
2382  * | (addcarry *, 0, Z)
2383  * | /
2384  * \ Carry
2385  * | /
2386  * (addcarry X, *, *)
2387  */
2388  if (Y.getOpcode() == ISD::UADDO &&
2389  CarryIn.getResNo() == 1 &&
2390  CarryIn.getOpcode() == ISD::ADDCARRY &&
2391  isNullConstant(CarryIn.getOperand(1)) &&
2392  CarryIn.getOperand(0) == Y.getValue(0)) {
2393  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2394  Y.getOperand(0), Y.getOperand(1),
2395  CarryIn.getOperand(2));
2396  AddToWorklist(NewY.getNode());
2397  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2398  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2399  NewY.getValue(1));
2400  }
2401  }
2402 
2403  return SDValue();
2404 }
2405 
2406 // Since it may not be valid to emit a fold to zero for vector initializers
2407 // check if we can before folding.
2408 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2409  SelectionDAG &DAG, bool LegalOperations,
2410  bool LegalTypes) {
2411  if (!VT.isVector())
2412  return DAG.getConstant(0, DL, VT);
2413  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2414  return DAG.getConstant(0, DL, VT);
2415  return SDValue();
2416 }
2417 
2418 SDValue DAGCombiner::visitSUB(SDNode *N) {
2419  SDValue N0 = N->getOperand(0);
2420  SDValue N1 = N->getOperand(1);
2421  EVT VT = N0.getValueType();
2422  SDLoc DL(N);
2423 
2424  // fold vector ops
2425  if (VT.isVector()) {
2426  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2427  return FoldedVOp;
2428 
2429  // fold (sub x, 0) -> x, vector edition
2431  return N0;
2432  }
2433 
2434  // fold (sub x, x) -> 0
2435  // FIXME: Refactor this and xor and other similar operations together.
2436  if (N0 == N1)
2437  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2440  // fold (sub c1, c2) -> c1-c2
2441  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2442  N1.getNode());
2443  }
2444 
2445  if (SDValue NewSel = foldBinOpIntoSelect(N))
2446  return NewSel;
2447 
2449 
2450  // fold (sub x, c) -> (add x, -c)
2451  if (N1C) {
2452  return DAG.getNode(ISD::ADD, DL, VT, N0,
2453  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2454  }
2455 
2457  unsigned BitWidth = VT.getScalarSizeInBits();
2458  // Right-shifting everything out but the sign bit followed by negation is
2459  // the same as flipping arithmetic/logical shift type without the negation:
2460  // -(X >>u 31) -> (X >>s 31)
2461  // -(X >>s 31) -> (X >>u 31)
2462  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2463  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2464  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2465  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2466  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2467  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2468  }
2469  }
2470 
2471  // 0 - X --> 0 if the sub is NUW.
2472  if (N->getFlags().hasNoUnsignedWrap())
2473  return N0;
2474 
2475  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2476  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2477  // N1 must be 0 because negating the minimum signed value is undefined.
2478  if (N->getFlags().hasNoSignedWrap())
2479  return N0;
2480 
2481  // 0 - X --> X if X is 0 or the minimum signed value.
2482  return N1;
2483  }
2484  }
2485 
2486  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2488  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2489 
2490  // fold A-(A-B) -> B
2491  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2492  return N1.getOperand(1);
2493 
2494  // fold (A+B)-A -> B
2495  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2496  return N0.getOperand(1);
2497 
2498  // fold (A+B)-B -> A
2499  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2500  return N0.getOperand(0);
2501 
2502  // fold C2-(A+C1) -> (C2-C1)-A
2503  if (N1.getOpcode() == ISD::ADD) {
2504  SDValue N11 = N1.getOperand(1);
2505  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2506  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2507  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2508  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2509  }
2510  }
2511 
2512  // fold ((A+(B+or-C))-B) -> A+or-C
2513  if (N0.getOpcode() == ISD::ADD &&
2514  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2515  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2516  N0.getOperand(1).getOperand(0) == N1)
2517  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2518  N0.getOperand(1).getOperand(1));
2519 
2520  // fold ((A+(C+B))-B) -> A+C
2521  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2522  N0.getOperand(1).getOperand(1) == N1)
2523  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2524  N0.getOperand(1).getOperand(0));
2525 
2526  // fold ((A-(B-C))-C) -> A-B
2527  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2528  N0.getOperand(1).getOperand(1) == N1)
2529  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2530  N0.getOperand(1).getOperand(0));
2531 
2532  // If either operand of a sub is undef, the result is undef
2533  if (N0.isUndef())
2534  return N0;
2535  if (N1.isUndef())
2536  return N1;
2537 
2538  // If the relocation model supports it, consider symbol offsets.
2539  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2540  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2541  // fold (sub Sym, c) -> Sym-c
2542  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2543  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2544  GA->getOffset() -
2545  (uint64_t)N1C->getSExtValue());
2546  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2547  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2548  if (GA->getGlobal() == GB->getGlobal())
2549  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2550  DL, VT);
2551  }
2552 
2553  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2554  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2555  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2556  if (TN->getVT() == MVT::i1) {
2557  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2558  DAG.getConstant(1, DL, VT));
2559  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2560  }
2561  }
2562 
2563  return SDValue();
2564 }
2565 
2566 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2567  SDValue N0 = N->getOperand(0);
2568  SDValue N1 = N->getOperand(1);
2569  EVT VT = N0.getValueType();
2570  SDLoc DL(N);
2571 
2572  // If the flag result is dead, turn this into an SUB.
2573  if (!N->hasAnyUseOfValue(1))
2574  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2575  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2576 
2577  // fold (subc x, x) -> 0 + no borrow
2578  if (N0 == N1)
2579  return CombineTo(N, DAG.getConstant(0, DL, VT),
2580  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2581 
2582  // fold (subc x, 0) -> x + no borrow
2583  if (isNullConstant(N1))
2584  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2585 
2586  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2587  if (isAllOnesConstant(N0))
2588  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2589  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2590 
2591  return SDValue();
2592 }
2593 
2594 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2595  SDValue N0 = N->getOperand(0);
2596  SDValue N1 = N->getOperand(1);
2597  EVT VT = N0.getValueType();
2598  if (VT.isVector())
2599  return SDValue();
2600 
2601  EVT CarryVT = N->getValueType(1);
2602  SDLoc DL(N);
2603 
2604  // If the flag result is dead, turn this into an SUB.
2605  if (!N->hasAnyUseOfValue(1))
2606  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2607  DAG.getUNDEF(CarryVT));
2608 
2609  // fold (usubo x, x) -> 0 + no borrow
2610  if (N0 == N1)
2611  return CombineTo(N, DAG.getConstant(0, DL, VT),
2612  DAG.getConstant(0, DL, CarryVT));
2613 
2614  // fold (usubo x, 0) -> x + no borrow
2615  if (isNullConstant(N1))
2616  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2617 
2618  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2619  if (isAllOnesConstant(N0))
2620  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2621  DAG.getConstant(0, DL, CarryVT));
2622 
2623  return SDValue();
2624 }
2625 
2626 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2627  SDValue N0 = N->getOperand(0);
2628  SDValue N1 = N->getOperand(1);
2629  SDValue CarryIn = N->getOperand(2);
2630 
2631  // fold (sube x, y, false) -> (subc x, y)
2632  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2633  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2634 
2635  return SDValue();
2636 }
2637 
2638 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2639  SDValue N0 = N->getOperand(0);
2640  SDValue N1 = N->getOperand(1);
2641  SDValue CarryIn = N->getOperand(2);
2642 
2643  // fold (subcarry x, y, false) -> (usubo x, y)
2644  if (isNullConstant(CarryIn))
2645  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2646 
2647  return SDValue();
2648 }
2649 
2650 SDValue DAGCombiner::visitMUL(SDNode *N) {
2651  SDValue N0 = N->getOperand(0);
2652  SDValue N1 = N->getOperand(1);
2653  EVT VT = N0.getValueType();
2654 
2655  // fold (mul x, undef) -> 0
2656  if (N0.isUndef() || N1.isUndef())
2657  return DAG.getConstant(0, SDLoc(N), VT);
2658 
2659  bool N0IsConst = false;
2660  bool N1IsConst = false;
2661  bool N1IsOpaqueConst = false;
2662  bool N0IsOpaqueConst = false;
2663  APInt ConstValue0, ConstValue1;
2664  // fold vector ops
2665  if (VT.isVector()) {
2666  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2667  return FoldedVOp;
2668 
2669  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2670  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2671  assert((!N0IsConst ||
2672  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2673  "Splat APInt should be element width");
2674  assert((!N1IsConst ||
2675  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2676  "Splat APInt should be element width");
2677  } else {
2678  N0IsConst = isa<ConstantSDNode>(N0);
2679  if (N0IsConst) {
2680  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2681  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2682  }
2683  N1IsConst = isa<ConstantSDNode>(N1);
2684  if (N1IsConst) {
2685  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2686  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2687  }
2688  }
2689 
2690  // fold (mul c1, c2) -> c1*c2
2691  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2692  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2693  N0.getNode(), N1.getNode());
2694 
2695  // canonicalize constant to RHS (vector doesn't have to splat)
2698  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2699  // fold (mul x, 0) -> 0
2700  if (N1IsConst && ConstValue1.isNullValue())
2701  return N1;
2702  // fold (mul x, 1) -> x
2703  if (N1IsConst && ConstValue1.isOneValue())
2704  return N0;
2705 
2706  if (SDValue NewSel = foldBinOpIntoSelect(N))
2707  return NewSel;
2708 
2709  // fold (mul x, -1) -> 0-x
2710  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2711  SDLoc DL(N);
2712  return DAG.getNode(ISD::SUB, DL, VT,
2713  DAG.getConstant(0, DL, VT), N0);
2714  }
2715  // fold (mul x, (1 << c)) -> x << c
2716  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2717  DAG.isKnownToBeAPowerOfTwo(N1) &&
2718  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2719  SDLoc DL(N);
2720  SDValue LogBase2 = BuildLogBase2(N1, DL);
2721  AddToWorklist(LogBase2.getNode());
2722 
2723  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2724  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2725  AddToWorklist(Trunc.getNode());
2726  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2727  }
2728  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2729  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2730  unsigned Log2Val = (-ConstValue1).logBase2();
2731  SDLoc DL(N);
2732  // FIXME: If the input is something that is easily negated (e.g. a
2733  // single-use add), we should put the negate there.
2734  return DAG.getNode(ISD::SUB, DL, VT,
2735  DAG.getConstant(0, DL, VT),
2736  DAG.getNode(ISD::SHL, DL, VT, N0,
2737  DAG.getConstant(Log2Val, DL,
2738  getShiftAmountTy(N0.getValueType()))));
2739  }
2740 
2741  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2742  if (N0.getOpcode() == ISD::SHL &&
2743  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2744  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2745  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2747  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2748  }
2749 
2750  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2751  // use.
2752  {
2753  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2754 
2755  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2756  if (N0.getOpcode() == ISD::SHL &&
2758  N0.getNode()->hasOneUse()) {
2759  Sh = N0; Y = N1;
2760  } else if (N1.getOpcode() == ISD::SHL &&
2762  N1.getNode()->hasOneUse()) {
2763  Sh = N1; Y = N0;
2764  }
2765 
2766  if (Sh.getNode()) {
2767  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2768  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2769  }
2770  }
2771 
2772  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2774  N0.getOpcode() == ISD::ADD &&
2776  isMulAddWithConstProfitable(N, N0, N1))
2777  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2778  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2779  N0.getOperand(0), N1),
2780  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2781  N0.getOperand(1), N1));
2782 
2783  // reassociate mul
2784  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2785  return RMUL;
2786 
2787  return SDValue();
2788 }
2789 
2790 /// Return true if divmod libcall is available.
2791 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2792  const TargetLowering &TLI) {
2793  RTLIB::Libcall LC;
2794  EVT NodeType = Node->getValueType(0);
2795  if (!NodeType.isSimple())
2796  return false;
2797  switch (NodeType.getSimpleVT().SimpleTy) {
2798  default: return false; // No libcall for vector types.
2799  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2800  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2801  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2802  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2803  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2804  }
2805 
2806  return TLI.getLibcallName(LC) != nullptr;
2807 }
2808 
2809 /// Issue divrem if both quotient and remainder are needed.
2810 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2811  if (Node->use_empty())
2812  return SDValue(); // This is a dead node, leave it alone.
2813 
2814  unsigned Opcode = Node->getOpcode();
2815  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2816  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2817 
2818  // DivMod lib calls can still work on non-legal types if using lib-calls.
2819  EVT VT = Node->getValueType(0);
2820  if (VT.isVector() || !VT.isInteger())
2821  return SDValue();
2822 
2823  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2824  return SDValue();
2825 
2826  // If DIVREM is going to get expanded into a libcall,
2827  // but there is no libcall available, then don't combine.
2828  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2829  !isDivRemLibcallAvailable(Node, isSigned, TLI))
2830  return SDValue();
2831 
2832  // If div is legal, it's better to do the normal expansion
2833  unsigned OtherOpcode = 0;
2834  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2835  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2836  if (TLI.isOperationLegalOrCustom(Opcode, VT))
2837  return SDValue();
2838  } else {
2839  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2840  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2841  return SDValue();
2842  }
2843 
2844  SDValue Op0 = Node->getOperand(0);
2845  SDValue Op1 = Node->getOperand(1);
2846  SDValue combined;
2847  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2848  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2849  SDNode *User = *UI;
2850  if (User == Node || User->use_empty())
2851  continue;
2852  // Convert the other matching node(s), too;
2853  // otherwise, the DIVREM may get target-legalized into something
2854  // target-specific that we won't be able to recognize.
2855  unsigned UserOpc = User->getOpcode();
2856  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2857  User->getOperand(0) == Op0 &&
2858  User->getOperand(1) == Op1) {
2859  if (!combined) {
2860  if (UserOpc == OtherOpcode) {
2861  SDVTList VTs = DAG.getVTList(VT, VT);
2862  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2863  } else if (UserOpc == DivRemOpc) {
2864  combined = SDValue(User, 0);
2865  } else {
2866  assert(UserOpc == Opcode);
2867  continue;
2868  }
2869  }
2870  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2871  CombineTo(User, combined);
2872  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2873  CombineTo(User, combined.getValue(1));
2874  }
2875  }
2876  return combined;
2877 }
2878 
2880  SDValue N0 = N->getOperand(0);
2881  SDValue N1 = N->getOperand(1);
2882  EVT VT = N->getValueType(0);
2883  SDLoc DL(N);
2884 
2885  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2886  return DAG.getUNDEF(VT);
2887 
2888  // undef / X -> 0
2889  // undef % X -> 0
2890  if (N0.isUndef())
2891  return DAG.getConstant(0, DL, VT);
2892 
2893  return SDValue();
2894 }
2895 
2896 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2897  SDValue N0 = N->getOperand(0);
2898  SDValue N1 = N->getOperand(1);
2899  EVT VT = N->getValueType(0);
2900 
2901  // fold vector ops
2902  if (VT.isVector())
2903  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2904  return FoldedVOp;
2905 
2906  SDLoc DL(N);
2907 
2908  // fold (sdiv c1, c2) -> c1/c2
2911  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2912  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2913  // fold (sdiv X, 1) -> X
2914  if (N1C && N1C->isOne())
2915  return N0;
2916  // fold (sdiv X, -1) -> 0-X
2917  if (N1C && N1C->isAllOnesValue())
2918  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2919 
2920  if (SDValue V = simplifyDivRem(N, DAG))
2921  return V;
2922 
2923  if (SDValue NewSel = foldBinOpIntoSelect(N))
2924  return NewSel;
2925 
2926  // If we know the sign bits of both operands are zero, strength reduce to a
2927  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2928  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2929  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2930 
2931  // fold (sdiv X, pow2) -> simple ops after legalize
2932  // FIXME: We check for the exact bit here because the generic lowering gives
2933  // better results in that case. The target-specific lowering should learn how
2934  // to handle exact sdivs efficiently.
2935  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2936  !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() ||
2937  (-N1C->getAPIntValue()).isPowerOf2())) {
2938  // Target-specific implementation of sdiv x, pow2.
2939  if (SDValue Res = BuildSDIVPow2(N))
2940  return Res;
2941 
2942  unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2943 
2944  // Splat the sign bit into the register
2945  SDValue SGN =
2946  DAG.getNode(ISD::SRA, DL, VT, N0,
2947  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2948  getShiftAmountTy(N0.getValueType())));
2949  AddToWorklist(SGN.getNode());
2950 
2951  // Add (N0 < 0) ? abs2 - 1 : 0;
2952  SDValue SRL =
2953  DAG.getNode(ISD::SRL, DL, VT, SGN,
2954  DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2955  getShiftAmountTy(SGN.getValueType())));
2956  SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2957  AddToWorklist(SRL.getNode());
2958  AddToWorklist(ADD.getNode()); // Divide by pow2
2959  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2960  DAG.getConstant(lg2, DL,
2961  getShiftAmountTy(ADD.getValueType())));
2962 
2963  // If we're dividing by a positive value, we're done. Otherwise, we must
2964  // negate the result.
2965  if (N1C->getAPIntValue().isNonNegative())
2966  return SRA;
2967 
2968  AddToWorklist(SRA.getNode());
2969  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2970  }
2971 
2972  // If integer divide is expensive and we satisfy the requirements, emit an
2973  // alternate sequence. Targets may check function attributes for size/speed
2974  // trade-offs.
2976  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2977  if (SDValue Op = BuildSDIV(N))
2978  return Op;
2979 
2980  // sdiv, srem -> sdivrem
2981  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2982  // true. Otherwise, we break the simplification logic in visitREM().
2983  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2984  if (SDValue DivRem = useDivRem(N))
2985  return DivRem;
2986 
2987  return SDValue();
2988 }
2989 
2990 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2991  SDValue N0 = N->getOperand(0);
2992  SDValue N1 = N->getOperand(1);
2993  EVT VT = N->getValueType(0);
2994 
2995  // fold vector ops
2996  if (VT.isVector())
2997  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2998  return FoldedVOp;
2999 
3000  SDLoc DL(N);
3001 
3002  // fold (udiv c1, c2) -> c1/c2
3005  if (N0C && N1C)
3006  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3007  N0C, N1C))
3008  return Folded;
3009 
3010  if (SDValue V = simplifyDivRem(N, DAG))
3011  return V;
3012 
3013  if (SDValue NewSel = foldBinOpIntoSelect(N))
3014  return NewSel;
3015 
3016  // fold (udiv x, (1 << c)) -> x >>u c
3017  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3018  DAG.isKnownToBeAPowerOfTwo(N1)) {
3019  SDValue LogBase2 = BuildLogBase2(N1, DL);
3020  AddToWorklist(LogBase2.getNode());
3021 
3022  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3023  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3024  AddToWorklist(Trunc.getNode());
3025  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3026  }
3027 
3028  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3029  if (N1.getOpcode() == ISD::SHL) {
3030  SDValue N10 = N1.getOperand(0);
3031  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3032  DAG.isKnownToBeAPowerOfTwo(N10)) {
3033  SDValue LogBase2 = BuildLogBase2(N10, DL);
3034  AddToWorklist(LogBase2.getNode());
3035 
3036  EVT ADDVT = N1.getOperand(1).getValueType();
3037  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3038  AddToWorklist(Trunc.getNode());
3039  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3040  AddToWorklist(Add.getNode());
3041  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3042  }
3043  }
3044 
3045  // fold (udiv x, c) -> alternate
3047  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3048  if (SDValue Op = BuildUDIV(N))
3049  return Op;
3050 
3051  // sdiv, srem -> sdivrem
3052  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3053  // true. Otherwise, we break the simplification logic in visitREM().
3054  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3055  if (SDValue DivRem = useDivRem(N))
3056  return DivRem;
3057 
3058  return SDValue();
3059 }
3060 
3061 // handles ISD::SREM and ISD::UREM
3062 SDValue DAGCombiner::visitREM(SDNode *N) {
3063  unsigned Opcode = N->getOpcode();
3064  SDValue N0 = N->getOperand(0);
3065  SDValue N1 = N->getOperand(1);
3066  EVT VT = N->getValueType(0);
3067  bool isSigned = (Opcode == ISD::SREM);
3068  SDLoc DL(N);
3069 
3070  // fold (rem c1, c2) -> c1%c2
3073  if (N0C && N1C)
3074  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3075  return Folded;
3076 
3077  if (SDValue V = simplifyDivRem(N, DAG))
3078  return V;
3079 
3080  if (SDValue NewSel = foldBinOpIntoSelect(N))
3081  return NewSel;
3082 
3083  if (isSigned) {
3084  // If we know the sign bits of both operands are zero, strength reduce to a
3085  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3086  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3087  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3088  } else {
3089  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3090  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3091  // fold (urem x, pow2) -> (and x, pow2-1)
3092  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3093  AddToWorklist(Add.getNode());
3094  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3095  }
3096  if (N1.getOpcode() == ISD::SHL &&
3097  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3098  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3099  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3100  AddToWorklist(Add.getNode());
3101  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3102  }
3103  }
3104 
3106 
3107  // If X/C can be simplified by the division-by-constant logic, lower
3108  // X%C to the equivalent of X-X/C*C.
3109  // To avoid mangling nodes, this simplification requires that the combine()
3110  // call for the speculative DIV must not cause a DIVREM conversion. We guard
3111  // against this by skipping the simplification if isIntDivCheap(). When
3112  // div is not cheap, combine will not return a DIVREM. Regardless,
3113  // checking cheapness here makes sense since the simplification results in
3114  // fatter code.
3115  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3116  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3117  SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3118  AddToWorklist(Div.getNode());
3119  SDValue OptimizedDiv = combine(Div.getNode());
3120  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
3121  assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
3122  (OptimizedDiv.getOpcode() != ISD::SDIVREM));
3123  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3124  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3125  AddToWorklist(Mul.getNode());
3126  return Sub;
3127  }
3128  }
3129 
3130  // sdiv, srem -> sdivrem
3131  if (SDValue DivRem = useDivRem(N))
3132  return DivRem.getValue(1);
3133 
3134  return SDValue();
3135 }
3136 
3137 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3138  SDValue N0 = N->getOperand(0);
3139  SDValue N1 = N->getOperand(1);
3140  EVT VT = N->getValueType(0);
3141  SDLoc DL(N);
3142 
3143  if (VT.isVector()) {
3144  // fold (mulhs x, 0) -> 0
3146  return N1;
3148  return N0;
3149  }
3150 
3151  // fold (mulhs x, 0) -> 0
3152  if (isNullConstant(N1))
3153  return N1;
3154  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3155  if (isOneConstant(N1))
3156  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3157  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3158  getShiftAmountTy(N0.getValueType())));
3159 
3160  // fold (mulhs x, undef) -> 0
3161  if (N0.isUndef() || N1.isUndef())
3162  return DAG.getConstant(0, DL, VT);
3163 
3164  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3165  // plus a shift.
3166  if (VT.isSimple() && !VT.isVector()) {
3167  MVT Simple = VT.getSimpleVT();
3168  unsigned SimpleSize = Simple.getSizeInBits();
3169  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3170  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3171  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3172  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3173  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3174  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3175  DAG.getConstant(SimpleSize, DL,
3176  getShiftAmountTy(N1.getValueType())));
3177  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3178  }
3179  }
3180 
3181  return SDValue();
3182 }
3183 
3184 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3185  SDValue N0 = N->getOperand(0);
3186  SDValue N1 = N->getOperand(1);
3187  EVT VT = N->getValueType(0);
3188  SDLoc DL(N);
3189 
3190  if (VT.isVector()) {
3191  // fold (mulhu x, 0) -> 0
3193  return N1;
3195  return N0;
3196  }
3197 
3198  // fold (mulhu x, 0) -> 0
3199  if (isNullConstant(N1))
3200  return N1;
3201  // fold (mulhu x, 1) -> 0
3202  if (isOneConstant(N1))
3203  return DAG.getConstant(0, DL, N0.getValueType());
3204  // fold (mulhu x, undef) -> 0
3205  if (N0.isUndef() || N1.isUndef())
3206  return DAG.getConstant(0, DL, VT);
3207 
3208  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3209  // plus a shift.
3210  if (VT.isSimple() && !VT.isVector()) {
3211  MVT Simple = VT.getSimpleVT();
3212  unsigned SimpleSize = Simple.getSizeInBits();
3213  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3214  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3215  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3216  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3217  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3218  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3219  DAG.getConstant(SimpleSize, DL,
3220  getShiftAmountTy(N1.getValueType())));
3221  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3222  }
3223  }
3224 
3225  return SDValue();
3226 }
3227 
3228 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3229 /// give the opcodes for the two computations that are being performed. Return
3230 /// true if a simplification was made.
3231 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3232  unsigned HiOp) {
3233  // If the high half is not needed, just compute the low half.
3234  bool HiExists = N->hasAnyUseOfValue(1);
3235  if (!HiExists &&
3236  (!LegalOperations ||
3237  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3238  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3239  return CombineTo(N, Res, Res);
3240  }
3241 
3242  // If the low half is not needed, just compute the high half.
3243  bool LoExists = N->hasAnyUseOfValue(0);
3244  if (!LoExists &&
3245  (!LegalOperations ||
3246  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3247  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3248  return CombineTo(N, Res, Res);
3249  }
3250 
3251  // If both halves are used, return as it is.
3252  if (LoExists && HiExists)
3253  return SDValue();
3254 
3255  // If the two computed results can be simplified separately, separate them.
3256  if (LoExists) {
3257  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3258  AddToWorklist(Lo.getNode());
3259  SDValue LoOpt = combine(Lo.getNode());
3260  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3261  (!LegalOperations ||
3262  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3263  return CombineTo(N, LoOpt, LoOpt);
3264  }
3265 
3266  if (HiExists) {
3267  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3268  AddToWorklist(Hi.getNode());
3269  SDValue HiOpt = combine(Hi.getNode());
3270  if (HiOpt.getNode() && HiOpt != Hi &&
3271  (!LegalOperations ||
3272  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3273  return CombineTo(N, HiOpt, HiOpt);
3274  }
3275 
3276  return SDValue();
3277 }
3278 
3279 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3280  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3281  return Res;
3282 
3283  EVT VT = N->getValueType(0);
3284  SDLoc DL(N);
3285 
3286  // If the type is twice as wide is legal, transform the mulhu to a wider
3287  // multiply plus a shift.
3288  if (VT.isSimple() && !VT.isVector()) {
3289  MVT Simple = VT.getSimpleVT();
3290  unsigned SimpleSize = Simple.getSizeInBits();
3291  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3292  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3293  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3294  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3295  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3296  // Compute the high part as N1.
3297  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3298  DAG.getConstant(SimpleSize, DL,
3299  getShiftAmountTy(Lo.getValueType())));
3300  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3301  // Compute the low part as N0.
3302  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3303  return CombineTo(N, Lo, Hi);
3304  }
3305  }
3306 
3307  return SDValue();
3308 }
3309 
3310 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3311  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3312  return Res;
3313 
3314  EVT VT = N->getValueType(0);
3315  SDLoc DL(N);
3316 
3317  // If the type is twice as wide is legal, transform the mulhu to a wider
3318  // multiply plus a shift.
3319  if (VT.isSimple() && !VT.isVector()) {
3320  MVT Simple = VT.getSimpleVT();
3321  unsigned SimpleSize = Simple.getSizeInBits();
3322  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3323  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3324  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3325  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3326  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3327  // Compute the high part as N1.
3328  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3329  DAG.getConstant(SimpleSize, DL,
3330  getShiftAmountTy(Lo.getValueType())));
3331  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3332  // Compute the low part as N0.
3333  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3334  return CombineTo(N, Lo, Hi);
3335  }
3336  }
3337 
3338  return SDValue();
3339 }
3340 
3341 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3342  // (smulo x, 2) -> (saddo x, x)
3343  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3344  if (C2->getAPIntValue() == 2)
3345  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3346  N->getOperand(0), N->getOperand(0));
3347 
3348  return SDValue();
3349 }
3350 
3351 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3352  // (umulo x, 2) -> (uaddo x, x)
3353  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3354  if (C2->getAPIntValue() == 2)
3355  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3356  N->getOperand(0), N->getOperand(0));
3357 
3358  return SDValue();
3359 }
3360 
3361 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3362  SDValue N0 = N->getOperand(0);
3363  SDValue N1 = N->getOperand(1);
3364  EVT VT = N0.getValueType();
3365 
3366  // fold vector ops
3367  if (VT.isVector())
3368  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3369  return FoldedVOp;
3370 
3371  // fold operation with constant operands.
3374  if (N0C && N1C)
3375  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3376 
3377  // canonicalize constant to RHS
3380  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3381 
3382  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3383  // Only do this if the current op isn't legal and the flipped is.
3384  unsigned Opcode = N->getOpcode();
3385  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3386  if (!TLI.isOperationLegal(Opcode, VT) &&
3387  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3388  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3389  unsigned AltOpcode;
3390  switch (Opcode) {
3391  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3392  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3393  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3394  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3395  default: llvm_unreachable("Unknown MINMAX opcode");
3396  }
3397  if (TLI.isOperationLegal(AltOpcode, VT))
3398  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3399  }
3400 
3401  return SDValue();
3402 }
3403 
3404 /// If this is a binary operator with two operands of the same opcode, try to
3405 /// simplify it.
3406 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3407  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3408  EVT VT = N0.getValueType();
3409  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3410 
3411  // Bail early if none of these transforms apply.
3412  if (N0.getNumOperands() == 0) return SDValue();
3413 
3414  // For each of OP in AND/OR/XOR:
3415  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3416  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3417  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3418  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3419  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3420  //
3421  // do not sink logical op inside of a vector extend, since it may combine
3422  // into a vsetcc.
3423  EVT Op0VT = N0.getOperand(0).getValueType();
3424  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3425  N0.getOpcode() == ISD::SIGN_EXTEND ||
3426  N0.getOpcode() == ISD::BSWAP ||
3427  // Avoid infinite looping with PromoteIntBinOp.
3428  (N0.getOpcode() == ISD::ANY_EXTEND &&
3429  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3430  (N0.getOpcode() == ISD::TRUNCATE &&
3431  (!TLI.isZExtFree(VT, Op0VT) ||
3432  !TLI.isTruncateFree(Op0VT, VT)) &&
3433  TLI.isTypeLegal(Op0VT))) &&
3434  !VT.isVector() &&
3435  Op0VT == N1.getOperand(0).getValueType() &&
3436  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3437  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3438  N0.getOperand(0).getValueType(),
3439  N0.getOperand(0), N1.getOperand(0));
3440  AddToWorklist(ORNode.getNode());
3441  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3442  }
3443 
3444  // For each of OP in SHL/SRL/SRA/AND...
3445  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3446  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3447  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3448  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3449  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3450  N0.getOperand(1) == N1.getOperand(1)) {
3451  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3452  N0.getOperand(0).getValueType(),
3453  N0.getOperand(0), N1.getOperand(0));
3454  AddToWorklist(ORNode.getNode());
3455  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3456  ORNode, N0.getOperand(1));
3457  }
3458 
3459  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3460  // Only perform this optimization up until type legalization, before
3461  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3462  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3463  // we don't want to undo this promotion.
3464  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3465  // on scalars.
3466  if ((N0.getOpcode() == ISD::BITCAST ||
3467  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3468  Level <= AfterLegalizeTypes) {
3469  SDValue In0 = N0.getOperand(0);
3470  SDValue In1 = N1.getOperand(0);
3471  EVT In0Ty = In0.getValueType();
3472  EVT In1Ty = In1.getValueType();
3473  SDLoc DL(N);
3474  // If both incoming values are integers, and the original types are the
3475  // same.
3476  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3477  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3478  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3479  AddToWorklist(Op.getNode());
3480  return BC;
3481  }
3482  }
3483 
3484  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3485  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3486  // If both shuffles use the same mask, and both shuffle within a single
3487  // vector, then it is worthwhile to move the swizzle after the operation.
3488  // The type-legalizer generates this pattern when loading illegal
3489  // vector types from memory. In many cases this allows additional shuffle
3490  // optimizations.
3491  // There are other cases where moving the shuffle after the xor/and/or
3492  // is profitable even if shuffles don't perform a swizzle.
3493  // If both shuffles use the same mask, and both shuffles have the same first
3494  // or second operand, then it might still be profitable to move the shuffle
3495  // after the xor/and/or operation.
3496  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3497  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3498  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3499 
3500  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3501  "Inputs to shuffles are not the same type");
3502 
3503  // Check that both shuffles use the same mask. The masks are known to be of
3504  // the same length because the result vector type is the same.
3505  // Check also that shuffles have only one use to avoid introducing extra
3506  // instructions.
3507  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3508  SVN0->getMask().equals(SVN1->getMask())) {
3509  SDValue ShOp = N0->getOperand(1);
3510 
3511  // Don't try to fold this node if it requires introducing a
3512  // build vector of all zeros that might be illegal at this stage.
3513  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3514  if (!LegalTypes)
3515  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3516  else
3517  ShOp = SDValue();
3518  }
3519 
3520  // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3521  // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
3522  // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3523  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3524  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3525  N0->getOperand(0), N1->getOperand(0));
3526  AddToWorklist(NewNode.getNode());
3527  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3528  SVN0->getMask());
3529  }
3530 
3531  // Don't try to fold this node if it requires introducing a
3532  // build vector of all zeros that might be illegal at this stage.
3533  ShOp = N0->getOperand(0);
3534  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3535  if (!LegalTypes)
3536  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3537  else
3538  ShOp = SDValue();
3539  }
3540 
3541  // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3542  // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
3543  // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3544  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3545  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3546  N0->getOperand(1), N1->getOperand(1));
3547  AddToWorklist(NewNode.getNode());
3548  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3549  SVN0->getMask());
3550  }
3551  }
3552  }
3553 
3554  return SDValue();
3555 }
3556 
3557 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3558 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3559  const SDLoc &DL) {
3560  SDValue LL, LR, RL, RR, N0CC, N1CC;
3561  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3562  !isSetCCEquivalent(N1, RL, RR, N1CC))
3563  return SDValue();
3564 
3565  assert(N0.getValueType() == N1.getValueType() &&
3566  "Unexpected operand types for bitwise logic op");
3567  assert(LL.getValueType() == LR.getValueType() &&
3568  RL.getValueType() == RR.getValueType() &&
3569  "Unexpected operand types for setcc");
3570 
3571  // If we're here post-legalization or the logic op type is not i1, the logic
3572  // op type must match a setcc result type. Also, all folds require new
3573  // operations on the left and right operands, so those types must match.
3574  EVT VT = N0.getValueType();
3575  EVT OpVT = LL.getValueType();
3576  if (LegalOperations || VT.getScalarType() != MVT::i1)
3577  if (VT != getSetCCResultType(OpVT))
3578  return SDValue();
3579  if (OpVT != RL.getValueType())
3580  return SDValue();
3581 
3582  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3583  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3584  bool IsInteger = OpVT.isInteger();
3585  if (LR == RR && CC0 == CC1 && IsInteger) {
3586  bool IsZero = isNullConstantOrNullSplatConstant(LR);
3587  bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3588 
3589  // All bits clear?
3590  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3591  // All sign bits clear?
3592  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3593  // Any bits set?
3594  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3595  // Any sign bits set?
3596  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3597 
3598  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3599  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3600  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3601  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3602  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3603  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3604  AddToWorklist(Or.getNode());
3605  return DAG.getSetCC(DL, VT, Or, LR, CC1);
3606  }
3607 
3608  // All bits set?
3609  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3610  // All sign bits set?
3611  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3612  // Any bits clear?
3613  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3614  // Any sign bits clear?
3615  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3616 
3617  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3618  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3619  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3620  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3621  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3622  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3623  AddToWorklist(And.getNode());
3624  return DAG.getSetCC(DL, VT, And, LR, CC1);
3625  }
3626  }
3627 
3628  // TODO: What is the 'or' equivalent of this fold?
3629  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3630  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3631  IsInteger && CC0 == ISD::SETNE &&
3632  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3633  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3634  SDValue One = DAG.getConstant(1, DL, OpVT);
3635  SDValue Two = DAG.getConstant(2, DL, OpVT);
3636  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3637  AddToWorklist(Add.getNode());
3638  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3639  }
3640 
3641  // Try more general transforms if the predicates match and the only user of
3642  // the compares is the 'and' or 'or'.
3643  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3644  N0.hasOneUse() && N1.hasOneUse()) {
3645  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3646  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3647  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3648  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3649  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3650  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3651  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3652  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3653  }
3654  }
3655 
3656  // Canonicalize equivalent operands to LL == RL.
3657  if (LL == RR && LR == RL) {
3658  CC1 = ISD::getSetCCSwappedOperands(CC1);
3659  std::swap(RL, RR);
3660  }
3661 
3662  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3663  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3664  if (LL == RL && LR == RR) {
3665  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3666  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3667  if (NewCC != ISD::SETCC_INVALID &&
3668  (!LegalOperations ||
3669  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3670  TLI.isOperationLegal(ISD::SETCC, OpVT))))
3671  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3672  }
3673 
3674  return SDValue();
3675 }
3676 
3677 /// This contains all DAGCombine rules which reduce two values combined by
3678 /// an And operation to a single value. This makes them reusable in the context
3679 /// of visitSELECT(). Rules involving constants are not included as
3680 /// visitSELECT() already handles those cases.
3681 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3682  EVT VT = N1.getValueType();
3683  SDLoc DL(N);
3684 
3685  // fold (and x, undef) -> 0
3686  if (N0.isUndef() || N1.isUndef())
3687  return DAG.getConstant(0, DL, VT);
3688 
3689  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3690  return V;
3691 
3692  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3693  VT.getSizeInBits() <= 64) {
3694  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3695  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3696  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3697  // immediate for an add, but it is legal if its top c2 bits are set,
3698  // transform the ADD so the immediate doesn't need to be materialized
3699  // in a register.
3700  APInt ADDC = ADDI->getAPIntValue();
3701  APInt SRLC = SRLI->getAPIntValue();
3702  if (ADDC.getMinSignedBits() <= 64 &&
3703  SRLC.ult(VT.getSizeInBits()) &&
3704  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3706  SRLC.getZExtValue());
3707  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3708  ADDC |= Mask;
3709  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3710  SDLoc DL0(N0);
3711  SDValue NewAdd =
3712  DAG.getNode(ISD::ADD, DL0, VT,
3713  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3714  CombineTo(N0.getNode(), NewAdd);
3715  // Return N so it doesn't get rechecked!
3716  return SDValue(N, 0);
3717  }
3718  }
3719  }
3720  }
3721  }
3722  }
3723 
3724  // Reduce bit extract of low half of an integer to the narrower type.
3725  // (and (srl i64:x, K), KMask) ->
3726  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3727  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3728  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3729  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3730  unsigned Size = VT.getSizeInBits();
3731  const APInt &AndMask = CAnd->getAPIntValue();
3732  unsigned ShiftBits = CShift->getZExtValue();
3733 
3734  // Bail out, this node will probably disappear anyway.
3735  if (ShiftBits == 0)
3736  return SDValue();
3737 
3738  unsigned MaskBits = AndMask.countTrailingOnes();
3739  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3740 
3741  if (AndMask.isMask() &&
3742  // Required bits must not span the two halves of the integer and
3743  // must fit in the half size type.
3744  (ShiftBits + MaskBits <= Size / 2) &&
3745  TLI.isNarrowingProfitable(VT, HalfVT) &&
3746  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3747  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3748  TLI.isTruncateFree(VT, HalfVT) &&
3749  TLI.isZExtFree(HalfVT, VT)) {
3750  // The isNarrowingProfitable is to avoid regressions on PPC and
3751  // AArch64 which match a few 64-bit bit insert / bit extract patterns
3752  // on downstream users of this. Those patterns could probably be
3753  // extended to handle extensions mixed in.
3754 
3755  SDValue SL(N0);
3756  assert(MaskBits <= Size);
3757 
3758  // Extracting the highest bit of the low half.
3759  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3760  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3761  N0.getOperand(0));
3762 
3763  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3764  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3765  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3766  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3767  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3768  }
3769  }
3770  }
3771  }
3772 
3773  return SDValue();
3774 }
3775 
3776 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3777  EVT LoadResultTy, EVT &ExtVT) {
3778  if (!AndC->getAPIntValue().isMask())
3779  return false;
3780 
3781  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3782 
3783  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3784  EVT LoadedVT = LoadN->getMemoryVT();
3785 
3786  if (ExtVT == LoadedVT &&
3787  (!LegalOperations ||
3788  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3789  // ZEXTLOAD will match without needing to change the size of the value being
3790  // loaded.
3791  return true;
3792  }
3793 
3794  // Do not change the width of a volatile load.
3795  if (LoadN->isVolatile())
3796  return false;
3797 
3798  // Do not generate loads of non-round integer types since these can
3799  // be expensive (and would be wrong if the type is not byte sized).
3800  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3801  return false;
3802 
3803  if (LegalOperations &&
3804  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3805  return false;
3806 
3807  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3808  return false;
3809 
3810  return true;
3811 }
3812 
3813 bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
3814  EVT &ExtVT, unsigned ShAmt) {
3815  // Don't transform one with multiple uses, this would require adding a new
3816  // load.
3817  if (!SDValue(LoadN, 0).hasOneUse())
3818  return false;
3819 
3820  if (LegalOperations &&
3821  !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
3822  return false;
3823 
3824  // Do not generate loads of non-round integer types since these can
3825  // be expensive (and would be wrong if the type is not byte sized).
3826  if (!ExtVT.isRound())
3827  return false;
3828 
3829  // Don't change the width of a volatile load.
3830  if (LoadN->isVolatile())
3831  return false;
3832 
3833  // Verify that we are actually reducing a load width here.
3834  if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
3835  return false;
3836 
3837  // For the transform to be legal, the load must produce only two values
3838  // (the value loaded and the chain). Don't transform a pre-increment
3839  // load, for example, which produces an extra value. Otherwise the
3840  // transformation is not equivalent, and the downstream logic to replace
3841  // uses gets things wrong.
3842  if (LoadN->getNumValues() > 2)
3843  return false;
3844 
3845  // If the load that we're shrinking is an extload and we're not just
3846  // discarding the extension we can't simply shrink the load. Bail.
3847  // TODO: It would be possible to merge the extensions in some cases.
3848  if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
3849  LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
3850  return false;
3851 
3852  if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
3853  return false;
3854 
3855  // It's not possible to generate a constant of extended or untyped type.
3856  EVT PtrType = LoadN->getOperand(1).getValueType();
3857  if (PtrType == MVT::Untyped || PtrType.isExtended())
3858  return false;
3859 
3860  return true;
3861 }
3862 
3863 bool DAGCombiner::SearchForAndLoads(SDNode *N,
3865  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
3867  SDNode *&NodeToMask) {
3868  // Recursively search for the operands, looking for loads which can be
3869  // narrowed.
3870  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
3871  SDValue Op = N->getOperand(i);
3872 
3873  if (Op.getValueType().isVector())
3874  return false;
3875 
3876  // Some constants may need fixing up later if they are too large.
3877  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3878  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
3879  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
3880  NodesWithConsts.insert(N);
3881  continue;
3882  }
3883 
3884  if (!Op.hasOneUse())
3885  return false;
3886 
3887  switch(Op.getOpcode()) {
3888  case ISD::LOAD: {
3889  auto *Load = cast<LoadSDNode>(Op);
3890  EVT ExtVT;
3891  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
3892  isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
3893 
3894  // ZEXTLOAD is already small enough.
3895  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
3896  ExtVT.bitsGE(Load->getMemoryVT()))
3897  continue;
3898 
3899  // Use LE to convert equal sized loads to zext.
3900  if (ExtVT.bitsLE(Load->getMemoryVT()))
3901  Loads.insert(Load);
3902 
3903  continue;
3904  }
3905  return false;
3906  }
3907  case ISD::ZERO_EXTEND:
3908  case ISD::AssertZext: {
3909  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
3910  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3911  EVT VT = Op.getOpcode() == ISD::AssertZext ?
3912  cast<VTSDNode>(Op.getOperand(1))->getVT() :
3913  Op.getOperand(0).getValueType();
3914 
3915  // We can accept extending nodes if the mask is wider or an equal
3916  // width to the original type.
3917  if (ExtVT.bitsGE(VT))
3918  continue;
3919  break;
3920  }
3921  case ISD::OR:
3922  case ISD::XOR:
3923  case ISD::AND:
3924  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
3925  NodeToMask))
3926  return false;
3927  continue;
3928  }
3929 
3930  // Allow one node which will masked along with any loads found.
3931  if (NodeToMask)
3932  return false;
3933  NodeToMask = Op.getNode();
3934  }
3935  return true;
3936 }
3937 
3938 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
3939  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
3940  if (!Mask)
3941  return false;
3942 
3943  if (!Mask->getAPIntValue().isMask())
3944  return false;
3945 
3946  // No need to do anything if the and directly uses a load.
3947  if (isa<LoadSDNode>(N->getOperand(0)))
3948  return false;
3949 
3951  SmallPtrSet<SDNode*, 2> NodesWithConsts;
3952  SDNode *FixupNode = nullptr;
3953  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
3954  if (Loads.size() == 0)
3955  return false;
3956 
3957  DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
3958  SDValue MaskOp = N->getOperand(1);
3959 
3960  // If it exists, fixup the single node we allow in the tree that needs
3961  // masking.
3962  if (FixupNode) {
3963  DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
3964  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
3965  FixupNode->getValueType(0),
3966  SDValue(FixupNode, 0), MaskOp);
3967  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
3968  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
3969  MaskOp);
3970  }
3971 
3972  // Narrow any constants that need it.
3973  for (auto *LogicN : NodesWithConsts) {
3974  SDValue Op0 = LogicN->getOperand(0);
3975  SDValue Op1 = LogicN->getOperand(1);
3976 
3977  if (isa<ConstantSDNode>(Op0))
3978  std::swap(Op0, Op1);
3979 
3980  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
3981  Op1, MaskOp);
3982 
3983  DAG.UpdateNodeOperands(LogicN, Op0, And);
3984  }
3985 
3986  // Create narrow loads.
3987  for (auto *Load : Loads) {
3988  DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
3989  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
3990  SDValue(Load, 0), MaskOp);
3991  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
3992  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
3993  SDValue NewLoad = ReduceLoadWidth(And.getNode());
3994  assert(NewLoad &&
3995  "Shouldn't be masking the load if it can't be narrowed");
3996  CombineTo(Load, NewLoad, NewLoad.getValue(1));
3997  }
3998  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
3999  return true;
4000  }
4001  return false;
4002 }
4003 
4004 SDValue DAGCombiner::visitAND(SDNode *N) {
4005  SDValue N0 = N->getOperand(0);
4006  SDValue N1 = N->getOperand(1);
4007  EVT VT = N1.getValueType();
4008 
4009  // x & x --> x
4010  if (N0 == N1)
4011  return N0;
4012 
4013  // fold vector ops
4014  if (VT.isVector()) {
4015  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4016  return FoldedVOp;
4017 
4018  // fold (and x, 0) -> 0, vector edition
4020  // do not return N0, because undef node may exist in N0
4022  SDLoc(N), N0.getValueType());
4024  // do not return N1, because undef node may exist in N1
4026  SDLoc(N), N1.getValueType());
4027 
4028  // fold (and x, -1) -> x, vector edition
4030  return N1;
4032  return N0;
4033  }
4034 
4035  // fold (and c1, c2) -> c1&c2
4038  if (N0C && N1C && !N1C->isOpaque())
4039  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4040  // canonicalize constant to RHS
4043  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4044  // fold (and x, -1) -> x
4045  if (isAllOnesConstant(N1))
4046  return N0;
4047  // if (and x, c) is known to be zero, return 0
4048  unsigned BitWidth = VT.getScalarSizeInBits();
4049  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4050  APInt::getAllOnesValue(BitWidth)))
4051  return DAG.getConstant(0, SDLoc(N), VT);
4052 
4053  if (SDValue NewSel = foldBinOpIntoSelect(N))
4054  return NewSel;
4055 
4056  // reassociate and
4057  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
4058  return RAND;
4059 
4060  // Try to convert a constant mask AND into a shuffle clear mask.
4061  if (VT.isVector())
4062  if (SDValue Shuffle = XformToShuffleWithZero(N))
4063  return Shuffle;
4064 
4065  // fold (and (or x, C), D) -> D if (C & D) == D
4066  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4067  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4068  };
4069  if (N0.getOpcode() == ISD::OR &&
4070  matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4071  return N1;
4072  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4073  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4074  SDValue N0Op0 = N0.getOperand(0);
4075  APInt Mask = ~N1C->getAPIntValue();
4076  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4077  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4078  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4079  N0.getValueType(), N0Op0);
4080 
4081  // Replace uses of the AND with uses of the Zero extend node.
4082  CombineTo(N, Zext);
4083 
4084  // We actually want to replace all uses of the any_extend with the
4085  // zero_extend, to avoid duplicating things. This will later cause this
4086  // AND to be folded.
4087  CombineTo(N0.getNode(), Zext);
4088  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4089  }
4090  }
4091  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4092  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4093  // already be zero by virtue of the width of the base type of the load.
4094  //
4095  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4096  // more cases.
4097  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4099  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4100  N0.getOperand(0).getResNo() == 0) ||
4101  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4102  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4103  N0 : N0.getOperand(0) );
4104 
4105  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4106  // This can be a pure constant or a vector splat, in which case we treat the
4107  // vector as a scalar and use the splat value.
4109  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4110  Constant = C->getAPIntValue();
4111  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4112  APInt SplatValue, SplatUndef;
4113  unsigned SplatBitSize;
4114  bool HasAnyUndefs;
4115  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4116  SplatBitSize, HasAnyUndefs);
4117  if (IsSplat) {
4118  // Undef bits can contribute to a possible optimisation if set, so
4119  // set them.
4120  SplatValue |= SplatUndef;
4121 
4122  // The splat value may be something like "0x00FFFFFF", which means 0 for
4123  // the first vector value and FF for the rest, repeating. We need a mask
4124  // that will apply equally to all members of the vector, so AND all the
4125  // lanes of the constant together.
4126  EVT VT = Vector->getValueType(0);
4127  unsigned BitWidth = VT.getScalarSizeInBits();
4128 
4129  // If the splat value has been compressed to a bitlength lower
4130  // than the size of the vector lane, we need to re-expand it to
4131  // the lane size.
4132  if (BitWidth > SplatBitSize)
4133  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4134  SplatBitSize < BitWidth;
4135  SplatBitSize = SplatBitSize * 2)
4136  SplatValue |= SplatValue.shl(SplatBitSize);
4137 
4138  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4139  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4140  if (SplatBitSize % BitWidth == 0) {
4141  Constant = APInt::getAllOnesValue(BitWidth);
4142  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4143  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4144  }
4145  }
4146  }
4147 
4148  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4149  // actually legal and isn't going to get expanded, else this is a false
4150  // optimisation.
4151  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4152  Load->getValueType(0),
4153  Load->getMemoryVT());
4154 
4155  // Resize the constant to the same size as the original memory access before
4156  // extension. If it is still the AllOnesValue then this AND is completely
4157  // unneeded.
4158  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4159 
4160  bool B;
4161  switch (Load->getExtensionType()) {
4162  default: B = false; break;
4163  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4164  case ISD::ZEXTLOAD:
4165  case ISD::NON_EXTLOAD: B = true; break;
4166  }
4167 
4168  if (B && Constant.isAllOnesValue()) {
4169  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4170  // preserve semantics once we get rid of the AND.
4171  SDValue NewLoad(Load, 0);
4172 
4173  // Fold the AND away. NewLoad may get replaced immediately.
4174  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4175 
4176  if (Load->getExtensionType() == ISD::EXTLOAD) {
4177  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4178  Load->getValueType(0), SDLoc(Load),
4179  Load->getChain(), Load->getBasePtr(),
4180  Load->getOffset(), Load->getMemoryVT(),
4181  Load->getMemOperand());
4182  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4183  if (Load->getNumValues() == 3) {
4184  // PRE/POST_INC loads have 3 values.
4185  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4186  NewLoad.getValue(2) };
4187  CombineTo(Load, To, 3, true);
4188  } else {
4189  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4190  }
4191  }
4192 
4193  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4194  }
4195  }
4196 
4197  // fold (and (load x), 255) -> (zextload x, i8)
4198  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4199  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4200  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4201  (N0.getOpcode() == ISD::ANY_EXTEND &&
4202  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4203  if (SDValue Res = ReduceLoadWidth(N)) {
4204  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4205  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4206 
4207  AddToWorklist(N);
4208  CombineTo(LN0, Res, Res.getValue(1));
4209  return SDValue(N, 0);
4210  }
4211  }
4212 
4213  if (Level >= AfterLegalizeTypes) {
4214  // Attempt to propagate the AND back up to the leaves which, if they're
4215  // loads, can be combined to narrow loads and the AND node can be removed.
4216  // Perform after legalization so that extend nodes will already be
4217  // combined into the loads.
4218  if (BackwardsPropagateMask(N, DAG)) {
4219  return SDValue(N, 0);
4220  }
4221  }
4222 
4223  if (SDValue Combined = visitANDLike(N0, N1, N))
4224  return Combined;
4225 
4226  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4227  if (N0.getOpcode() == N1.getOpcode())
4228  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4229  return Tmp;
4230 
4231  // Masking the negated extension of a boolean is just the zero-extended
4232  // boolean:
4233  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4234  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4235  //
4236  // Note: the SimplifyDemandedBits fold below can make an information-losing
4237  // transform, and then we have no way to find this better fold.
4238  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4240  SDValue SubRHS = N0.getOperand(1);
4241  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4242  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4243  return SubRHS;
4244  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4245  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4246  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4247  }
4248  }
4249 
4250  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4251  // fold (and (sra)) -> (and (srl)) when possible.
4252  if (SimplifyDemandedBits(SDValue(N, 0)))
4253  return SDValue(N, 0);
4254 
4255  // fold (zext_inreg (extload x)) -> (zextload x)
4256  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4257  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4258  EVT MemVT = LN0->getMemoryVT();
4259  // If we zero all the possible extended bits, then we can turn this into
4260  // a zextload if we are running before legalize or the operation is legal.
4261  unsigned BitWidth = N1.getScalarValueSizeInBits();
4262  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4263  BitWidth - MemVT.getScalarSizeInBits())) &&
4264  ((!LegalOperations && !LN0->isVolatile()) ||
4265  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4266  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4267  LN0->getChain(), LN0->getBasePtr(),
4268  MemVT, LN0->getMemOperand());
4269  AddToWorklist(N);
4270  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4271  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4272  }
4273  }
4274  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4275  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4276  N0.hasOneUse()) {
4277  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4278  EVT MemVT = LN0->getMemoryVT();
4279  // If we zero all the possible extended bits, then we can turn this into
4280  // a zextload if we are running before legalize or the operation is legal.
4281  unsigned BitWidth = N1.getScalarValueSizeInBits();
4282  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4283  BitWidth - MemVT.getScalarSizeInBits())) &&
4284  ((!LegalOperations && !LN0->isVolatile()) ||
4285  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4286  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4287  LN0->getChain(), LN0->getBasePtr(),
4288  MemVT, LN0->getMemOperand());
4289  AddToWorklist(N);
4290  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4291  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4292  }
4293  }
4294  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4295  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4296  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4297  N0.getOperand(1), false))
4298  return BSwap;
4299  }
4300 
4301  return SDValue();
4302 }
4303 
4304 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4305 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4306  bool DemandHighBits) {
4307  if (!LegalOperations)
4308  return SDValue();
4309 
4310  EVT VT = N->getValueType(0);
4311  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4312  return SDValue();
4313  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4314  return SDValue();
4315 
4316  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4317  bool LookPassAnd0 = false;
4318  bool LookPassAnd1 = false;
4319  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4320  std::swap(N0, N1);
4321  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4322  std::swap(N0, N1);
4323  if (N0.getOpcode() == ISD::AND) {
4324  if (!N0.getNode()->hasOneUse())
4325  return SDValue();
4327  if (!N01C || N01C->getZExtValue() != 0xFF00)
4328  return SDValue();
4329  N0 = N0.getOperand(0);
4330  LookPassAnd0 = true;
4331  }
4332 
4333  if (N1.getOpcode() == ISD::AND) {
4334  if (!N1.getNode()->hasOneUse())
4335  return SDValue();
4337  if (!N11C || N11C->getZExtValue() != 0xFF)
4338  return SDValue();
4339  N1 = N1.getOperand(0);
4340  LookPassAnd1 = true;
4341  }
4342 
4343  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4344  std::swap(N0, N1);
4345  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4346  return SDValue();
4347  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4348  return SDValue();
4349 
4352  if (!N01C || !N11C)
4353  return SDValue();
4354  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4355  return SDValue();
4356 
4357  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4358  SDValue N00 = N0->getOperand(0);
4359  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4360  if (!N00.getNode()->hasOneUse())
4361  return SDValue();
4363  if (!N001C || N001C->getZExtValue() != 0xFF)
4364  return SDValue();
4365  N00 = N00.getOperand(0);
4366  LookPassAnd0 = true;
4367  }
4368 
4369  SDValue N10 = N1->getOperand(0);
4370  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4371  if (!N10.getNode()->hasOneUse())
4372  return SDValue();
4374  if (!N101C || N101C->getZExtValue() != 0xFF00)
4375  return SDValue();
4376  N10 = N10.getOperand(0);
4377  LookPassAnd1 = true;
4378  }
4379 
4380  if (N00 != N10)
4381  return SDValue();
4382 
4383  // Make sure everything beyond the low halfword gets set to zero since the SRL
4384  // 16 will clear the top bits.
4385  unsigned OpSizeInBits = VT.getSizeInBits();
4386  if (DemandHighBits && OpSizeInBits > 16) {
4387  // If the left-shift isn't masked out then the only way this is a bswap is
4388  // if all bits beyond the low 8 are 0. In that case the entire pattern
4389  // reduces to a left shift anyway: leave it for other parts of the combiner.
4390  if (!LookPassAnd0)
4391  return SDValue();
4392 
4393  // However, if the right shift isn't masked out then it might be because
4394  // it's not needed. See if we can spot that too.
4395  if (!LookPassAnd1 &&
4396  !DAG.MaskedValueIsZero(
4397  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4398  return SDValue();
4399  }
4400 
4401  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4402  if (OpSizeInBits > 16) {
4403  SDLoc DL(N);
4404  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4405  DAG.getConstant(OpSizeInBits - 16, DL,
4406  getShiftAmountTy(VT)));
4407  }
4408  return Res;
4409 }
4410 
4411 /// Return true if the specified node is an element that makes up a 32-bit
4412 /// packed halfword byteswap.
4413 /// ((x & 0x000000ff) << 8) |
4414 /// ((x & 0x0000ff00) >> 8) |
4415 /// ((x & 0x00ff0000) << 8) |
4416 /// ((x & 0xff000000) >> 8)
4418  if (!N.getNode()->hasOneUse())
4419  return false;
4420 
4421  unsigned Opc = N.getOpcode();
4422  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4423  return false;
4424 
4425  SDValue N0 = N.getOperand(0);
4426  unsigned Opc0 = N0.getOpcode();
4427  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4428  return false;
4429 
4430  ConstantSDNode *N1C = nullptr;
4431  // SHL or SRL: look upstream for AND mask operand
4432  if (Opc == ISD::AND)
4433  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4434  else if (Opc0 == ISD::AND)
4435  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4436  if (!N1C)
4437  return false;
4438 
4439  unsigned MaskByteOffset;
4440  switch (N1C->getZExtValue()) {
4441  default:
4442  return false;
4443  case 0xFF: MaskByteOffset = 0; break;
4444  case 0xFF00: MaskByteOffset = 1; break;
4445  case 0xFF0000: MaskByteOffset = 2; break;
4446  case 0xFF000000: MaskByteOffset = 3; break;
4447  }
4448 
4449  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4450  if (Opc == ISD::AND) {
4451  if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4452  // (x >> 8) & 0xff
4453  // (x >> 8) & 0xff0000
4454  if (Opc0 != ISD::SRL)
4455  return false;
4457  if (!C || C->getZExtValue() != 8)
4458  return false;
4459  } else {
4460  // (x << 8) & 0xff00
4461  // (x << 8) & 0xff000000
4462  if (Opc0 != ISD::SHL)
4463  return false;
4465  if (!C || C->getZExtValue() != 8)
4466  return false;
4467  }
4468  } else if (Opc == ISD::SHL) {
4469  // (x & 0xff) << 8
4470  // (x & 0xff0000) << 8
4471  if (MaskByteOffset != 0 && MaskByteOffset != 2)
4472  return false;
4474  if (!C || C->getZExtValue() != 8)
4475  return false;
4476  } else { // Opc == ISD::SRL
4477  // (x & 0xff00) >> 8
4478  // (x & 0xff000000) >> 8
4479  if (MaskByteOffset != 1 && MaskByteOffset != 3)
4480  return false;
4482  if (!C || C->getZExtValue() != 8)
4483  return false;
4484  }
4485 
4486  if (Parts[MaskByteOffset])
4487  return false;
4488 
4489  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4490  return true;
4491 }
4492 
4493 /// Match a 32-bit packed halfword bswap. That is
4494 /// ((x & 0x000000ff) << 8) |
4495 /// ((x & 0x0000ff00) >> 8) |
4496 /// ((x & 0x00ff0000) << 8) |
4497 /// ((x & 0xff000000) >> 8)
4498 /// => (rotl (bswap x), 16)
4499 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4500  if (!LegalOperations)
4501  return SDValue();
4502 
4503  EVT VT = N->getValueType(0);
4504  if (VT != MVT::i32)
4505  return SDValue();
4506  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4507  return SDValue();
4508 
4509  // Look for either
4510  // (or (or (and), (and)), (or (and), (and)))
4511  // (or (or (or (and), (and)), (and)), (and))
4512  if (N0.getOpcode() != ISD::OR)
4513  return SDValue();
4514  SDValue N00 = N0.getOperand(0);
4515  SDValue N01 = N0.getOperand(1);
4516  SDNode *Parts[4] = {};
4517 
4518  if (N1.getOpcode() == ISD::OR &&
4519  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4520  // (or (or (and), (and)), (or (and), (and)))
4521  if (!isBSwapHWordElement(N00, Parts))
4522  return SDValue();
4523 
4524  if (!isBSwapHWordElement(N01, Parts))
4525  return SDValue();
4526  SDValue N10 = N1.getOperand(0);
4527  if (!isBSwapHWordElement(N10, Parts))
4528  return SDValue();
4529  SDValue N11 = N1.getOperand(1);
4530  if (!isBSwapHWordElement(N11, Parts))
4531  return SDValue();
4532  } else {
4533  // (or (or (or (and), (and)), (and)), (and))
4534  if (!isBSwapHWordElement(N1, Parts))
4535  return SDValue();
4536  if (!isBSwapHWordElement(N01, Parts))
4537  return SDValue();
4538  if (N00.getOpcode() != ISD::OR)
4539  return SDValue();
4540  SDValue N000 = N00.getOperand(0);
4541  if (!isBSwapHWordElement(N000, Parts))
4542  return SDValue();
4543  SDValue N001 = N00.getOperand(1);
4544  if (!isBSwapHWordElement(N001, Parts))
4545  return SDValue();
4546  }
4547 
4548  // Make sure the parts are all coming from the same node.
4549  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4550  return SDValue();
4551 
4552  SDLoc DL(N);
4553  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4554  SDValue(Parts[0], 0));
4555 
4556  // Result of the bswap should be rotated by 16. If it's not legal, then
4557  // do (x << 16) | (x >> 16).
4558  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4559  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4560  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4561  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4562  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4563  return DAG.getNode(ISD::OR, DL, VT,
4564  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4565  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4566 }
4567 
4568 /// This contains all DAGCombine rules which reduce two values combined by
4569 /// an Or operation to a single value \see visitANDLike().
4570 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4571  EVT VT = N1.getValueType();
4572  SDLoc DL(N);
4573 
4574  // fold (or x, undef) -> -1
4575  if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4576  return DAG.getAllOnesConstant(DL, VT);
4577 
4578  if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4579  return V;
4580 
4581  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
4582  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4583  // Don't increase # computations.
4584  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4585  // We can only do this xform if we know that bits from X that are set in C2
4586  // but not in C1 are already zero. Likewise for Y.
4587  if (const ConstantSDNode *N0O1C =
4589  if (const ConstantSDNode *N1O1C =
4591  // We can only do this xform if we know that bits from X that are set in
4592  // C2 but not in C1 are already zero. Likewise for Y.
4593  const APInt &LHSMask = N0O1C->getAPIntValue();
4594  const APInt &RHSMask = N1O1C->getAPIntValue();
4595 
4596  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4597  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4598  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4599  N0.getOperand(0), N1.getOperand(0));
4600  return DAG.getNode(ISD::AND, DL, VT, X,
4601  DAG.getConstant(LHSMask | RHSMask, DL, VT));
4602  }
4603  }
4604  }
4605  }
4606 
4607  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4608  if (N0.getOpcode() == ISD::AND &&
4609  N1.getOpcode() == ISD::AND &&
4610  N0.getOperand(0) == N1.getOperand(0) &&
4611  // Don't increase # computations.
4612  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4613  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4614  N0.getOperand(1), N1.getOperand(1));
4615  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4616  }
4617 
4618  return SDValue();
4619 }
4620 
4621 SDValue DAGCombiner::visitOR(SDNode *N) {
4622  SDValue N0 = N->getOperand(0);
4623  SDValue N1 = N->getOperand(1);
4624  EVT VT = N1.getValueType();
4625 
4626  // x | x --> x
4627  if (N0 == N1)
4628  return N0;
4629 
4630  // fold vector ops
4631  if (VT.isVector()) {
4632  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4633  return FoldedVOp;
4634 
4635  // fold (or x, 0) -> x, vector edition
4637  return N1;
4639  return N0;
4640 
4641  // fold (or x, -1) -> -1, vector edition
4643  // do not return N0, because undef node may exist in N0
4644  return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4646  // do not return N1, because undef node may exist in N1
4647  return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4648 
4649  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4650  // Do this only if the resulting shuffle is legal.
4651  if (isa<ShuffleVectorSDNode>(N0) &&
4652  isa<ShuffleVectorSDNode>(N1) &&
4653  // Avoid folding a node with illegal type.
4654  TLI.isTypeLegal(VT)) {
4655  bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4656  bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4657  bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4658  bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4659  // Ensure both shuffles have a zero input.
4660  if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4661  assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4662  assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4663  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4664  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4665  bool CanFold = true;
4666  int NumElts = VT.getVectorNumElements();
4667  SmallVector<int, 4> Mask(NumElts);
4668 
4669  for (int i = 0; i != NumElts; ++i) {
4670  int M0 = SV0->getMaskElt(i);
4671  int M1 = SV1->getMaskElt(i);
4672 
4673  // Determine if either index is pointing to a zero vector.
4674  bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4675  bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4676 
4677  // If one element is zero and the otherside is undef, keep undef.
4678  // This also handles the case that both are undef.
4679  if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4680  Mask[i] = -1;
4681  continue;
4682  }
4683 
4684  // Make sure only one of the elements is zero.
4685  if (M0Zero == M1Zero) {
4686  CanFold = false;
4687  break;
4688  }
4689 
4690  assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4691 
4692  // We have a zero and non-zero element. If the non-zero came from
4693  // SV0 make the index a LHS index. If it came from SV1, make it
4694  // a RHS index. We need to mod by NumElts because we don't care
4695  // which operand it came from in the original shuffles.
4696  Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4697  }
4698 
4699  if (CanFold) {
4700  SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4701  SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4702 
4703  bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4704  if (!LegalMask) {
4705  std::swap(NewLHS, NewRHS);
4707  LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4708  }
4709 
4710  if (LegalMask)
4711  return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4712  }
4713  }
4714  }
4715  }
4716 
4717  // fold (or c1, c2) -> c1|c2
4720  if (N0C && N1C && !N1C->isOpaque())
4721  return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4722  // canonicalize constant to RHS
4725  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4726  // fold (or x, 0) -> x
4727  if (isNullConstant(N1))
4728  return N0;
4729  // fold (or x, -1) -> -1
4730  if (isAllOnesConstant(N1))
4731  return N1;
4732 
4733  if (SDValue NewSel = foldBinOpIntoSelect(N))
4734  return NewSel;
4735 
4736  // fold (or x, c) -> c iff (x & ~c) == 0
4737  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4738  return N1;
4739 
4740  if (SDValue Combined = visitORLike(N0, N1, N))
4741  return Combined;
4742 
4743  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4744  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4745  return BSwap;
4746  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4747  return BSwap;
4748 
4749  // reassociate or
4750  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4751  return ROR;
4752 
4753  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4754  // iff (c1 & c2) != 0.
4755  auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4756  return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
4757  };
4758  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4759  matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
4760  if (SDValue COR = DAG.FoldConstantArithmetic(
4761  ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
4762  SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
4763  AddToWorklist(IOR.getNode());
4764  return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
4765  }
4766  }
4767 
4768  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
4769  if (N0.getOpcode() == N1.getOpcode())
4770  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4771  return Tmp;
4772 
4773  // See if this is some rotate idiom.
4774  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4775  return SDValue(Rot, 0);
4776 
4777  if (SDValue Load = MatchLoadCombine(N))
4778  return Load;
4779 
4780  // Simplify the operands using demanded-bits information.
4781  if (SimplifyDemandedBits(SDValue(N, 0)))
4782  return SDValue(N, 0);
4783 
4784  return SDValue();
4785 }
4786 
4787 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4788 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4789  if (Op.getOpcode() == ISD::AND) {
4791  Mask = Op.getOperand(1);
4792  Op = Op.getOperand(0);
4793  } else {
4794  return false;
4795  }
4796  }
4797 
4798  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4799  Shift = Op;
4800  return true;
4801  }
4802 
4803  return false;
4804 }
4805 
4806 // Return true if we can prove that, whenever Neg and Pos are both in the
4807 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
4808 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4809 //
4810 // (or (shift1 X, Neg), (shift2 X, Pos))
4811 //
4812 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4813 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
4814 // to consider shift amounts with defined behavior.
4815 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4816  // If EltSize is a power of 2 then:
4817  //
4818  // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4819  // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4820  //
4821  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4822  // for the stronger condition:
4823  //
4824  // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
4825  //
4826  // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4827  // we can just replace Neg with Neg' for the rest of the function.
4828  //
4829  // In other cases we check for the even stronger condition:
4830  //
4831  // Neg == EltSize - Pos [B]
4832  //
4833  // for all Neg and Pos. Note that the (or ...) then invokes undefined
4834  // behavior if Pos == 0 (and consequently Neg == EltSize).
4835  //
4836  // We could actually use [A] whenever EltSize is a power of 2, but the
4837  // only extra cases that it would match are those uninteresting ones
4838  // where Neg and Pos are never in range at the same time. E.g. for
4839  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4840  // as well as (sub 32, Pos), but:
4841  //
4842  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4843  //
4844  // always invokes undefined behavior for 32-bit X.
4845  //
4846  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4847  unsigned MaskLoBits = 0;
4848  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4849  if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4850  if (NegC->getAPIntValue() == EltSize - 1) {
4851  Neg = Neg.getOperand(0);
4852  MaskLoBits = Log2_64(EltSize);
4853  }
4854  }
4855  }
4856 
4857  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4858  if (Neg.getOpcode() != ISD::SUB)
4859  return false;
4861  if (!NegC)
4862  return false;
4863  SDValue NegOp1 = Neg.getOperand(1);
4864 
4865  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4866  // Pos'. The truncation is redundant for the purpose of the equality.
4867  if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4868  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4869  if (PosC->getAPIntValue() == EltSize - 1)
4870  Pos = Pos.getOperand(0);
4871 
4872  // The condition we need is now:
4873  //
4874  // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4875  //
4876  // If NegOp1 == Pos then we need:
4877  //
4878  // EltSize & Mask == NegC & Mask
4879  //
4880  // (because "x & Mask" is a truncation and distributes through subtraction).
4881  APInt Width;
4882  if (Pos == NegOp1)
4883  Width = NegC->getAPIntValue();
4884 
4885  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4886  // Then the condition we want to prove becomes:
4887  //
4888  // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4889  //
4890  // which, again because "x & Mask" is a truncation, becomes:
4891  //
4892  // NegC & Mask == (EltSize - PosC) & Mask
4893  // EltSize & Mask == (NegC + PosC) & Mask
4894  else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4895  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4896  Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4897  else
4898  return false;
4899  } else
4900  return false;
4901 
4902  // Now we just need to check that EltSize & Mask == Width & Mask.
4903  if (MaskLoBits)
4904  // EltSize & Mask is 0 since Mask is EltSize - 1.
4905  return Width.getLoBits(MaskLoBits) == 0;
4906  return Width == EltSize;
4907 }
4908 
4909 // A subroutine of MatchRotate used once we have found an OR of two opposite
4910 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
4911 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4912 // former being preferred if supported. InnerPos and InnerNeg are Pos and
4913 // Neg with outer conversions stripped away.