LLVM  7.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/APFloat.h"
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 #include <vector>
76 
77 using namespace llvm;
78 
79 #define DEBUG_TYPE "dagcombine"
80 
81 STATISTIC(NodesCombined , "Number of dag nodes combined");
82 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
83 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
84 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
85 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
86 STATISTIC(SlicedLoads, "Number of load sliced");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140 
141  /// Set of nodes which have been combined (at least once).
142  ///
143  /// This is used to allow us to reliably add any operands of a DAG node
144  /// which have not yet been combined to the worklist.
145  SmallPtrSet<SDNode *, 32> CombinedNodes;
146 
147  // AA - Used for DAG load/store alias analysis.
148  AliasAnalysis *AA;
149 
150  /// When an instruction is simplified, add all users of the instruction to
151  /// the work lists because they might get more simplified now.
152  void AddUsersToWorklist(SDNode *N) {
153  for (SDNode *Node : N->uses())
154  AddToWorklist(Node);
155  }
156 
157  /// Call the node-specific routine that folds each particular type of node.
158  SDValue visit(SDNode *N);
159 
160  public:
161  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
162  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
163  OptLevel(OL), AA(AA) {
164  ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
165 
166  MaximumLegalStoreInBits = 0;
167  for (MVT VT : MVT::all_valuetypes())
168  if (EVT(VT).isSimple() && VT != MVT::Other &&
169  TLI.isTypeLegal(EVT(VT)) &&
170  VT.getSizeInBits() >= MaximumLegalStoreInBits)
171  MaximumLegalStoreInBits = VT.getSizeInBits();
172  }
173 
174  /// Add to the worklist making sure its instance is at the back (next to be
175  /// processed.)
176  void AddToWorklist(SDNode *N) {
178  "Deleted Node added to Worklist");
179 
180  // Skip handle nodes as they can't usefully be combined and confuse the
181  // zero-use deletion strategy.
182  if (N->getOpcode() == ISD::HANDLENODE)
183  return;
184 
185  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
186  Worklist.push_back(N);
187  }
188 
189  /// Remove all instances of N from the worklist.
190  void removeFromWorklist(SDNode *N) {
191  CombinedNodes.erase(N);
192 
193  auto It = WorklistMap.find(N);
194  if (It == WorklistMap.end())
195  return; // Not in the worklist.
196 
197  // Null out the entry rather than erasing it to avoid a linear operation.
198  Worklist[It->second] = nullptr;
199  WorklistMap.erase(It);
200  }
201 
202  void deleteAndRecombine(SDNode *N);
203  bool recursivelyDeleteUnusedNodes(SDNode *N);
204 
205  /// Replaces all uses of the results of one DAG node with new values.
206  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
207  bool AddTo = true);
208 
209  /// Replaces all uses of the results of one DAG node with new values.
210  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
211  return CombineTo(N, &Res, 1, AddTo);
212  }
213 
214  /// Replaces all uses of the results of one DAG node with new values.
215  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
216  bool AddTo = true) {
217  SDValue To[] = { Res0, Res1 };
218  return CombineTo(N, To, 2, AddTo);
219  }
220 
221  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
222 
223  private:
224  unsigned MaximumLegalStoreInBits;
225 
226  /// Check the specified integer node value to see if it can be simplified or
227  /// if things it uses can be simplified by bit propagation.
228  /// If so, return true.
229  bool SimplifyDemandedBits(SDValue Op) {
230  unsigned BitWidth = Op.getScalarValueSizeInBits();
231  APInt Demanded = APInt::getAllOnesValue(BitWidth);
232  return SimplifyDemandedBits(Op, Demanded);
233  }
234 
235  /// Check the specified vector node value to see if it can be simplified or
236  /// if things it uses can be simplified as it only uses some of the
237  /// elements. If so, return true.
238  bool SimplifyDemandedVectorElts(SDValue Op) {
239  unsigned NumElts = Op.getValueType().getVectorNumElements();
240  APInt Demanded = APInt::getAllOnesValue(NumElts);
241  return SimplifyDemandedVectorElts(Op, Demanded);
242  }
243 
244  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
245  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded);
246 
247  bool CombineToPreIndexedLoadStore(SDNode *N);
248  bool CombineToPostIndexedLoadStore(SDNode *N);
249  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
250  bool SliceUpLoad(SDNode *N);
251 
252  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
253  /// load.
254  ///
255  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
256  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
257  /// \param EltNo index of the vector element to load.
258  /// \param OriginalLoad load that EVE came from to be replaced.
259  /// \returns EVE on success SDValue() on failure.
260  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
261  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
262  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
263  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
264  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
265  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
266  SDValue PromoteIntBinOp(SDValue Op);
267  SDValue PromoteIntShiftOp(SDValue Op);
268  SDValue PromoteExtend(SDValue Op);
269  bool PromoteLoad(SDValue Op);
270 
271  /// Call the node-specific routine that knows how to fold each
272  /// particular type of node. If that doesn't do anything, try the
273  /// target-specific DAG combines.
274  SDValue combine(SDNode *N);
275 
276  // Visitation implementation - Implement dag node combining for different
277  // node types. The semantics are as follows:
278  // Return Value:
279  // SDValue.getNode() == 0 - No change was made
280  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
281  // otherwise - N should be replaced by the returned Operand.
282  //
283  SDValue visitTokenFactor(SDNode *N);
284  SDValue visitMERGE_VALUES(SDNode *N);
285  SDValue visitADD(SDNode *N);
286  SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
287  SDValue visitSUB(SDNode *N);
288  SDValue visitADDC(SDNode *N);
289  SDValue visitUADDO(SDNode *N);
290  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
291  SDValue visitSUBC(SDNode *N);
292  SDValue visitUSUBO(SDNode *N);
293  SDValue visitADDE(SDNode *N);
294  SDValue visitADDCARRY(SDNode *N);
295  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
296  SDValue visitSUBE(SDNode *N);
297  SDValue visitSUBCARRY(SDNode *N);
298  SDValue visitMUL(SDNode *N);
299  SDValue useDivRem(SDNode *N);
300  SDValue visitSDIV(SDNode *N);
301  SDValue visitUDIV(SDNode *N);
302  SDValue visitREM(SDNode *N);
303  SDValue visitMULHU(SDNode *N);
304  SDValue visitMULHS(SDNode *N);
305  SDValue visitSMUL_LOHI(SDNode *N);
306  SDValue visitUMUL_LOHI(SDNode *N);
307  SDValue visitSMULO(SDNode *N);
308  SDValue visitUMULO(SDNode *N);
309  SDValue visitIMINMAX(SDNode *N);
310  SDValue visitAND(SDNode *N);
311  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
312  SDValue visitOR(SDNode *N);
313  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
314  SDValue visitXOR(SDNode *N);
315  SDValue SimplifyVBinOp(SDNode *N);
316  SDValue visitSHL(SDNode *N);
317  SDValue visitSRA(SDNode *N);
318  SDValue visitSRL(SDNode *N);
319  SDValue visitRotate(SDNode *N);
320  SDValue visitABS(SDNode *N);
321  SDValue visitBSWAP(SDNode *N);
322  SDValue visitBITREVERSE(SDNode *N);
323  SDValue visitCTLZ(SDNode *N);
324  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
325  SDValue visitCTTZ(SDNode *N);
326  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
327  SDValue visitCTPOP(SDNode *N);
328  SDValue visitSELECT(SDNode *N);
329  SDValue visitVSELECT(SDNode *N);
330  SDValue visitSELECT_CC(SDNode *N);
331  SDValue visitSETCC(SDNode *N);
332  SDValue visitSETCCE(SDNode *N);
333  SDValue visitSETCCCARRY(SDNode *N);
334  SDValue visitSIGN_EXTEND(SDNode *N);
335  SDValue visitZERO_EXTEND(SDNode *N);
336  SDValue visitANY_EXTEND(SDNode *N);
337  SDValue visitAssertExt(SDNode *N);
338  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
339  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
340  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
341  SDValue visitTRUNCATE(SDNode *N);
342  SDValue visitBITCAST(SDNode *N);
343  SDValue visitBUILD_PAIR(SDNode *N);
344  SDValue visitFADD(SDNode *N);
345  SDValue visitFSUB(SDNode *N);
346  SDValue visitFMUL(SDNode *N);
347  SDValue visitFMA(SDNode *N);
348  SDValue visitFDIV(SDNode *N);
349  SDValue visitFREM(SDNode *N);
350  SDValue visitFSQRT(SDNode *N);
351  SDValue visitFCOPYSIGN(SDNode *N);
352  SDValue visitSINT_TO_FP(SDNode *N);
353  SDValue visitUINT_TO_FP(SDNode *N);
354  SDValue visitFP_TO_SINT(SDNode *N);
355  SDValue visitFP_TO_UINT(SDNode *N);
356  SDValue visitFP_ROUND(SDNode *N);
357  SDValue visitFP_ROUND_INREG(SDNode *N);
358  SDValue visitFP_EXTEND(SDNode *N);
359  SDValue visitFNEG(SDNode *N);
360  SDValue visitFABS(SDNode *N);
361  SDValue visitFCEIL(SDNode *N);
362  SDValue visitFTRUNC(SDNode *N);
363  SDValue visitFFLOOR(SDNode *N);
364  SDValue visitFMINNUM(SDNode *N);
365  SDValue visitFMAXNUM(SDNode *N);
366  SDValue visitBRCOND(SDNode *N);
367  SDValue visitBR_CC(SDNode *N);
368  SDValue visitLOAD(SDNode *N);
369 
370  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
371  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
372 
373  SDValue visitSTORE(SDNode *N);
374  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
375  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
376  SDValue visitBUILD_VECTOR(SDNode *N);
377  SDValue visitCONCAT_VECTORS(SDNode *N);
378  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
379  SDValue visitVECTOR_SHUFFLE(SDNode *N);
380  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
381  SDValue visitINSERT_SUBVECTOR(SDNode *N);
382  SDValue visitMLOAD(SDNode *N);
383  SDValue visitMSTORE(SDNode *N);
384  SDValue visitMGATHER(SDNode *N);
385  SDValue visitMSCATTER(SDNode *N);
386  SDValue visitFP_TO_FP16(SDNode *N);
387  SDValue visitFP16_TO_FP(SDNode *N);
388 
389  SDValue visitFADDForFMACombine(SDNode *N);
390  SDValue visitFSUBForFMACombine(SDNode *N);
391  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
392 
393  SDValue XformToShuffleWithZero(SDNode *N);
394  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
395  SDValue RHS);
396 
397  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
398 
399  SDValue foldSelectOfConstants(SDNode *N);
400  SDValue foldVSelectOfConstants(SDNode *N);
401  SDValue foldBinOpIntoSelect(SDNode *BO);
402  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
403  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
404  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
405  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
406  SDValue N2, SDValue N3, ISD::CondCode CC,
407  bool NotExtCompare = false);
408  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
409  SDValue N2, SDValue N3, ISD::CondCode CC);
410  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
411  const SDLoc &DL);
412  SDValue unfoldMaskedMerge(SDNode *N);
413  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
414  const SDLoc &DL, bool foldBooleans);
415  SDValue rebuildSetCC(SDValue N);
416 
417  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
418  SDValue &CC) const;
419  bool isOneUseSetCC(SDValue N) const;
420 
421  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
422  unsigned HiOp);
423  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
424  SDValue CombineExtLoad(SDNode *N);
425  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
426  SDValue combineRepeatedFPDivisors(SDNode *N);
427  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
428  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
429  SDValue BuildSDIV(SDNode *N);
430  SDValue BuildSDIVPow2(SDNode *N);
431  SDValue BuildUDIV(SDNode *N);
432  SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
433  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
434  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
435  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
436  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
437  SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
438  SDNodeFlags Flags, bool Reciprocal);
439  SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
440  SDNodeFlags Flags, bool Reciprocal);
441  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
442  bool DemandHighBits = true);
443  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
444  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
445  SDValue InnerPos, SDValue InnerNeg,
446  unsigned PosOpcode, unsigned NegOpcode,
447  const SDLoc &DL);
448  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
449  SDValue MatchLoadCombine(SDNode *N);
450  SDValue ReduceLoadWidth(SDNode *N);
451  SDValue ReduceLoadOpStoreWidth(SDNode *N);
453  SDValue TransformFPLoadStorePair(SDNode *N);
454  SDValue convertBuildVecZextToZext(SDNode *N);
455  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
456  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
457  SDValue reduceBuildVecToShuffle(SDNode *N);
458  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
459  ArrayRef<int> VectorMask, SDValue VecIn1,
460  SDValue VecIn2, unsigned LeftIdx);
461  SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
462 
463  /// Walk up chain skipping non-aliasing memory nodes,
464  /// looking for aliasing nodes and adding them to the Aliases vector.
465  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
466  SmallVectorImpl<SDValue> &Aliases);
467 
468  /// Return true if there is any possibility that the two addresses overlap.
469  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
470 
471  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
472  /// chain (aliasing node.)
473  SDValue FindBetterChain(SDNode *N, SDValue Chain);
474 
475  /// Try to replace a store and any possibly adjacent stores on
476  /// consecutive chains with better chains. Return true only if St is
477  /// replaced.
478  ///
479  /// Notice that other chains may still be replaced even if the function
480  /// returns false.
481  bool findBetterNeighborChains(StoreSDNode *St);
482 
483  /// Match "(X shl/srl V1) & V2" where V2 may not be present.
484  bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
485 
486  /// Holds a pointer to an LSBaseSDNode as well as information on where it
487  /// is located in a sequence of memory operations connected by a chain.
488  struct MemOpLink {
489  // Ptr to the mem node.
490  LSBaseSDNode *MemNode;
491 
492  // Offset from the base ptr.
493  int64_t OffsetFromBase;
494 
495  MemOpLink(LSBaseSDNode *N, int64_t Offset)
496  : MemNode(N), OffsetFromBase(Offset) {}
497  };
498 
499  /// This is a helper function for visitMUL to check the profitability
500  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
501  /// MulNode is the original multiply, AddNode is (add x, c1),
502  /// and ConstNode is c2.
503  bool isMulAddWithConstProfitable(SDNode *MulNode,
504  SDValue &AddNode,
505  SDValue &ConstNode);
506 
507  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
508  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
509  /// the type of the loaded value to be extended.
510  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
511  EVT LoadResultTy, EVT &ExtVT);
512 
513  /// Helper function to calculate whether the given Load can have its
514  /// width reduced to ExtVT.
515  bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
516  EVT &ExtVT, unsigned ShAmt = 0);
517 
518  /// Used by BackwardsPropagateMask to find suitable loads.
519  bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
520  SmallPtrSetImpl<SDNode*> &NodeWithConsts,
521  ConstantSDNode *Mask, SDNode *&UncombinedNode);
522  /// Attempt to propagate a given AND node back to load leaves so that they
523  /// can be combined into narrow loads.
524  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
525 
526  /// Helper function for MergeConsecutiveStores which merges the
527  /// component store chains.
528  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
529  unsigned NumStores);
530 
531  /// This is a helper function for MergeConsecutiveStores. When the
532  /// source elements of the consecutive stores are all constants or
533  /// all extracted vector elements, try to merge them into one
534  /// larger store introducing bitcasts if necessary. \return True
535  /// if a merged store was created.
536  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
537  EVT MemVT, unsigned NumStores,
538  bool IsConstantSrc, bool UseVector,
539  bool UseTrunc);
540 
541  /// This is a helper function for MergeConsecutiveStores. Stores
542  /// that potentially may be merged with St are placed in
543  /// StoreNodes. RootNode is a chain predecessor to all store
544  /// candidates.
545  void getStoreMergeCandidates(StoreSDNode *St,
546  SmallVectorImpl<MemOpLink> &StoreNodes,
547  SDNode *&Root);
548 
549  /// Helper function for MergeConsecutiveStores. Checks if
550  /// candidate stores have indirect dependency through their
551  /// operands. RootNode is the predecessor to all stores calculated
552  /// by getStoreMergeCandidates and is used to prune the dependency check.
553  /// \return True if safe to merge.
554  bool checkMergeStoreCandidatesForDependencies(
555  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
556  SDNode *RootNode);
557 
558  /// Merge consecutive store operations into a wide store.
559  /// This optimization uses wide integers or vectors when possible.
560  /// \return number of stores that were merged into a merged store (the
561  /// affected nodes are stored as a prefix in \p StoreNodes).
562  bool MergeConsecutiveStores(StoreSDNode *N);
563 
564  /// Try to transform a truncation where C is a constant:
565  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
566  ///
567  /// \p N needs to be a truncation and its first operand an AND. Other
568  /// requirements are checked by the function (e.g. that trunc is
569  /// single-use) and if missed an empty SDValue is returned.
570  SDValue distributeTruncateThroughAnd(SDNode *N);
571 
572  public:
573  /// Runs the dag combiner on all nodes in the work list
574  void Run(CombineLevel AtLevel);
575 
576  SelectionDAG &getDAG() const { return DAG; }
577 
578  /// Returns a type large enough to hold any valid shift amount - before type
579  /// legalization these can be huge.
580  EVT getShiftAmountTy(EVT LHSTy) {
581  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
582  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
583  }
584 
585  /// This method returns true if we are running before type legalization or
586  /// if the specified VT is legal.
587  bool isTypeLegal(const EVT &VT) {
588  if (!LegalTypes) return true;
589  return TLI.isTypeLegal(VT);
590  }
591 
592  /// Convenience wrapper around TargetLowering::getSetCCResultType
593  EVT getSetCCResultType(EVT VT) const {
594  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
595  }
596 
597  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
598  SDValue OrigLoad, SDValue ExtLoad,
599  ISD::NodeType ExtType);
600  };
601 
602 /// This class is a DAGUpdateListener that removes any deleted
603 /// nodes from the worklist.
604 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
605  DAGCombiner &DC;
606 
607 public:
608  explicit WorklistRemover(DAGCombiner &dc)
609  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
610 
611  void NodeDeleted(SDNode *N, SDNode *E) override {
612  DC.removeFromWorklist(N);
613  }
614 };
615 
616 } // end anonymous namespace
617 
618 //===----------------------------------------------------------------------===//
619 // TargetLowering::DAGCombinerInfo implementation
620 //===----------------------------------------------------------------------===//
621 
623  ((DAGCombiner*)DC)->AddToWorklist(N);
624 }
625 
627 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
628  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
629 }
630 
632 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
633  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
634 }
635 
637 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
638  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
639 }
640 
643  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
644 }
645 
646 //===----------------------------------------------------------------------===//
647 // Helper Functions
648 //===----------------------------------------------------------------------===//
649 
650 void DAGCombiner::deleteAndRecombine(SDNode *N) {
651  removeFromWorklist(N);
652 
653  // If the operands of this node are only used by the node, they will now be
654  // dead. Make sure to re-visit them and recursively delete dead nodes.
655  for (const SDValue &Op : N->ops())
656  // For an operand generating multiple values, one of the values may
657  // become dead allowing further simplification (e.g. split index
658  // arithmetic from an indexed load).
659  if (Op->hasOneUse() || Op->getNumValues() > 1)
660  AddToWorklist(Op.getNode());
661 
662  DAG.DeleteNode(N);
663 }
664 
665 /// Return 1 if we can compute the negated form of the specified expression for
666 /// the same cost as the expression itself, or 2 if we can compute the negated
667 /// form more cheaply than the expression itself.
668 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
669  const TargetLowering &TLI,
670  const TargetOptions *Options,
671  unsigned Depth = 0) {
672  // fneg is removable even if it has multiple uses.
673  if (Op.getOpcode() == ISD::FNEG) return 2;
674 
675  // Don't allow anything with multiple uses unless we know it is free.
676  EVT VT = Op.getValueType();
677  if (!Op.hasOneUse())
678  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
679  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
680  return 0;
681 
682  // Don't recurse exponentially.
683  if (Depth > 6) return 0;
684 
685  switch (Op.getOpcode()) {
686  default: return false;
687  case ISD::ConstantFP: {
688  if (!LegalOperations)
689  return 1;
690 
691  // Don't invert constant FP values after legalization unless the target says
692  // the negated constant is legal.
693  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
694  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
695  }
696  case ISD::FADD:
697  // FIXME: determine better conditions for this xform.
698  if (!Options->UnsafeFPMath) return 0;
699 
700  // After operation legalization, it might not be legal to create new FSUBs.
701  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
702  return 0;
703 
704  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
705  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
706  Options, Depth + 1))
707  return V;
708  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
709  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
710  Depth + 1);
711  case ISD::FSUB:
712  // We can't turn -(A-B) into B-A when we honor signed zeros.
713  if (!Options->NoSignedZerosFPMath &&
714  !Op.getNode()->getFlags().hasNoSignedZeros())
715  return 0;
716 
717  // fold (fneg (fsub A, B)) -> (fsub B, A)
718  return 1;
719 
720  case ISD::FMUL:
721  case ISD::FDIV:
722  if (Options->HonorSignDependentRoundingFPMath()) return 0;
723 
724  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
725  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
726  Options, Depth + 1))
727  return V;
728 
729  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
730  Depth + 1);
731 
732  case ISD::FP_EXTEND:
733  case ISD::FP_ROUND:
734  case ISD::FSIN:
735  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
736  Depth + 1);
737  }
738 }
739 
740 /// If isNegatibleForFree returns true, return the newly negated expression.
742  bool LegalOperations, unsigned Depth = 0) {
743  const TargetOptions &Options = DAG.getTarget().Options;
744  // fneg is removable even if it has multiple uses.
745  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
746 
747  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
748 
749  const SDNodeFlags Flags = Op.getNode()->getFlags();
750 
751  switch (Op.getOpcode()) {
752  default: llvm_unreachable("Unknown code");
753  case ISD::ConstantFP: {
754  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
755  V.changeSign();
756  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
757  }
758  case ISD::FADD:
759  // FIXME: determine better conditions for this xform.
760  assert(Options.UnsafeFPMath);
761 
762  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
763  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
764  DAG.getTargetLoweringInfo(), &Options, Depth+1))
765  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
766  GetNegatedExpression(Op.getOperand(0), DAG,
767  LegalOperations, Depth+1),
768  Op.getOperand(1), Flags);
769  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
770  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
771  GetNegatedExpression(Op.getOperand(1), DAG,
772  LegalOperations, Depth+1),
773  Op.getOperand(0), Flags);
774  case ISD::FSUB:
775  // fold (fneg (fsub 0, B)) -> B
776  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
777  if (N0CFP->isZero())
778  return Op.getOperand(1);
779 
780  // fold (fneg (fsub A, B)) -> (fsub B, A)
781  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
782  Op.getOperand(1), Op.getOperand(0), Flags);
783 
784  case ISD::FMUL:
785  case ISD::FDIV:
787 
788  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
789  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
790  DAG.getTargetLoweringInfo(), &Options, Depth+1))
791  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
792  GetNegatedExpression(Op.getOperand(0), DAG,
793  LegalOperations, Depth+1),
794  Op.getOperand(1), Flags);
795 
796  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
797  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
798  Op.getOperand(0),
799  GetNegatedExpression(Op.getOperand(1), DAG,
800  LegalOperations, Depth+1), Flags);
801 
802  case ISD::FP_EXTEND:
803  case ISD::FSIN:
804  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
805  GetNegatedExpression(Op.getOperand(0), DAG,
806  LegalOperations, Depth+1));
807  case ISD::FP_ROUND:
808  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
809  GetNegatedExpression(Op.getOperand(0), DAG,
810  LegalOperations, Depth+1),
811  Op.getOperand(1));
812  }
813 }
814 
815 // APInts must be the same size for most operations, this helper
816 // function zero extends the shorter of the pair so that they match.
817 // We provide an Offset so that we can create bitwidths that won't overflow.
818 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
819  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
820  LHS = LHS.zextOrSelf(Bits);
821  RHS = RHS.zextOrSelf(Bits);
822 }
823 
824 // Return true if this node is a setcc, or is a select_cc
825 // that selects between the target values used for true and false, making it
826 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
827 // the appropriate nodes based on the type of node we are checking. This
828 // simplifies life a bit for the callers.
829 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
830  SDValue &CC) const {
831  if (N.getOpcode() == ISD::SETCC) {
832  LHS = N.getOperand(0);
833  RHS = N.getOperand(1);
834  CC = N.getOperand(2);
835  return true;
836  }
837 
838  if (N.getOpcode() != ISD::SELECT_CC ||
839  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
840  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
841  return false;
842 
843  if (TLI.getBooleanContents(N.getValueType()) ==
845  return false;
846 
847  LHS = N.getOperand(0);
848  RHS = N.getOperand(1);
849  CC = N.getOperand(4);
850  return true;
851 }
852 
853 /// Return true if this is a SetCC-equivalent operation with only one use.
854 /// If this is true, it allows the users to invert the operation for free when
855 /// it is profitable to do so.
856 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
857  SDValue N0, N1, N2;
858  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
859  return true;
860  return false;
861 }
862 
864  while (V.getOpcode() == ISD::BITCAST)
865  V = V.getOperand(0);
866  return V;
867 }
868 
869 // Returns the SDNode if it is a constant float BuildVector
870 // or constant float.
872  if (isa<ConstantFPSDNode>(N))
873  return N.getNode();
875  return N.getNode();
876  return nullptr;
877 }
878 
879 // Determines if it is a constant integer or a build vector of constant
880 // integers (and undefs).
881 // Do not permit build vector implicit truncation.
882 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
883  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
884  return !(Const->isOpaque() && NoOpaques);
885  if (N.getOpcode() != ISD::BUILD_VECTOR)
886  return false;
887  unsigned BitWidth = N.getScalarValueSizeInBits();
888  for (const SDValue &Op : N->op_values()) {
889  if (Op.isUndef())
890  continue;
892  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
893  (Const->isOpaque() && NoOpaques))
894  return false;
895  }
896  return true;
897 }
898 
899 // Determines if it is a constant null integer or a splatted vector of a
900 // constant null integer (with no undefs).
901 // Build vector implicit truncation is not an issue for null values.
903  // TODO: may want to use peekThroughBitcast() here.
904  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
905  return Splat->isNullValue();
906  return false;
907 }
908 
909 // Determines if it is a constant integer of one or a splatted vector of a
910 // constant integer of one (with no undefs).
911 // Do not permit build vector implicit truncation.
913  // TODO: may want to use peekThroughBitcast() here.
914  unsigned BitWidth = N.getScalarValueSizeInBits();
915  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
916  return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
917  return false;
918 }
919 
920 // Determines if it is a constant integer of all ones or a splatted vector of a
921 // constant integer of all ones (with no undefs).
922 // Do not permit build vector implicit truncation.
924  N = peekThroughBitcast(N);
925  unsigned BitWidth = N.getScalarValueSizeInBits();
926  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
927  return Splat->isAllOnesValue() &&
928  Splat->getAPIntValue().getBitWidth() == BitWidth;
929  return false;
930 }
931 
932 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
933 // undef's.
934 static bool isAnyConstantBuildVector(const SDNode *N) {
937 }
938 
939 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
940  SDValue N1) {
941  EVT VT = N0.getValueType();
942  if (N0.getOpcode() == Opc) {
945  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
946  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
947  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
948  return SDValue();
949  }
950  if (N0.hasOneUse()) {
951  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
952  // use
953  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
954  if (!OpNode.getNode())
955  return SDValue();
956  AddToWorklist(OpNode.getNode());
957  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
958  }
959  }
960  }
961 
962  if (N1.getOpcode() == Opc) {
965  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
966  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
967  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
968  return SDValue();
969  }
970  if (N1.hasOneUse()) {
971  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
972  // use
973  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
974  if (!OpNode.getNode())
975  return SDValue();
976  AddToWorklist(OpNode.getNode());
977  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
978  }
979  }
980  }
981 
982  return SDValue();
983 }
984 
985 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
986  bool AddTo) {
987  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
988  ++NodesCombined;
989  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
990  To[0].getNode()->dump(&DAG);
991  dbgs() << " and " << NumTo - 1 << " other values\n");
992  for (unsigned i = 0, e = NumTo; i != e; ++i)
993  assert((!To[i].getNode() ||
994  N->getValueType(i) == To[i].getValueType()) &&
995  "Cannot combine value to value of different type!");
996 
997  WorklistRemover DeadNodes(*this);
998  DAG.ReplaceAllUsesWith(N, To);
999  if (AddTo) {
1000  // Push the new nodes and any users onto the worklist
1001  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1002  if (To[i].getNode()) {
1003  AddToWorklist(To[i].getNode());
1004  AddUsersToWorklist(To[i].getNode());
1005  }
1006  }
1007  }
1008 
1009  // Finally, if the node is now dead, remove it from the graph. The node
1010  // may not be dead if the replacement process recursively simplified to
1011  // something else needing this node.
1012  if (N->use_empty())
1013  deleteAndRecombine(N);
1014  return SDValue(N, 0);
1015 }
1016 
1017 void DAGCombiner::
1018 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1019  // Replace all uses. If any nodes become isomorphic to other nodes and
1020  // are deleted, make sure to remove them from our worklist.
1021  WorklistRemover DeadNodes(*this);
1022  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1023 
1024  // Push the new node and any (possibly new) users onto the worklist.
1025  AddToWorklist(TLO.New.getNode());
1026  AddUsersToWorklist(TLO.New.getNode());
1027 
1028  // Finally, if the node is now dead, remove it from the graph. The node
1029  // may not be dead if the replacement process recursively simplified to
1030  // something else needing this node.
1031  if (TLO.Old.getNode()->use_empty())
1032  deleteAndRecombine(TLO.Old.getNode());
1033 }
1034 
1035 /// Check the specified integer node value to see if it can be simplified or if
1036 /// things it uses can be simplified by bit propagation. If so, return true.
1037 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
1038  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1039  KnownBits Known;
1040  if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
1041  return false;
1042 
1043  // Revisit the node.
1044  AddToWorklist(Op.getNode());
1045 
1046  // Replace the old value with the new one.
1047  ++NodesCombined;
1048  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1049  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1050  dbgs() << '\n');
1051 
1052  CommitTargetLoweringOpt(TLO);
1053  return true;
1054 }
1055 
1056 /// Check the specified vector node value to see if it can be simplified or
1057 /// if things it uses can be simplified as it only uses some of the elements.
1058 /// If so, return true.
1059 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1060  const APInt &Demanded) {
1061  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1062  APInt KnownUndef, KnownZero;
1063  if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO))
1064  return false;
1065 
1066  // Revisit the node.
1067  AddToWorklist(Op.getNode());
1068 
1069  // Replace the old value with the new one.
1070  ++NodesCombined;
1071  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1072  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1073  dbgs() << '\n');
1074 
1075  CommitTargetLoweringOpt(TLO);
1076  return true;
1077 }
1078 
1079 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1080  SDLoc DL(Load);
1081  EVT VT = Load->getValueType(0);
1082  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1083 
1084  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1085  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1086  WorklistRemover DeadNodes(*this);
1087  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1088  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1089  deleteAndRecombine(Load);
1090  AddToWorklist(Trunc.getNode());
1091 }
1092 
1093 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1094  Replace = false;
1095  SDLoc DL(Op);
1096  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1097  LoadSDNode *LD = cast<LoadSDNode>(Op);
1098  EVT MemVT = LD->getMemoryVT();
1100  : LD->getExtensionType();
1101  Replace = true;
1102  return DAG.getExtLoad(ExtType, DL, PVT,
1103  LD->getChain(), LD->getBasePtr(),
1104  MemVT, LD->getMemOperand());
1105  }
1106 
1107  unsigned Opc = Op.getOpcode();
1108  switch (Opc) {
1109  default: break;
1110  case ISD::AssertSext:
1111  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1112  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1113  break;
1114  case ISD::AssertZext:
1115  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1116  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1117  break;
1118  case ISD::Constant: {
1119  unsigned ExtOpc =
1121  return DAG.getNode(ExtOpc, DL, PVT, Op);
1122  }
1123  }
1124 
1125  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1126  return SDValue();
1127  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1128 }
1129 
1130 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1132  return SDValue();
1133  EVT OldVT = Op.getValueType();
1134  SDLoc DL(Op);
1135  bool Replace = false;
1136  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1137  if (!NewOp.getNode())
1138  return SDValue();
1139  AddToWorklist(NewOp.getNode());
1140 
1141  if (Replace)
1142  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1143  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1144  DAG.getValueType(OldVT));
1145 }
1146 
1147 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1148  EVT OldVT = Op.getValueType();
1149  SDLoc DL(Op);
1150  bool Replace = false;
1151  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1152  if (!NewOp.getNode())
1153  return SDValue();
1154  AddToWorklist(NewOp.getNode());
1155 
1156  if (Replace)
1157  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1158  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1159 }
1160 
1161 /// Promote the specified integer binary operation if the target indicates it is
1162 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1163 /// i32 since i16 instructions are longer.
1164 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1165  if (!LegalOperations)
1166  return SDValue();
1167 
1168  EVT VT = Op.getValueType();
1169  if (VT.isVector() || !VT.isInteger())
1170  return SDValue();
1171 
1172  // If operation type is 'undesirable', e.g. i16 on x86, consider
1173  // promoting it.
1174  unsigned Opc = Op.getOpcode();
1175  if (TLI.isTypeDesirableForOp(Opc, VT))
1176  return SDValue();
1177 
1178  EVT PVT = VT;
1179  // Consult target whether it is a good idea to promote this operation and
1180  // what's the right type to promote it to.
1181  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1182  assert(PVT != VT && "Don't know what type to promote to!");
1183 
1184  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1185 
1186  bool Replace0 = false;
1187  SDValue N0 = Op.getOperand(0);
1188  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1189 
1190  bool Replace1 = false;
1191  SDValue N1 = Op.getOperand(1);
1192  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1193  SDLoc DL(Op);
1194 
1195  SDValue RV =
1196  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1197 
1198  // We are always replacing N0/N1's use in N and only need
1199  // additional replacements if there are additional uses.
1200  Replace0 &= !N0->hasOneUse();
1201  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1202 
1203  // Combine Op here so it is preserved past replacements.
1204  CombineTo(Op.getNode(), RV);
1205 
1206  // If operands have a use ordering, make sure we deal with
1207  // predecessor first.
1208  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1209  std::swap(N0, N1);
1210  std::swap(NN0, NN1);
1211  }
1212 
1213  if (Replace0) {
1214  AddToWorklist(NN0.getNode());
1215  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1216  }
1217  if (Replace1) {
1218  AddToWorklist(NN1.getNode());
1219  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1220  }
1221  return Op;
1222  }
1223  return SDValue();
1224 }
1225 
1226 /// Promote the specified integer shift operation if the target indicates it is
1227 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1228 /// i32 since i16 instructions are longer.
1229 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1230  if (!LegalOperations)
1231  return SDValue();
1232 
1233  EVT VT = Op.getValueType();
1234  if (VT.isVector() || !VT.isInteger())
1235  return SDValue();
1236 
1237  // If operation type is 'undesirable', e.g. i16 on x86, consider
1238  // promoting it.
1239  unsigned Opc = Op.getOpcode();
1240  if (TLI.isTypeDesirableForOp(Opc, VT))
1241  return SDValue();
1242 
1243  EVT PVT = VT;
1244  // Consult target whether it is a good idea to promote this operation and
1245  // what's the right type to promote it to.
1246  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1247  assert(PVT != VT && "Don't know what type to promote to!");
1248 
1249  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1250 
1251  bool Replace = false;
1252  SDValue N0 = Op.getOperand(0);
1253  SDValue N1 = Op.getOperand(1);
1254  if (Opc == ISD::SRA)
1255  N0 = SExtPromoteOperand(N0, PVT);
1256  else if (Opc == ISD::SRL)
1257  N0 = ZExtPromoteOperand(N0, PVT);
1258  else
1259  N0 = PromoteOperand(N0, PVT, Replace);
1260 
1261  if (!N0.getNode())
1262  return SDValue();
1263 
1264  SDLoc DL(Op);
1265  SDValue RV =
1266  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1267 
1268  AddToWorklist(N0.getNode());
1269  if (Replace)
1270  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1271 
1272  // Deal with Op being deleted.
1273  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1274  return RV;
1275  }
1276  return SDValue();
1277 }
1278 
1279 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1280  if (!LegalOperations)
1281  return SDValue();
1282 
1283  EVT VT = Op.getValueType();
1284  if (VT.isVector() || !VT.isInteger())
1285  return SDValue();
1286 
1287  // If operation type is 'undesirable', e.g. i16 on x86, consider
1288  // promoting it.
1289  unsigned Opc = Op.getOpcode();
1290  if (TLI.isTypeDesirableForOp(Opc, VT))
1291  return SDValue();
1292 
1293  EVT PVT = VT;
1294  // Consult target whether it is a good idea to promote this operation and
1295  // what's the right type to promote it to.
1296  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1297  assert(PVT != VT && "Don't know what type to promote to!");
1298  // fold (aext (aext x)) -> (aext x)
1299  // fold (aext (zext x)) -> (zext x)
1300  // fold (aext (sext x)) -> (sext x)
1301  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1302  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1303  }
1304  return SDValue();
1305 }
1306 
1307 bool DAGCombiner::PromoteLoad(SDValue Op) {
1308  if (!LegalOperations)
1309  return false;
1310 
1311  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1312  return false;
1313 
1314  EVT VT = Op.getValueType();
1315  if (VT.isVector() || !VT.isInteger())
1316  return false;
1317 
1318  // If operation type is 'undesirable', e.g. i16 on x86, consider
1319  // promoting it.
1320  unsigned Opc = Op.getOpcode();
1321  if (TLI.isTypeDesirableForOp(Opc, VT))
1322  return false;
1323 
1324  EVT PVT = VT;
1325  // Consult target whether it is a good idea to promote this operation and
1326  // what's the right type to promote it to.
1327  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1328  assert(PVT != VT && "Don't know what type to promote to!");
1329 
1330  SDLoc DL(Op);
1331  SDNode *N = Op.getNode();
1332  LoadSDNode *LD = cast<LoadSDNode>(N);
1333  EVT MemVT = LD->getMemoryVT();
1335  : LD->getExtensionType();
1336  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1337  LD->getChain(), LD->getBasePtr(),
1338  MemVT, LD->getMemOperand());
1339  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1340 
1341  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1342  Result.getNode()->dump(&DAG); dbgs() << '\n');
1343  WorklistRemover DeadNodes(*this);
1344  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1345  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1346  deleteAndRecombine(N);
1347  AddToWorklist(Result.getNode());
1348  return true;
1349  }
1350  return false;
1351 }
1352 
1353 /// Recursively delete a node which has no uses and any operands for
1354 /// which it is the only use.
1355 ///
1356 /// Note that this both deletes the nodes and removes them from the worklist.
1357 /// It also adds any nodes who have had a user deleted to the worklist as they
1358 /// may now have only one use and subject to other combines.
1359 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1360  if (!N->use_empty())
1361  return false;
1362 
1364  Nodes.insert(N);
1365  do {
1366  N = Nodes.pop_back_val();
1367  if (!N)
1368  continue;
1369 
1370  if (N->use_empty()) {
1371  for (const SDValue &ChildN : N->op_values())
1372  Nodes.insert(ChildN.getNode());
1373 
1374  removeFromWorklist(N);
1375  DAG.DeleteNode(N);
1376  } else {
1377  AddToWorklist(N);
1378  }
1379  } while (!Nodes.empty());
1380  return true;
1381 }
1382 
1383 //===----------------------------------------------------------------------===//
1384 // Main DAG Combiner implementation
1385 //===----------------------------------------------------------------------===//
1386 
1387 void DAGCombiner::Run(CombineLevel AtLevel) {
1388  // set the instance variables, so that the various visit routines may use it.
1389  Level = AtLevel;
1390  LegalOperations = Level >= AfterLegalizeVectorOps;
1391  LegalTypes = Level >= AfterLegalizeTypes;
1392 
1393  // Add all the dag nodes to the worklist.
1394  for (SDNode &Node : DAG.allnodes())
1395  AddToWorklist(&Node);
1396 
1397  // Create a dummy node (which is not added to allnodes), that adds a reference
1398  // to the root node, preventing it from being deleted, and tracking any
1399  // changes of the root.
1400  HandleSDNode Dummy(DAG.getRoot());
1401 
1402  // While the worklist isn't empty, find a node and try to combine it.
1403  while (!WorklistMap.empty()) {
1404  SDNode *N;
1405  // The Worklist holds the SDNodes in order, but it may contain null entries.
1406  do {
1407  N = Worklist.pop_back_val();
1408  } while (!N);
1409 
1410  bool GoodWorklistEntry = WorklistMap.erase(N);
1411  (void)GoodWorklistEntry;
1412  assert(GoodWorklistEntry &&
1413  "Found a worklist entry without a corresponding map entry!");
1414 
1415  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1416  // N is deleted from the DAG, since they too may now be dead or may have a
1417  // reduced number of uses, allowing other xforms.
1418  if (recursivelyDeleteUnusedNodes(N))
1419  continue;
1420 
1421  WorklistRemover DeadNodes(*this);
1422 
1423  // If this combine is running after legalizing the DAG, re-legalize any
1424  // nodes pulled off the worklist.
1425  if (Level == AfterLegalizeDAG) {
1426  SmallSetVector<SDNode *, 16> UpdatedNodes;
1427  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1428 
1429  for (SDNode *LN : UpdatedNodes) {
1430  AddToWorklist(LN);
1431  AddUsersToWorklist(LN);
1432  }
1433  if (!NIsValid)
1434  continue;
1435  }
1436 
1437  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1438 
1439  // Add any operands of the new node which have not yet been combined to the
1440  // worklist as well. Because the worklist uniques things already, this
1441  // won't repeatedly process the same operand.
1442  CombinedNodes.insert(N);
1443  for (const SDValue &ChildN : N->op_values())
1444  if (!CombinedNodes.count(ChildN.getNode()))
1445  AddToWorklist(ChildN.getNode());
1446 
1447  SDValue RV = combine(N);
1448 
1449  if (!RV.getNode())
1450  continue;
1451 
1452  ++NodesCombined;
1453 
1454  // If we get back the same node we passed in, rather than a new node or
1455  // zero, we know that the node must have defined multiple values and
1456  // CombineTo was used. Since CombineTo takes care of the worklist
1457  // mechanics for us, we have no work to do in this case.
1458  if (RV.getNode() == N)
1459  continue;
1460 
1462  RV.getOpcode() != ISD::DELETED_NODE &&
1463  "Node was deleted but visit returned new node!");
1464 
1465  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1466 
1467  if (N->getNumValues() == RV.getNode()->getNumValues())
1468  DAG.ReplaceAllUsesWith(N, RV.getNode());
1469  else {
1470  assert(N->getValueType(0) == RV.getValueType() &&
1471  N->getNumValues() == 1 && "Type mismatch");
1472  DAG.ReplaceAllUsesWith(N, &RV);
1473  }
1474 
1475  // Push the new node and any users onto the worklist
1476  AddToWorklist(RV.getNode());
1477  AddUsersToWorklist(RV.getNode());
1478 
1479  // Finally, if the node is now dead, remove it from the graph. The node
1480  // may not be dead if the replacement process recursively simplified to
1481  // something else needing this node. This will also take care of adding any
1482  // operands which have lost a user to the worklist.
1483  recursivelyDeleteUnusedNodes(N);
1484  }
1485 
1486  // If the root changed (e.g. it was a dead load, update the root).
1487  DAG.setRoot(Dummy.getValue());
1488  DAG.RemoveDeadNodes();
1489 }
1490 
1491 SDValue DAGCombiner::visit(SDNode *N) {
1492  switch (N->getOpcode()) {
1493  default: break;
1494  case ISD::TokenFactor: return visitTokenFactor(N);
1495  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1496  case ISD::ADD: return visitADD(N);
1497  case ISD::SUB: return visitSUB(N);
1498  case ISD::ADDC: return visitADDC(N);
1499  case ISD::UADDO: return visitUADDO(N);
1500  case ISD::SUBC: return visitSUBC(N);
1501  case ISD::USUBO: return visitUSUBO(N);
1502  case ISD::ADDE: return visitADDE(N);
1503  case ISD::ADDCARRY: return visitADDCARRY(N);
1504  case ISD::SUBE: return visitSUBE(N);
1505  case ISD::SUBCARRY: return visitSUBCARRY(N);
1506  case ISD::MUL: return visitMUL(N);
1507  case ISD::SDIV: return visitSDIV(N);
1508  case ISD::UDIV: return visitUDIV(N);
1509  case ISD::SREM:
1510  case ISD::UREM: return visitREM(N);
1511  case ISD::MULHU: return visitMULHU(N);
1512  case ISD::MULHS: return visitMULHS(N);
1513  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1514  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1515  case ISD::SMULO: return visitSMULO(N);
1516  case ISD::UMULO: return visitUMULO(N);
1517  case ISD::SMIN:
1518  case ISD::SMAX:
1519  case ISD::UMIN:
1520  case ISD::UMAX: return visitIMINMAX(N);
1521  case ISD::AND: return visitAND(N);
1522  case ISD::OR: return visitOR(N);
1523  case ISD::XOR: return visitXOR(N);
1524  case ISD::SHL: return visitSHL(N);
1525  case ISD::SRA: return visitSRA(N);
1526  case ISD::SRL: return visitSRL(N);
1527  case ISD::ROTR:
1528  case ISD::ROTL: return visitRotate(N);
1529  case ISD::ABS: return visitABS(N);
1530  case ISD::BSWAP: return visitBSWAP(N);
1531  case ISD::BITREVERSE: return visitBITREVERSE(N);
1532  case ISD::CTLZ: return visitCTLZ(N);
1533  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1534  case ISD::CTTZ: return visitCTTZ(N);
1535  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1536  case ISD::CTPOP: return visitCTPOP(N);
1537  case ISD::SELECT: return visitSELECT(N);
1538  case ISD::VSELECT: return visitVSELECT(N);
1539  case ISD::SELECT_CC: return visitSELECT_CC(N);
1540  case ISD::SETCC: return visitSETCC(N);
1541  case ISD::SETCCE: return visitSETCCE(N);
1542  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1543  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1544  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1545  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1546  case ISD::AssertSext:
1547  case ISD::AssertZext: return visitAssertExt(N);
1548  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1549  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1550  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1551  case ISD::TRUNCATE: return visitTRUNCATE(N);
1552  case ISD::BITCAST: return visitBITCAST(N);
1553  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1554  case ISD::FADD: return visitFADD(N);
1555  case ISD::FSUB: return visitFSUB(N);
1556  case ISD::FMUL: return visitFMUL(N);
1557  case ISD::FMA: return visitFMA(N);
1558  case ISD::FDIV: return visitFDIV(N);
1559  case ISD::FREM: return visitFREM(N);
1560  case ISD::FSQRT: return visitFSQRT(N);
1561  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1562  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1563  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1564  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1565  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1566  case ISD::FP_ROUND: return visitFP_ROUND(N);
1567  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1568  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1569  case ISD::FNEG: return visitFNEG(N);
1570  case ISD::FABS: return visitFABS(N);
1571  case ISD::FFLOOR: return visitFFLOOR(N);
1572  case ISD::FMINNUM: return visitFMINNUM(N);
1573  case ISD::FMAXNUM: return visitFMAXNUM(N);
1574  case ISD::FCEIL: return visitFCEIL(N);
1575  case ISD::FTRUNC: return visitFTRUNC(N);
1576  case ISD::BRCOND: return visitBRCOND(N);
1577  case ISD::BR_CC: return visitBR_CC(N);
1578  case ISD::LOAD: return visitLOAD(N);
1579  case ISD::STORE: return visitSTORE(N);
1580  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1581  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1582  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1583  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1584  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1585  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1586  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1587  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1588  case ISD::MGATHER: return visitMGATHER(N);
1589  case ISD::MLOAD: return visitMLOAD(N);
1590  case ISD::MSCATTER: return visitMSCATTER(N);
1591  case ISD::MSTORE: return visitMSTORE(N);
1592  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1593  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1594  }
1595  return SDValue();
1596 }
1597 
1598 SDValue DAGCombiner::combine(SDNode *N) {
1599  SDValue RV = visit(N);
1600 
1601  // If nothing happened, try a target-specific DAG combine.
1602  if (!RV.getNode()) {
1604  "Node was deleted but visit returned NULL!");
1605 
1606  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1608 
1609  // Expose the DAG combiner to the target combiner impls.
1611  DagCombineInfo(DAG, Level, false, this);
1612 
1613  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1614  }
1615  }
1616 
1617  // If nothing happened still, try promoting the operation.
1618  if (!RV.getNode()) {
1619  switch (N->getOpcode()) {
1620  default: break;
1621  case ISD::ADD:
1622  case ISD::SUB:
1623  case ISD::MUL:
1624  case ISD::AND:
1625  case ISD::OR:
1626  case ISD::XOR:
1627  RV = PromoteIntBinOp(SDValue(N, 0));
1628  break;
1629  case ISD::SHL:
1630  case ISD::SRA:
1631  case ISD::SRL:
1632  RV = PromoteIntShiftOp(SDValue(N, 0));
1633  break;
1634  case ISD::SIGN_EXTEND:
1635  case ISD::ZERO_EXTEND:
1636  case ISD::ANY_EXTEND:
1637  RV = PromoteExtend(SDValue(N, 0));
1638  break;
1639  case ISD::LOAD:
1640  if (PromoteLoad(SDValue(N, 0)))
1641  RV = SDValue(N, 0);
1642  break;
1643  }
1644  }
1645 
1646  // If N is a commutative binary node, try eliminate it if the commuted
1647  // version is already present in the DAG.
1648  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1649  N->getNumValues() == 1) {
1650  SDValue N0 = N->getOperand(0);
1651  SDValue N1 = N->getOperand(1);
1652 
1653  // Constant operands are canonicalized to RHS.
1654  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1655  SDValue Ops[] = {N1, N0};
1656  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1657  N->getFlags());
1658  if (CSENode)
1659  return SDValue(CSENode, 0);
1660  }
1661  }
1662 
1663  return RV;
1664 }
1665 
1666 /// Given a node, return its input chain if it has one, otherwise return a null
1667 /// sd operand.
1669  if (unsigned NumOps = N->getNumOperands()) {
1670  if (N->getOperand(0).getValueType() == MVT::Other)
1671  return N->getOperand(0);
1672  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1673  return N->getOperand(NumOps-1);
1674  for (unsigned i = 1; i < NumOps-1; ++i)
1675  if (N->getOperand(i).getValueType() == MVT::Other)
1676  return N->getOperand(i);
1677  }
1678  return SDValue();
1679 }
1680 
1681 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1682  // If N has two operands, where one has an input chain equal to the other,
1683  // the 'other' chain is redundant.
1684  if (N->getNumOperands() == 2) {
1685  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1686  return N->getOperand(0);
1687  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1688  return N->getOperand(1);
1689  }
1690 
1691  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1692  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1693  SmallPtrSet<SDNode*, 16> SeenOps;
1694  bool Changed = false; // If we should replace this token factor.
1695 
1696  // Start out with this token factor.
1697  TFs.push_back(N);
1698 
1699  // Iterate through token factors. The TFs grows when new token factors are
1700  // encountered.
1701  for (unsigned i = 0; i < TFs.size(); ++i) {
1702  SDNode *TF = TFs[i];
1703 
1704  // Check each of the operands.
1705  for (const SDValue &Op : TF->op_values()) {
1706  switch (Op.getOpcode()) {
1707  case ISD::EntryToken:
1708  // Entry tokens don't need to be added to the list. They are
1709  // redundant.
1710  Changed = true;
1711  break;
1712 
1713  case ISD::TokenFactor:
1714  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1715  // Queue up for processing.
1716  TFs.push_back(Op.getNode());
1717  // Clean up in case the token factor is removed.
1718  AddToWorklist(Op.getNode());
1719  Changed = true;
1720  break;
1721  }
1723 
1724  default:
1725  // Only add if it isn't already in the list.
1726  if (SeenOps.insert(Op.getNode()).second)
1727  Ops.push_back(Op);
1728  else
1729  Changed = true;
1730  break;
1731  }
1732  }
1733  }
1734 
1735  // Remove Nodes that are chained to another node in the list. Do so
1736  // by walking up chains breath-first stopping when we've seen
1737  // another operand. In general we must climb to the EntryNode, but we can exit
1738  // early if we find all remaining work is associated with just one operand as
1739  // no further pruning is possible.
1740 
1741  // List of nodes to search through and original Ops from which they originate.
1743  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1744  SmallPtrSet<SDNode *, 16> SeenChains;
1745  bool DidPruneOps = false;
1746 
1747  unsigned NumLeftToConsider = 0;
1748  for (const SDValue &Op : Ops) {
1749  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1750  OpWorkCount.push_back(1);
1751  }
1752 
1753  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1754  // If this is an Op, we can remove the op from the list. Remark any
1755  // search associated with it as from the current OpNumber.
1756  if (SeenOps.count(Op) != 0) {
1757  Changed = true;
1758  DidPruneOps = true;
1759  unsigned OrigOpNumber = 0;
1760  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1761  OrigOpNumber++;
1762  assert((OrigOpNumber != Ops.size()) &&
1763  "expected to find TokenFactor Operand");
1764  // Re-mark worklist from OrigOpNumber to OpNumber
1765  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1766  if (Worklist[i].second == OrigOpNumber) {
1767  Worklist[i].second = OpNumber;
1768  }
1769  }
1770  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1771  OpWorkCount[OrigOpNumber] = 0;
1772  NumLeftToConsider--;
1773  }
1774  // Add if it's a new chain
1775  if (SeenChains.insert(Op).second) {
1776  OpWorkCount[OpNumber]++;
1777  Worklist.push_back(std::make_pair(Op, OpNumber));
1778  }
1779  };
1780 
1781  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1782  // We need at least be consider at least 2 Ops to prune.
1783  if (NumLeftToConsider <= 1)
1784  break;
1785  auto CurNode = Worklist[i].first;
1786  auto CurOpNumber = Worklist[i].second;
1787  assert((OpWorkCount[CurOpNumber] > 0) &&
1788  "Node should not appear in worklist");
1789  switch (CurNode->getOpcode()) {
1790  case ISD::EntryToken:
1791  // Hitting EntryToken is the only way for the search to terminate without
1792  // hitting
1793  // another operand's search. Prevent us from marking this operand
1794  // considered.
1795  NumLeftToConsider++;
1796  break;
1797  case ISD::TokenFactor:
1798  for (const SDValue &Op : CurNode->op_values())
1799  AddToWorklist(i, Op.getNode(), CurOpNumber);
1800  break;
1801  case ISD::CopyFromReg:
1802  case ISD::CopyToReg:
1803  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1804  break;
1805  default:
1806  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1807  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1808  break;
1809  }
1810  OpWorkCount[CurOpNumber]--;
1811  if (OpWorkCount[CurOpNumber] == 0)
1812  NumLeftToConsider--;
1813  }
1814 
1815  // If we've changed things around then replace token factor.
1816  if (Changed) {
1817  SDValue Result;
1818  if (Ops.empty()) {
1819  // The entry token is the only possible outcome.
1820  Result = DAG.getEntryNode();
1821  } else {
1822  if (DidPruneOps) {
1823  SmallVector<SDValue, 8> PrunedOps;
1824  //
1825  for (const SDValue &Op : Ops) {
1826  if (SeenChains.count(Op.getNode()) == 0)
1827  PrunedOps.push_back(Op);
1828  }
1829  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
1830  } else {
1831  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1832  }
1833  }
1834  return Result;
1835  }
1836  return SDValue();
1837 }
1838 
1839 /// MERGE_VALUES can always be eliminated.
1840 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1841  WorklistRemover DeadNodes(*this);
1842  // Replacing results may cause a different MERGE_VALUES to suddenly
1843  // be CSE'd with N, and carry its uses with it. Iterate until no
1844  // uses remain, to ensure that the node can be safely deleted.
1845  // First add the users of this node to the work list so that they
1846  // can be tried again once they have new operands.
1847  AddUsersToWorklist(N);
1848  do {
1849  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1850  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1851  } while (!N->use_empty());
1852  deleteAndRecombine(N);
1853  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1854 }
1855 
1856 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1857 /// ConstantSDNode pointer else nullptr.
1860  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1861 }
1862 
1863 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1864  auto BinOpcode = BO->getOpcode();
1865  assert((BinOpcode == ISD::ADD || BinOpcode == ISD::SUB ||
1866  BinOpcode == ISD::MUL || BinOpcode == ISD::SDIV ||
1867  BinOpcode == ISD::UDIV || BinOpcode == ISD::SREM ||
1868  BinOpcode == ISD::UREM || BinOpcode == ISD::AND ||
1869  BinOpcode == ISD::OR || BinOpcode == ISD::XOR ||
1870  BinOpcode == ISD::SHL || BinOpcode == ISD::SRL ||
1871  BinOpcode == ISD::SRA || BinOpcode == ISD::FADD ||
1872  BinOpcode == ISD::FSUB || BinOpcode == ISD::FMUL ||
1873  BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) &&
1874  "Unexpected binary operator");
1875 
1876  // Bail out if any constants are opaque because we can't constant fold those.
1877  SDValue C1 = BO->getOperand(1);
1878  if (!isConstantOrConstantVector(C1, true) &&
1880  return SDValue();
1881 
1882  // Don't do this unless the old select is going away. We want to eliminate the
1883  // binary operator, not replace a binop with a select.
1884  // TODO: Handle ISD::SELECT_CC.
1885  SDValue Sel = BO->getOperand(0);
1886  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1887  return SDValue();
1888 
1889  SDValue CT = Sel.getOperand(1);
1890  if (!isConstantOrConstantVector(CT, true) &&
1892  return SDValue();
1893 
1894  SDValue CF = Sel.getOperand(2);
1895  if (!isConstantOrConstantVector(CF, true) &&
1897  return SDValue();
1898 
1899  // We have a select-of-constants followed by a binary operator with a
1900  // constant. Eliminate the binop by pulling the constant math into the select.
1901  // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1
1902  EVT VT = Sel.getValueType();
1903  SDLoc DL(Sel);
1904  SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1);
1905  if (!NewCT.isUndef() &&
1906  !isConstantOrConstantVector(NewCT, true) &&
1908  return SDValue();
1909 
1910  SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1);
1911  if (!NewCF.isUndef() &&
1912  !isConstantOrConstantVector(NewCF, true) &&
1914  return SDValue();
1915 
1916  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1917 }
1918 
1919 SDValue DAGCombiner::visitADD(SDNode *N) {
1920  SDValue N0 = N->getOperand(0);
1921  SDValue N1 = N->getOperand(1);
1922  EVT VT = N0.getValueType();
1923  SDLoc DL(N);
1924 
1925  // fold vector ops
1926  if (VT.isVector()) {
1927  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1928  return FoldedVOp;
1929 
1930  // fold (add x, 0) -> x, vector edition
1932  return N0;
1934  return N1;
1935  }
1936 
1937  // fold (add x, undef) -> undef
1938  if (N0.isUndef())
1939  return N0;
1940 
1941  if (N1.isUndef())
1942  return N1;
1943 
1945  // canonicalize constant to RHS
1947  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1948  // fold (add c1, c2) -> c1+c2
1949  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1950  N1.getNode());
1951  }
1952 
1953  // fold (add x, 0) -> x
1954  if (isNullConstant(N1))
1955  return N0;
1956 
1957  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1958  // fold ((c1-A)+c2) -> (c1+c2)-A
1959  if (N0.getOpcode() == ISD::SUB &&
1960  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1961  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
1962  return DAG.getNode(ISD::SUB, DL, VT,
1963  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1964  N0.getOperand(1));
1965  }
1966 
1967  // add (sext i1 X), 1 -> zext (not i1 X)
1968  // We don't transform this pattern:
1969  // add (zext i1 X), -1 -> sext (not i1 X)
1970  // because most (?) targets generate better code for the zext form.
1971  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
1973  SDValue X = N0.getOperand(0);
1974  if ((!LegalOperations ||
1975  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
1976  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
1977  X.getScalarValueSizeInBits() == 1) {
1978  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
1979  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
1980  }
1981  }
1982 
1983  // Undo the add -> or combine to merge constant offsets from a frame index.
1984  if (N0.getOpcode() == ISD::OR &&
1985  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
1986  isa<ConstantSDNode>(N0.getOperand(1)) &&
1987  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
1988  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
1989  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
1990  }
1991  }
1992 
1993  if (SDValue NewSel = foldBinOpIntoSelect(N))
1994  return NewSel;
1995 
1996  // reassociate add
1997  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1998  return RADD;
1999 
2000  // fold ((0-A) + B) -> B-A
2001  if (N0.getOpcode() == ISD::SUB &&
2003  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2004 
2005  // fold (A + (0-B)) -> A-B
2006  if (N1.getOpcode() == ISD::SUB &&
2008  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2009 
2010  // fold (A+(B-A)) -> B
2011  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2012  return N1.getOperand(0);
2013 
2014  // fold ((B-A)+A) -> B
2015  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2016  return N0.getOperand(0);
2017 
2018  // fold (A+(B-(A+C))) to (B-C)
2019  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2020  N0 == N1.getOperand(1).getOperand(0))
2021  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2022  N1.getOperand(1).getOperand(1));
2023 
2024  // fold (A+(B-(C+A))) to (B-C)
2025  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2026  N0 == N1.getOperand(1).getOperand(1))
2027  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2028  N1.getOperand(1).getOperand(0));
2029 
2030  // fold (A+((B-A)+or-C)) to (B+or-C)
2031  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2032  N1.getOperand(0).getOpcode() == ISD::SUB &&
2033  N0 == N1.getOperand(0).getOperand(1))
2034  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2035  N1.getOperand(1));
2036 
2037  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2038  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2039  SDValue N00 = N0.getOperand(0);
2040  SDValue N01 = N0.getOperand(1);
2041  SDValue N10 = N1.getOperand(0);
2042  SDValue N11 = N1.getOperand(1);
2043 
2045  return DAG.getNode(ISD::SUB, DL, VT,
2046  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2047  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2048  }
2049 
2050  if (SimplifyDemandedBits(SDValue(N, 0)))
2051  return SDValue(N, 0);
2052 
2053  // fold (a+b) -> (a|b) iff a and b share no bits.
2054  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2055  DAG.haveNoCommonBitsSet(N0, N1))
2056  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2057 
2058  if (SDValue Combined = visitADDLike(N0, N1, N))
2059  return Combined;
2060 
2061  if (SDValue Combined = visitADDLike(N1, N0, N))
2062  return Combined;
2063 
2064  return SDValue();
2065 }
2066 
2067 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2068  bool Masked = false;
2069 
2070  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2071  while (true) {
2072  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2073  V = V.getOperand(0);
2074  continue;
2075  }
2076 
2077  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2078  Masked = true;
2079  V = V.getOperand(0);
2080  continue;
2081  }
2082 
2083  break;
2084  }
2085 
2086  // If this is not a carry, return.
2087  if (V.getResNo() != 1)
2088  return SDValue();
2089 
2090  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2091  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2092  return SDValue();
2093 
2094  // If the result is masked, then no matter what kind of bool it is we can
2095  // return. If it isn't, then we need to make sure the bool type is either 0 or
2096  // 1 and not other values.
2097  if (Masked ||
2098  TLI.getBooleanContents(V.getValueType()) ==
2100  return V;
2101 
2102  return SDValue();
2103 }
2104 
2105 SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
2106  EVT VT = N0.getValueType();
2107  SDLoc DL(LocReference);
2108 
2109  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2110  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2112  return DAG.getNode(ISD::SUB, DL, VT, N0,
2113  DAG.getNode(ISD::SHL, DL, VT,
2114  N1.getOperand(0).getOperand(1),
2115  N1.getOperand(1)));
2116 
2117  if (N1.getOpcode() == ISD::AND) {
2118  SDValue AndOp0 = N1.getOperand(0);
2119  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
2120  unsigned DestBits = VT.getScalarSizeInBits();
2121 
2122  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
2123  // and similar xforms where the inner op is either ~0 or 0.
2124  if (NumSignBits == DestBits &&
2126  return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
2127  }
2128 
2129  // add (sext i1), X -> sub X, (zext i1)
2130  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2131  N0.getOperand(0).getValueType() == MVT::i1 &&
2133  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2134  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2135  }
2136 
2137  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2138  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2139  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2140  if (TN->getVT() == MVT::i1) {
2141  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2142  DAG.getConstant(1, DL, VT));
2143  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2144  }
2145  }
2146 
2147  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2148  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2149  N1.getResNo() == 0)
2150  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2151  N0, N1.getOperand(0), N1.getOperand(2));
2152 
2153  // (add X, Carry) -> (addcarry X, 0, Carry)
2155  if (SDValue Carry = getAsCarry(TLI, N1))
2156  return DAG.getNode(ISD::ADDCARRY, DL,
2157  DAG.getVTList(VT, Carry.getValueType()), N0,
2158  DAG.getConstant(0, DL, VT), Carry);
2159 
2160  return SDValue();
2161 }
2162 
2163 SDValue DAGCombiner::visitADDC(SDNode *N) {
2164  SDValue N0 = N->getOperand(0);
2165  SDValue N1 = N->getOperand(1);
2166  EVT VT = N0.getValueType();
2167  SDLoc DL(N);
2168 
2169  // If the flag result is dead, turn this into an ADD.
2170  if (!N->hasAnyUseOfValue(1))
2171  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2172  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2173 
2174  // canonicalize constant to RHS.
2177  if (N0C && !N1C)
2178  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2179 
2180  // fold (addc x, 0) -> x + no carry out
2181  if (isNullConstant(N1))
2182  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2183  DL, MVT::Glue));
2184 
2185  // If it cannot overflow, transform into an add.
2186  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2187  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2188  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2189 
2190  return SDValue();
2191 }
2192 
2193 SDValue DAGCombiner::visitUADDO(SDNode *N) {
2194  SDValue N0 = N->getOperand(0);
2195  SDValue N1 = N->getOperand(1);
2196  EVT VT = N0.getValueType();
2197  if (VT.isVector())
2198  return SDValue();
2199 
2200  EVT CarryVT = N->getValueType(1);
2201  SDLoc DL(N);
2202 
2203  // If the flag result is dead, turn this into an ADD.
2204  if (!N->hasAnyUseOfValue(1))
2205  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2206  DAG.getUNDEF(CarryVT));
2207 
2208  // canonicalize constant to RHS.
2211  if (N0C && !N1C)
2212  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
2213 
2214  // fold (uaddo x, 0) -> x + no carry out
2215  if (isNullConstant(N1))
2216  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2217 
2218  // If it cannot overflow, transform into an add.
2219  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2220  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2221  DAG.getConstant(0, DL, CarryVT));
2222 
2223  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2224  return Combined;
2225 
2226  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2227  return Combined;
2228 
2229  return SDValue();
2230 }
2231 
2232 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2233  auto VT = N0.getValueType();
2234 
2235  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2236  // If Y + 1 cannot overflow.
2237  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2238  SDValue Y = N1.getOperand(0);
2239  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2240  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2241  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2242  N1.getOperand(2));
2243  }
2244 
2245  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2247  if (SDValue Carry = getAsCarry(TLI, N1))
2248  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2249  DAG.getConstant(0, SDLoc(N), VT), Carry);
2250 
2251  return SDValue();
2252 }
2253 
2254 SDValue DAGCombiner::visitADDE(SDNode *N) {
2255  SDValue N0 = N->getOperand(0);
2256  SDValue N1 = N->getOperand(1);
2257  SDValue CarryIn = N->getOperand(2);
2258 
2259  // canonicalize constant to RHS
2262  if (N0C && !N1C)
2263  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2264  N1, N0, CarryIn);
2265 
2266  // fold (adde x, y, false) -> (addc x, y)
2267  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2268  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2269 
2270  return SDValue();
2271 }
2272 
2273 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2274  SDValue N0 = N->getOperand(0);
2275  SDValue N1 = N->getOperand(1);
2276  SDValue CarryIn = N->getOperand(2);
2277  SDLoc DL(N);
2278 
2279  // canonicalize constant to RHS
2282  if (N0C && !N1C)
2283  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2284 
2285  // fold (addcarry x, y, false) -> (uaddo x, y)
2286  if (isNullConstant(CarryIn))
2287  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2288 
2289  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2290  if (isNullConstant(N0) && isNullConstant(N1)) {
2291  EVT VT = N0.getValueType();
2292  EVT CarryVT = CarryIn.getValueType();
2293  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2294  AddToWorklist(CarryExt.getNode());
2295  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2296  DAG.getConstant(1, DL, VT)),
2297  DAG.getConstant(0, DL, CarryVT));
2298  }
2299 
2300  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2301  return Combined;
2302 
2303  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2304  return Combined;
2305 
2306  return SDValue();
2307 }
2308 
2309 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2310  SDNode *N) {
2311  // Iff the flag result is dead:
2312  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2313  if ((N0.getOpcode() == ISD::ADD ||
2314  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2315  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2316  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2317  N0.getOperand(0), N0.getOperand(1), CarryIn);
2318 
2319  /**
2320  * When one of the addcarry argument is itself a carry, we may be facing
2321  * a diamond carry propagation. In which case we try to transform the DAG
2322  * to ensure linear carry propagation if that is possible.
2323  *
2324  * We are trying to get:
2325  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2326  */
2327  if (auto Y = getAsCarry(TLI, N1)) {
2328  /**
2329  * (uaddo A, B)
2330  * / \
2331  * Carry Sum
2332  * | \
2333  * | (addcarry *, 0, Z)
2334  * | /
2335  * \ Carry
2336  * | /
2337  * (addcarry X, *, *)
2338  */
2339  if (Y.getOpcode() == ISD::UADDO &&
2340  CarryIn.getResNo() == 1 &&
2341  CarryIn.getOpcode() == ISD::ADDCARRY &&
2342  isNullConstant(CarryIn.getOperand(1)) &&
2343  CarryIn.getOperand(0) == Y.getValue(0)) {
2344  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2345  Y.getOperand(0), Y.getOperand(1),
2346  CarryIn.getOperand(2));
2347  AddToWorklist(NewY.getNode());
2348  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2349  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2350  NewY.getValue(1));
2351  }
2352  }
2353 
2354  return SDValue();
2355 }
2356 
2357 // Since it may not be valid to emit a fold to zero for vector initializers
2358 // check if we can before folding.
2359 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2360  SelectionDAG &DAG, bool LegalOperations,
2361  bool LegalTypes) {
2362  if (!VT.isVector())
2363  return DAG.getConstant(0, DL, VT);
2364  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2365  return DAG.getConstant(0, DL, VT);
2366  return SDValue();
2367 }
2368 
2369 SDValue DAGCombiner::visitSUB(SDNode *N) {
2370  SDValue N0 = N->getOperand(0);
2371  SDValue N1 = N->getOperand(1);
2372  EVT VT = N0.getValueType();
2373  SDLoc DL(N);
2374 
2375  // fold vector ops
2376  if (VT.isVector()) {
2377  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2378  return FoldedVOp;
2379 
2380  // fold (sub x, 0) -> x, vector edition
2382  return N0;
2383  }
2384 
2385  // fold (sub x, x) -> 0
2386  // FIXME: Refactor this and xor and other similar operations together.
2387  if (N0 == N1)
2388  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
2391  // fold (sub c1, c2) -> c1-c2
2392  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2393  N1.getNode());
2394  }
2395 
2396  if (SDValue NewSel = foldBinOpIntoSelect(N))
2397  return NewSel;
2398 
2400 
2401  // fold (sub x, c) -> (add x, -c)
2402  if (N1C) {
2403  return DAG.getNode(ISD::ADD, DL, VT, N0,
2404  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2405  }
2406 
2408  unsigned BitWidth = VT.getScalarSizeInBits();
2409  // Right-shifting everything out but the sign bit followed by negation is
2410  // the same as flipping arithmetic/logical shift type without the negation:
2411  // -(X >>u 31) -> (X >>s 31)
2412  // -(X >>s 31) -> (X >>u 31)
2413  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2414  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2415  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2416  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2417  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2418  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2419  }
2420  }
2421 
2422  // 0 - X --> 0 if the sub is NUW.
2423  if (N->getFlags().hasNoUnsignedWrap())
2424  return N0;
2425 
2426  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2427  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2428  // N1 must be 0 because negating the minimum signed value is undefined.
2429  if (N->getFlags().hasNoSignedWrap())
2430  return N0;
2431 
2432  // 0 - X --> X if X is 0 or the minimum signed value.
2433  return N1;
2434  }
2435  }
2436 
2437  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2439  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2440 
2441  // fold A-(A-B) -> B
2442  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2443  return N1.getOperand(1);
2444 
2445  // fold (A+B)-A -> B
2446  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2447  return N0.getOperand(1);
2448 
2449  // fold (A+B)-B -> A
2450  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2451  return N0.getOperand(0);
2452 
2453  // fold C2-(A+C1) -> (C2-C1)-A
2454  if (N1.getOpcode() == ISD::ADD) {
2455  SDValue N11 = N1.getOperand(1);
2456  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2457  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2458  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2459  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2460  }
2461  }
2462 
2463  // fold ((A+(B+or-C))-B) -> A+or-C
2464  if (N0.getOpcode() == ISD::ADD &&
2465  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2466  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2467  N0.getOperand(1).getOperand(0) == N1)
2468  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2469  N0.getOperand(1).getOperand(1));
2470 
2471  // fold ((A+(C+B))-B) -> A+C
2472  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2473  N0.getOperand(1).getOperand(1) == N1)
2474  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2475  N0.getOperand(1).getOperand(0));
2476 
2477  // fold ((A-(B-C))-C) -> A-B
2478  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2479  N0.getOperand(1).getOperand(1) == N1)
2480  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2481  N0.getOperand(1).getOperand(0));
2482 
2483  // If either operand of a sub is undef, the result is undef
2484  if (N0.isUndef())
2485  return N0;
2486  if (N1.isUndef())
2487  return N1;
2488 
2489  // If the relocation model supports it, consider symbol offsets.
2490  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2491  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2492  // fold (sub Sym, c) -> Sym-c
2493  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2494  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2495  GA->getOffset() -
2496  (uint64_t)N1C->getSExtValue());
2497  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2498  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2499  if (GA->getGlobal() == GB->getGlobal())
2500  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2501  DL, VT);
2502  }
2503 
2504  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2505  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2506  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2507  if (TN->getVT() == MVT::i1) {
2508  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2509  DAG.getConstant(1, DL, VT));
2510  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2511  }
2512  }
2513 
2514  return SDValue();
2515 }
2516 
2517 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2518  SDValue N0 = N->getOperand(0);
2519  SDValue N1 = N->getOperand(1);
2520  EVT VT = N0.getValueType();
2521  SDLoc DL(N);
2522 
2523  // If the flag result is dead, turn this into an SUB.
2524  if (!N->hasAnyUseOfValue(1))
2525  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2526  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2527 
2528  // fold (subc x, x) -> 0 + no borrow
2529  if (N0 == N1)
2530  return CombineTo(N, DAG.getConstant(0, DL, VT),
2531  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2532 
2533  // fold (subc x, 0) -> x + no borrow
2534  if (isNullConstant(N1))
2535  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2536 
2537  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2538  if (isAllOnesConstant(N0))
2539  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2540  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2541 
2542  return SDValue();
2543 }
2544 
2545 SDValue DAGCombiner::visitUSUBO(SDNode *N) {
2546  SDValue N0 = N->getOperand(0);
2547  SDValue N1 = N->getOperand(1);
2548  EVT VT = N0.getValueType();
2549  if (VT.isVector())
2550  return SDValue();
2551 
2552  EVT CarryVT = N->getValueType(1);
2553  SDLoc DL(N);
2554 
2555  // If the flag result is dead, turn this into an SUB.
2556  if (!N->hasAnyUseOfValue(1))
2557  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2558  DAG.getUNDEF(CarryVT));
2559 
2560  // fold (usubo x, x) -> 0 + no borrow
2561  if (N0 == N1)
2562  return CombineTo(N, DAG.getConstant(0, DL, VT),
2563  DAG.getConstant(0, DL, CarryVT));
2564 
2565  // fold (usubo x, 0) -> x + no borrow
2566  if (isNullConstant(N1))
2567  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2568 
2569  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2570  if (isAllOnesConstant(N0))
2571  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2572  DAG.getConstant(0, DL, CarryVT));
2573 
2574  return SDValue();
2575 }
2576 
2577 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2578  SDValue N0 = N->getOperand(0);
2579  SDValue N1 = N->getOperand(1);
2580  SDValue CarryIn = N->getOperand(2);
2581 
2582  // fold (sube x, y, false) -> (subc x, y)
2583  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2584  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2585 
2586  return SDValue();
2587 }
2588 
2589 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
2590  SDValue N0 = N->getOperand(0);
2591  SDValue N1 = N->getOperand(1);
2592  SDValue CarryIn = N->getOperand(2);
2593 
2594  // fold (subcarry x, y, false) -> (usubo x, y)
2595  if (isNullConstant(CarryIn))
2596  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
2597 
2598  return SDValue();
2599 }
2600 
2601 SDValue DAGCombiner::visitMUL(SDNode *N) {
2602  SDValue N0 = N->getOperand(0);
2603  SDValue N1 = N->getOperand(1);
2604  EVT VT = N0.getValueType();
2605 
2606  // fold (mul x, undef) -> 0
2607  if (N0.isUndef() || N1.isUndef())
2608  return DAG.getConstant(0, SDLoc(N), VT);
2609 
2610  bool N0IsConst = false;
2611  bool N1IsConst = false;
2612  bool N1IsOpaqueConst = false;
2613  bool N0IsOpaqueConst = false;
2614  APInt ConstValue0, ConstValue1;
2615  // fold vector ops
2616  if (VT.isVector()) {
2617  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2618  return FoldedVOp;
2619 
2620  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2621  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2622  assert((!N0IsConst ||
2623  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
2624  "Splat APInt should be element width");
2625  assert((!N1IsConst ||
2626  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
2627  "Splat APInt should be element width");
2628  } else {
2629  N0IsConst = isa<ConstantSDNode>(N0);
2630  if (N0IsConst) {
2631  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2632  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2633  }
2634  N1IsConst = isa<ConstantSDNode>(N1);
2635  if (N1IsConst) {
2636  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2637  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2638  }
2639  }
2640 
2641  // fold (mul c1, c2) -> c1*c2
2642  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2643  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2644  N0.getNode(), N1.getNode());
2645 
2646  // canonicalize constant to RHS (vector doesn't have to splat)
2649  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2650  // fold (mul x, 0) -> 0
2651  if (N1IsConst && ConstValue1.isNullValue())
2652  return N1;
2653  // fold (mul x, 1) -> x
2654  if (N1IsConst && ConstValue1.isOneValue())
2655  return N0;
2656 
2657  if (SDValue NewSel = foldBinOpIntoSelect(N))
2658  return NewSel;
2659 
2660  // fold (mul x, -1) -> 0-x
2661  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2662  SDLoc DL(N);
2663  return DAG.getNode(ISD::SUB, DL, VT,
2664  DAG.getConstant(0, DL, VT), N0);
2665  }
2666  // fold (mul x, (1 << c)) -> x << c
2667  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2668  DAG.isKnownToBeAPowerOfTwo(N1) &&
2669  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
2670  SDLoc DL(N);
2671  SDValue LogBase2 = BuildLogBase2(N1, DL);
2672  AddToWorklist(LogBase2.getNode());
2673 
2674  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2675  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2676  AddToWorklist(Trunc.getNode());
2677  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
2678  }
2679  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2680  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
2681  unsigned Log2Val = (-ConstValue1).logBase2();
2682  SDLoc DL(N);
2683  // FIXME: If the input is something that is easily negated (e.g. a
2684  // single-use add), we should put the negate there.
2685  return DAG.getNode(ISD::SUB, DL, VT,
2686  DAG.getConstant(0, DL, VT),
2687  DAG.getNode(ISD::SHL, DL, VT, N0,
2688  DAG.getConstant(Log2Val, DL,
2689  getShiftAmountTy(N0.getValueType()))));
2690  }
2691 
2692  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2693  if (N0.getOpcode() == ISD::SHL &&
2694  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2695  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2696  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2698  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2699  }
2700 
2701  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2702  // use.
2703  {
2704  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2705 
2706  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2707  if (N0.getOpcode() == ISD::SHL &&
2709  N0.getNode()->hasOneUse()) {
2710  Sh = N0; Y = N1;
2711  } else if (N1.getOpcode() == ISD::SHL &&
2713  N1.getNode()->hasOneUse()) {
2714  Sh = N1; Y = N0;
2715  }
2716 
2717  if (Sh.getNode()) {
2718  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2719  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2720  }
2721  }
2722 
2723  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2725  N0.getOpcode() == ISD::ADD &&
2727  isMulAddWithConstProfitable(N, N0, N1))
2728  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2729  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2730  N0.getOperand(0), N1),
2731  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2732  N0.getOperand(1), N1));
2733 
2734  // reassociate mul
2735  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2736  return RMUL;
2737 
2738  return SDValue();
2739 }
2740 
2741 /// Return true if divmod libcall is available.
2742 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2743  const TargetLowering &TLI) {
2744  RTLIB::Libcall LC;
2745  EVT NodeType = Node->getValueType(0);
2746  if (!NodeType.isSimple())
2747  return false;
2748  switch (NodeType.getSimpleVT().SimpleTy) {
2749  default: return false; // No libcall for vector types.
2750  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2751  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2752  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2753  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2754  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2755  }
2756 
2757  return TLI.getLibcallName(LC) != nullptr;
2758 }
2759 
2760 /// Issue divrem if both quotient and remainder are needed.
2761 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2762  if (Node->use_empty())
2763  return SDValue(); // This is a dead node, leave it alone.
2764 
2765  unsigned Opcode = Node->getOpcode();
2766  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2767  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2768 
2769  // DivMod lib calls can still work on non-legal types if using lib-calls.
2770  EVT VT = Node->getValueType(0);
2771  if (VT.isVector() || !VT.isInteger())
2772  return SDValue();
2773 
2774  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2775  return SDValue();
2776 
2777  // If DIVREM is going to get expanded into a libcall,
2778  // but there is no libcall available, then don't combine.
2779  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2780  !isDivRemLibcallAvailable(Node, isSigned, TLI))
2781  return SDValue();
2782 
2783  // If div is legal, it's better to do the normal expansion
2784  unsigned OtherOpcode = 0;
2785  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2786  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2787  if (TLI.isOperationLegalOrCustom(Opcode, VT))
2788  return SDValue();
2789  } else {
2790  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2791  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2792  return SDValue();
2793  }
2794 
2795  SDValue Op0 = Node->getOperand(0);
2796  SDValue Op1 = Node->getOperand(1);
2797  SDValue combined;
2798  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2799  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
2800  SDNode *User = *UI;
2801  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
2802  User->use_empty())
2803  continue;
2804  // Convert the other matching node(s), too;
2805  // otherwise, the DIVREM may get target-legalized into something
2806  // target-specific that we won't be able to recognize.
2807  unsigned UserOpc = User->getOpcode();
2808  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2809  User->getOperand(0) == Op0 &&
2810  User->getOperand(1) == Op1) {
2811  if (!combined) {
2812  if (UserOpc == OtherOpcode) {
2813  SDVTList VTs = DAG.getVTList(VT, VT);
2814  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2815  } else if (UserOpc == DivRemOpc) {
2816  combined = SDValue(User, 0);
2817  } else {
2818  assert(UserOpc == Opcode);
2819  continue;
2820  }
2821  }
2822  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2823  CombineTo(User, combined);
2824  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2825  CombineTo(User, combined.getValue(1));
2826  }
2827  }
2828  return combined;
2829 }
2830 
2832  SDValue N0 = N->getOperand(0);
2833  SDValue N1 = N->getOperand(1);
2834  EVT VT = N->getValueType(0);
2835  SDLoc DL(N);
2836 
2837  if (DAG.isUndef(N->getOpcode(), {N0, N1}))
2838  return DAG.getUNDEF(VT);
2839 
2840  // undef / X -> 0
2841  // undef % X -> 0
2842  if (N0.isUndef())
2843  return DAG.getConstant(0, DL, VT);
2844 
2845  return SDValue();
2846 }
2847 
2848 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2849  SDValue N0 = N->getOperand(0);
2850  SDValue N1 = N->getOperand(1);
2851  EVT VT = N->getValueType(0);
2852 
2853  // fold vector ops
2854  if (VT.isVector())
2855  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2856  return FoldedVOp;
2857 
2858  SDLoc DL(N);
2859 
2860  // fold (sdiv c1, c2) -> c1/c2
2863  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2864  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2865  // fold (sdiv X, 1) -> X
2866  if (N1C && N1C->isOne())
2867  return N0;
2868  // fold (sdiv X, -1) -> 0-X
2869  if (N1C && N1C->isAllOnesValue())
2870  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
2871 
2872  if (SDValue V = simplifyDivRem(N, DAG))
2873  return V;
2874 
2875  if (SDValue NewSel = foldBinOpIntoSelect(N))
2876  return NewSel;
2877 
2878  // If we know the sign bits of both operands are zero, strength reduce to a
2879  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2880  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2881  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2882 
2883  // Helper for determining whether a value is a power-2 constant scalar or a
2884  // vector of such elements.
2885  SmallBitVector KnownNegatives(
2886  (N1C || !VT.isVector()) ? 1 : VT.getVectorNumElements(), false);
2887  unsigned EltIndex = 0;
2888  auto IsPowerOfTwo = [&KnownNegatives, &EltIndex](ConstantSDNode *C) {
2889  unsigned Idx = EltIndex++;
2890  if (C->isNullValue() || C->isOpaque())
2891  return false;
2892  // The instruction sequence to be generated contains shifting C by (op size
2893  // in bits - # of trailing zeros in C), which results in an undef value when
2894  // C == 1. (e.g. if the op size in bits is 32, it will be (sra x , 32) if C
2895  // == 1)
2896  if (C->getAPIntValue().isOneValue())
2897  return false;
2898 
2899  if (C->getAPIntValue().isPowerOf2())
2900  return true;
2901  if ((-C->getAPIntValue()).isPowerOf2()) {
2902  KnownNegatives.set(Idx);
2903  return true;
2904  }
2905  return false;
2906  };
2907 
2908  // fold (sdiv X, pow2) -> simple ops after legalize
2909  // FIXME: We check for the exact bit here because the generic lowering gives
2910  // better results in that case. The target-specific lowering should learn how
2911  // to handle exact sdivs efficiently.
2912  if (!N->getFlags().hasExact() &&
2913  ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) {
2914  // Target-specific implementation of sdiv x, pow2.
2915  if (SDValue Res = BuildSDIVPow2(N))
2916  return Res;
2917 
2918  // Create constants that are functions of the shift amount value.
2919  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
2920  SDValue Bits = DAG.getConstant(VT.getScalarSizeInBits(), DL, ShiftAmtTy);
2921  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
2922  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
2923  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
2924  if (!isConstantOrConstantVector(Inexact))
2925  return SDValue();
2926  // Splat the sign bit into the register
2927  SDValue Sign = DAG.getNode(
2928  ISD::SRA, DL, VT, N0,
2929  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, ShiftAmtTy));
2930  AddToWorklist(Sign.getNode());
2931 
2932  // Add (N0 < 0) ? abs2 - 1 : 0;
2933  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
2934  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
2935  AddToWorklist(Srl.getNode());
2936  AddToWorklist(Add.getNode()); // Divide by pow2
2937  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
2938 
2939  // If dividing by a positive value, we're done. Otherwise, the result must
2940  // be negated.
2941  if (KnownNegatives.none())
2942  return Sra;
2943 
2944  AddToWorklist(Sra.getNode());
2945  SDValue Sub =
2946  DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Sra);
2947  // If all shift amount elements are negative, we're done.
2948  if (KnownNegatives.all())
2949  return Sub;
2950 
2951  // Shift amount has both positive and negative elements.
2952  assert(VT.isVector() && !N0C &&
2953  "Expecting a non-splat vector shift amount");
2954 
2955  SmallVector<SDValue, 64> VSelectMask;
2956  for (int i = 0, e = VT.getVectorNumElements(); i < e; ++i)
2957  VSelectMask.push_back(
2958  DAG.getConstant(KnownNegatives[i] ? -1 : 0, DL, MVT::i1));
2959 
2960  SDValue Mask =
2962  VT.getVectorElementCount()),
2963  DL, VSelectMask);
2964  return DAG.getNode(ISD::VSELECT, DL, VT, Mask, Sub, Sra);
2965  }
2966 
2967  // If integer divide is expensive and we satisfy the requirements, emit an
2968  // alternate sequence. Targets may check function attributes for size/speed
2969  // trade-offs.
2971  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2972  if (SDValue Op = BuildSDIV(N))
2973  return Op;
2974 
2975  // sdiv, srem -> sdivrem
2976  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2977  // true. Otherwise, we break the simplification logic in visitREM().
2978  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2979  if (SDValue DivRem = useDivRem(N))
2980  return DivRem;
2981 
2982  return SDValue();
2983 }
2984 
2985 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2986  SDValue N0 = N->getOperand(0);
2987  SDValue N1 = N->getOperand(1);
2988  EVT VT = N->getValueType(0);
2989 
2990  // fold vector ops
2991  if (VT.isVector())
2992  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2993  return FoldedVOp;
2994 
2995  SDLoc DL(N);
2996 
2997  // fold (udiv c1, c2) -> c1/c2
3000  if (N0C && N1C)
3001  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3002  N0C, N1C))
3003  return Folded;
3004 
3005  if (SDValue V = simplifyDivRem(N, DAG))
3006  return V;
3007 
3008  if (SDValue NewSel = foldBinOpIntoSelect(N))
3009  return NewSel;
3010 
3011  // fold (udiv x, (1 << c)) -> x >>u c
3012  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3013  DAG.isKnownToBeAPowerOfTwo(N1)) {
3014  SDValue LogBase2 = BuildLogBase2(N1, DL);
3015  AddToWorklist(LogBase2.getNode());
3016 
3017  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3018  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3019  AddToWorklist(Trunc.getNode());
3020  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3021  }
3022 
3023  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3024  if (N1.getOpcode() == ISD::SHL) {
3025  SDValue N10 = N1.getOperand(0);
3026  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3027  DAG.isKnownToBeAPowerOfTwo(N10)) {
3028  SDValue LogBase2 = BuildLogBase2(N10, DL);
3029  AddToWorklist(LogBase2.getNode());
3030 
3031  EVT ADDVT = N1.getOperand(1).getValueType();
3032  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3033  AddToWorklist(Trunc.getNode());
3034  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3035  AddToWorklist(Add.getNode());
3036  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3037  }
3038  }
3039 
3040  // fold (udiv x, c) -> alternate
3042  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
3043  if (SDValue Op = BuildUDIV(N))
3044  return Op;
3045 
3046  // sdiv, srem -> sdivrem
3047  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3048  // true. Otherwise, we break the simplification logic in visitREM().
3049  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3050  if (SDValue DivRem = useDivRem(N))
3051  return DivRem;
3052 
3053  return SDValue();
3054 }
3055 
3056 // handles ISD::SREM and ISD::UREM
3057 SDValue DAGCombiner::visitREM(SDNode *N) {
3058  unsigned Opcode = N->getOpcode();
3059  SDValue N0 = N->getOperand(0);
3060  SDValue N1 = N->getOperand(1);
3061  EVT VT = N->getValueType(0);
3062  bool isSigned = (Opcode == ISD::SREM);
3063  SDLoc DL(N);
3064 
3065  // fold (rem c1, c2) -> c1%c2
3068  if (N0C && N1C)
3069  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3070  return Folded;
3071 
3072  if (SDValue V = simplifyDivRem(N, DAG))
3073  return V;
3074 
3075  if (SDValue NewSel = foldBinOpIntoSelect(N))
3076  return NewSel;
3077 
3078  if (isSigned) {
3079  // If we know the sign bits of both operands are zero, strength reduce to a
3080  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3081  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3082  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3083  } else {
3084  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3085  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3086  // fold (urem x, pow2) -> (and x, pow2-1)
3087  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3088  AddToWorklist(Add.getNode());
3089  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3090  }
3091  if (N1.getOpcode() == ISD::SHL &&
3092  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3093  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3094  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3095  AddToWorklist(Add.getNode());
3096  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3097  }
3098  }
3099 
3101 
3102  // If X/C can be simplified by the division-by-constant logic, lower
3103  // X%C to the equivalent of X-X/C*C.
3104  // To avoid mangling nodes, this simplification requires that the combine()
3105  // call for the speculative DIV must not cause a DIVREM conversion. We guard
3106  // against this by skipping the simplification if isIntDivCheap(). When
3107  // div is not cheap, combine will not return a DIVREM. Regardless,
3108  // checking cheapness here makes sense since the simplification results in
3109  // fatter code.
3110  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
3111  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3112  SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
3113  AddToWorklist(Div.getNode());
3114  SDValue OptimizedDiv = combine(Div.getNode());
3115  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode() &&
3116  OptimizedDiv.getOpcode() != ISD::UDIVREM &&
3117  OptimizedDiv.getOpcode() != ISD::SDIVREM) {
3118  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3119  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3120  AddToWorklist(Mul.getNode());
3121  return Sub;
3122  }
3123  }
3124 
3125  // sdiv, srem -> sdivrem
3126  if (SDValue DivRem = useDivRem(N))
3127  return DivRem.getValue(1);
3128 
3129  return SDValue();
3130 }
3131 
3132 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3133  SDValue N0 = N->getOperand(0);
3134  SDValue N1 = N->getOperand(1);
3135  EVT VT = N->getValueType(0);
3136  SDLoc DL(N);
3137 
3138  if (VT.isVector()) {
3139  // fold (mulhs x, 0) -> 0
3141  return N1;
3143  return N0;
3144  }
3145 
3146  // fold (mulhs x, 0) -> 0
3147  if (isNullConstant(N1))
3148  return N1;
3149  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3150  if (isOneConstant(N1))
3151  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3152  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3153  getShiftAmountTy(N0.getValueType())));
3154 
3155  // fold (mulhs x, undef) -> 0
3156  if (N0.isUndef() || N1.isUndef())
3157  return DAG.getConstant(0, DL, VT);
3158 
3159  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3160  // plus a shift.
3161  if (VT.isSimple() && !VT.isVector()) {
3162  MVT Simple = VT.getSimpleVT();
3163  unsigned SimpleSize = Simple.getSizeInBits();
3164  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3165  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3166  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3167  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3168  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3169  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3170  DAG.getConstant(SimpleSize, DL,
3171  getShiftAmountTy(N1.getValueType())));
3172  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3173  }
3174  }
3175 
3176  return SDValue();
3177 }
3178 
3179 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3180  SDValue N0 = N->getOperand(0);
3181  SDValue N1 = N->getOperand(1);
3182  EVT VT = N->getValueType(0);
3183  SDLoc DL(N);
3184 
3185  if (VT.isVector()) {
3186  // fold (mulhu x, 0) -> 0
3188  return N1;
3190  return N0;
3191  }
3192 
3193  // fold (mulhu x, 0) -> 0
3194  if (isNullConstant(N1))
3195  return N1;
3196  // fold (mulhu x, 1) -> 0
3197  if (isOneConstant(N1))
3198  return DAG.getConstant(0, DL, N0.getValueType());
3199  // fold (mulhu x, undef) -> 0
3200  if (N0.isUndef() || N1.isUndef())
3201  return DAG.getConstant(0, DL, VT);
3202 
3203  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3204  // plus a shift.
3205  if (VT.isSimple() && !VT.isVector()) {
3206  MVT Simple = VT.getSimpleVT();
3207  unsigned SimpleSize = Simple.getSizeInBits();
3208  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3209  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3210  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3211  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3212  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3213  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3214  DAG.getConstant(SimpleSize, DL,
3215  getShiftAmountTy(N1.getValueType())));
3216  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3217  }
3218  }
3219 
3220  return SDValue();
3221 }
3222 
3223 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3224 /// give the opcodes for the two computations that are being performed. Return
3225 /// true if a simplification was made.
3226 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3227  unsigned HiOp) {
3228  // If the high half is not needed, just compute the low half.
3229  bool HiExists = N->hasAnyUseOfValue(1);
3230  if (!HiExists &&
3231  (!LegalOperations ||
3232  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3233  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3234  return CombineTo(N, Res, Res);
3235  }
3236 
3237  // If the low half is not needed, just compute the high half.
3238  bool LoExists = N->hasAnyUseOfValue(0);
3239  if (!LoExists &&
3240  (!LegalOperations ||
3241  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
3242  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3243  return CombineTo(N, Res, Res);
3244  }
3245 
3246  // If both halves are used, return as it is.
3247  if (LoExists && HiExists)
3248  return SDValue();
3249 
3250  // If the two computed results can be simplified separately, separate them.
3251  if (LoExists) {
3252  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3253  AddToWorklist(Lo.getNode());
3254  SDValue LoOpt = combine(Lo.getNode());
3255  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3256  (!LegalOperations ||
3257  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
3258  return CombineTo(N, LoOpt, LoOpt);
3259  }
3260 
3261  if (HiExists) {
3262  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3263  AddToWorklist(Hi.getNode());
3264  SDValue HiOpt = combine(Hi.getNode());
3265  if (HiOpt.getNode() && HiOpt != Hi &&
3266  (!LegalOperations ||
3267  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
3268  return CombineTo(N, HiOpt, HiOpt);
3269  }
3270 
3271  return SDValue();
3272 }
3273 
3274 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3275  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3276  return Res;
3277 
3278  EVT VT = N->getValueType(0);
3279  SDLoc DL(N);
3280 
3281  // If the type is twice as wide is legal, transform the mulhu to a wider
3282  // multiply plus a shift.
3283  if (VT.isSimple() && !VT.isVector()) {
3284  MVT Simple = VT.getSimpleVT();
3285  unsigned SimpleSize = Simple.getSizeInBits();
3286  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3287  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3288  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3289  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3290  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3291  // Compute the high part as N1.
3292  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3293  DAG.getConstant(SimpleSize, DL,
3294  getShiftAmountTy(Lo.getValueType())));
3295  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3296  // Compute the low part as N0.
3297  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3298  return CombineTo(N, Lo, Hi);
3299  }
3300  }
3301 
3302  return SDValue();
3303 }
3304 
3305 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3306  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3307  return Res;
3308 
3309  EVT VT = N->getValueType(0);
3310  SDLoc DL(N);
3311 
3312  // If the type is twice as wide is legal, transform the mulhu to a wider
3313  // multiply plus a shift.
3314  if (VT.isSimple() && !VT.isVector()) {
3315  MVT Simple = VT.getSimpleVT();
3316  unsigned SimpleSize = Simple.getSizeInBits();
3317  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3318  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3319  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3320  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3321  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3322  // Compute the high part as N1.
3323  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3324  DAG.getConstant(SimpleSize, DL,
3325  getShiftAmountTy(Lo.getValueType())));
3326  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3327  // Compute the low part as N0.
3328  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3329  return CombineTo(N, Lo, Hi);
3330  }
3331  }
3332 
3333  return SDValue();
3334 }
3335 
3336 SDValue DAGCombiner::visitSMULO(SDNode *N) {
3337  // (smulo x, 2) -> (saddo x, x)
3338  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3339  if (C2->getAPIntValue() == 2)
3340  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
3341  N->getOperand(0), N->getOperand(0));
3342 
3343  return SDValue();
3344 }
3345 
3346 SDValue DAGCombiner::visitUMULO(SDNode *N) {
3347  // (umulo x, 2) -> (uaddo x, x)
3348  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
3349  if (C2->getAPIntValue() == 2)
3350  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
3351  N->getOperand(0), N->getOperand(0));
3352 
3353  return SDValue();
3354 }
3355 
3356 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3357  SDValue N0 = N->getOperand(0);
3358  SDValue N1 = N->getOperand(1);
3359  EVT VT = N0.getValueType();
3360 
3361  // fold vector ops
3362  if (VT.isVector())
3363  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3364  return FoldedVOp;
3365 
3366  // fold operation with constant operands.
3369  if (N0C && N1C)
3370  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3371 
3372  // canonicalize constant to RHS
3375  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3376 
3377  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3378  // Only do this if the current op isn't legal and the flipped is.
3379  unsigned Opcode = N->getOpcode();
3380  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3381  if (!TLI.isOperationLegal(Opcode, VT) &&
3382  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
3383  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
3384  unsigned AltOpcode;
3385  switch (Opcode) {
3386  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
3387  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
3388  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
3389  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
3390  default: llvm_unreachable("Unknown MINMAX opcode");
3391  }
3392  if (TLI.isOperationLegal(AltOpcode, VT))
3393  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
3394  }
3395 
3396  return SDValue();
3397 }
3398 
3399 /// If this is a binary operator with two operands of the same opcode, try to
3400 /// simplify it.
3401 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
3402  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3403  EVT VT = N0.getValueType();
3404  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
3405 
3406  // Bail early if none of these transforms apply.
3407  if (N0.getNumOperands() == 0) return SDValue();
3408 
3409  // For each of OP in AND/OR/XOR:
3410  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
3411  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
3412  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
3413  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
3414  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
3415  //
3416  // do not sink logical op inside of a vector extend, since it may combine
3417  // into a vsetcc.
3418  EVT Op0VT = N0.getOperand(0).getValueType();
3419  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
3420  N0.getOpcode() == ISD::SIGN_EXTEND ||
3421  N0.getOpcode() == ISD::BSWAP ||
3422  // Avoid infinite looping with PromoteIntBinOp.
3423  (N0.getOpcode() == ISD::ANY_EXTEND &&
3424  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
3425  (N0.getOpcode() == ISD::TRUNCATE &&
3426  (!TLI.isZExtFree(VT, Op0VT) ||
3427  !TLI.isTruncateFree(Op0VT, VT)) &&
3428  TLI.isTypeLegal(Op0VT))) &&
3429  !VT.isVector() &&
3430  Op0VT == N1.getOperand(0).getValueType() &&
3431  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
3432  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3433  N0.getOperand(0).getValueType(),
3434  N0.getOperand(0), N1.getOperand(0));
3435  AddToWorklist(ORNode.getNode());
3436  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
3437  }
3438 
3439  // For each of OP in SHL/SRL/SRA/AND...
3440  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
3441  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
3442  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
3443  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
3444  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
3445  N0.getOperand(1) == N1.getOperand(1)) {
3446  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
3447  N0.getOperand(0).getValueType(),
3448  N0.getOperand(0), N1.getOperand(0));
3449  AddToWorklist(ORNode.getNode());
3450  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
3451  ORNode, N0.getOperand(1));
3452  }
3453 
3454  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
3455  // Only perform this optimization up until type legalization, before
3456  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
3457  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
3458  // we don't want to undo this promotion.
3459  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
3460  // on scalars.
3461  if ((N0.getOpcode() == ISD::BITCAST ||
3462  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
3463  Level <= AfterLegalizeTypes) {
3464  SDValue In0 = N0.getOperand(0);
3465  SDValue In1 = N1.getOperand(0);
3466  EVT In0Ty = In0.getValueType();
3467  EVT In1Ty = In1.getValueType();
3468  SDLoc DL(N);
3469  // If both incoming values are integers, and the original types are the
3470  // same.
3471  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
3472  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
3473  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
3474  AddToWorklist(Op.getNode());
3475  return BC;
3476  }
3477  }
3478 
3479  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
3480  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
3481  // If both shuffles use the same mask, and both shuffle within a single
3482  // vector, then it is worthwhile to move the swizzle after the operation.
3483  // The type-legalizer generates this pattern when loading illegal
3484  // vector types from memory. In many cases this allows additional shuffle
3485  // optimizations.
3486  // There are other cases where moving the shuffle after the xor/and/or
3487  // is profitable even if shuffles don't perform a swizzle.
3488  // If both shuffles use the same mask, and both shuffles have the same first
3489  // or second operand, then it might still be profitable to move the shuffle
3490  // after the xor/and/or operation.
3491  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
3492  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
3493  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
3494 
3495  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
3496  "Inputs to shuffles are not the same type");
3497 
3498  // Check that both shuffles use the same mask. The masks are known to be of
3499  // the same length because the result vector type is the same.
3500  // Check also that shuffles have only one use to avoid introducing extra
3501  // instructions.
3502  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
3503  SVN0->getMask().equals(SVN1->getMask())) {
3504  SDValue ShOp = N0->getOperand(1);
3505 
3506  // Don't try to fold this node if it requires introducing a
3507  // build vector of all zeros that might be illegal at this stage.
3508  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3509  if (!LegalTypes)
3510  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3511  else
3512  ShOp = SDValue();
3513  }
3514 
3515  // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
3516  // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
3517  // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
3518  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
3519  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3520  N0->getOperand(0), N1->getOperand(0));
3521  AddToWorklist(NewNode.getNode());
3522  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
3523  SVN0->getMask());
3524  }
3525 
3526  // Don't try to fold this node if it requires introducing a
3527  // build vector of all zeros that might be illegal at this stage.
3528  ShOp = N0->getOperand(0);
3529  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
3530  if (!LegalTypes)
3531  ShOp = DAG.getConstant(0, SDLoc(N), VT);
3532  else
3533  ShOp = SDValue();
3534  }
3535 
3536  // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
3537  // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
3538  // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
3539  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
3540  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
3541  N0->getOperand(1), N1->getOperand(1));
3542  AddToWorklist(NewNode.getNode());
3543  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
3544  SVN0->getMask());
3545  }
3546  }
3547  }
3548 
3549  return SDValue();
3550 }
3551 
3552 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
3553 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
3554  const SDLoc &DL) {
3555  SDValue LL, LR, RL, RR, N0CC, N1CC;
3556  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
3557  !isSetCCEquivalent(N1, RL, RR, N1CC))
3558  return SDValue();
3559 
3560  assert(N0.getValueType() == N1.getValueType() &&
3561  "Unexpected operand types for bitwise logic op");
3562  assert(LL.getValueType() == LR.getValueType() &&
3563  RL.getValueType() == RR.getValueType() &&
3564  "Unexpected operand types for setcc");
3565 
3566  // If we're here post-legalization or the logic op type is not i1, the logic
3567  // op type must match a setcc result type. Also, all folds require new
3568  // operations on the left and right operands, so those types must match.
3569  EVT VT = N0.getValueType();
3570  EVT OpVT = LL.getValueType();
3571  if (LegalOperations || VT.getScalarType() != MVT::i1)
3572  if (VT != getSetCCResultType(OpVT))
3573  return SDValue();
3574  if (OpVT != RL.getValueType())
3575  return SDValue();
3576 
3577  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
3578  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
3579  bool IsInteger = OpVT.isInteger();
3580  if (LR == RR && CC0 == CC1 && IsInteger) {
3581  bool IsZero = isNullConstantOrNullSplatConstant(LR);
3582  bool IsNeg1 = isAllOnesConstantOrAllOnesSplatConstant(LR);
3583 
3584  // All bits clear?
3585  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
3586  // All sign bits clear?
3587  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
3588  // Any bits set?
3589  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
3590  // Any sign bits set?
3591  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
3592 
3593  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
3594  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
3595  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
3596  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
3597  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
3598  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
3599  AddToWorklist(Or.getNode());
3600  return DAG.getSetCC(DL, VT, Or, LR, CC1);
3601  }
3602 
3603  // All bits set?
3604  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
3605  // All sign bits set?
3606  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
3607  // Any bits clear?
3608  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
3609  // Any sign bits clear?
3610  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
3611 
3612  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
3613  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
3614  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
3615  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
3616  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
3617  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
3618  AddToWorklist(And.getNode());
3619  return DAG.getSetCC(DL, VT, And, LR, CC1);
3620  }
3621  }
3622 
3623  // TODO: What is the 'or' equivalent of this fold?
3624  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
3625  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
3626  IsInteger && CC0 == ISD::SETNE &&
3627  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
3628  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
3629  SDValue One = DAG.getConstant(1, DL, OpVT);
3630  SDValue Two = DAG.getConstant(2, DL, OpVT);
3631  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
3632  AddToWorklist(Add.getNode());
3633  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
3634  }
3635 
3636  // Try more general transforms if the predicates match and the only user of
3637  // the compares is the 'and' or 'or'.
3638  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
3639  N0.hasOneUse() && N1.hasOneUse()) {
3640  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
3641  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
3642  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
3643  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
3644  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
3645  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
3646  SDValue Zero = DAG.getConstant(0, DL, OpVT);
3647  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
3648  }
3649  }
3650 
3651  // Canonicalize equivalent operands to LL == RL.
3652  if (LL == RR && LR == RL) {
3653  CC1 = ISD::getSetCCSwappedOperands(CC1);
3654  std::swap(RL, RR);
3655  }
3656 
3657  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3658  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
3659  if (LL == RL && LR == RR) {
3660  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
3661  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
3662  if (NewCC != ISD::SETCC_INVALID &&
3663  (!LegalOperations ||
3664  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
3665  TLI.isOperationLegal(ISD::SETCC, OpVT))))
3666  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
3667  }
3668 
3669  return SDValue();
3670 }
3671 
3672 /// This contains all DAGCombine rules which reduce two values combined by
3673 /// an And operation to a single value. This makes them reusable in the context
3674 /// of visitSELECT(). Rules involving constants are not included as
3675 /// visitSELECT() already handles those cases.
3676 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
3677  EVT VT = N1.getValueType();
3678  SDLoc DL(N);
3679 
3680  // fold (and x, undef) -> 0
3681  if (N0.isUndef() || N1.isUndef())
3682  return DAG.getConstant(0, DL, VT);
3683 
3684  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
3685  return V;
3686 
3687  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3688  VT.getSizeInBits() <= 64) {
3689  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3690  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3691  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3692  // immediate for an add, but it is legal if its top c2 bits are set,
3693  // transform the ADD so the immediate doesn't need to be materialized
3694  // in a register.
3695  APInt ADDC = ADDI->getAPIntValue();
3696  APInt SRLC = SRLI->getAPIntValue();
3697  if (ADDC.getMinSignedBits() <= 64 &&
3698  SRLC.ult(VT.getSizeInBits()) &&
3699  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3701  SRLC.getZExtValue());
3702  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3703  ADDC |= Mask;
3704  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3705  SDLoc DL0(N0);
3706  SDValue NewAdd =
3707  DAG.getNode(ISD::ADD, DL0, VT,
3708  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3709  CombineTo(N0.getNode(), NewAdd);
3710  // Return N so it doesn't get rechecked!
3711  return SDValue(N, 0);
3712  }
3713  }
3714  }
3715  }
3716  }
3717  }
3718 
3719  // Reduce bit extract of low half of an integer to the narrower type.
3720  // (and (srl i64:x, K), KMask) ->
3721  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3722  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3723  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3724  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3725  unsigned Size = VT.getSizeInBits();
3726  const APInt &AndMask = CAnd->getAPIntValue();
3727  unsigned ShiftBits = CShift->getZExtValue();
3728 
3729  // Bail out, this node will probably disappear anyway.
3730  if (ShiftBits == 0)
3731  return SDValue();
3732 
3733  unsigned MaskBits = AndMask.countTrailingOnes();
3734  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3735 
3736  if (AndMask.isMask() &&
3737  // Required bits must not span the two halves of the integer and
3738  // must fit in the half size type.
3739  (ShiftBits + MaskBits <= Size / 2) &&
3740  TLI.isNarrowingProfitable(VT, HalfVT) &&
3741  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3742  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3743  TLI.isTruncateFree(VT, HalfVT) &&
3744  TLI.isZExtFree(HalfVT, VT)) {
3745  // The isNarrowingProfitable is to avoid regressions on PPC and
3746  // AArch64 which match a few 64-bit bit insert / bit extract patterns
3747  // on downstream users of this. Those patterns could probably be
3748  // extended to handle extensions mixed in.
3749 
3750  SDValue SL(N0);
3751  assert(MaskBits <= Size);
3752 
3753  // Extracting the highest bit of the low half.
3754  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3755  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3756  N0.getOperand(0));
3757 
3758  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3759  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3760  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3761  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3762  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3763  }
3764  }
3765  }
3766  }
3767 
3768  return SDValue();
3769 }
3770 
3771 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3772  EVT LoadResultTy, EVT &ExtVT) {
3773  if (!AndC->getAPIntValue().isMask())
3774  return false;
3775 
3776  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
3777 
3778  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3779  EVT LoadedVT = LoadN->getMemoryVT();
3780 
3781  if (ExtVT == LoadedVT &&
3782  (!LegalOperations ||
3783  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3784  // ZEXTLOAD will match without needing to change the size of the value being
3785  // loaded.
3786  return true;
3787  }
3788 
3789  // Do not change the width of a volatile load.
3790  if (LoadN->isVolatile())
3791  return false;
3792 
3793  // Do not generate loads of non-round integer types since these can
3794  // be expensive (and would be wrong if the type is not byte sized).
3795  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3796  return false;
3797 
3798  if (LegalOperations &&
3799  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3800  return false;
3801 
3802  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3803  return false;
3804 
3805  return true;
3806 }
3807 
3808 bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
3809  EVT &ExtVT, unsigned ShAmt) {
3810  // Don't transform one with multiple uses, this would require adding a new
3811  // load.
3812  if (!SDValue(LoadN, 0).hasOneUse())
3813  return false;
3814 
3815  if (LegalOperations &&
3816  !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
3817  return false;
3818 
3819  // Do not generate loads of non-round integer types since these can
3820  // be expensive (and would be wrong if the type is not byte sized).
3821  if (!ExtVT.isRound())
3822  return false;
3823 
3824  // Don't change the width of a volatile load.
3825  if (LoadN->isVolatile())
3826  return false;
3827 
3828  // Verify that we are actually reducing a load width here.
3829  if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
3830  return false;
3831 
3832  // For the transform to be legal, the load must produce only two values
3833  // (the value loaded and the chain). Don't transform a pre-increment
3834  // load, for example, which produces an extra value. Otherwise the
3835  // transformation is not equivalent, and the downstream logic to replace
3836  // uses gets things wrong.
3837  if (LoadN->getNumValues() > 2)
3838  return false;
3839 
3840  // Only allow byte offsets.
3841  if (ShAmt % 8)
3842  return false;
3843 
3844  // Ensure that this isn't going to produce an unsupported unaligned access.
3845  if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
3846  ExtVT, LoadN->getAddressSpace(),
3847  ShAmt / 8))
3848  return false;
3849 
3850 
3851  // If the load that we're shrinking is an extload and we're not just
3852  // discarding the extension we can't simply shrink the load. Bail.
3853  // TODO: It would be possible to merge the extensions in some cases.
3854  if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
3855  LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
3856  return false;
3857 
3858  if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
3859  return false;
3860 
3861  // It's not possible to generate a constant of extended or untyped type.
3862  EVT PtrType = LoadN->getOperand(1).getValueType();
3863  if (PtrType == MVT::Untyped || PtrType.isExtended())
3864  return false;
3865 
3866  return true;
3867 }
3868 
3869 bool DAGCombiner::SearchForAndLoads(SDNode *N,
3871  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
3873  SDNode *&NodeToMask) {
3874  // Recursively search for the operands, looking for loads which can be
3875  // narrowed.
3876  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
3877  SDValue Op = N->getOperand(i);
3878 
3879  if (Op.getValueType().isVector())
3880  return false;
3881 
3882  // Some constants may need fixing up later if they are too large.
3883  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
3884  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
3885  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
3886  NodesWithConsts.insert(N);
3887  continue;
3888  }
3889 
3890  if (!Op.hasOneUse())
3891  return false;
3892 
3893  switch(Op.getOpcode()) {
3894  case ISD::LOAD: {
3895  auto *Load = cast<LoadSDNode>(Op);
3896  EVT ExtVT;
3897  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
3898  isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
3899 
3900  // ZEXTLOAD is already small enough.
3901  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
3902  ExtVT.bitsGE(Load->getMemoryVT()))
3903  continue;
3904 
3905  // Use LE to convert equal sized loads to zext.
3906  if (ExtVT.bitsLE(Load->getMemoryVT()))
3907  Loads.insert(Load);
3908 
3909  continue;
3910  }
3911  return false;
3912  }
3913  case ISD::ZERO_EXTEND:
3914  case ISD::AssertZext: {
3915  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
3916  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3917  EVT VT = Op.getOpcode() == ISD::AssertZext ?
3918  cast<VTSDNode>(Op.getOperand(1))->getVT() :
3919  Op.getOperand(0).getValueType();
3920 
3921  // We can accept extending nodes if the mask is wider or an equal
3922  // width to the original type.
3923  if (ExtVT.bitsGE(VT))
3924  continue;
3925  break;
3926  }
3927  case ISD::OR:
3928  case ISD::XOR:
3929  case ISD::AND:
3930  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
3931  NodeToMask))
3932  return false;
3933  continue;
3934  }
3935 
3936  // Allow one node which will masked along with any loads found.
3937  if (NodeToMask)
3938  return false;
3939  NodeToMask = Op.getNode();
3940  }
3941  return true;
3942 }
3943 
3944 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
3945  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
3946  if (!Mask)
3947  return false;
3948 
3949  if (!Mask->getAPIntValue().isMask())
3950  return false;
3951 
3952  // No need to do anything if the and directly uses a load.
3953  if (isa<LoadSDNode>(N->getOperand(0)))
3954  return false;
3955 
3957  SmallPtrSet<SDNode*, 2> NodesWithConsts;
3958  SDNode *FixupNode = nullptr;
3959  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
3960  if (Loads.size() == 0)
3961  return false;
3962 
3963  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
3964  SDValue MaskOp = N->getOperand(1);
3965 
3966  // If it exists, fixup the single node we allow in the tree that needs
3967  // masking.
3968  if (FixupNode) {
3969  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
3970  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
3971  FixupNode->getValueType(0),
3972  SDValue(FixupNode, 0), MaskOp);
3973  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
3974  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
3975  MaskOp);
3976  }
3977 
3978  // Narrow any constants that need it.
3979  for (auto *LogicN : NodesWithConsts) {
3980  SDValue Op0 = LogicN->getOperand(0);
3981  SDValue Op1 = LogicN->getOperand(1);
3982 
3983  if (isa<ConstantSDNode>(Op0))
3984  std::swap(Op0, Op1);
3985 
3986  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
3987  Op1, MaskOp);
3988 
3989  DAG.UpdateNodeOperands(LogicN, Op0, And);
3990  }
3991 
3992  // Create narrow loads.
3993  for (auto *Load : Loads) {
3994  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
3995  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
3996  SDValue(Load, 0), MaskOp);
3997  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
3998  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
3999  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4000  assert(NewLoad &&
4001  "Shouldn't be masking the load if it can't be narrowed");
4002  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4003  }
4004  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4005  return true;
4006  }
4007  return false;
4008 }
4009 
4010 SDValue DAGCombiner::visitAND(SDNode *N) {
4011  SDValue N0 = N->getOperand(0);
4012  SDValue N1 = N->getOperand(1);
4013  EVT VT = N1.getValueType();
4014 
4015  // x & x --> x
4016  if (N0 == N1)
4017  return N0;
4018 
4019  // fold vector ops
4020  if (VT.isVector()) {
4021  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4022  return FoldedVOp;
4023 
4024  // fold (and x, 0) -> 0, vector edition
4026  // do not return N0, because undef node may exist in N0
4028  SDLoc(N), N0.getValueType());
4030  // do not return N1, because undef node may exist in N1
4032  SDLoc(N), N1.getValueType());
4033 
4034  // fold (and x, -1) -> x, vector edition
4036  return N1;
4038  return N0;
4039  }
4040 
4041  // fold (and c1, c2) -> c1&c2
4044  if (N0C && N1C && !N1C->isOpaque())
4045  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4046  // canonicalize constant to RHS
4049  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4050  // fold (and x, -1) -> x
4051  if (isAllOnesConstant(N1))
4052  return N0;
4053  // if (and x, c) is known to be zero, return 0
4054  unsigned BitWidth = VT.getScalarSizeInBits();
4055  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4056  APInt::getAllOnesValue(BitWidth)))
4057  return DAG.getConstant(0, SDLoc(N), VT);
4058 
4059  if (SDValue NewSel = foldBinOpIntoSelect(N))
4060  return NewSel;
4061 
4062  // reassociate and
4063  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
4064  return RAND;
4065 
4066  // Try to convert a constant mask AND into a shuffle clear mask.
4067  if (VT.isVector())
4068  if (SDValue Shuffle = XformToShuffleWithZero(N))
4069  return Shuffle;
4070 
4071  // fold (and (or x, C), D) -> D if (C & D) == D
4072  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4073  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4074  };
4075  if (N0.getOpcode() == ISD::OR &&
4076  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4077  return N1;
4078  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4079  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4080  SDValue N0Op0 = N0.getOperand(0);
4081  APInt Mask = ~N1C->getAPIntValue();
4082  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4083  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4084  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4085  N0.getValueType(), N0Op0);
4086 
4087  // Replace uses of the AND with uses of the Zero extend node.
4088  CombineTo(N, Zext);
4089 
4090  // We actually want to replace all uses of the any_extend with the
4091  // zero_extend, to avoid duplicating things. This will later cause this
4092  // AND to be folded.
4093  CombineTo(N0.getNode(), Zext);
4094  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4095  }
4096  }
4097  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4098  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4099  // already be zero by virtue of the width of the base type of the load.
4100  //
4101  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4102  // more cases.
4103  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4105  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4106  N0.getOperand(0).getResNo() == 0) ||
4107  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4108  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4109  N0 : N0.getOperand(0) );
4110 
4111  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4112  // This can be a pure constant or a vector splat, in which case we treat the
4113  // vector as a scalar and use the splat value.
4115  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4116  Constant = C->getAPIntValue();
4117  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4118  APInt SplatValue, SplatUndef;
4119  unsigned SplatBitSize;
4120  bool HasAnyUndefs;
4121  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4122  SplatBitSize, HasAnyUndefs);
4123  if (IsSplat) {
4124  // Undef bits can contribute to a possible optimisation if set, so
4125  // set them.
4126  SplatValue |= SplatUndef;
4127 
4128  // The splat value may be something like "0x00FFFFFF", which means 0 for
4129  // the first vector value and FF for the rest, repeating. We need a mask
4130  // that will apply equally to all members of the vector, so AND all the
4131  // lanes of the constant together.
4132  EVT VT = Vector->getValueType(0);
4133  unsigned BitWidth = VT.getScalarSizeInBits();
4134 
4135  // If the splat value has been compressed to a bitlength lower
4136  // than the size of the vector lane, we need to re-expand it to
4137  // the lane size.
4138  if (BitWidth > SplatBitSize)
4139  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4140  SplatBitSize < BitWidth;
4141  SplatBitSize = SplatBitSize * 2)
4142  SplatValue |= SplatValue.shl(SplatBitSize);
4143 
4144  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4145  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4146  if (SplatBitSize % BitWidth == 0) {
4147  Constant = APInt::getAllOnesValue(BitWidth);
4148  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4149  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4150  }
4151  }
4152  }
4153 
4154  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4155  // actually legal and isn't going to get expanded, else this is a false
4156  // optimisation.
4157  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4158  Load->getValueType(0),
4159  Load->getMemoryVT());
4160 
4161  // Resize the constant to the same size as the original memory access before
4162  // extension. If it is still the AllOnesValue then this AND is completely
4163  // unneeded.
4164  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4165 
4166  bool B;
4167  switch (Load->getExtensionType()) {
4168  default: B = false; break;
4169  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4170  case ISD::ZEXTLOAD:
4171  case ISD::NON_EXTLOAD: B = true; break;
4172  }
4173 
4174  if (B && Constant.isAllOnesValue()) {
4175  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4176  // preserve semantics once we get rid of the AND.
4177  SDValue NewLoad(Load, 0);
4178 
4179  // Fold the AND away. NewLoad may get replaced immediately.
4180  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4181 
4182  if (Load->getExtensionType() == ISD::EXTLOAD) {
4183  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4184  Load->getValueType(0), SDLoc(Load),
4185  Load->getChain(), Load->getBasePtr(),
4186  Load->getOffset(), Load->getMemoryVT(),
4187  Load->getMemOperand());
4188  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4189  if (Load->getNumValues() == 3) {
4190  // PRE/POST_INC loads have 3 values.
4191  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4192  NewLoad.getValue(2) };
4193  CombineTo(Load, To, 3, true);
4194  } else {
4195  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4196  }
4197  }
4198 
4199  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4200  }
4201  }
4202 
4203  // fold (and (load x), 255) -> (zextload x, i8)
4204  // fold (and (extload x, i16), 255) -> (zextload x, i8)
4205  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4206  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4207  (N0.getOpcode() == ISD::ANY_EXTEND &&
4208  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4209  if (SDValue Res = ReduceLoadWidth(N)) {
4210  LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4211  ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4212 
4213  AddToWorklist(N);
4214  CombineTo(LN0, Res, Res.getValue(1));
4215  return SDValue(N, 0);
4216  }
4217  }
4218 
4219  if (Level >= AfterLegalizeTypes) {
4220  // Attempt to propagate the AND back up to the leaves which, if they're
4221  // loads, can be combined to narrow loads and the AND node can be removed.
4222  // Perform after legalization so that extend nodes will already be
4223  // combined into the loads.
4224  if (BackwardsPropagateMask(N, DAG)) {
4225  return SDValue(N, 0);
4226  }
4227  }
4228 
4229  if (SDValue Combined = visitANDLike(N0, N1, N))
4230  return Combined;
4231 
4232  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
4233  if (N0.getOpcode() == N1.getOpcode())
4234  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4235  return Tmp;
4236 
4237  // Masking the negated extension of a boolean is just the zero-extended
4238  // boolean:
4239  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4240  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4241  //
4242  // Note: the SimplifyDemandedBits fold below can make an information-losing
4243  // transform, and then we have no way to find this better fold.
4244  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4246  SDValue SubRHS = N0.getOperand(1);
4247  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4248  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4249  return SubRHS;
4250  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4251  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4252  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4253  }
4254  }
4255 
4256  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4257  // fold (and (sra)) -> (and (srl)) when possible.
4258  if (SimplifyDemandedBits(SDValue(N, 0)))
4259  return SDValue(N, 0);
4260 
4261  // fold (zext_inreg (extload x)) -> (zextload x)
4262  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
4263  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4264  EVT MemVT = LN0->getMemoryVT();
4265  // If we zero all the possible extended bits, then we can turn this into
4266  // a zextload if we are running before legalize or the operation is legal.
4267  unsigned BitWidth = N1.getScalarValueSizeInBits();
4268  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4269  BitWidth - MemVT.getScalarSizeInBits())) &&
4270  ((!LegalOperations && !LN0->isVolatile()) ||
4271  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4272  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4273  LN0->getChain(), LN0->getBasePtr(),
4274  MemVT, LN0->getMemOperand());
4275  AddToWorklist(N);
4276  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4277  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4278  }
4279  }
4280  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
4281  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4282  N0.hasOneUse()) {
4283  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4284  EVT MemVT = LN0->getMemoryVT();
4285  // If we zero all the possible extended bits, then we can turn this into
4286  // a zextload if we are running before legalize or the operation is legal.
4287  unsigned BitWidth = N1.getScalarValueSizeInBits();
4288  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
4289  BitWidth - MemVT.getScalarSizeInBits())) &&
4290  ((!LegalOperations && !LN0->isVolatile()) ||
4291  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
4292  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
4293  LN0->getChain(), LN0->getBasePtr(),
4294  MemVT, LN0->getMemOperand());
4295  AddToWorklist(N);
4296  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4297  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4298  }
4299  }
4300  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
4301  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
4302  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
4303  N0.getOperand(1), false))
4304  return BSwap;
4305  }
4306 
4307  return SDValue();
4308 }
4309 
4310 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
4311 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
4312  bool DemandHighBits) {
4313  if (!LegalOperations)
4314  return SDValue();
4315 
4316  EVT VT = N->getValueType(0);
4317  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
4318  return SDValue();
4319  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4320  return SDValue();
4321 
4322  // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
4323  bool LookPassAnd0 = false;
4324  bool LookPassAnd1 = false;
4325  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
4326  std::swap(N0, N1);
4327  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
4328  std::swap(N0, N1);
4329  if (N0.getOpcode() == ISD::AND) {
4330  if (!N0.getNode()->hasOneUse())
4331  return SDValue();
4333  // Also handle 0xffff since the LHS is guaranteed to have zeros there.
4334  // This is needed for X86.
4335  if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
4336  N01C->getZExtValue() != 0xFFFF))
4337  return SDValue();
4338  N0 = N0.getOperand(0);
4339  LookPassAnd0 = true;
4340  }
4341 
4342  if (N1.getOpcode() == ISD::AND) {
4343  if (!N1.getNode()->hasOneUse())
4344  return SDValue();
4346  if (!N11C || N11C->getZExtValue() != 0xFF)
4347  return SDValue();
4348  N1 = N1.getOperand(0);
4349  LookPassAnd1 = true;
4350  }
4351 
4352  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
4353  std::swap(N0, N1);
4354  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
4355  return SDValue();
4356  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
4357  return SDValue();
4358 
4361  if (!N01C || !N11C)
4362  return SDValue();
4363  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
4364  return SDValue();
4365 
4366  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
4367  SDValue N00 = N0->getOperand(0);
4368  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
4369  if (!N00.getNode()->hasOneUse())
4370  return SDValue();
4372  if (!N001C || N001C->getZExtValue() != 0xFF)
4373  return SDValue();
4374  N00 = N00.getOperand(0);
4375  LookPassAnd0 = true;
4376  }
4377 
4378  SDValue N10 = N1->getOperand(0);
4379  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
4380  if (!N10.getNode()->hasOneUse())
4381  return SDValue();
4383  // Also allow 0xFFFF since the bits will be shifted out. This is needed
4384  // for X86.
4385  if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
4386  N101C->getZExtValue() != 0xFFFF))
4387  return SDValue();
4388  N10 = N10.getOperand(0);
4389  LookPassAnd1 = true;
4390  }
4391 
4392  if (N00 != N10)
4393  return SDValue();
4394 
4395  // Make sure everything beyond the low halfword gets set to zero since the SRL
4396  // 16 will clear the top bits.
4397  unsigned OpSizeInBits = VT.getSizeInBits();
4398  if (DemandHighBits && OpSizeInBits > 16) {
4399  // If the left-shift isn't masked out then the only way this is a bswap is
4400  // if all bits beyond the low 8 are 0. In that case the entire pattern
4401  // reduces to a left shift anyway: leave it for other parts of the combiner.
4402  if (!LookPassAnd0)
4403  return SDValue();
4404 
4405  // However, if the right shift isn't masked out then it might be because
4406  // it's not needed. See if we can spot that too.
4407  if (!LookPassAnd1 &&
4408  !DAG.MaskedValueIsZero(
4409  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
4410  return SDValue();
4411  }
4412 
4413  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
4414  if (OpSizeInBits > 16) {
4415  SDLoc DL(N);
4416  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
4417  DAG.getConstant(OpSizeInBits - 16, DL,
4418  getShiftAmountTy(VT)));
4419  }
4420  return Res;
4421 }
4422 
4423 /// Return true if the specified node is an element that makes up a 32-bit
4424 /// packed halfword byteswap.
4425 /// ((x & 0x000000ff) << 8) |
4426 /// ((x & 0x0000ff00) >> 8) |
4427 /// ((x & 0x00ff0000) << 8) |
4428 /// ((x & 0xff000000) >> 8)
4430  if (!N.getNode()->hasOneUse())
4431  return false;
4432 
4433  unsigned Opc = N.getOpcode();
4434  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
4435  return false;
4436 
4437  SDValue N0 = N.getOperand(0);
4438  unsigned Opc0 = N0.getOpcode();
4439  if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
4440  return false;
4441 
4442  ConstantSDNode *N1C = nullptr;
4443  // SHL or SRL: look upstream for AND mask operand
4444  if (Opc == ISD::AND)
4445  N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
4446  else if (Opc0 == ISD::AND)
4447  N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
4448  if (!N1C)
4449  return false;
4450 
4451  unsigned MaskByteOffset;
4452  switch (N1C->getZExtValue()) {
4453  default:
4454  return false;
4455  case 0xFF: MaskByteOffset = 0; break;
4456  case 0xFF00: MaskByteOffset = 1; break;
4457  case 0xFFFF:
4458  // In case demanded bits didn't clear the bits that will be shifted out.
4459  // This is needed for X86.
4460  if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
4461  MaskByteOffset = 1;
4462  break;
4463  }
4464  return false;
4465  case 0xFF0000: MaskByteOffset = 2; break;
4466  case 0xFF000000: MaskByteOffset = 3; break;
4467  }
4468 
4469  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
4470  if (Opc == ISD::AND) {
4471  if (MaskByteOffset == 0 || MaskByteOffset == 2) {
4472  // (x >> 8) & 0xff
4473  // (x >> 8) & 0xff0000
4474  if (Opc0 != ISD::SRL)
4475  return false;
4477  if (!C || C->getZExtValue() != 8)
4478  return false;
4479  } else {
4480  // (x << 8) & 0xff00
4481  // (x << 8) & 0xff000000
4482  if (Opc0 != ISD::SHL)
4483  return false;
4485  if (!C || C->getZExtValue() != 8)
4486  return false;
4487  }
4488  } else if (Opc == ISD::SHL) {
4489  // (x & 0xff) << 8
4490  // (x & 0xff0000) << 8
4491  if (MaskByteOffset != 0 && MaskByteOffset != 2)
4492  return false;
4494  if (!C || C->getZExtValue() != 8)
4495  return false;
4496  } else { // Opc == ISD::SRL
4497  // (x & 0xff00) >> 8
4498  // (x & 0xff000000) >> 8
4499  if (MaskByteOffset != 1 && MaskByteOffset != 3)
4500  return false;
4502  if (!C || C->getZExtValue() != 8)
4503  return false;
4504  }
4505 
4506  if (Parts[MaskByteOffset])
4507  return false;
4508 
4509  Parts[MaskByteOffset] = N0.getOperand(0).getNode();
4510  return true;
4511 }
4512 
4513 /// Match a 32-bit packed halfword bswap. That is
4514 /// ((x & 0x000000ff) << 8) |
4515 /// ((x & 0x0000ff00) >> 8) |
4516 /// ((x & 0x00ff0000) << 8) |
4517 /// ((x & 0xff000000) >> 8)
4518 /// => (rotl (bswap x), 16)
4519 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
4520  if (!LegalOperations)
4521  return SDValue();
4522 
4523  EVT VT = N->getValueType(0);
4524  if (VT != MVT::i32)
4525  return SDValue();
4526  if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
4527  return SDValue();
4528 
4529  // Look for either
4530  // (or (or (and), (and)), (or (and), (and)))
4531  // (or (or (or (and), (and)), (and)), (and))
4532  if (N0.getOpcode() != ISD::OR)
4533  return SDValue();
4534  SDValue N00 = N0.getOperand(0);
4535  SDValue N01 = N0.getOperand(1);
4536  SDNode *Parts[4] = {};
4537 
4538  if (N1.getOpcode() == ISD::OR &&
4539  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
4540  // (or (or (and), (and)), (or (and), (and)))
4541  if (!isBSwapHWordElement(N00, Parts))
4542  return SDValue();
4543 
4544  if (!isBSwapHWordElement(N01, Parts))
4545  return SDValue();
4546  SDValue N10 = N1.getOperand(0);
4547  if (!isBSwapHWordElement(N10, Parts))
4548  return SDValue();
4549  SDValue N11 = N1.getOperand(1);
4550  if (!isBSwapHWordElement(N11, Parts))
4551  return SDValue();
4552  } else {
4553  // (or (or (or (and), (and)), (and)), (and))
4554  if (!isBSwapHWordElement(N1, Parts))
4555  return SDValue();
4556  if (!isBSwapHWordElement(N01, Parts))
4557  return SDValue();
4558  if (N00.getOpcode() != ISD::OR)
4559  return SDValue();
4560  SDValue N000 = N00.getOperand(0);
4561  if (!isBSwapHWordElement(N000, Parts))
4562  return SDValue();
4563  SDValue N001 = N00.getOperand(1);
4564  if (!isBSwapHWordElement(N001, Parts))
4565  return SDValue();
4566  }
4567 
4568  // Make sure the parts are all coming from the same node.
4569  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
4570  return SDValue();
4571 
4572  SDLoc DL(N);
4573  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
4574  SDValue(Parts[0], 0));
4575 
4576  // Result of the bswap should be rotated by 16. If it's not legal, then
4577  // do (x << 16) | (x >> 16).
4578  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
4579  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
4580  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
4581  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
4582  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
4583  return DAG.getNode(ISD::OR, DL, VT,
4584  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
4585  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
4586 }
4587 
4588 /// This contains all DAGCombine rules which reduce two values combined by
4589 /// an Or operation to a single value \see visitANDLike().
4590 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
4591  EVT VT = N1.getValueType();
4592  SDLoc DL(N);
4593 
4594  // fold (or x, undef) -> -1
4595  if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
4596  return DAG.getAllOnesConstant(DL, VT);
4597 
4598  if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
4599  return V;
4600 
4601  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
4602  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
4603  // Don't increase # computations.
4604  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4605  // We can only do this xform if we know that bits from X that are set in C2
4606  // but not in C1 are already zero. Likewise for Y.
4607  if (const ConstantSDNode *N0O1C =
4609  if (const ConstantSDNode *N1O1C =
4611  // We can only do this xform if we know that bits from X that are set in
4612  // C2 but not in C1 are already zero. Likewise for Y.
4613  const APInt &LHSMask = N0O1C->getAPIntValue();
4614  const APInt &RHSMask = N1O1C->getAPIntValue();
4615 
4616  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
4617  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
4618  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4619  N0.getOperand(0), N1.getOperand(0));
4620  return DAG.getNode(ISD::AND, DL, VT, X,
4621  DAG.getConstant(LHSMask | RHSMask, DL, VT));
4622  }
4623  }
4624  }
4625  }
4626 
4627  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
4628  if (N0.getOpcode() == ISD::AND &&
4629  N1.getOpcode() == ISD::AND &&
4630  N0.getOperand(0) == N1.getOperand(0) &&
4631  // Don't increase # computations.
4632  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
4633  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
4634  N0.getOperand(1), N1.getOperand(1));
4635  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
4636  }
4637 
4638  return SDValue();
4639 }
4640 
4641 SDValue DAGCombiner::visitOR(SDNode *N) {
4642  SDValue N0 = N->getOperand(0);
4643  SDValue N1 = N->getOperand(1);
4644  EVT VT = N1.getValueType();
4645 
4646  // x | x --> x
4647  if (N0 == N1)
4648  return N0;
4649 
4650  // fold vector ops
4651  if (VT.isVector()) {
4652  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4653  return FoldedVOp;
4654 
4655  // fold (or x, 0) -> x, vector edition
4657  return N1;
4659  return N0;
4660 
4661  // fold (or x, -1) -> -1, vector edition
4663  // do not return N0, because undef node may exist in N0
4664  return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
4666  // do not return N1, because undef node may exist in N1
4667  return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
4668 
4669  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
4670  // Do this only if the resulting shuffle is legal.
4671  if (isa<ShuffleVectorSDNode>(N0) &&
4672  isa<ShuffleVectorSDNode>(N1) &&
4673  // Avoid folding a node with illegal type.
4674  TLI.isTypeLegal(VT)) {
4675  bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
4676  bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
4677  bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
4678  bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
4679  // Ensure both shuffles have a zero input.
4680  if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
4681  assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
4682  assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
4683  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
4684  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
4685  bool CanFold = true;
4686  int NumElts = VT.getVectorNumElements();
4687  SmallVector<int, 4> Mask(NumElts);
4688 
4689  for (int i = 0; i != NumElts; ++i) {
4690  int M0 = SV0->getMaskElt(i);
4691  int M1 = SV1->getMaskElt(i);
4692 
4693  // Determine if either index is pointing to a zero vector.
4694  bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
4695  bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
4696 
4697  // If one element is zero and the otherside is undef, keep undef.
4698  // This also handles the case that both are undef.
4699  if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
4700  Mask[i] = -1;
4701  continue;
4702  }
4703 
4704  // Make sure only one of the elements is zero.
4705  if (M0Zero == M1Zero) {
4706  CanFold = false;
4707  break;
4708  }
4709 
4710  assert((M0 >= 0 || M1 >= 0) && "Undef index!");
4711 
4712  // We have a zero and non-zero element. If the non-zero came from
4713  // SV0 make the index a LHS index. If it came from SV1, make it
4714  // a RHS index. We need to mod by NumElts because we don't care
4715  // which operand it came from in the original shuffles.
4716  Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
4717  }
4718 
4719  if (CanFold) {
4720  SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
4721  SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
4722 
4723  bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4724  if (!LegalMask) {
4725  std::swap(NewLHS, NewRHS);
4727  LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
4728  }
4729 
4730  if (LegalMask)
4731  return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
4732  }
4733  }
4734  }
4735  }
4736 
4737  // fold (or c1, c2) -> c1|c2
4740  if (N0C && N1C && !N1C->isOpaque())
4741  return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
4742  // canonicalize constant to RHS
4745  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
4746  // fold (or x, 0) -> x
4747  if (isNullConstant(N1))
4748  return N0;
4749  // fold (or x, -1) -> -1
4750  if (isAllOnesConstant(N1))
4751  return N1;
4752 
4753  if (SDValue NewSel = foldBinOpIntoSelect(N))
4754  return NewSel;
4755 
4756  // fold (or x, c) -> c iff (x & ~c) == 0
4757  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
4758  return N1;
4759 
4760  if (SDValue Combined = visitORLike(N0, N1, N))
4761  return Combined;
4762 
4763  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
4764  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
4765  return BSwap;
4766  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
4767  return BSwap;
4768 
4769  // reassociate or
4770  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
4771  return ROR;
4772 
4773  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
4774  // iff (c1 & c2) != 0.
4775  auto MatchIntersect = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4776  return LHS->getAPIntValue().intersects(RHS->getAPIntValue());
4777  };
4778  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4779  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) {
4780  if (SDValue COR = DAG.FoldConstantArithmetic(
4781  ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
4782  SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
4783  AddToWorklist(IOR.getNode());
4784  return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
4785  }
4786  }
4787 
4788  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
4789  if (N0.getOpcode() == N1.getOpcode())
4790  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4791  return Tmp;
4792 
4793  // See if this is some rotate idiom.
4794  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
4795  return SDValue(Rot, 0);
4796 
4797  if (SDValue Load = MatchLoadCombine(N))
4798  return Load;
4799 
4800  // Simplify the operands using demanded-bits information.
4801  if (SimplifyDemandedBits(SDValue(N, 0)))
4802  return SDValue(N, 0);
4803 
4804  return SDValue();
4805 }
4806 
4807 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
4808 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
4809  if (Op.getOpcode() == ISD::AND) {
4811  Mask = Op.getOperand(1);
4812  Op = Op.getOperand(0);
4813  } else {
4814  return false;
4815  }
4816  }
4817 
4818  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4819  Shift = Op;
4820  return true;
4821  }
4822 
4823  return false;
4824 }
4825 
4826 // Return true if we can prove that, whenever Neg and Pos are both in the
4827 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
4828 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4829 //
4830 // (or (shift1 X, Neg), (shift2 X, Pos))
4831 //
4832 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4833 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
4834 // to consider shift amounts with defined behavior.
4835 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
4836  SelectionDAG &DAG) {
4837  // If EltSize is a power of 2 then:
4838  //
4839  // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4840  // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4841  //
4842  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4843  // for the stronger condition:
4844  //
4845  // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
4846  //
4847  // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4848  // we can just replace Neg with Neg' for the rest of the function.
4849  //
4850  // In other cases we check for the even stronger condition:
4851  //
4852  // Neg == EltSize - Pos [B]
4853  //
4854  // for all Neg and Pos. Note that the (or ...) then invokes undefined
4855  // behavior if Pos == 0 (and consequently Neg == EltSize).
4856  //
4857  // We could actually use [A] whenever EltSize is a power of 2, but the
4858  // only extra cases that it would match are those uninteresting ones
4859  // where Neg and Pos are never in range at the same time. E.g. for
4860  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4861  // as well as (sub 32, Pos), but:
4862  //
4863  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4864  //
4865  // always invokes undefined behavior for 32-bit X.
4866  //
4867  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4868  unsigned MaskLoBits = 0;
4869  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4870  if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4871  KnownBits Known;
4872  DAG.computeKnownBits(Neg.getOperand(0), Known);
4873  unsigned Bits = Log2_64(EltSize);
4874  if (NegC->getAPIntValue().getActiveBits() <= Bits &&
4875  ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
4876  Neg = Neg.getOperand(0);
4877  MaskLoBits = Bits;
4878  }
4879  }
4880  }
4881 
4882  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4883  if (Neg.getOpcode() != ISD::SUB)
4884  return false;
4886  if (!NegC)
4887  return false;
4888  SDValue NegOp1 = Neg.getOperand(1);
4889 
4890  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4891  // Pos'. The truncation is redundant for the purpose of the equality.
4892  if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
4893  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
4894  KnownBits Known;
4895  DAG.computeKnownBits(Pos.getOperand(0), Known);
4896  if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
4897  ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
4898  MaskLoBits))
4899  Pos = Pos.getOperand(0);
4900  }
4901  }
4902 
4903  // The condition we need is now:
4904  //
4905