LLVM  9.0.0svn
DAGCombiner.cpp
Go to the documentation of this file.
1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/STLExtras.h"
26 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/SmallSet.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/Statistic.h"
48 #include "llvm/IR/Attributes.h"
49 #include "llvm/IR/Constant.h"
50 #include "llvm/IR/DataLayout.h"
51 #include "llvm/IR/DerivedTypes.h"
52 #include "llvm/IR/Function.h"
53 #include "llvm/IR/LLVMContext.h"
54 #include "llvm/IR/Metadata.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/Compiler.h"
59 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/KnownBits.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstdint>
70 #include <functional>
71 #include <iterator>
72 #include <string>
73 #include <tuple>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 #define DEBUG_TYPE "dagcombine"
79 
80 STATISTIC(NodesCombined , "Number of dag nodes combined");
81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
83 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
84 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
85 STATISTIC(SlicedLoads, "Number of load sliced");
86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
87 
88 static cl::opt<bool>
89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
90  cl::desc("Enable DAG combiner's use of IR alias analysis"));
91 
92 static cl::opt<bool>
93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
94  cl::desc("Enable DAG combiner's use of TBAA"));
95 
96 #ifndef NDEBUG
98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
99  cl::desc("Only use DAG-combiner alias analysis in this"
100  " function"));
101 #endif
102 
103 /// Hidden option to stress test load slicing, i.e., when this option
104 /// is enabled, load slicing bypasses most of its profitability guards.
105 static cl::opt<bool>
106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
107  cl::desc("Bypass the profitability model of load slicing"),
108  cl::init(false));
109 
110 static cl::opt<bool>
111  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
112  cl::desc("DAG combiner may split indexing from loads"));
113 
114 namespace {
115 
116  class DAGCombiner {
117  SelectionDAG &DAG;
118  const TargetLowering &TLI;
120  CodeGenOpt::Level OptLevel;
121  bool LegalOperations = false;
122  bool LegalTypes = false;
123  bool ForCodeSize;
124 
125  /// Worklist of all of the nodes that need to be simplified.
126  ///
127  /// This must behave as a stack -- new nodes to process are pushed onto the
128  /// back and when processing we pop off of the back.
129  ///
130  /// The worklist will not contain duplicates but may contain null entries
131  /// due to nodes being deleted from the underlying DAG.
132  SmallVector<SDNode *, 64> Worklist;
133 
134  /// Mapping from an SDNode to its position on the worklist.
135  ///
136  /// This is used to find and remove nodes from the worklist (by nulling
137  /// them) when they are deleted from the underlying DAG. It relies on
138  /// stable indices of nodes within the worklist.
139  DenseMap<SDNode *, unsigned> WorklistMap;
140  /// This records all nodes attempted to add to the worklist since we
141  /// considered a new worklist entry. As we keep do not add duplicate nodes
142  /// in the worklist, this is different from the tail of the worklist.
143  SmallSetVector<SDNode *, 32> PruningList;
144 
145  /// Set of nodes which have been combined (at least once).
146  ///
147  /// This is used to allow us to reliably add any operands of a DAG node
148  /// which have not yet been combined to the worklist.
149  SmallPtrSet<SDNode *, 32> CombinedNodes;
150 
151  // AA - Used for DAG load/store alias analysis.
152  AliasAnalysis *AA;
153 
154  /// When an instruction is simplified, add all users of the instruction to
155  /// the work lists because they might get more simplified now.
156  void AddUsersToWorklist(SDNode *N) {
157  for (SDNode *Node : N->uses())
158  AddToWorklist(Node);
159  }
160 
161  // Prune potentially dangling nodes. This is called after
162  // any visit to a node, but should also be called during a visit after any
163  // failed combine which may have created a DAG node.
164  void clearAddedDanglingWorklistEntries() {
165  // Check any nodes added to the worklist to see if they are prunable.
166  while (!PruningList.empty()) {
167  auto *N = PruningList.pop_back_val();
168  if (N->use_empty())
169  recursivelyDeleteUnusedNodes(N);
170  }
171  }
172 
173  SDNode *getNextWorklistEntry() {
174  // Before we do any work, remove nodes that are not in use.
175  clearAddedDanglingWorklistEntries();
176  SDNode *N = nullptr;
177  // The Worklist holds the SDNodes in order, but it may contain null
178  // entries.
179  while (!N && !Worklist.empty()) {
180  N = Worklist.pop_back_val();
181  }
182 
183  if (N) {
184  bool GoodWorklistEntry = WorklistMap.erase(N);
185  (void)GoodWorklistEntry;
186  assert(GoodWorklistEntry &&
187  "Found a worklist entry without a corresponding map entry!");
188  }
189  return N;
190  }
191 
192  /// Call the node-specific routine that folds each particular type of node.
193  SDValue visit(SDNode *N);
194 
195  public:
196  DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
197  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
198  OptLevel(OL), AA(AA) {
199  ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
200 
201  MaximumLegalStoreInBits = 0;
202  for (MVT VT : MVT::all_valuetypes())
203  if (EVT(VT).isSimple() && VT != MVT::Other &&
204  TLI.isTypeLegal(EVT(VT)) &&
205  VT.getSizeInBits() >= MaximumLegalStoreInBits)
206  MaximumLegalStoreInBits = VT.getSizeInBits();
207  }
208 
209  void ConsiderForPruning(SDNode *N) {
210  // Mark this for potential pruning.
211  PruningList.insert(N);
212  }
213 
214  /// Add to the worklist making sure its instance is at the back (next to be
215  /// processed.)
216  void AddToWorklist(SDNode *N) {
218  "Deleted Node added to Worklist");
219 
220  // Skip handle nodes as they can't usefully be combined and confuse the
221  // zero-use deletion strategy.
222  if (N->getOpcode() == ISD::HANDLENODE)
223  return;
224 
225  ConsiderForPruning(N);
226 
227  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
228  Worklist.push_back(N);
229  }
230 
231  /// Remove all instances of N from the worklist.
232  void removeFromWorklist(SDNode *N) {
233  CombinedNodes.erase(N);
234  PruningList.remove(N);
235 
236  auto It = WorklistMap.find(N);
237  if (It == WorklistMap.end())
238  return; // Not in the worklist.
239 
240  // Null out the entry rather than erasing it to avoid a linear operation.
241  Worklist[It->second] = nullptr;
242  WorklistMap.erase(It);
243  }
244 
245  void deleteAndRecombine(SDNode *N);
246  bool recursivelyDeleteUnusedNodes(SDNode *N);
247 
248  /// Replaces all uses of the results of one DAG node with new values.
249  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
250  bool AddTo = true);
251 
252  /// Replaces all uses of the results of one DAG node with new values.
253  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
254  return CombineTo(N, &Res, 1, AddTo);
255  }
256 
257  /// Replaces all uses of the results of one DAG node with new values.
258  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
259  bool AddTo = true) {
260  SDValue To[] = { Res0, Res1 };
261  return CombineTo(N, To, 2, AddTo);
262  }
263 
264  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
265 
266  private:
267  unsigned MaximumLegalStoreInBits;
268 
269  /// Check the specified integer node value to see if it can be simplified or
270  /// if things it uses can be simplified by bit propagation.
271  /// If so, return true.
272  bool SimplifyDemandedBits(SDValue Op) {
273  unsigned BitWidth = Op.getScalarValueSizeInBits();
275  return SimplifyDemandedBits(Op, DemandedBits);
276  }
277 
278  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
279  EVT VT = Op.getValueType();
280  unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
281  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
282  return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
283  }
284 
285  /// Check the specified vector node value to see if it can be simplified or
286  /// if things it uses can be simplified as it only uses some of the
287  /// elements. If so, return true.
288  bool SimplifyDemandedVectorElts(SDValue Op) {
289  unsigned NumElts = Op.getValueType().getVectorNumElements();
290  APInt DemandedElts = APInt::getAllOnesValue(NumElts);
291  return SimplifyDemandedVectorElts(Op, DemandedElts);
292  }
293 
294  bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
295  const APInt &DemandedElts);
296  bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
297  bool AssumeSingleUse = false);
298 
299  bool CombineToPreIndexedLoadStore(SDNode *N);
300  bool CombineToPostIndexedLoadStore(SDNode *N);
301  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
302  bool SliceUpLoad(SDNode *N);
303 
304  // Scalars have size 0 to distinguish from singleton vectors.
305  SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
306  bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
307  bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
308 
309  /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
310  /// load.
311  ///
312  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
313  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
314  /// \param EltNo index of the vector element to load.
315  /// \param OriginalLoad load that EVE came from to be replaced.
316  /// \returns EVE on success SDValue() on failure.
317  SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
318  SDValue EltNo,
319  LoadSDNode *OriginalLoad);
320  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
321  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
322  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
323  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
324  SDValue PromoteIntBinOp(SDValue Op);
325  SDValue PromoteIntShiftOp(SDValue Op);
326  SDValue PromoteExtend(SDValue Op);
327  bool PromoteLoad(SDValue Op);
328 
329  /// Call the node-specific routine that knows how to fold each
330  /// particular type of node. If that doesn't do anything, try the
331  /// target-specific DAG combines.
332  SDValue combine(SDNode *N);
333 
334  // Visitation implementation - Implement dag node combining for different
335  // node types. The semantics are as follows:
336  // Return Value:
337  // SDValue.getNode() == 0 - No change was made
338  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
339  // otherwise - N should be replaced by the returned Operand.
340  //
341  SDValue visitTokenFactor(SDNode *N);
342  SDValue visitMERGE_VALUES(SDNode *N);
343  SDValue visitADD(SDNode *N);
344  SDValue visitADDLike(SDNode *N);
345  SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
346  SDValue visitSUB(SDNode *N);
347  SDValue visitADDSAT(SDNode *N);
348  SDValue visitSUBSAT(SDNode *N);
349  SDValue visitADDC(SDNode *N);
350  SDValue visitADDO(SDNode *N);
351  SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
352  SDValue visitSUBC(SDNode *N);
353  SDValue visitSUBO(SDNode *N);
354  SDValue visitADDE(SDNode *N);
355  SDValue visitADDCARRY(SDNode *N);
356  SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
357  SDValue visitSUBE(SDNode *N);
358  SDValue visitSUBCARRY(SDNode *N);
359  SDValue visitMUL(SDNode *N);
360  SDValue useDivRem(SDNode *N);
361  SDValue visitSDIV(SDNode *N);
362  SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
363  SDValue visitUDIV(SDNode *N);
364  SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
365  SDValue visitREM(SDNode *N);
366  SDValue visitMULHU(SDNode *N);
367  SDValue visitMULHS(SDNode *N);
368  SDValue visitSMUL_LOHI(SDNode *N);
369  SDValue visitUMUL_LOHI(SDNode *N);
370  SDValue visitMULO(SDNode *N);
371  SDValue visitIMINMAX(SDNode *N);
372  SDValue visitAND(SDNode *N);
373  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
374  SDValue visitOR(SDNode *N);
375  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
376  SDValue visitXOR(SDNode *N);
377  SDValue SimplifyVBinOp(SDNode *N);
378  SDValue visitSHL(SDNode *N);
379  SDValue visitSRA(SDNode *N);
380  SDValue visitSRL(SDNode *N);
381  SDValue visitFunnelShift(SDNode *N);
382  SDValue visitRotate(SDNode *N);
383  SDValue visitABS(SDNode *N);
384  SDValue visitBSWAP(SDNode *N);
385  SDValue visitBITREVERSE(SDNode *N);
386  SDValue visitCTLZ(SDNode *N);
387  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
388  SDValue visitCTTZ(SDNode *N);
389  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
390  SDValue visitCTPOP(SDNode *N);
391  SDValue visitSELECT(SDNode *N);
392  SDValue visitVSELECT(SDNode *N);
393  SDValue visitSELECT_CC(SDNode *N);
394  SDValue visitSETCC(SDNode *N);
395  SDValue visitSETCCCARRY(SDNode *N);
396  SDValue visitSIGN_EXTEND(SDNode *N);
397  SDValue visitZERO_EXTEND(SDNode *N);
398  SDValue visitANY_EXTEND(SDNode *N);
399  SDValue visitAssertExt(SDNode *N);
400  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
401  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
402  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
403  SDValue visitTRUNCATE(SDNode *N);
404  SDValue visitBITCAST(SDNode *N);
405  SDValue visitBUILD_PAIR(SDNode *N);
406  SDValue visitFADD(SDNode *N);
407  SDValue visitFSUB(SDNode *N);
408  SDValue visitFMUL(SDNode *N);
409  SDValue visitFMA(SDNode *N);
410  SDValue visitFDIV(SDNode *N);
411  SDValue visitFREM(SDNode *N);
412  SDValue visitFSQRT(SDNode *N);
413  SDValue visitFCOPYSIGN(SDNode *N);
414  SDValue visitFPOW(SDNode *N);
415  SDValue visitSINT_TO_FP(SDNode *N);
416  SDValue visitUINT_TO_FP(SDNode *N);
417  SDValue visitFP_TO_SINT(SDNode *N);
418  SDValue visitFP_TO_UINT(SDNode *N);
419  SDValue visitFP_ROUND(SDNode *N);
420  SDValue visitFP_ROUND_INREG(SDNode *N);
421  SDValue visitFP_EXTEND(SDNode *N);
422  SDValue visitFNEG(SDNode *N);
423  SDValue visitFABS(SDNode *N);
424  SDValue visitFCEIL(SDNode *N);
425  SDValue visitFTRUNC(SDNode *N);
426  SDValue visitFFLOOR(SDNode *N);
427  SDValue visitFMINNUM(SDNode *N);
428  SDValue visitFMAXNUM(SDNode *N);
429  SDValue visitFMINIMUM(SDNode *N);
430  SDValue visitFMAXIMUM(SDNode *N);
431  SDValue visitBRCOND(SDNode *N);
432  SDValue visitBR_CC(SDNode *N);
433  SDValue visitLOAD(SDNode *N);
434 
435  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
436  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
437 
438  SDValue visitSTORE(SDNode *N);
439  SDValue visitLIFETIME_END(SDNode *N);
440  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
441  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
442  SDValue visitBUILD_VECTOR(SDNode *N);
443  SDValue visitCONCAT_VECTORS(SDNode *N);
444  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
445  SDValue visitVECTOR_SHUFFLE(SDNode *N);
446  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
447  SDValue visitINSERT_SUBVECTOR(SDNode *N);
448  SDValue visitMLOAD(SDNode *N);
449  SDValue visitMSTORE(SDNode *N);
450  SDValue visitMGATHER(SDNode *N);
451  SDValue visitMSCATTER(SDNode *N);
452  SDValue visitFP_TO_FP16(SDNode *N);
453  SDValue visitFP16_TO_FP(SDNode *N);
454  SDValue visitVECREDUCE(SDNode *N);
455 
456  SDValue visitFADDForFMACombine(SDNode *N);
457  SDValue visitFSUBForFMACombine(SDNode *N);
458  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
459 
460  SDValue XformToShuffleWithZero(SDNode *N);
461  SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
462  SDValue N1);
463  SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
464  SDValue N1, SDNodeFlags Flags);
465 
466  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
467 
468  SDValue foldSelectOfConstants(SDNode *N);
469  SDValue foldVSelectOfConstants(SDNode *N);
470  SDValue foldBinOpIntoSelect(SDNode *BO);
471  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
472  SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
473  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
474  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
475  SDValue N2, SDValue N3, ISD::CondCode CC,
476  bool NotExtCompare = false);
477  SDValue convertSelectOfFPConstantsToLoadOffset(
478  const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
479  ISD::CondCode CC);
480  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
481  SDValue N2, SDValue N3, ISD::CondCode CC);
482  SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
483  const SDLoc &DL);
484  SDValue unfoldMaskedMerge(SDNode *N);
485  SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
486  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
487  const SDLoc &DL, bool foldBooleans);
488  SDValue rebuildSetCC(SDValue N);
489 
490  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
491  SDValue &CC) const;
492  bool isOneUseSetCC(SDValue N) const;
493 
494  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
495  unsigned HiOp);
496  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
497  SDValue CombineExtLoad(SDNode *N);
498  SDValue CombineZExtLogicopShiftLoad(SDNode *N);
499  SDValue combineRepeatedFPDivisors(SDNode *N);
500  SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
501  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
502  SDValue BuildSDIV(SDNode *N);
503  SDValue BuildSDIVPow2(SDNode *N);
504  SDValue BuildUDIV(SDNode *N);
505  SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
506  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
507  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
508  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
509  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
510  SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
511  SDNodeFlags Flags, bool Reciprocal);
512  SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
513  SDNodeFlags Flags, bool Reciprocal);
514  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
515  bool DemandHighBits = true);
516  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
517  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
518  SDValue InnerPos, SDValue InnerNeg,
519  unsigned PosOpcode, unsigned NegOpcode,
520  const SDLoc &DL);
521  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
522  SDValue MatchLoadCombine(SDNode *N);
523  SDValue ReduceLoadWidth(SDNode *N);
524  SDValue ReduceLoadOpStoreWidth(SDNode *N);
526  SDValue TransformFPLoadStorePair(SDNode *N);
527  SDValue convertBuildVecZextToZext(SDNode *N);
528  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
529  SDValue reduceBuildVecToShuffle(SDNode *N);
530  SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
531  ArrayRef<int> VectorMask, SDValue VecIn1,
532  SDValue VecIn2, unsigned LeftIdx,
533  bool DidSplitVec);
534  SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
535 
536  /// Walk up chain skipping non-aliasing memory nodes,
537  /// looking for aliasing nodes and adding them to the Aliases vector.
538  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
539  SmallVectorImpl<SDValue> &Aliases);
540 
541  /// Return true if there is any possibility that the two addresses overlap.
542  bool isAlias(SDNode *Op0, SDNode *Op1) const;
543 
544  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
545  /// chain (aliasing node.)
546  SDValue FindBetterChain(SDNode *N, SDValue Chain);
547 
548  /// Try to replace a store and any possibly adjacent stores on
549  /// consecutive chains with better chains. Return true only if St is
550  /// replaced.
551  ///
552  /// Notice that other chains may still be replaced even if the function
553  /// returns false.
554  bool findBetterNeighborChains(StoreSDNode *St);
555 
556  // Helper for findBetterNeighborChains. Walk up store chain add additional
557  // chained stores that do not overlap and can be parallelized.
558  bool parallelizeChainedStores(StoreSDNode *St);
559 
560  /// Holds a pointer to an LSBaseSDNode as well as information on where it
561  /// is located in a sequence of memory operations connected by a chain.
562  struct MemOpLink {
563  // Ptr to the mem node.
564  LSBaseSDNode *MemNode;
565 
566  // Offset from the base ptr.
567  int64_t OffsetFromBase;
568 
569  MemOpLink(LSBaseSDNode *N, int64_t Offset)
570  : MemNode(N), OffsetFromBase(Offset) {}
571  };
572 
573  /// This is a helper function for visitMUL to check the profitability
574  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
575  /// MulNode is the original multiply, AddNode is (add x, c1),
576  /// and ConstNode is c2.
577  bool isMulAddWithConstProfitable(SDNode *MulNode,
578  SDValue &AddNode,
579  SDValue &ConstNode);
580 
581  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
582  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
583  /// the type of the loaded value to be extended.
584  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
585  EVT LoadResultTy, EVT &ExtVT);
586 
587  /// Helper function to calculate whether the given Load/Store can have its
588  /// width reduced to ExtVT.
589  bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
590  EVT &MemVT, unsigned ShAmt = 0);
591 
592  /// Used by BackwardsPropagateMask to find suitable loads.
593  bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
594  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
595  ConstantSDNode *Mask, SDNode *&NodeToMask);
596  /// Attempt to propagate a given AND node back to load leaves so that they
597  /// can be combined into narrow loads.
598  bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
599 
600  /// Helper function for MergeConsecutiveStores which merges the
601  /// component store chains.
602  SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
603  unsigned NumStores);
604 
605  /// This is a helper function for MergeConsecutiveStores. When the
606  /// source elements of the consecutive stores are all constants or
607  /// all extracted vector elements, try to merge them into one
608  /// larger store introducing bitcasts if necessary. \return True
609  /// if a merged store was created.
610  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
611  EVT MemVT, unsigned NumStores,
612  bool IsConstantSrc, bool UseVector,
613  bool UseTrunc);
614 
615  /// This is a helper function for MergeConsecutiveStores. Stores
616  /// that potentially may be merged with St are placed in
617  /// StoreNodes. RootNode is a chain predecessor to all store
618  /// candidates.
619  void getStoreMergeCandidates(StoreSDNode *St,
620  SmallVectorImpl<MemOpLink> &StoreNodes,
621  SDNode *&Root);
622 
623  /// Helper function for MergeConsecutiveStores. Checks if
624  /// candidate stores have indirect dependency through their
625  /// operands. RootNode is the predecessor to all stores calculated
626  /// by getStoreMergeCandidates and is used to prune the dependency check.
627  /// \return True if safe to merge.
628  bool checkMergeStoreCandidatesForDependencies(
629  SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
630  SDNode *RootNode);
631 
632  /// Merge consecutive store operations into a wide store.
633  /// This optimization uses wide integers or vectors when possible.
634  /// \return number of stores that were merged into a merged store (the
635  /// affected nodes are stored as a prefix in \p StoreNodes).
636  bool MergeConsecutiveStores(StoreSDNode *St);
637 
638  /// Try to transform a truncation where C is a constant:
639  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
640  ///
641  /// \p N needs to be a truncation and its first operand an AND. Other
642  /// requirements are checked by the function (e.g. that trunc is
643  /// single-use) and if missed an empty SDValue is returned.
644  SDValue distributeTruncateThroughAnd(SDNode *N);
645 
646  /// Helper function to determine whether the target supports operation
647  /// given by \p Opcode for type \p VT, that is, whether the operation
648  /// is legal or custom before legalizing operations, and whether is
649  /// legal (but not custom) after legalization.
650  bool hasOperation(unsigned Opcode, EVT VT) {
651  if (LegalOperations)
652  return TLI.isOperationLegal(Opcode, VT);
653  return TLI.isOperationLegalOrCustom(Opcode, VT);
654  }
655 
656  public:
657  /// Runs the dag combiner on all nodes in the work list
658  void Run(CombineLevel AtLevel);
659 
660  SelectionDAG &getDAG() const { return DAG; }
661 
662  /// Returns a type large enough to hold any valid shift amount - before type
663  /// legalization these can be huge.
664  EVT getShiftAmountTy(EVT LHSTy) {
665  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
666  return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
667  }
668 
669  /// This method returns true if we are running before type legalization or
670  /// if the specified VT is legal.
671  bool isTypeLegal(const EVT &VT) {
672  if (!LegalTypes) return true;
673  return TLI.isTypeLegal(VT);
674  }
675 
676  /// Convenience wrapper around TargetLowering::getSetCCResultType
677  EVT getSetCCResultType(EVT VT) const {
678  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
679  }
680 
681  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
682  SDValue OrigLoad, SDValue ExtLoad,
684  };
685 
686 /// This class is a DAGUpdateListener that removes any deleted
687 /// nodes from the worklist.
688 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
689  DAGCombiner &DC;
690 
691 public:
692  explicit WorklistRemover(DAGCombiner &dc)
693  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
694 
695  void NodeDeleted(SDNode *N, SDNode *E) override {
696  DC.removeFromWorklist(N);
697  }
698 };
699 
700 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
701  DAGCombiner &DC;
702 
703 public:
704  explicit WorklistInserter(DAGCombiner &dc)
705  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
706 
707  // FIXME: Ideally we could add N to the worklist, but this causes exponential
708  // compile time costs in large DAGs, e.g. Halide.
709  void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
710 };
711 
712 } // end anonymous namespace
713 
714 //===----------------------------------------------------------------------===//
715 // TargetLowering::DAGCombinerInfo implementation
716 //===----------------------------------------------------------------------===//
717 
719  ((DAGCombiner*)DC)->AddToWorklist(N);
720 }
721 
723 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
724  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
725 }
726 
728 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
729  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
730 }
731 
733 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
734  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
735 }
736 
739  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
740 }
741 
742 //===----------------------------------------------------------------------===//
743 // Helper Functions
744 //===----------------------------------------------------------------------===//
745 
746 void DAGCombiner::deleteAndRecombine(SDNode *N) {
747  removeFromWorklist(N);
748 
749  // If the operands of this node are only used by the node, they will now be
750  // dead. Make sure to re-visit them and recursively delete dead nodes.
751  for (const SDValue &Op : N->ops())
752  // For an operand generating multiple values, one of the values may
753  // become dead allowing further simplification (e.g. split index
754  // arithmetic from an indexed load).
755  if (Op->hasOneUse() || Op->getNumValues() > 1)
756  AddToWorklist(Op.getNode());
757 
758  DAG.DeleteNode(N);
759 }
760 
761 /// Return 1 if we can compute the negated form of the specified expression for
762 /// the same cost as the expression itself, or 2 if we can compute the negated
763 /// form more cheaply than the expression itself.
764 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
765  const TargetLowering &TLI,
766  const TargetOptions *Options,
767  bool ForCodeSize,
768  unsigned Depth = 0) {
769  // fneg is removable even if it has multiple uses.
770  if (Op.getOpcode() == ISD::FNEG) return 2;
771 
772  // Don't allow anything with multiple uses unless we know it is free.
773  EVT VT = Op.getValueType();
774  const SDNodeFlags Flags = Op->getFlags();
775  if (!Op.hasOneUse())
776  if (!(Op.getOpcode() == ISD::FP_EXTEND &&
777  TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
778  return 0;
779 
780  // Don't recurse exponentially.
781  if (Depth > 6) return 0;
782 
783  switch (Op.getOpcode()) {
784  default: return false;
785  case ISD::ConstantFP: {
786  if (!LegalOperations)
787  return 1;
788 
789  // Don't invert constant FP values after legalization unless the target says
790  // the negated constant is legal.
791  return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
792  TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
793  ForCodeSize);
794  }
795  case ISD::FADD:
796  if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
797  return 0;
798 
799  // After operation legalization, it might not be legal to create new FSUBs.
800  if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
801  return 0;
802 
803  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
804  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
805  Options, ForCodeSize, Depth + 1))
806  return V;
807  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
808  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
809  ForCodeSize, Depth + 1);
810  case ISD::FSUB:
811  // We can't turn -(A-B) into B-A when we honor signed zeros.
812  if (!Options->NoSignedZerosFPMath &&
813  !Flags.hasNoSignedZeros())
814  return 0;
815 
816  // fold (fneg (fsub A, B)) -> (fsub B, A)
817  return 1;
818 
819  case ISD::FMUL:
820  case ISD::FDIV:
821  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
822  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
823  Options, ForCodeSize, Depth + 1))
824  return V;
825 
826  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
827  ForCodeSize, Depth + 1);
828 
829  case ISD::FP_EXTEND:
830  case ISD::FP_ROUND:
831  case ISD::FSIN:
832  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
833  ForCodeSize, Depth + 1);
834  }
835 }
836 
837 /// If isNegatibleForFree returns true, return the newly negated expression.
839  bool LegalOperations, bool ForCodeSize,
840  unsigned Depth = 0) {
841  const TargetOptions &Options = DAG.getTarget().Options;
842  // fneg is removable even if it has multiple uses.
843  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
844 
845  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
846 
847  const SDNodeFlags Flags = Op.getNode()->getFlags();
848 
849  switch (Op.getOpcode()) {
850  default: llvm_unreachable("Unknown code");
851  case ISD::ConstantFP: {
852  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
853  V.changeSign();
854  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
855  }
856  case ISD::FADD:
857  assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
858 
859  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
860  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
861  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
862  Depth+1))
863  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
864  GetNegatedExpression(Op.getOperand(0), DAG,
865  LegalOperations, ForCodeSize,
866  Depth+1),
867  Op.getOperand(1), Flags);
868  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
869  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
870  GetNegatedExpression(Op.getOperand(1), DAG,
871  LegalOperations, ForCodeSize,
872  Depth+1),
873  Op.getOperand(0), Flags);
874  case ISD::FSUB:
875  // fold (fneg (fsub 0, B)) -> B
876  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
877  if (N0CFP->isZero())
878  return Op.getOperand(1);
879 
880  // fold (fneg (fsub A, B)) -> (fsub B, A)
881  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
882  Op.getOperand(1), Op.getOperand(0), Flags);
883 
884  case ISD::FMUL:
885  case ISD::FDIV:
886  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
887  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
888  DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
889  Depth+1))
890  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
891  GetNegatedExpression(Op.getOperand(0), DAG,
892  LegalOperations, ForCodeSize,
893  Depth+1),
894  Op.getOperand(1), Flags);
895 
896  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
897  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
898  Op.getOperand(0),
899  GetNegatedExpression(Op.getOperand(1), DAG,
900  LegalOperations, ForCodeSize,
901  Depth+1), Flags);
902 
903  case ISD::FP_EXTEND:
904  case ISD::FSIN:
905  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
906  GetNegatedExpression(Op.getOperand(0), DAG,
907  LegalOperations, ForCodeSize,
908  Depth+1));
909  case ISD::FP_ROUND:
910  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
911  GetNegatedExpression(Op.getOperand(0), DAG,
912  LegalOperations, ForCodeSize,
913  Depth+1),
914  Op.getOperand(1));
915  }
916 }
917 
918 // APInts must be the same size for most operations, this helper
919 // function zero extends the shorter of the pair so that they match.
920 // We provide an Offset so that we can create bitwidths that won't overflow.
921 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
922  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
923  LHS = LHS.zextOrSelf(Bits);
924  RHS = RHS.zextOrSelf(Bits);
925 }
926 
927 // Return true if this node is a setcc, or is a select_cc
928 // that selects between the target values used for true and false, making it
929 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
930 // the appropriate nodes based on the type of node we are checking. This
931 // simplifies life a bit for the callers.
932 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
933  SDValue &CC) const {
934  if (N.getOpcode() == ISD::SETCC) {
935  LHS = N.getOperand(0);
936  RHS = N.getOperand(1);
937  CC = N.getOperand(2);
938  return true;
939  }
940 
941  if (N.getOpcode() != ISD::SELECT_CC ||
942  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
943  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
944  return false;
945 
946  if (TLI.getBooleanContents(N.getValueType()) ==
948  return false;
949 
950  LHS = N.getOperand(0);
951  RHS = N.getOperand(1);
952  CC = N.getOperand(4);
953  return true;
954 }
955 
956 /// Return true if this is a SetCC-equivalent operation with only one use.
957 /// If this is true, it allows the users to invert the operation for free when
958 /// it is profitable to do so.
959 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
960  SDValue N0, N1, N2;
961  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
962  return true;
963  return false;
964 }
965 
966 // Returns the SDNode if it is a constant float BuildVector
967 // or constant float.
969  if (isa<ConstantFPSDNode>(N))
970  return N.getNode();
972  return N.getNode();
973  return nullptr;
974 }
975 
976 // Determines if it is a constant integer or a build vector of constant
977 // integers (and undefs).
978 // Do not permit build vector implicit truncation.
979 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
980  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
981  return !(Const->isOpaque() && NoOpaques);
982  if (N.getOpcode() != ISD::BUILD_VECTOR)
983  return false;
984  unsigned BitWidth = N.getScalarValueSizeInBits();
985  for (const SDValue &Op : N->op_values()) {
986  if (Op.isUndef())
987  continue;
989  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
990  (Const->isOpaque() && NoOpaques))
991  return false;
992  }
993  return true;
994 }
995 
996 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
997 // undef's.
998 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
999  if (V.getOpcode() != ISD::BUILD_VECTOR)
1000  return false;
1001  return isConstantOrConstantVector(V, NoOpaques) ||
1003 }
1004 
1005 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1006 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1007 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1008  SDValue N0, SDValue N1) {
1009  EVT VT = N0.getValueType();
1010 
1011  if (N0.getOpcode() != Opc)
1012  return SDValue();
1013 
1014  // Don't reassociate reductions.
1015  if (N0->getFlags().hasVectorReduction())
1016  return SDValue();
1017 
1019  if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1020  // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1021  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1022  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1023  return SDValue();
1024  }
1025  if (N0.hasOneUse()) {
1026  // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1027  // iff (op x, c1) has one use
1028  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1029  if (!OpNode.getNode())
1030  return SDValue();
1031  AddToWorklist(OpNode.getNode());
1032  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1033  }
1034  }
1035  return SDValue();
1036 }
1037 
1038 // Try to reassociate commutative binops.
1039 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1040  SDValue N1, SDNodeFlags Flags) {
1041  assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1042  // Don't reassociate reductions.
1043  if (Flags.hasVectorReduction())
1044  return SDValue();
1045 
1046  // Floating-point reassociation is not allowed without loose FP math.
1047  if (N0.getValueType().isFloatingPoint() ||
1049  if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1050  return SDValue();
1051 
1052  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1053  return Combined;
1054  if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1055  return Combined;
1056  return SDValue();
1057 }
1058 
1059 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1060  bool AddTo) {
1061  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1062  ++NodesCombined;
1063  LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1064  To[0].getNode()->dump(&DAG);
1065  dbgs() << " and " << NumTo - 1 << " other values\n");
1066  for (unsigned i = 0, e = NumTo; i != e; ++i)
1067  assert((!To[i].getNode() ||
1068  N->getValueType(i) == To[i].getValueType()) &&
1069  "Cannot combine value to value of different type!");
1070 
1071  WorklistRemover DeadNodes(*this);
1072  DAG.ReplaceAllUsesWith(N, To);
1073  if (AddTo) {
1074  // Push the new nodes and any users onto the worklist
1075  for (unsigned i = 0, e = NumTo; i != e; ++i) {
1076  if (To[i].getNode()) {
1077  AddToWorklist(To[i].getNode());
1078  AddUsersToWorklist(To[i].getNode());
1079  }
1080  }
1081  }
1082 
1083  // Finally, if the node is now dead, remove it from the graph. The node
1084  // may not be dead if the replacement process recursively simplified to
1085  // something else needing this node.
1086  if (N->use_empty())
1087  deleteAndRecombine(N);
1088  return SDValue(N, 0);
1089 }
1090 
1091 void DAGCombiner::
1092 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1093  // Replace all uses. If any nodes become isomorphic to other nodes and
1094  // are deleted, make sure to remove them from our worklist.
1095  WorklistRemover DeadNodes(*this);
1096  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1097 
1098  // Push the new node and any (possibly new) users onto the worklist.
1099  AddToWorklist(TLO.New.getNode());
1100  AddUsersToWorklist(TLO.New.getNode());
1101 
1102  // Finally, if the node is now dead, remove it from the graph. The node
1103  // may not be dead if the replacement process recursively simplified to
1104  // something else needing this node.
1105  if (TLO.Old.getNode()->use_empty())
1106  deleteAndRecombine(TLO.Old.getNode());
1107 }
1108 
1109 /// Check the specified integer node value to see if it can be simplified or if
1110 /// things it uses can be simplified by bit propagation. If so, return true.
1111 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1112  const APInt &DemandedElts) {
1113  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1114  KnownBits Known;
1115  if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1116  return false;
1117 
1118  // Revisit the node.
1119  AddToWorklist(Op.getNode());
1120 
1121  // Replace the old value with the new one.
1122  ++NodesCombined;
1123  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1124  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1125  dbgs() << '\n');
1126 
1127  CommitTargetLoweringOpt(TLO);
1128  return true;
1129 }
1130 
1131 /// Check the specified vector node value to see if it can be simplified or
1132 /// if things it uses can be simplified as it only uses some of the elements.
1133 /// If so, return true.
1134 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1135  const APInt &DemandedElts,
1136  bool AssumeSingleUse) {
1137  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1138  APInt KnownUndef, KnownZero;
1139  if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1140  TLO, 0, AssumeSingleUse))
1141  return false;
1142 
1143  // Revisit the node.
1144  AddToWorklist(Op.getNode());
1145 
1146  // Replace the old value with the new one.
1147  ++NodesCombined;
1148  LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1149  dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1150  dbgs() << '\n');
1151 
1152  CommitTargetLoweringOpt(TLO);
1153  return true;
1154 }
1155 
1156 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1157  SDLoc DL(Load);
1158  EVT VT = Load->getValueType(0);
1159  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1160 
1161  LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1162  Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1163  WorklistRemover DeadNodes(*this);
1164  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1165  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1166  deleteAndRecombine(Load);
1167  AddToWorklist(Trunc.getNode());
1168 }
1169 
1170 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1171  Replace = false;
1172  SDLoc DL(Op);
1173  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1174  LoadSDNode *LD = cast<LoadSDNode>(Op);
1175  EVT MemVT = LD->getMemoryVT();
1177  : LD->getExtensionType();
1178  Replace = true;
1179  return DAG.getExtLoad(ExtType, DL, PVT,
1180  LD->getChain(), LD->getBasePtr(),
1181  MemVT, LD->getMemOperand());
1182  }
1183 
1184  unsigned Opc = Op.getOpcode();
1185  switch (Opc) {
1186  default: break;
1187  case ISD::AssertSext:
1188  if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1189  return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1190  break;
1191  case ISD::AssertZext:
1192  if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1193  return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1194  break;
1195  case ISD::Constant: {
1196  unsigned ExtOpc =
1198  return DAG.getNode(ExtOpc, DL, PVT, Op);
1199  }
1200  }
1201 
1202  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1203  return SDValue();
1204  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1205 }
1206 
1207 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1209  return SDValue();
1210  EVT OldVT = Op.getValueType();
1211  SDLoc DL(Op);
1212  bool Replace = false;
1213  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1214  if (!NewOp.getNode())
1215  return SDValue();
1216  AddToWorklist(NewOp.getNode());
1217 
1218  if (Replace)
1219  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1220  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1221  DAG.getValueType(OldVT));
1222 }
1223 
1224 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1225  EVT OldVT = Op.getValueType();
1226  SDLoc DL(Op);
1227  bool Replace = false;
1228  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1229  if (!NewOp.getNode())
1230  return SDValue();
1231  AddToWorklist(NewOp.getNode());
1232 
1233  if (Replace)
1234  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1235  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1236 }
1237 
1238 /// Promote the specified integer binary operation if the target indicates it is
1239 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1240 /// i32 since i16 instructions are longer.
1241 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1242  if (!LegalOperations)
1243  return SDValue();
1244 
1245  EVT VT = Op.getValueType();
1246  if (VT.isVector() || !VT.isInteger())
1247  return SDValue();
1248 
1249  // If operation type is 'undesirable', e.g. i16 on x86, consider
1250  // promoting it.
1251  unsigned Opc = Op.getOpcode();
1252  if (TLI.isTypeDesirableForOp(Opc, VT))
1253  return SDValue();
1254 
1255  EVT PVT = VT;
1256  // Consult target whether it is a good idea to promote this operation and
1257  // what's the right type to promote it to.
1258  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1259  assert(PVT != VT && "Don't know what type to promote to!");
1260 
1261  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1262 
1263  bool Replace0 = false;
1264  SDValue N0 = Op.getOperand(0);
1265  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1266 
1267  bool Replace1 = false;
1268  SDValue N1 = Op.getOperand(1);
1269  SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1270  SDLoc DL(Op);
1271 
1272  SDValue RV =
1273  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1274 
1275  // We are always replacing N0/N1's use in N and only need
1276  // additional replacements if there are additional uses.
1277  Replace0 &= !N0->hasOneUse();
1278  Replace1 &= (N0 != N1) && !N1->hasOneUse();
1279 
1280  // Combine Op here so it is preserved past replacements.
1281  CombineTo(Op.getNode(), RV);
1282 
1283  // If operands have a use ordering, make sure we deal with
1284  // predecessor first.
1285  if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1286  std::swap(N0, N1);
1287  std::swap(NN0, NN1);
1288  }
1289 
1290  if (Replace0) {
1291  AddToWorklist(NN0.getNode());
1292  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1293  }
1294  if (Replace1) {
1295  AddToWorklist(NN1.getNode());
1296  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1297  }
1298  return Op;
1299  }
1300  return SDValue();
1301 }
1302 
1303 /// Promote the specified integer shift operation if the target indicates it is
1304 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1305 /// i32 since i16 instructions are longer.
1306 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1307  if (!LegalOperations)
1308  return SDValue();
1309 
1310  EVT VT = Op.getValueType();
1311  if (VT.isVector() || !VT.isInteger())
1312  return SDValue();
1313 
1314  // If operation type is 'undesirable', e.g. i16 on x86, consider
1315  // promoting it.
1316  unsigned Opc = Op.getOpcode();
1317  if (TLI.isTypeDesirableForOp(Opc, VT))
1318  return SDValue();
1319 
1320  EVT PVT = VT;
1321  // Consult target whether it is a good idea to promote this operation and
1322  // what's the right type to promote it to.
1323  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1324  assert(PVT != VT && "Don't know what type to promote to!");
1325 
1326  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1327 
1328  bool Replace = false;
1329  SDValue N0 = Op.getOperand(0);
1330  SDValue N1 = Op.getOperand(1);
1331  if (Opc == ISD::SRA)
1332  N0 = SExtPromoteOperand(N0, PVT);
1333  else if (Opc == ISD::SRL)
1334  N0 = ZExtPromoteOperand(N0, PVT);
1335  else
1336  N0 = PromoteOperand(N0, PVT, Replace);
1337 
1338  if (!N0.getNode())
1339  return SDValue();
1340 
1341  SDLoc DL(Op);
1342  SDValue RV =
1343  DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1344 
1345  AddToWorklist(N0.getNode());
1346  if (Replace)
1347  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1348 
1349  // Deal with Op being deleted.
1350  if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1351  return RV;
1352  }
1353  return SDValue();
1354 }
1355 
1356 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1357  if (!LegalOperations)
1358  return SDValue();
1359 
1360  EVT VT = Op.getValueType();
1361  if (VT.isVector() || !VT.isInteger())
1362  return SDValue();
1363 
1364  // If operation type is 'undesirable', e.g. i16 on x86, consider
1365  // promoting it.
1366  unsigned Opc = Op.getOpcode();
1367  if (TLI.isTypeDesirableForOp(Opc, VT))
1368  return SDValue();
1369 
1370  EVT PVT = VT;
1371  // Consult target whether it is a good idea to promote this operation and
1372  // what's the right type to promote it to.
1373  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1374  assert(PVT != VT && "Don't know what type to promote to!");
1375  // fold (aext (aext x)) -> (aext x)
1376  // fold (aext (zext x)) -> (zext x)
1377  // fold (aext (sext x)) -> (sext x)
1378  LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1379  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1380  }
1381  return SDValue();
1382 }
1383 
1384 bool DAGCombiner::PromoteLoad(SDValue Op) {
1385  if (!LegalOperations)
1386  return false;
1387 
1388  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1389  return false;
1390 
1391  EVT VT = Op.getValueType();
1392  if (VT.isVector() || !VT.isInteger())
1393  return false;
1394 
1395  // If operation type is 'undesirable', e.g. i16 on x86, consider
1396  // promoting it.
1397  unsigned Opc = Op.getOpcode();
1398  if (TLI.isTypeDesirableForOp(Opc, VT))
1399  return false;
1400 
1401  EVT PVT = VT;
1402  // Consult target whether it is a good idea to promote this operation and
1403  // what's the right type to promote it to.
1404  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1405  assert(PVT != VT && "Don't know what type to promote to!");
1406 
1407  SDLoc DL(Op);
1408  SDNode *N = Op.getNode();
1409  LoadSDNode *LD = cast<LoadSDNode>(N);
1410  EVT MemVT = LD->getMemoryVT();
1412  : LD->getExtensionType();
1413  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1414  LD->getChain(), LD->getBasePtr(),
1415  MemVT, LD->getMemOperand());
1416  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1417 
1418  LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1419  Result.getNode()->dump(&DAG); dbgs() << '\n');
1420  WorklistRemover DeadNodes(*this);
1421  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1422  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1423  deleteAndRecombine(N);
1424  AddToWorklist(Result.getNode());
1425  return true;
1426  }
1427  return false;
1428 }
1429 
1430 /// Recursively delete a node which has no uses and any operands for
1431 /// which it is the only use.
1432 ///
1433 /// Note that this both deletes the nodes and removes them from the worklist.
1434 /// It also adds any nodes who have had a user deleted to the worklist as they
1435 /// may now have only one use and subject to other combines.
1436 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1437  if (!N->use_empty())
1438  return false;
1439 
1441  Nodes.insert(N);
1442  do {
1443  N = Nodes.pop_back_val();
1444  if (!N)
1445  continue;
1446 
1447  if (N->use_empty()) {
1448  for (const SDValue &ChildN : N->op_values())
1449  Nodes.insert(ChildN.getNode());
1450 
1451  removeFromWorklist(N);
1452  DAG.DeleteNode(N);
1453  } else {
1454  AddToWorklist(N);
1455  }
1456  } while (!Nodes.empty());
1457  return true;
1458 }
1459 
1460 //===----------------------------------------------------------------------===//
1461 // Main DAG Combiner implementation
1462 //===----------------------------------------------------------------------===//
1463 
1464 void DAGCombiner::Run(CombineLevel AtLevel) {
1465  // set the instance variables, so that the various visit routines may use it.
1466  Level = AtLevel;
1467  LegalOperations = Level >= AfterLegalizeVectorOps;
1468  LegalTypes = Level >= AfterLegalizeTypes;
1469 
1470  WorklistInserter AddNodes(*this);
1471 
1472  // Add all the dag nodes to the worklist.
1473  for (SDNode &Node : DAG.allnodes())
1474  AddToWorklist(&Node);
1475 
1476  // Create a dummy node (which is not added to allnodes), that adds a reference
1477  // to the root node, preventing it from being deleted, and tracking any
1478  // changes of the root.
1479  HandleSDNode Dummy(DAG.getRoot());
1480 
1481  // While we have a valid worklist entry node, try to combine it.
1482  while (SDNode *N = getNextWorklistEntry()) {
1483  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1484  // N is deleted from the DAG, since they too may now be dead or may have a
1485  // reduced number of uses, allowing other xforms.
1486  if (recursivelyDeleteUnusedNodes(N))
1487  continue;
1488 
1489  WorklistRemover DeadNodes(*this);
1490 
1491  // If this combine is running after legalizing the DAG, re-legalize any
1492  // nodes pulled off the worklist.
1493  if (Level == AfterLegalizeDAG) {
1494  SmallSetVector<SDNode *, 16> UpdatedNodes;
1495  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1496 
1497  for (SDNode *LN : UpdatedNodes) {
1498  AddToWorklist(LN);
1499  AddUsersToWorklist(LN);
1500  }
1501  if (!NIsValid)
1502  continue;
1503  }
1504 
1505  LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1506 
1507  // Add any operands of the new node which have not yet been combined to the
1508  // worklist as well. Because the worklist uniques things already, this
1509  // won't repeatedly process the same operand.
1510  CombinedNodes.insert(N);
1511  for (const SDValue &ChildN : N->op_values())
1512  if (!CombinedNodes.count(ChildN.getNode()))
1513  AddToWorklist(ChildN.getNode());
1514 
1515  SDValue RV = combine(N);
1516 
1517  if (!RV.getNode())
1518  continue;
1519 
1520  ++NodesCombined;
1521 
1522  // If we get back the same node we passed in, rather than a new node or
1523  // zero, we know that the node must have defined multiple values and
1524  // CombineTo was used. Since CombineTo takes care of the worklist
1525  // mechanics for us, we have no work to do in this case.
1526  if (RV.getNode() == N)
1527  continue;
1528 
1530  RV.getOpcode() != ISD::DELETED_NODE &&
1531  "Node was deleted but visit returned new node!");
1532 
1533  LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1534 
1535  if (N->getNumValues() == RV.getNode()->getNumValues())
1536  DAG.ReplaceAllUsesWith(N, RV.getNode());
1537  else {
1538  assert(N->getValueType(0) == RV.getValueType() &&
1539  N->getNumValues() == 1 && "Type mismatch");
1540  DAG.ReplaceAllUsesWith(N, &RV);
1541  }
1542 
1543  // Push the new node and any users onto the worklist
1544  AddToWorklist(RV.getNode());
1545  AddUsersToWorklist(RV.getNode());
1546 
1547  // Finally, if the node is now dead, remove it from the graph. The node
1548  // may not be dead if the replacement process recursively simplified to
1549  // something else needing this node. This will also take care of adding any
1550  // operands which have lost a user to the worklist.
1551  recursivelyDeleteUnusedNodes(N);
1552  }
1553 
1554  // If the root changed (e.g. it was a dead load, update the root).
1555  DAG.setRoot(Dummy.getValue());
1556  DAG.RemoveDeadNodes();
1557 }
1558 
1559 SDValue DAGCombiner::visit(SDNode *N) {
1560  switch (N->getOpcode()) {
1561  default: break;
1562  case ISD::TokenFactor: return visitTokenFactor(N);
1563  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1564  case ISD::ADD: return visitADD(N);
1565  case ISD::SUB: return visitSUB(N);
1566  case ISD::SADDSAT:
1567  case ISD::UADDSAT: return visitADDSAT(N);
1568  case ISD::SSUBSAT:
1569  case ISD::USUBSAT: return visitSUBSAT(N);
1570  case ISD::ADDC: return visitADDC(N);
1571  case ISD::SADDO:
1572  case ISD::UADDO: return visitADDO(N);
1573  case ISD::SUBC: return visitSUBC(N);
1574  case ISD::SSUBO:
1575  case ISD::USUBO: return visitSUBO(N);
1576  case ISD::ADDE: return visitADDE(N);
1577  case ISD::ADDCARRY: return visitADDCARRY(N);
1578  case ISD::SUBE: return visitSUBE(N);
1579  case ISD::SUBCARRY: return visitSUBCARRY(N);
1580  case ISD::MUL: return visitMUL(N);
1581  case ISD::SDIV: return visitSDIV(N);
1582  case ISD::UDIV: return visitUDIV(N);
1583  case ISD::SREM:
1584  case ISD::UREM: return visitREM(N);
1585  case ISD::MULHU: return visitMULHU(N);
1586  case ISD::MULHS: return visitMULHS(N);
1587  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1588  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1589  case ISD::SMULO:
1590  case ISD::UMULO: return visitMULO(N);
1591  case ISD::SMIN:
1592  case ISD::SMAX:
1593  case ISD::UMIN:
1594  case ISD::UMAX: return visitIMINMAX(N);
1595  case ISD::AND: return visitAND(N);
1596  case ISD::OR: return visitOR(N);
1597  case ISD::XOR: return visitXOR(N);
1598  case ISD::SHL: return visitSHL(N);
1599  case ISD::SRA: return visitSRA(N);
1600  case ISD::SRL: return visitSRL(N);
1601  case ISD::ROTR:
1602  case ISD::ROTL: return visitRotate(N);
1603  case ISD::FSHL:
1604  case ISD::FSHR: return visitFunnelShift(N);
1605  case ISD::ABS: return visitABS(N);
1606  case ISD::BSWAP: return visitBSWAP(N);
1607  case ISD::BITREVERSE: return visitBITREVERSE(N);
1608  case ISD::CTLZ: return visitCTLZ(N);
1609  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1610  case ISD::CTTZ: return visitCTTZ(N);
1611  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1612  case ISD::CTPOP: return visitCTPOP(N);
1613  case ISD::SELECT: return visitSELECT(N);
1614  case ISD::VSELECT: return visitVSELECT(N);
1615  case ISD::SELECT_CC: return visitSELECT_CC(N);
1616  case ISD::SETCC: return visitSETCC(N);
1617  case ISD::SETCCCARRY: return visitSETCCCARRY(N);
1618  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1619  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1620  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1621  case ISD::AssertSext:
1622  case ISD::AssertZext: return visitAssertExt(N);
1623  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1624  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1625  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1626  case ISD::TRUNCATE: return visitTRUNCATE(N);
1627  case ISD::BITCAST: return visitBITCAST(N);
1628  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1629  case ISD::FADD: return visitFADD(N);
1630  case ISD::FSUB: return visitFSUB(N);
1631  case ISD::FMUL: return visitFMUL(N);
1632  case ISD::FMA: return visitFMA(N);
1633  case ISD::FDIV: return visitFDIV(N);
1634  case ISD::FREM: return visitFREM(N);
1635  case ISD::FSQRT: return visitFSQRT(N);
1636  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1637  case ISD::FPOW: return visitFPOW(N);
1638  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1639  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1640  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1641  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1642  case ISD::FP_ROUND: return visitFP_ROUND(N);
1643  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1644  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1645  case ISD::FNEG: return visitFNEG(N);
1646  case ISD::FABS: return visitFABS(N);
1647  case ISD::FFLOOR: return visitFFLOOR(N);
1648  case ISD::FMINNUM: return visitFMINNUM(N);
1649  case ISD::FMAXNUM: return visitFMAXNUM(N);
1650  case ISD::FMINIMUM: return visitFMINIMUM(N);
1651  case ISD::FMAXIMUM: return visitFMAXIMUM(N);
1652  case ISD::FCEIL: return visitFCEIL(N);
1653  case ISD::FTRUNC: return visitFTRUNC(N);
1654  case ISD::BRCOND: return visitBRCOND(N);
1655  case ISD::BR_CC: return visitBR_CC(N);
1656  case ISD::LOAD: return visitLOAD(N);
1657  case ISD::STORE: return visitSTORE(N);
1658  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1659  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1660  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1661  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1662  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1663  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1664  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1665  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1666  case ISD::MGATHER: return visitMGATHER(N);
1667  case ISD::MLOAD: return visitMLOAD(N);
1668  case ISD::MSCATTER: return visitMSCATTER(N);
1669  case ISD::MSTORE: return visitMSTORE(N);
1670  case ISD::LIFETIME_END: return visitLIFETIME_END(N);
1671  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1672  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1673  case ISD::VECREDUCE_FADD:
1674  case ISD::VECREDUCE_FMUL:
1675  case ISD::VECREDUCE_ADD:
1676  case ISD::VECREDUCE_MUL:
1677  case ISD::VECREDUCE_AND:
1678  case ISD::VECREDUCE_OR:
1679  case ISD::VECREDUCE_XOR:
1680  case ISD::VECREDUCE_SMAX:
1681  case ISD::VECREDUCE_SMIN:
1682  case ISD::VECREDUCE_UMAX:
1683  case ISD::VECREDUCE_UMIN:
1684  case ISD::VECREDUCE_FMAX:
1685  case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
1686  }
1687  return SDValue();
1688 }
1689 
1690 SDValue DAGCombiner::combine(SDNode *N) {
1691  SDValue RV = visit(N);
1692 
1693  // If nothing happened, try a target-specific DAG combine.
1694  if (!RV.getNode()) {
1696  "Node was deleted but visit returned NULL!");
1697 
1698  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1700 
1701  // Expose the DAG combiner to the target combiner impls.
1703  DagCombineInfo(DAG, Level, false, this);
1704 
1705  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1706  }
1707  }
1708 
1709  // If nothing happened still, try promoting the operation.
1710  if (!RV.getNode()) {
1711  switch (N->getOpcode()) {
1712  default: break;
1713  case ISD::ADD:
1714  case ISD::SUB:
1715  case ISD::MUL:
1716  case ISD::AND:
1717  case ISD::OR:
1718  case ISD::XOR:
1719  RV = PromoteIntBinOp(SDValue(N, 0));
1720  break;
1721  case ISD::SHL:
1722  case ISD::SRA:
1723  case ISD::SRL:
1724  RV = PromoteIntShiftOp(SDValue(N, 0));
1725  break;
1726  case ISD::SIGN_EXTEND:
1727  case ISD::ZERO_EXTEND:
1728  case ISD::ANY_EXTEND:
1729  RV = PromoteExtend(SDValue(N, 0));
1730  break;
1731  case ISD::LOAD:
1732  if (PromoteLoad(SDValue(N, 0)))
1733  RV = SDValue(N, 0);
1734  break;
1735  }
1736  }
1737 
1738  // If N is a commutative binary node, try to eliminate it if the commuted
1739  // version is already present in the DAG.
1740  if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1741  N->getNumValues() == 1) {
1742  SDValue N0 = N->getOperand(0);
1743  SDValue N1 = N->getOperand(1);
1744 
1745  // Constant operands are canonicalized to RHS.
1746  if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1747  SDValue Ops[] = {N1, N0};
1748  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1749  N->getFlags());
1750  if (CSENode)
1751  return SDValue(CSENode, 0);
1752  }
1753  }
1754 
1755  return RV;
1756 }
1757 
1758 /// Given a node, return its input chain if it has one, otherwise return a null
1759 /// sd operand.
1761  if (unsigned NumOps = N->getNumOperands()) {
1762  if (N->getOperand(0).getValueType() == MVT::Other)
1763  return N->getOperand(0);
1764  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1765  return N->getOperand(NumOps-1);
1766  for (unsigned i = 1; i < NumOps-1; ++i)
1767  if (N->getOperand(i).getValueType() == MVT::Other)
1768  return N->getOperand(i);
1769  }
1770  return SDValue();
1771 }
1772 
1773 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1774  // If N has two operands, where one has an input chain equal to the other,
1775  // the 'other' chain is redundant.
1776  if (N->getNumOperands() == 2) {
1777  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1778  return N->getOperand(0);
1779  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1780  return N->getOperand(1);
1781  }
1782 
1783  // Don't simplify token factors if optnone.
1784  if (OptLevel == CodeGenOpt::None)
1785  return SDValue();
1786 
1787  // If the sole user is a token factor, we should make sure we have a
1788  // chance to merge them together. This prevents TF chains from inhibiting
1789  // optimizations.
1790  if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1791  AddToWorklist(*(N->use_begin()));
1792 
1793  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1794  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1795  SmallPtrSet<SDNode*, 16> SeenOps;
1796  bool Changed = false; // If we should replace this token factor.
1797 
1798  // Start out with this token factor.
1799  TFs.push_back(N);
1800 
1801  // Iterate through token factors. The TFs grows when new token factors are
1802  // encountered.
1803  for (unsigned i = 0; i < TFs.size(); ++i) {
1804  SDNode *TF = TFs[i];
1805 
1806  // Check each of the operands.
1807  for (const SDValue &Op : TF->op_values()) {
1808  switch (Op.getOpcode()) {
1809  case ISD::EntryToken:
1810  // Entry tokens don't need to be added to the list. They are
1811  // redundant.
1812  Changed = true;
1813  break;
1814 
1815  case ISD::TokenFactor:
1816  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1817  // Queue up for processing.
1818  TFs.push_back(Op.getNode());
1819  // Clean up in case the token factor is removed.
1820  AddToWorklist(Op.getNode());
1821  Changed = true;
1822  break;
1823  }
1825 
1826  default:
1827  // Only add if it isn't already in the list.
1828  if (SeenOps.insert(Op.getNode()).second)
1829  Ops.push_back(Op);
1830  else
1831  Changed = true;
1832  break;
1833  }
1834  }
1835  }
1836 
1837  // Remove Nodes that are chained to another node in the list. Do so
1838  // by walking up chains breath-first stopping when we've seen
1839  // another operand. In general we must climb to the EntryNode, but we can exit
1840  // early if we find all remaining work is associated with just one operand as
1841  // no further pruning is possible.
1842 
1843  // List of nodes to search through and original Ops from which they originate.
1845  SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1846  SmallPtrSet<SDNode *, 16> SeenChains;
1847  bool DidPruneOps = false;
1848 
1849  unsigned NumLeftToConsider = 0;
1850  for (const SDValue &Op : Ops) {
1851  Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1852  OpWorkCount.push_back(1);
1853  }
1854 
1855  auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1856  // If this is an Op, we can remove the op from the list. Remark any
1857  // search associated with it as from the current OpNumber.
1858  if (SeenOps.count(Op) != 0) {
1859  Changed = true;
1860  DidPruneOps = true;
1861  unsigned OrigOpNumber = 0;
1862  while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1863  OrigOpNumber++;
1864  assert((OrigOpNumber != Ops.size()) &&
1865  "expected to find TokenFactor Operand");
1866  // Re-mark worklist from OrigOpNumber to OpNumber
1867  for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1868  if (Worklist[i].second == OrigOpNumber) {
1869  Worklist[i].second = OpNumber;
1870  }
1871  }
1872  OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1873  OpWorkCount[OrigOpNumber] = 0;
1874  NumLeftToConsider--;
1875  }
1876  // Add if it's a new chain
1877  if (SeenChains.insert(Op).second) {
1878  OpWorkCount[OpNumber]++;
1879  Worklist.push_back(std::make_pair(Op, OpNumber));
1880  }
1881  };
1882 
1883  for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1884  // We need at least be consider at least 2 Ops to prune.
1885  if (NumLeftToConsider <= 1)
1886  break;
1887  auto CurNode = Worklist[i].first;
1888  auto CurOpNumber = Worklist[i].second;
1889  assert((OpWorkCount[CurOpNumber] > 0) &&
1890  "Node should not appear in worklist");
1891  switch (CurNode->getOpcode()) {
1892  case ISD::EntryToken:
1893  // Hitting EntryToken is the only way for the search to terminate without
1894  // hitting
1895  // another operand's search. Prevent us from marking this operand
1896  // considered.
1897  NumLeftToConsider++;
1898  break;
1899  case ISD::TokenFactor:
1900  for (const SDValue &Op : CurNode->op_values())
1901  AddToWorklist(i, Op.getNode(), CurOpNumber);
1902  break;
1903  case ISD::LIFETIME_START:
1904  case ISD::LIFETIME_END:
1905  case ISD::CopyFromReg:
1906  case ISD::CopyToReg:
1907  AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1908  break;
1909  default:
1910  if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1911  AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1912  break;
1913  }
1914  OpWorkCount[CurOpNumber]--;
1915  if (OpWorkCount[CurOpNumber] == 0)
1916  NumLeftToConsider--;
1917  }
1918 
1919  // If we've changed things around then replace token factor.
1920  if (Changed) {
1921  SDValue Result;
1922  if (Ops.empty()) {
1923  // The entry token is the only possible outcome.
1924  Result = DAG.getEntryNode();
1925  } else {
1926  if (DidPruneOps) {
1927  SmallVector<SDValue, 8> PrunedOps;
1928  //
1929  for (const SDValue &Op : Ops) {
1930  if (SeenChains.count(Op.getNode()) == 0)
1931  PrunedOps.push_back(Op);
1932  }
1933  Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1934  } else {
1935  Result = DAG.getTokenFactor(SDLoc(N), Ops);
1936  }
1937  }
1938  return Result;
1939  }
1940  return SDValue();
1941 }
1942 
1943 /// MERGE_VALUES can always be eliminated.
1944 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1945  WorklistRemover DeadNodes(*this);
1946  // Replacing results may cause a different MERGE_VALUES to suddenly
1947  // be CSE'd with N, and carry its uses with it. Iterate until no
1948  // uses remain, to ensure that the node can be safely deleted.
1949  // First add the users of this node to the work list so that they
1950  // can be tried again once they have new operands.
1951  AddUsersToWorklist(N);
1952  do {
1953  // Do as a single replacement to avoid rewalking use lists.
1955  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1956  Ops.push_back(N->getOperand(i));
1957  DAG.ReplaceAllUsesWith(N, Ops.data());
1958  } while (!N->use_empty());
1959  deleteAndRecombine(N);
1960  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1961 }
1962 
1963 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1964 /// ConstantSDNode pointer else nullptr.
1967  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1968 }
1969 
1970 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1971  assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
1972  "Unexpected binary operator");
1973 
1974  // Don't do this unless the old select is going away. We want to eliminate the
1975  // binary operator, not replace a binop with a select.
1976  // TODO: Handle ISD::SELECT_CC.
1977  unsigned SelOpNo = 0;
1978  SDValue Sel = BO->getOperand(0);
1979  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1980  SelOpNo = 1;
1981  Sel = BO->getOperand(1);
1982  }
1983 
1984  if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1985  return SDValue();
1986 
1987  SDValue CT = Sel.getOperand(1);
1988  if (!isConstantOrConstantVector(CT, true) &&
1990  return SDValue();
1991 
1992  SDValue CF = Sel.getOperand(2);
1993  if (!isConstantOrConstantVector(CF, true) &&
1995  return SDValue();
1996 
1997  // Bail out if any constants are opaque because we can't constant fold those.
1998  // The exception is "and" and "or" with either 0 or -1 in which case we can
1999  // propagate non constant operands into select. I.e.:
2000  // and (select Cond, 0, -1), X --> select Cond, 0, X
2001  // or X, (select Cond, -1, 0) --> select Cond, -1, X
2002  auto BinOpcode = BO->getOpcode();
2003  bool CanFoldNonConst =
2004  (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2007 
2008  SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2009  if (!CanFoldNonConst &&
2010  !isConstantOrConstantVector(CBO, true) &&
2012  return SDValue();
2013 
2014  EVT VT = Sel.getValueType();
2015 
2016  // In case of shift value and shift amount may have different VT. For instance
2017  // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2018  // swapped operands and value types do not match. NB: x86 is fine if operands
2019  // are not swapped with shift amount VT being not bigger than shifted value.
2020  // TODO: that is possible to check for a shift operation, correct VTs and
2021  // still perform optimization on x86 if needed.
2022  if (SelOpNo && VT != CBO.getValueType())
2023  return SDValue();
2024 
2025  // We have a select-of-constants followed by a binary operator with a
2026  // constant. Eliminate the binop by pulling the constant math into the select.
2027  // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2028  SDLoc DL(Sel);
2029  SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2030  : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2031  if (!CanFoldNonConst && !NewCT.isUndef() &&
2032  !isConstantOrConstantVector(NewCT, true) &&
2034  return SDValue();
2035 
2036  SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2037  : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2038  if (!CanFoldNonConst && !NewCF.isUndef() &&
2039  !isConstantOrConstantVector(NewCF, true) &&
2041  return SDValue();
2042 
2043  return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2044 }
2045 
2047  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2048  "Expecting add or sub");
2049 
2050  // Match a constant operand and a zext operand for the math instruction:
2051  // add Z, C
2052  // sub C, Z
2053  bool IsAdd = N->getOpcode() == ISD::ADD;
2054  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2055  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2056  auto *CN = dyn_cast<ConstantSDNode>(C);
2057  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2058  return SDValue();
2059 
2060  // Match the zext operand as a setcc of a boolean.
2061  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2062  Z.getOperand(0).getValueType() != MVT::i1)
2063  return SDValue();
2064 
2065  // Match the compare as: setcc (X & 1), 0, eq.
2066  SDValue SetCC = Z.getOperand(0);
2067  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2068  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2069  SetCC.getOperand(0).getOpcode() != ISD::AND ||
2070  !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2071  return SDValue();
2072 
2073  // We are adding/subtracting a constant and an inverted low bit. Turn that
2074  // into a subtract/add of the low bit with incremented/decremented constant:
2075  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2076  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2077  EVT VT = C.getValueType();
2078  SDLoc DL(N);
2079  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2080  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2081  DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2082  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2083 }
2084 
2085 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2086 /// a shift and add with a different constant.
2088  assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2089  "Expecting add or sub");
2090 
2091  // We need a constant operand for the add/sub, and the other operand is a
2092  // logical shift right: add (srl), C or sub C, (srl).
2093  bool IsAdd = N->getOpcode() == ISD::ADD;
2094  SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2095  SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2096  ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2097  if (!C || ShiftOp.getOpcode() != ISD::SRL)
2098  return SDValue();
2099 
2100  // The shift must be of a 'not' value.
2101  SDValue Not = ShiftOp.getOperand(0);
2102  if (!Not.hasOneUse() || !isBitwiseNot(Not))
2103  return SDValue();
2104 
2105  // The shift must be moving the sign bit to the least-significant-bit.
2106  EVT VT = ShiftOp.getValueType();
2107  SDValue ShAmt = ShiftOp.getOperand(1);
2108  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2109  if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2110  return SDValue();
2111 
2112  // Eliminate the 'not' by adjusting the shift and add/sub constant:
2113  // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2114  // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2115  SDLoc DL(N);
2116  auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2117  SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2118  APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2119  return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2120 }
2121 
2122 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2123 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2124 /// are no common bits set in the operands).
2125 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2126  SDValue N0 = N->getOperand(0);
2127  SDValue N1 = N->getOperand(1);
2128  EVT VT = N0.getValueType();
2129  SDLoc DL(N);
2130 
2131  // fold vector ops
2132  if (VT.isVector()) {
2133  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2134  return FoldedVOp;
2135 
2136  // fold (add x, 0) -> x, vector edition
2138  return N0;
2140  return N1;
2141  }
2142 
2143  // fold (add x, undef) -> undef
2144  if (N0.isUndef())
2145  return N0;
2146 
2147  if (N1.isUndef())
2148  return N1;
2149 
2151  // canonicalize constant to RHS
2153  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2154  // fold (add c1, c2) -> c1+c2
2155  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2156  N1.getNode());
2157  }
2158 
2159  // fold (add x, 0) -> x
2160  if (isNullConstant(N1))
2161  return N0;
2162 
2163  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2164  // fold ((c1-A)+c2) -> (c1+c2)-A
2165  if (N0.getOpcode() == ISD::SUB &&
2166  isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2167  // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2168  return DAG.getNode(ISD::SUB, DL, VT,
2169  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2170  N0.getOperand(1));
2171  }
2172 
2173  // add (sext i1 X), 1 -> zext (not i1 X)
2174  // We don't transform this pattern:
2175  // add (zext i1 X), -1 -> sext (not i1 X)
2176  // because most (?) targets generate better code for the zext form.
2177  if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2178  isOneOrOneSplat(N1)) {
2179  SDValue X = N0.getOperand(0);
2180  if ((!LegalOperations ||
2181  (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2182  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2183  X.getScalarValueSizeInBits() == 1) {
2184  SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2185  return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2186  }
2187  }
2188 
2189  // Undo the add -> or combine to merge constant offsets from a frame index.
2190  if (N0.getOpcode() == ISD::OR &&
2191  isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2192  isa<ConstantSDNode>(N0.getOperand(1)) &&
2193  DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2194  SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2195  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2196  }
2197  }
2198 
2199  if (SDValue NewSel = foldBinOpIntoSelect(N))
2200  return NewSel;
2201 
2202  // reassociate add
2203  if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2204  return RADD;
2205 
2206  // fold ((0-A) + B) -> B-A
2207  if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2208  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2209 
2210  // fold (A + (0-B)) -> A-B
2211  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2212  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2213 
2214  // fold (A+(B-A)) -> B
2215  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2216  return N1.getOperand(0);
2217 
2218  // fold ((B-A)+A) -> B
2219  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2220  return N0.getOperand(0);
2221 
2222  // fold ((A-B)+(C-A)) -> (C-B)
2223  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2224  N0.getOperand(0) == N1.getOperand(1))
2225  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2226  N0.getOperand(1));
2227 
2228  // fold ((A-B)+(B-C)) -> (A-C)
2229  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2230  N0.getOperand(1) == N1.getOperand(0))
2231  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2232  N1.getOperand(1));
2233 
2234  // fold (A+(B-(A+C))) to (B-C)
2235  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2236  N0 == N1.getOperand(1).getOperand(0))
2237  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2238  N1.getOperand(1).getOperand(1));
2239 
2240  // fold (A+(B-(C+A))) to (B-C)
2241  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2242  N0 == N1.getOperand(1).getOperand(1))
2243  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2244  N1.getOperand(1).getOperand(0));
2245 
2246  // fold (A+((B-A)+or-C)) to (B+or-C)
2247  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2248  N1.getOperand(0).getOpcode() == ISD::SUB &&
2249  N0 == N1.getOperand(0).getOperand(1))
2250  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2251  N1.getOperand(1));
2252 
2253  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2254  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2255  SDValue N00 = N0.getOperand(0);
2256  SDValue N01 = N0.getOperand(1);
2257  SDValue N10 = N1.getOperand(0);
2258  SDValue N11 = N1.getOperand(1);
2259 
2261  return DAG.getNode(ISD::SUB, DL, VT,
2262  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2263  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2264  }
2265 
2266  // fold (add (umax X, C), -C) --> (usubsat X, C)
2267  if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2268  auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2269  return (!Max && !Op) ||
2270  (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2271  };
2272  if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2273  /*AllowUndefs*/ true))
2274  return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2275  N0.getOperand(1));
2276  }
2277 
2278  if (SimplifyDemandedBits(SDValue(N, 0)))
2279  return SDValue(N, 0);
2280 
2281  if (isOneOrOneSplat(N1)) {
2282  // fold (add (xor a, -1), 1) -> (sub 0, a)
2283  if (isBitwiseNot(N0))
2284  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2285  N0.getOperand(0));
2286 
2287  // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2288  if (N0.getOpcode() == ISD::ADD ||
2289  N0.getOpcode() == ISD::UADDO ||
2290  N0.getOpcode() == ISD::SADDO) {
2291  SDValue A, Xor;
2292 
2293  if (isBitwiseNot(N0.getOperand(0))) {
2294  A = N0.getOperand(1);
2295  Xor = N0.getOperand(0);
2296  } else if (isBitwiseNot(N0.getOperand(1))) {
2297  A = N0.getOperand(0);
2298  Xor = N0.getOperand(1);
2299  }
2300 
2301  if (Xor)
2302  return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2303  }
2304  }
2305 
2306  if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2307  return Combined;
2308 
2309  if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2310  return Combined;
2311 
2312  return SDValue();
2313 }
2314 
2315 SDValue DAGCombiner::visitADD(SDNode *N) {
2316  SDValue N0 = N->getOperand(0);
2317  SDValue N1 = N->getOperand(1);
2318  EVT VT = N0.getValueType();
2319  SDLoc DL(N);
2320 
2321  if (SDValue Combined = visitADDLike(N))
2322  return Combined;
2323 
2324  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2325  return V;
2326 
2327  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2328  return V;
2329 
2330  // fold (a+b) -> (a|b) iff a and b share no bits.
2331  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2332  DAG.haveNoCommonBitsSet(N0, N1))
2333  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2334 
2335  return SDValue();
2336 }
2337 
2338 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2339  unsigned Opcode = N->getOpcode();
2340  SDValue N0 = N->getOperand(0);
2341  SDValue N1 = N->getOperand(1);
2342  EVT VT = N0.getValueType();
2343  SDLoc DL(N);
2344 
2345  // fold vector ops
2346  if (VT.isVector()) {
2347  // TODO SimplifyVBinOp
2348 
2349  // fold (add_sat x, 0) -> x, vector edition
2351  return N0;
2353  return N1;
2354  }
2355 
2356  // fold (add_sat x, undef) -> -1
2357  if (N0.isUndef() || N1.isUndef())
2358  return DAG.getAllOnesConstant(DL, VT);
2359 
2361  // canonicalize constant to RHS
2363  return DAG.getNode(Opcode, DL, VT, N1, N0);
2364  // fold (add_sat c1, c2) -> c3
2365  return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2366  N1.getNode());
2367  }
2368 
2369  // fold (add_sat x, 0) -> x
2370  if (isNullConstant(N1))
2371  return N0;
2372 
2373  // If it cannot overflow, transform into an add.
2374  if (Opcode == ISD::UADDSAT)
2375  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2376  return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2377 
2378  return SDValue();
2379 }
2380 
2381 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2382  bool Masked = false;
2383 
2384  // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2385  while (true) {
2386  if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2387  V = V.getOperand(0);
2388  continue;
2389  }
2390 
2391  if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2392  Masked = true;
2393  V = V.getOperand(0);
2394  continue;
2395  }
2396 
2397  break;
2398  }
2399 
2400  // If this is not a carry, return.
2401  if (V.getResNo() != 1)
2402  return SDValue();
2403 
2404  if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2405  V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2406  return SDValue();
2407 
2408  EVT VT = V.getNode()->getValueType(0);
2409  if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2410  return SDValue();
2411 
2412  // If the result is masked, then no matter what kind of bool it is we can
2413  // return. If it isn't, then we need to make sure the bool type is either 0 or
2414  // 1 and not other values.
2415  if (Masked ||
2416  TLI.getBooleanContents(V.getValueType()) ==
2418  return V;
2419 
2420  return SDValue();
2421 }
2422 
2423 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2424 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2425 /// the opcode and bypass the mask operation.
2426 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2427  SelectionDAG &DAG, const SDLoc &DL) {
2428  if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2429  return SDValue();
2430 
2431  EVT VT = N0.getValueType();
2432  if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2433  return SDValue();
2434 
2435  // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2436  // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2437  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2438 }
2439 
2440 /// Helper for doing combines based on N0 and N1 being added to each other.
2441 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2442  SDNode *LocReference) {
2443  EVT VT = N0.getValueType();
2444  SDLoc DL(LocReference);
2445 
2446  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2447  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2449  return DAG.getNode(ISD::SUB, DL, VT, N0,
2450  DAG.getNode(ISD::SHL, DL, VT,
2451  N1.getOperand(0).getOperand(1),
2452  N1.getOperand(1)));
2453 
2454  if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2455  return V;
2456 
2457  // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2458  // rather than 'add 0/-1' (the zext should get folded).
2459  // add (sext i1 Y), X --> sub X, (zext i1 Y)
2460  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2461  N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2463  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2464  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2465  }
2466 
2467  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2468  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2469  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2470  if (TN->getVT() == MVT::i1) {
2471  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2472  DAG.getConstant(1, DL, VT));
2473  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2474  }
2475  }
2476 
2477  // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2478  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2479  N1.getResNo() == 0)
2480  return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2481  N0, N1.getOperand(0), N1.getOperand(2));
2482 
2483  // (add X, Carry) -> (addcarry X, 0, Carry)
2485  if (SDValue Carry = getAsCarry(TLI, N1))
2486  return DAG.getNode(ISD::ADDCARRY, DL,
2487  DAG.getVTList(VT, Carry.getValueType()), N0,
2488  DAG.getConstant(0, DL, VT), Carry);
2489 
2490  return SDValue();
2491 }
2492 
2493 SDValue DAGCombiner::visitADDC(SDNode *N) {
2494  SDValue N0 = N->getOperand(0);
2495  SDValue N1 = N->getOperand(1);
2496  EVT VT = N0.getValueType();
2497  SDLoc DL(N);
2498 
2499  // If the flag result is dead, turn this into an ADD.
2500  if (!N->hasAnyUseOfValue(1))
2501  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2502  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2503 
2504  // canonicalize constant to RHS.
2507  if (N0C && !N1C)
2508  return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2509 
2510  // fold (addc x, 0) -> x + no carry out
2511  if (isNullConstant(N1))
2512  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2513  DL, MVT::Glue));
2514 
2515  // If it cannot overflow, transform into an add.
2516  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2517  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2518  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2519 
2520  return SDValue();
2521 }
2522 
2523 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2524  SelectionDAG &DAG, const TargetLowering &TLI) {
2525  EVT VT = V.getValueType();
2526 
2527  SDValue Cst;
2528  switch (TLI.getBooleanContents(VT)) {
2531  Cst = DAG.getConstant(1, DL, VT);
2532  break;
2534  Cst = DAG.getConstant(-1, DL, VT);
2535  break;
2536  }
2537 
2538  return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2539 }
2540 
2542  if (V.getOpcode() != ISD::XOR)
2543  return SDValue();
2544 
2545  ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2546  if (!Const)
2547  return SDValue();
2548 
2549  EVT VT = V.getValueType();
2550 
2551  bool IsFlip = false;
2552  switch(TLI.getBooleanContents(VT)) {
2554  IsFlip = Const->isOne();
2555  break;
2557  IsFlip = Const->isAllOnesValue();
2558  break;
2560  IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2561  break;
2562  }
2563 
2564  if (IsFlip)
2565  return V.getOperand(0);
2566  return SDValue();
2567 }
2568 
2569 SDValue DAGCombiner::visitADDO(SDNode *N) {
2570  SDValue N0 = N->getOperand(0);
2571  SDValue N1 = N->getOperand(1);
2572  EVT VT = N0.getValueType();
2573  bool IsSigned = (ISD::SADDO == N->getOpcode());
2574 
2575  EVT CarryVT = N->getValueType(1);
2576  SDLoc DL(N);
2577 
2578  // If the flag result is dead, turn this into an ADD.
2579  if (!N->hasAnyUseOfValue(1))
2580  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2581  DAG.getUNDEF(CarryVT));
2582 
2583  // canonicalize constant to RHS.
2586  return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2587 
2588  // fold (addo x, 0) -> x + no carry out
2589  if (isNullOrNullSplat(N1))
2590  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2591 
2592  if (!IsSigned) {
2593  // If it cannot overflow, transform into an add.
2594  if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2595  return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2596  DAG.getConstant(0, DL, CarryVT));
2597 
2598  // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2599  if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2600  SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2601  DAG.getConstant(0, DL, VT), N0.getOperand(0));
2602  return CombineTo(N, Sub,
2603  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2604  }
2605 
2606  if (SDValue Combined = visitUADDOLike(N0, N1, N))
2607  return Combined;
2608 
2609  if (SDValue Combined = visitUADDOLike(N1, N0, N))
2610  return Combined;
2611  }
2612 
2613  return SDValue();
2614 }
2615 
2616 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2617  EVT VT = N0.getValueType();
2618  if (VT.isVector())
2619  return SDValue();
2620 
2621  // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2622  // If Y + 1 cannot overflow.
2623  if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2624  SDValue Y = N1.getOperand(0);
2625  SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2626  if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2627  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2628  N1.getOperand(2));
2629  }
2630 
2631  // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2633  if (SDValue Carry = getAsCarry(TLI, N1))
2634  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2635  DAG.getConstant(0, SDLoc(N), VT), Carry);
2636 
2637  return SDValue();
2638 }
2639 
2640 SDValue DAGCombiner::visitADDE(SDNode *N) {
2641  SDValue N0 = N->getOperand(0);
2642  SDValue N1 = N->getOperand(1);
2643  SDValue CarryIn = N->getOperand(2);
2644 
2645  // canonicalize constant to RHS
2648  if (N0C && !N1C)
2649  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2650  N1, N0, CarryIn);
2651 
2652  // fold (adde x, y, false) -> (addc x, y)
2653  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2654  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2655 
2656  return SDValue();
2657 }
2658 
2659 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2660  SDValue N0 = N->getOperand(0);
2661  SDValue N1 = N->getOperand(1);
2662  SDValue CarryIn = N->getOperand(2);
2663  SDLoc DL(N);
2664 
2665  // canonicalize constant to RHS
2668  if (N0C && !N1C)
2669  return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2670 
2671  // fold (addcarry x, y, false) -> (uaddo x, y)
2672  if (isNullConstant(CarryIn)) {
2673  if (!LegalOperations ||
2675  return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2676  }
2677 
2678  EVT CarryVT = CarryIn.getValueType();
2679 
2680  // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2681  if (isNullConstant(N0) && isNullConstant(N1)) {
2682  EVT VT = N0.getValueType();
2683  SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2684  AddToWorklist(CarryExt.getNode());
2685  return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2686  DAG.getConstant(1, DL, VT)),
2687  DAG.getConstant(0, DL, CarryVT));
2688  }
2689 
2690  // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2691  if (isBitwiseNot(N0) && isNullConstant(N1)) {
2692  if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2693  SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2694  DAG.getConstant(0, DL, N0.getValueType()),
2695  N0.getOperand(0), B);
2696  return CombineTo(N, Sub,
2697  flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2698  }
2699  }
2700 
2701  if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2702  return Combined;
2703 
2704  if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2705  return Combined;
2706 
2707  return SDValue();
2708 }
2709 
2710 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2711  SDNode *N) {
2712  // Iff the flag result is dead:
2713  // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2714  if ((N0.getOpcode() == ISD::ADD ||
2715  (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2716  isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2717  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2718  N0.getOperand(0), N0.getOperand(1), CarryIn);
2719 
2720  /**
2721  * When one of the addcarry argument is itself a carry, we may be facing
2722  * a diamond carry propagation. In which case we try to transform the DAG
2723  * to ensure linear carry propagation if that is possible.
2724  *
2725  * We are trying to get:
2726  * (addcarry X, 0, (addcarry A, B, Z):Carry)
2727  */
2728  if (auto Y = getAsCarry(TLI, N1)) {
2729  /**
2730  * (uaddo A, B)
2731  * / \
2732  * Carry Sum
2733  * | \
2734  * | (addcarry *, 0, Z)
2735  * | /
2736  * \ Carry
2737  * | /
2738  * (addcarry X, *, *)
2739  */
2740  if (Y.getOpcode() == ISD::UADDO &&
2741  CarryIn.getResNo() == 1 &&
2742  CarryIn.getOpcode() == ISD::ADDCARRY &&
2743  isNullConstant(CarryIn.getOperand(1)) &&
2744  CarryIn.getOperand(0) == Y.getValue(0)) {
2745  auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2746  Y.getOperand(0), Y.getOperand(1),
2747  CarryIn.getOperand(2));
2748  AddToWorklist(NewY.getNode());
2749  return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2750  DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2751  NewY.getValue(1));
2752  }
2753  }
2754 
2755  return SDValue();
2756 }
2757 
2758 // Since it may not be valid to emit a fold to zero for vector initializers
2759 // check if we can before folding.
2760 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2761  SelectionDAG &DAG, bool LegalOperations) {
2762  if (!VT.isVector())
2763  return DAG.getConstant(0, DL, VT);
2764  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2765  return DAG.getConstant(0, DL, VT);
2766  return SDValue();
2767 }
2768 
2769 SDValue DAGCombiner::visitSUB(SDNode *N) {
2770  SDValue N0 = N->getOperand(0);
2771  SDValue N1 = N->getOperand(1);
2772  EVT VT = N0.getValueType();
2773  SDLoc DL(N);
2774 
2775  // fold vector ops
2776  if (VT.isVector()) {
2777  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2778  return FoldedVOp;
2779 
2780  // fold (sub x, 0) -> x, vector edition
2782  return N0;
2783  }
2784 
2785  // fold (sub x, x) -> 0
2786  // FIXME: Refactor this and xor and other similar operations together.
2787  if (N0 == N1)
2788  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2791  // fold (sub c1, c2) -> c1-c2
2792  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2793  N1.getNode());
2794  }
2795 
2796  if (SDValue NewSel = foldBinOpIntoSelect(N))
2797  return NewSel;
2798 
2800 
2801  // fold (sub x, c) -> (add x, -c)
2802  if (N1C) {
2803  return DAG.getNode(ISD::ADD, DL, VT, N0,
2804  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2805  }
2806 
2807  if (isNullOrNullSplat(N0)) {
2808  unsigned BitWidth = VT.getScalarSizeInBits();
2809  // Right-shifting everything out but the sign bit followed by negation is
2810  // the same as flipping arithmetic/logical shift type without the negation:
2811  // -(X >>u 31) -> (X >>s 31)
2812  // -(X >>s 31) -> (X >>u 31)
2813  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2814  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2815  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2816  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2817  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2818  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2819  }
2820  }
2821 
2822  // 0 - X --> 0 if the sub is NUW.
2823  if (N->getFlags().hasNoUnsignedWrap())
2824  return N0;
2825 
2826  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2827  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2828  // N1 must be 0 because negating the minimum signed value is undefined.
2829  if (N->getFlags().hasNoSignedWrap())
2830  return N0;
2831 
2832  // 0 - X --> X if X is 0 or the minimum signed value.
2833  return N1;
2834  }
2835  }
2836 
2837  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2838  if (isAllOnesOrAllOnesSplat(N0))
2839  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2840 
2841  // fold (A - (0-B)) -> A+B
2842  if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2843  return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2844 
2845  // fold A-(A-B) -> B
2846  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2847  return N1.getOperand(1);
2848 
2849  // fold (A+B)-A -> B
2850  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2851  return N0.getOperand(1);
2852 
2853  // fold (A+B)-B -> A
2854  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2855  return N0.getOperand(0);
2856 
2857  // fold C2-(A+C1) -> (C2-C1)-A
2858  if (N1.getOpcode() == ISD::ADD) {
2859  SDValue N11 = N1.getOperand(1);
2860  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2861  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2862  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2863  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2864  }
2865  }
2866 
2867  // fold ((A+(B+or-C))-B) -> A+or-C
2868  if (N0.getOpcode() == ISD::ADD &&
2869  (N0.getOperand(1).getOpcode() == ISD::SUB ||
2870  N0.getOperand(1).getOpcode() == ISD::ADD) &&
2871  N0.getOperand(1).getOperand(0) == N1)
2872  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2873  N0.getOperand(1).getOperand(1));
2874 
2875  // fold ((A+(C+B))-B) -> A+C
2876  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2877  N0.getOperand(1).getOperand(1) == N1)
2878  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2879  N0.getOperand(1).getOperand(0));
2880 
2881  // fold ((A-(B-C))-C) -> A-B
2882  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2883  N0.getOperand(1).getOperand(1) == N1)
2884  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2885  N0.getOperand(1).getOperand(0));
2886 
2887  // fold (A-(B-C)) -> A+(C-B)
2888  if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2889  return DAG.getNode(ISD::ADD, DL, VT, N0,
2890  DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2891  N1.getOperand(0)));
2892 
2893  // fold (X - (-Y * Z)) -> (X + (Y * Z))
2894  if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2895  if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2897  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2898  N1.getOperand(0).getOperand(1),
2899  N1.getOperand(1));
2900  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2901  }
2902  if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2904  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2905  N1.getOperand(0),
2906  N1.getOperand(1).getOperand(1));
2907  return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2908  }
2909  }
2910 
2911  // If either operand of a sub is undef, the result is undef
2912  if (N0.isUndef())
2913  return N0;
2914  if (N1.isUndef())
2915  return N1;
2916 
2917  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2918  return V;
2919 
2920  if (SDValue V = foldAddSubOfSignBit(N, DAG))
2921  return V;
2922 
2923  if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2924  return V;
2925 
2926  // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2927  // rather than 'sub 0/1' (the sext should get folded).
2928  // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2929  if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2930  N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2931  TLI.getBooleanContents(VT) ==
2933  SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2934  return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2935  }
2936 
2937  // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2938  if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2939  if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2940  SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2941  SDValue S0 = N1.getOperand(0);
2942  if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2943  unsigned OpSizeInBits = VT.getScalarSizeInBits();
2945  if (C->getAPIntValue() == (OpSizeInBits - 1))
2946  return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2947  }
2948  }
2949  }
2950 
2951  // If the relocation model supports it, consider symbol offsets.
2952  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2953  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2954  // fold (sub Sym, c) -> Sym-c
2955  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2956  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2957  GA->getOffset() -
2958  (uint64_t)N1C->getSExtValue());
2959  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2960  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2961  if (GA->getGlobal() == GB->getGlobal())
2962  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2963  DL, VT);
2964  }
2965 
2966  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2967  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2968  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2969  if (TN->getVT() == MVT::i1) {
2970  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2971  DAG.getConstant(1, DL, VT));
2972  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2973  }
2974  }
2975 
2976  // Prefer an add for more folding potential and possibly better codegen:
2977  // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2978  if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2979  SDValue ShAmt = N1.getOperand(1);
2980  ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2981  if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2982  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2983  return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2984  }
2985  }
2986 
2987  return SDValue();
2988 }
2989 
2990 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2991  SDValue N0 = N->getOperand(0);
2992  SDValue N1 = N->getOperand(1);
2993  EVT VT = N0.getValueType();
2994  SDLoc DL(N);
2995 
2996  // fold vector ops
2997  if (VT.isVector()) {
2998  // TODO SimplifyVBinOp
2999 
3000  // fold (sub_sat x, 0) -> x, vector edition
3002  return N0;
3003  }
3004 
3005  // fold (sub_sat x, undef) -> 0
3006  if (N0.isUndef() || N1.isUndef())
3007  return DAG.getConstant(0, DL, VT);
3008 
3009  // fold (sub_sat x, x) -> 0
3010  if (N0 == N1)
3011  return DAG.getConstant(0, DL, VT);
3012 
3015  // fold (sub_sat c1, c2) -> c3
3016  return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3017  N1.getNode());
3018  }
3019 
3020  // fold (sub_sat x, 0) -> x
3021  if (isNullConstant(N1))
3022  return N0;
3023 
3024  return SDValue();
3025 }
3026 
3027 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3028  SDValue N0 = N->getOperand(0);
3029  SDValue N1 = N->getOperand(1);
3030  EVT VT = N0.getValueType();
3031  SDLoc DL(N);
3032 
3033  // If the flag result is dead, turn this into an SUB.
3034  if (!N->hasAnyUseOfValue(1))
3035  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3036  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3037 
3038  // fold (subc x, x) -> 0 + no borrow
3039  if (N0 == N1)
3040  return CombineTo(N, DAG.getConstant(0, DL, VT),
3041  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3042 
3043  // fold (subc x, 0) -> x + no borrow
3044  if (isNullConstant(N1))
3045  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3046 
3047  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3048  if (isAllOnesConstant(N0))
3049  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3050  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3051 
3052  return SDValue();
3053 }
3054 
3055 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3056  SDValue N0 = N->getOperand(0);
3057  SDValue N1 = N->getOperand(1);
3058  EVT VT = N0.getValueType();
3059  bool IsSigned = (ISD::SSUBO == N->getOpcode());
3060 
3061  EVT CarryVT = N->getValueType(1);
3062  SDLoc DL(N);
3063 
3064  // If the flag result is dead, turn this into an SUB.
3065  if (!N->hasAnyUseOfValue(1))
3066  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3067  DAG.getUNDEF(CarryVT));
3068 
3069  // fold (subo x, x) -> 0 + no borrow
3070  if (N0 == N1)
3071  return CombineTo(N, DAG.getConstant(0, DL, VT),
3072  DAG.getConstant(0, DL, CarryVT));
3073 
3075 
3076  // fold (subox, c) -> (addo x, -c)
3077  if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3078  return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3079  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3080  }
3081 
3082  // fold (subo x, 0) -> x + no borrow
3083  if (isNullOrNullSplat(N1))
3084  return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3085 
3086  // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3087  if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3088  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3089  DAG.getConstant(0, DL, CarryVT));
3090 
3091  return SDValue();
3092 }
3093 
3094 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3095  SDValue N0 = N->getOperand(0);
3096  SDValue N1 = N->getOperand(1);
3097  SDValue CarryIn = N->getOperand(2);
3098 
3099  // fold (sube x, y, false) -> (subc x, y)
3100  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3101  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3102 
3103  return SDValue();
3104 }
3105 
3106 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3107  SDValue N0 = N->getOperand(0);
3108  SDValue N1 = N->getOperand(1);
3109  SDValue CarryIn = N->getOperand(2);
3110 
3111  // fold (subcarry x, y, false) -> (usubo x, y)
3112  if (isNullConstant(CarryIn)) {
3113  if (!LegalOperations ||
3115  return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3116  }
3117 
3118  return SDValue();
3119 }
3120 
3121 SDValue DAGCombiner::visitMUL(SDNode *N) {
3122  SDValue N0 = N->getOperand(0);
3123  SDValue N1 = N->getOperand(1);
3124  EVT VT = N0.getValueType();
3125 
3126  // fold (mul x, undef) -> 0
3127  if (N0.isUndef() || N1.isUndef())
3128  return DAG.getConstant(0, SDLoc(N), VT);
3129 
3130  bool N0IsConst = false;
3131  bool N1IsConst = false;
3132  bool N1IsOpaqueConst = false;
3133  bool N0IsOpaqueConst = false;
3134  APInt ConstValue0, ConstValue1;
3135  // fold vector ops
3136  if (VT.isVector()) {
3137  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3138  return FoldedVOp;
3139 
3140  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3141  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3142  assert((!N0IsConst ||
3143  ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3144  "Splat APInt should be element width");
3145  assert((!N1IsConst ||
3146  ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3147  "Splat APInt should be element width");
3148  } else {
3149  N0IsConst = isa<ConstantSDNode>(N0);
3150  if (N0IsConst) {
3151  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3152  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3153  }
3154  N1IsConst = isa<ConstantSDNode>(N1);
3155  if (N1IsConst) {
3156  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3157  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3158  }
3159  }
3160 
3161  // fold (mul c1, c2) -> c1*c2
3162  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3163  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3164  N0.getNode(), N1.getNode());
3165 
3166  // canonicalize constant to RHS (vector doesn't have to splat)
3169  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3170  // fold (mul x, 0) -> 0
3171  if (N1IsConst && ConstValue1.isNullValue())
3172  return N1;
3173  // fold (mul x, 1) -> x
3174  if (N1IsConst && ConstValue1.isOneValue())
3175  return N0;
3176 
3177  if (SDValue NewSel = foldBinOpIntoSelect(N))
3178  return NewSel;
3179 
3180  // fold (mul x, -1) -> 0-x
3181  if (N1IsConst && ConstValue1.isAllOnesValue()) {
3182  SDLoc DL(N);
3183  return DAG.getNode(ISD::SUB, DL, VT,
3184  DAG.getConstant(0, DL, VT), N0);
3185  }
3186  // fold (mul x, (1 << c)) -> x << c
3187  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3188  DAG.isKnownToBeAPowerOfTwo(N1) &&
3189  (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3190  SDLoc DL(N);
3191  SDValue LogBase2 = BuildLogBase2(N1, DL);
3192  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3193  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3194  return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3195  }
3196  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3197  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3198  unsigned Log2Val = (-ConstValue1).logBase2();
3199  SDLoc DL(N);
3200  // FIXME: If the input is something that is easily negated (e.g. a
3201  // single-use add), we should put the negate there.
3202  return DAG.getNode(ISD::SUB, DL, VT,
3203  DAG.getConstant(0, DL, VT),
3204  DAG.getNode(ISD::SHL, DL, VT, N0,
3205  DAG.getConstant(Log2Val, DL,
3206  getShiftAmountTy(N0.getValueType()))));
3207  }
3208 
3209  // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3210  // mul x, (2^N + 1) --> add (shl x, N), x
3211  // mul x, (2^N - 1) --> sub (shl x, N), x
3212  // Examples: x * 33 --> (x << 5) + x
3213  // x * 15 --> (x << 4) - x
3214  // x * -33 --> -((x << 5) + x)
3215  // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3216  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3217  // TODO: We could handle more general decomposition of any constant by
3218  // having the target set a limit on number of ops and making a
3219  // callback to determine that sequence (similar to sqrt expansion).
3220  unsigned MathOp = ISD::DELETED_NODE;
3221  APInt MulC = ConstValue1.abs();
3222  if ((MulC - 1).isPowerOf2())
3223  MathOp = ISD::ADD;
3224  else if ((MulC + 1).isPowerOf2())
3225  MathOp = ISD::SUB;
3226 
3227  if (MathOp != ISD::DELETED_NODE) {
3228  unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3229  : (MulC + 1).logBase2();
3230  assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3231  "Not expecting multiply-by-constant that could have simplified");
3232  SDLoc DL(N);
3233  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3234  DAG.getConstant(ShAmt, DL, VT));
3235  SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3236  if (ConstValue1.isNegative())
3237  R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3238  return R;
3239  }
3240  }
3241 
3242  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3243  if (N0.getOpcode() == ISD::SHL &&
3244  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3245  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3246  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3248  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3249  }
3250 
3251  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3252  // use.
3253  {
3254  SDValue Sh(nullptr, 0), Y(nullptr, 0);
3255 
3256  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
3257  if (N0.getOpcode() == ISD::SHL &&
3259  N0.getNode()->hasOneUse()) {
3260  Sh = N0; Y = N1;
3261  } else if (N1.getOpcode() == ISD::SHL &&
3263  N1.getNode()->hasOneUse()) {
3264  Sh = N1; Y = N0;
3265  }
3266 
3267  if (Sh.getNode()) {
3268  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3269  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3270  }
3271  }
3272 
3273  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3275  N0.getOpcode() == ISD::ADD &&
3277  isMulAddWithConstProfitable(N, N0, N1))
3278  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3279  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3280  N0.getOperand(0), N1),
3281  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3282  N0.getOperand(1), N1));
3283 
3284  // reassociate mul
3285  if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3286  return RMUL;
3287 
3288  return SDValue();
3289 }
3290 
3291 /// Return true if divmod libcall is available.
3292 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3293  const TargetLowering &TLI) {
3294  RTLIB::Libcall LC;
3295  EVT NodeType = Node->getValueType(0);
3296  if (!NodeType.isSimple())
3297  return false;
3298  switch (NodeType.getSimpleVT().SimpleTy) {
3299  default: return false; // No libcall for vector types.
3300  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
3301  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3302  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3303  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3304  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3305  }
3306 
3307  return TLI.getLibcallName(LC) != nullptr;
3308 }
3309 
3310 /// Issue divrem if both quotient and remainder are needed.
3311 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3312  if (Node->use_empty())
3313  return SDValue(); // This is a dead node, leave it alone.
3314 
3315  unsigned Opcode = Node->getOpcode();
3316  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3317  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3318 
3319  // DivMod lib calls can still work on non-legal types if using lib-calls.
3320  EVT VT = Node->getValueType(0);
3321  if (VT.isVector() || !VT.isInteger())
3322  return SDValue();
3323 
3324  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3325  return SDValue();
3326 
3327  // If DIVREM is going to get expanded into a libcall,
3328  // but there is no libcall available, then don't combine.
3329  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3330  !isDivRemLibcallAvailable(Node, isSigned, TLI))
3331  return SDValue();
3332 
3333  // If div is legal, it's better to do the normal expansion
3334  unsigned OtherOpcode = 0;
3335  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3336  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3337  if (TLI.isOperationLegalOrCustom(Opcode, VT))
3338  return SDValue();
3339  } else {
3340  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3341  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3342  return SDValue();
3343  }
3344 
3345  SDValue Op0 = Node->getOperand(0);
3346  SDValue Op1 = Node->getOperand(1);
3347  SDValue combined;
3348  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3349  UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3350  SDNode *User = *UI;
3351  if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3352  User->use_empty())
3353  continue;
3354  // Convert the other matching node(s), too;
3355  // otherwise, the DIVREM may get target-legalized into something
3356  // target-specific that we won't be able to recognize.
3357  unsigned UserOpc = User->getOpcode();
3358  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3359  User->getOperand(0) == Op0 &&
3360  User->getOperand(1) == Op1) {
3361  if (!combined) {
3362  if (UserOpc == OtherOpcode) {
3363  SDVTList VTs = DAG.getVTList(VT, VT);
3364  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3365  } else if (UserOpc == DivRemOpc) {
3366  combined = SDValue(User, 0);
3367  } else {
3368  assert(UserOpc == Opcode);
3369  continue;
3370  }
3371  }
3372  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3373  CombineTo(User, combined);
3374  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3375  CombineTo(User, combined.getValue(1));
3376  }
3377  }
3378  return combined;
3379 }
3380 
3382  SDValue N0 = N->getOperand(0);
3383  SDValue N1 = N->getOperand(1);
3384  EVT VT = N->getValueType(0);
3385  SDLoc DL(N);
3386 
3387  unsigned Opc = N->getOpcode();
3388  bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3390 
3391  // X / undef -> undef
3392  // X % undef -> undef
3393  // X / 0 -> undef
3394  // X % 0 -> undef
3395  // NOTE: This includes vectors where any divisor element is zero/undef.
3396  if (DAG.isUndef(Opc, {N0, N1}))
3397  return DAG.getUNDEF(VT);
3398 
3399  // undef / X -> 0
3400  // undef % X -> 0
3401  if (N0.isUndef())
3402  return DAG.getConstant(0, DL, VT);
3403 
3404  // 0 / X -> 0
3405  // 0 % X -> 0
3407  if (N0C && N0C->isNullValue())
3408  return N0;
3409 
3410  // X / X -> 1
3411  // X % X -> 0
3412  if (N0 == N1)
3413  return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3414 
3415  // X / 1 -> X
3416  // X % 1 -> 0
3417  // If this is a boolean op (single-bit element type), we can't have
3418  // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3419  // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3420  // it's a 1.
3421  if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3422  return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3423 
3424  return SDValue();
3425 }
3426 
3427 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3428  SDValue N0 = N->getOperand(0);
3429  SDValue N1 = N->getOperand(1);
3430  EVT VT = N->getValueType(0);
3431  EVT CCVT = getSetCCResultType(VT);
3432 
3433  // fold vector ops
3434  if (VT.isVector())
3435  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3436  return FoldedVOp;
3437 
3438  SDLoc DL(N);
3439 
3440  // fold (sdiv c1, c2) -> c1/c2
3443  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3444  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3445  // fold (sdiv X, -1) -> 0-X
3446  if (N1C && N1C->isAllOnesValue())
3447  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3448  // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3449  if (N1C && N1C->getAPIntValue().isMinSignedValue())
3450  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3451  DAG.getConstant(1, DL, VT),
3452  DAG.getConstant(0, DL, VT));
3453 
3454  if (SDValue V = simplifyDivRem(N, DAG))
3455  return V;
3456 
3457  if (SDValue NewSel = foldBinOpIntoSelect(N))
3458  return NewSel;
3459 
3460  // If we know the sign bits of both operands are zero, strength reduce to a
3461  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
3462  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3463  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3464 
3465  if (SDValue V = visitSDIVLike(N0, N1, N)) {
3466  // If the corresponding remainder node exists, update its users with
3467  // (Dividend - (Quotient * Divisor).
3468  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3469  { N0, N1 })) {
3470  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3471  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3472  AddToWorklist(Mul.getNode());
3473  AddToWorklist(Sub.getNode());
3474  CombineTo(RemNode, Sub);
3475  }
3476  return V;
3477  }
3478 
3479  // sdiv, srem -> sdivrem
3480  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3481  // true. Otherwise, we break the simplification logic in visitREM().
3483  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3484  if (SDValue DivRem = useDivRem(N))
3485  return DivRem;
3486 
3487  return SDValue();
3488 }
3489 
3490 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3491  SDLoc DL(N);
3492  EVT VT = N->getValueType(0);
3493  EVT CCVT = getSetCCResultType(VT);
3494  unsigned BitWidth = VT.getScalarSizeInBits();
3495 
3496  // Helper for determining whether a value is a power-2 constant scalar or a
3497  // vector of such elements.
3498  auto IsPowerOfTwo = [](ConstantSDNode *C) {
3499  if (C->isNullValue() || C->isOpaque())
3500  return false;
3501  if (C->getAPIntValue().isPowerOf2())
3502  return true;
3503  if ((-C->getAPIntValue()).isPowerOf2())
3504  return true;
3505  return false;
3506  };
3507 
3508  // fold (sdiv X, pow2) -> simple ops after legalize
3509  // FIXME: We check for the exact bit here because the generic lowering gives
3510  // better results in that case. The target-specific lowering should learn how
3511  // to handle exact sdivs efficiently.
3512  if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3513  // Target-specific implementation of sdiv x, pow2.
3514  if (SDValue Res = BuildSDIVPow2(N))
3515  return Res;
3516 
3517  // Create constants that are functions of the shift amount value.
3518  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3519  SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3520  SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3521  C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3522  SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3523  if (!isConstantOrConstantVector(Inexact))
3524  return SDValue();
3525 
3526  // Splat the sign bit into the register
3527  SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3528  DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3529  AddToWorklist(Sign.getNode());
3530 
3531  // Add (N0 < 0) ? abs2 - 1 : 0;
3532  SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3533  AddToWorklist(Srl.getNode());
3534  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3535  AddToWorklist(Add.getNode());
3536  SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3537  AddToWorklist(Sra.getNode());
3538 
3539  // Special case: (sdiv X, 1) -> X
3540  // Special Case: (sdiv X, -1) -> 0-X
3541  SDValue One = DAG.getConstant(1, DL, VT);
3542  SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3543  SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3544  SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3545  SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3546  Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3547 
3548  // If dividing by a positive value, we're done. Otherwise, the result must
3549  // be negated.
3550  SDValue Zero = DAG.getConstant(0, DL, VT);
3551  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3552 
3553  // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3554  SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3555  SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3556  return Res;
3557  }
3558 
3559  // If integer divide is expensive and we satisfy the requirements, emit an
3560  // alternate sequence. Targets may check function attributes for size/speed
3561  // trade-offs.
3563  if (isConstantOrConstantVector(N1) &&
3564  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3565  if (SDValue Op = BuildSDIV(N))
3566  return Op;
3567 
3568  return SDValue();
3569 }
3570 
3571 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3572  SDValue N0 = N->getOperand(0);
3573  SDValue N1 = N->getOperand(1);
3574  EVT VT = N->getValueType(0);
3575  EVT CCVT = getSetCCResultType(VT);
3576 
3577  // fold vector ops
3578  if (VT.isVector())
3579  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3580  return FoldedVOp;
3581 
3582  SDLoc DL(N);
3583 
3584  // fold (udiv c1, c2) -> c1/c2
3587  if (N0C && N1C)
3588  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3589  N0C, N1C))
3590  return Folded;
3591  // fold (udiv X, -1) -> select(X == -1, 1, 0)
3592  if (N1C && N1C->getAPIntValue().isAllOnesValue())
3593  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3594  DAG.getConstant(1, DL, VT),
3595  DAG.getConstant(0, DL, VT));
3596 
3597  if (SDValue V = simplifyDivRem(N, DAG))
3598  return V;
3599 
3600  if (SDValue NewSel = foldBinOpIntoSelect(N))
3601  return NewSel;
3602 
3603  if (SDValue V = visitUDIVLike(N0, N1, N)) {
3604  // If the corresponding remainder node exists, update its users with
3605  // (Dividend - (Quotient * Divisor).
3606  if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3607  { N0, N1 })) {
3608  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3609  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3610  AddToWorklist(Mul.getNode());
3611  AddToWorklist(Sub.getNode());
3612  CombineTo(RemNode, Sub);
3613  }
3614  return V;
3615  }
3616 
3617  // sdiv, srem -> sdivrem
3618  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3619  // true. Otherwise, we break the simplification logic in visitREM().
3621  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3622  if (SDValue DivRem = useDivRem(N))
3623  return DivRem;
3624 
3625  return SDValue();
3626 }
3627 
3628 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3629  SDLoc DL(N);
3630  EVT VT = N->getValueType(0);
3631 
3632  // fold (udiv x, (1 << c)) -> x >>u c
3633  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3634  DAG.isKnownToBeAPowerOfTwo(N1)) {
3635  SDValue LogBase2 = BuildLogBase2(N1, DL);
3636  AddToWorklist(LogBase2.getNode());
3637 
3638  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3639  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3640  AddToWorklist(Trunc.getNode());
3641  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3642  }
3643 
3644  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3645  if (N1.getOpcode() == ISD::SHL) {
3646  SDValue N10 = N1.getOperand(0);
3647  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3648  DAG.isKnownToBeAPowerOfTwo(N10)) {
3649  SDValue LogBase2 = BuildLogBase2(N10, DL);
3650  AddToWorklist(LogBase2.getNode());
3651 
3652  EVT ADDVT = N1.getOperand(1).getValueType();
3653  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3654  AddToWorklist(Trunc.getNode());
3655  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3656  AddToWorklist(Add.getNode());
3657  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3658  }
3659  }
3660 
3661  // fold (udiv x, c) -> alternate
3663  if (isConstantOrConstantVector(N1) &&
3664  !TLI.isIntDivCheap(N->getValueType(0), Attr))
3665  if (SDValue Op = BuildUDIV(N))
3666  return Op;
3667 
3668  return SDValue();
3669 }
3670 
3671 // handles ISD::SREM and ISD::UREM
3672 SDValue DAGCombiner::visitREM(SDNode *N) {
3673  unsigned Opcode = N->getOpcode();
3674  SDValue N0 = N->getOperand(0);
3675  SDValue N1 = N->getOperand(1);
3676  EVT VT = N->getValueType(0);
3677  EVT CCVT = getSetCCResultType(VT);
3678 
3679  bool isSigned = (Opcode == ISD::SREM);
3680  SDLoc DL(N);
3681 
3682  // fold (rem c1, c2) -> c1%c2
3685  if (N0C && N1C)
3686  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3687  return Folded;
3688  // fold (urem X, -1) -> select(X == -1, 0, x)
3689  if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3690  return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3691  DAG.getConstant(0, DL, VT), N0);
3692 
3693  if (SDValue V = simplifyDivRem(N, DAG))
3694  return V;
3695 
3696  if (SDValue NewSel = foldBinOpIntoSelect(N))
3697  return NewSel;
3698 
3699  if (isSigned) {
3700  // If we know the sign bits of both operands are zero, strength reduce to a
3701  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3702  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3703  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3704  } else {
3705  SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3706  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3707  // fold (urem x, pow2) -> (and x, pow2-1)
3708  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3709  AddToWorklist(Add.getNode());
3710  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3711  }
3712  if (N1.getOpcode() == ISD::SHL &&
3713  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3714  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3715  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3716  AddToWorklist(Add.getNode());
3717  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3718  }
3719  }
3720 
3722 
3723  // If X/C can be simplified by the division-by-constant logic, lower
3724  // X%C to the equivalent of X-X/C*C.
3725  // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3726  // speculative DIV must not cause a DIVREM conversion. We guard against this
3727  // by skipping the simplification if isIntDivCheap(). When div is not cheap,
3728  // combine will not return a DIVREM. Regardless, checking cheapness here
3729  // makes sense since the simplification results in fatter code.
3730  if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3731  SDValue OptimizedDiv =
3732  isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3733  if (OptimizedDiv.getNode()) {
3734  // If the equivalent Div node also exists, update its users.
3735  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3736  if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3737  { N0, N1 }))
3738  CombineTo(DivNode, OptimizedDiv);
3739  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3740  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3741  AddToWorklist(OptimizedDiv.getNode());
3742  AddToWorklist(Mul.getNode());
3743  return Sub;
3744  }
3745  }
3746 
3747  // sdiv, srem -> sdivrem
3748  if (SDValue DivRem = useDivRem(N))
3749  return DivRem.getValue(1);
3750 
3751  return SDValue();
3752 }
3753 
3754 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3755  SDValue N0 = N->getOperand(0);
3756  SDValue N1 = N->getOperand(1);
3757  EVT VT = N->getValueType(0);
3758  SDLoc DL(N);
3759 
3760  if (VT.isVector()) {
3761  // fold (mulhs x, 0) -> 0
3763  return N1;
3765  return N0;
3766  }
3767 
3768  // fold (mulhs x, 0) -> 0
3769  if (isNullConstant(N1))
3770  return N1;
3771  // fold (mulhs x, 1) -> (sra x, size(x)-1)
3772  if (isOneConstant(N1))
3773  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3774  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3775  getShiftAmountTy(N0.getValueType())));
3776 
3777  // fold (mulhs x, undef) -> 0
3778  if (N0.isUndef() || N1.isUndef())
3779  return DAG.getConstant(0, DL, VT);
3780 
3781  // If the type twice as wide is legal, transform the mulhs to a wider multiply
3782  // plus a shift.
3783  if (VT.isSimple() && !VT.isVector()) {
3784  MVT Simple = VT.getSimpleVT();
3785  unsigned SimpleSize = Simple.getSizeInBits();
3786  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3787  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3788  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3789  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3790  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3791  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3792  DAG.getConstant(SimpleSize, DL,
3793  getShiftAmountTy(N1.getValueType())));
3794  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3795  }
3796  }
3797 
3798  return SDValue();
3799 }
3800 
3801 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3802  SDValue N0 = N->getOperand(0);
3803  SDValue N1 = N->getOperand(1);
3804  EVT VT = N->getValueType(0);
3805  SDLoc DL(N);
3806 
3807  if (VT.isVector()) {
3808  // fold (mulhu x, 0) -> 0
3810  return N1;
3812  return N0;
3813  }
3814 
3815  // fold (mulhu x, 0) -> 0
3816  if (isNullConstant(N1))
3817  return N1;
3818  // fold (mulhu x, 1) -> 0
3819  if (isOneConstant(N1))
3820  return DAG.getConstant(0, DL, N0.getValueType());
3821  // fold (mulhu x, undef) -> 0
3822  if (N0.isUndef() || N1.isUndef())
3823  return DAG.getConstant(0, DL, VT);
3824 
3825  // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3826  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3827  DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3828  unsigned NumEltBits = VT.getScalarSizeInBits();
3829  SDValue LogBase2 = BuildLogBase2(N1, DL);
3830  SDValue SRLAmt = DAG.getNode(
3831  ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3832  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3833  SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3834  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3835  }
3836 
3837  // If the type twice as wide is legal, transform the mulhu to a wider multiply
3838  // plus a shift.
3839  if (VT.isSimple() && !VT.isVector()) {
3840  MVT Simple = VT.getSimpleVT();
3841  unsigned SimpleSize = Simple.getSizeInBits();
3842  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3843  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3844  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3845  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3846  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3847  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3848  DAG.getConstant(SimpleSize, DL,
3849  getShiftAmountTy(N1.getValueType())));
3850  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3851  }
3852  }
3853 
3854  return SDValue();
3855 }
3856 
3857 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3858 /// give the opcodes for the two computations that are being performed. Return
3859 /// true if a simplification was made.
3860 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3861  unsigned HiOp) {
3862  // If the high half is not needed, just compute the low half.
3863  bool HiExists = N->hasAnyUseOfValue(1);
3864  if (!HiExists && (!LegalOperations ||
3865  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3866  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3867  return CombineTo(N, Res, Res);
3868  }
3869 
3870  // If the low half is not needed, just compute the high half.
3871  bool LoExists = N->hasAnyUseOfValue(0);
3872  if (!LoExists && (!LegalOperations ||
3873  TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3874  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3875  return CombineTo(N, Res, Res);
3876  }
3877 
3878  // If both halves are used, return as it is.
3879  if (LoExists && HiExists)
3880  return SDValue();
3881 
3882  // If the two computed results can be simplified separately, separate them.
3883  if (LoExists) {
3884  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3885  AddToWorklist(Lo.getNode());
3886  SDValue LoOpt = combine(Lo.getNode());
3887  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3888  (!LegalOperations ||
3889  TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3890  return CombineTo(N, LoOpt, LoOpt);
3891  }
3892 
3893  if (HiExists) {
3894  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3895  AddToWorklist(Hi.getNode());
3896  SDValue HiOpt = combine(Hi.getNode());
3897  if (HiOpt.getNode() && HiOpt != Hi &&
3898  (!LegalOperations ||
3899  TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3900  return CombineTo(N, HiOpt, HiOpt);
3901  }
3902 
3903  return SDValue();
3904 }
3905 
3906 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3907  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3908  return Res;
3909 
3910  EVT VT = N->getValueType(0);
3911  SDLoc DL(N);
3912 
3913  // If the type is twice as wide is legal, transform the mulhu to a wider
3914  // multiply plus a shift.
3915  if (VT.isSimple() && !VT.isVector()) {
3916  MVT Simple = VT.getSimpleVT();
3917  unsigned SimpleSize = Simple.getSizeInBits();
3918  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3919  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3920  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3921  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3922  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3923  // Compute the high part as N1.
3924  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3925  DAG.getConstant(SimpleSize, DL,
3926  getShiftAmountTy(Lo.getValueType())));
3927  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3928  // Compute the low part as N0.
3929  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3930  return CombineTo(N, Lo, Hi);
3931  }
3932  }
3933 
3934  return SDValue();
3935 }
3936 
3937 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3938  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3939  return Res;
3940 
3941  EVT VT = N->getValueType(0);
3942  SDLoc DL(N);
3943 
3944  // If the type is twice as wide is legal, transform the mulhu to a wider
3945  // multiply plus a shift.
3946  if (VT.isSimple() && !VT.isVector()) {
3947  MVT Simple = VT.getSimpleVT();
3948  unsigned SimpleSize = Simple.getSizeInBits();
3949  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3950  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3951  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3952  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3953  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3954  // Compute the high part as N1.
3955  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3956  DAG.getConstant(SimpleSize, DL,
3957  getShiftAmountTy(Lo.getValueType())));
3958  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3959  // Compute the low part as N0.
3960  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3961  return CombineTo(N, Lo, Hi);
3962  }
3963  }
3964 
3965  return SDValue();
3966 }
3967 
3968 SDValue DAGCombiner::visitMULO(SDNode *N) {
3969  bool IsSigned = (ISD::SMULO == N->getOpcode());
3970 
3971  // (mulo x, 2) -> (addo x, x)
3973  if (C2->getAPIntValue() == 2)
3974  return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
3975  N->getVTList(), N->getOperand(0), N->getOperand(0));
3976 
3977  return SDValue();
3978 }
3979 
3980 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3981  SDValue N0 = N->getOperand(0);
3982  SDValue N1 = N->getOperand(1);
3983  EVT VT = N0.getValueType();
3984 
3985  // fold vector ops
3986  if (VT.isVector())
3987  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3988  return FoldedVOp;
3989 
3990  // fold operation with constant operands.
3993  if (N0C && N1C)
3994  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3995 
3996  // canonicalize constant to RHS
3999  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4000 
4001  // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4002  // Only do this if the current op isn't legal and the flipped is.
4003  unsigned Opcode = N->getOpcode();
4004  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4005  if (!TLI.isOperationLegal(Opcode, VT) &&
4006  (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4007  (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4008  unsigned AltOpcode;
4009  switch (Opcode) {
4010  case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4011  case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4012  case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4013  case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4014  default: llvm_unreachable("Unknown MINMAX opcode");
4015  }
4016  if (TLI.isOperationLegal(AltOpcode, VT))
4017  return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4018  }
4019 
4020  return SDValue();
4021 }
4022 
4023 /// If this is a bitwise logic instruction and both operands have the same
4024 /// opcode, try to sink the other opcode after the logic instruction.
4025 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4026  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4027  EVT VT = N0.getValueType();
4028  unsigned LogicOpcode = N->getOpcode();
4029  unsigned HandOpcode = N0.getOpcode();
4030  assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4031  LogicOpcode == ISD::XOR) && "Expected logic opcode");
4032  assert(HandOpcode == N1.getOpcode() && "Bad input!");
4033 
4034  // Bail early if none of these transforms apply.
4035  if (N0.getNumOperands() == 0)
4036  return SDValue();
4037 
4038  // FIXME: We should check number of uses of the operands to not increase
4039  // the instruction count for all transforms.
4040 
4041  // Handle size-changing casts.
4042  SDValue X = N0.getOperand(0);
4043  SDValue Y = N1.getOperand(0);
4044  EVT XVT = X.getValueType();
4045  SDLoc DL(N);
4046  if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4047  HandOpcode == ISD::SIGN_EXTEND) {
4048  // If both operands have other uses, this transform would create extra
4049  // instructions without eliminating anything.
4050  if (!N0.hasOneUse() && !N1.hasOneUse())
4051  return SDValue();
4052  // We need matching integer source types.
4053  if (XVT != Y.getValueType())
4054  return SDValue();
4055  // Don't create an illegal op during or after legalization. Don't ever
4056  // create an unsupported vector op.
4057  if ((VT.isVector() || LegalOperations) &&
4058  !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4059  return SDValue();
4060  // Avoid infinite looping with PromoteIntBinOp.
4061  // TODO: Should we apply desirable/legal constraints to all opcodes?
4062  if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4063  !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4064  return SDValue();
4065  // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4066  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4067  return DAG.getNode(HandOpcode, DL, VT, Logic);
4068  }
4069 
4070  // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4071  if (HandOpcode == ISD::TRUNCATE) {
4072  // If both operands have other uses, this transform would create extra
4073  // instructions without eliminating anything.
4074  if (!N0.hasOneUse() && !N1.hasOneUse())
4075  return SDValue();
4076  // We need matching source types.
4077  if (XVT != Y.getValueType())
4078  return SDValue();
4079  // Don't create an illegal op during or after legalization.
4080  if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4081  return SDValue();
4082  // Be extra careful sinking truncate. If it's free, there's no benefit in
4083  // widening a binop. Also, don't create a logic op on an illegal type.
4084  if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4085  return SDValue();
4086  if (!TLI.isTypeLegal(XVT))
4087  return SDValue();
4088  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4089  return DAG.getNode(HandOpcode, DL, VT, Logic);
4090  }
4091 
4092  // For binops SHL/SRL/SRA/AND:
4093  // logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4094  if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4095  HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4096  N0.getOperand(1) == N1.getOperand(1)) {
4097  // If either operand has other uses, this transform is not an improvement.
4098  if (!N0.hasOneUse() || !N1.hasOneUse())
4099  return SDValue();
4100  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4101  return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4102  }
4103 
4104  // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4105  if (HandOpcode == ISD::BSWAP) {
4106  // If either operand has other uses, this transform is not an improvement.
4107  if (!N0.hasOneUse() || !N1.hasOneUse())
4108  return SDValue();
4109  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4110  return DAG.getNode(HandOpcode, DL, VT, Logic);
4111  }
4112 
4113  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4114  // Only perform this optimization up until type legalization, before
4115  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4116  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4117  // we don't want to undo this promotion.
4118  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4119  // on scalars.
4120  if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4121  Level <= AfterLegalizeTypes) {
4122  // Input types must be integer and the same.
4123  if (XVT.isInteger() && XVT == Y.getValueType()) {
4124  SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4125  return DAG.getNode(HandOpcode, DL, VT, Logic);
4126  }
4127  }
4128 
4129  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4130  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4131  // If both shuffles use the same mask, and both shuffle within a single
4132  // vector, then it is worthwhile to move the swizzle after the operation.
4133  // The type-legalizer generates this pattern when loading illegal
4134  // vector types from memory. In many cases this allows additional shuffle
4135  // optimizations.
4136  // There are other cases where moving the shuffle after the xor/and/or
4137  // is profitable even if shuffles don't perform a swizzle.
4138  // If both shuffles use the same mask, and both shuffles have the same first
4139  // or second operand, then it might still be profitable to move the shuffle
4140  // after the xor/and/or operation.
4141  if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4142  auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4143  auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4144  assert(X.getValueType() == Y.getValueType() &&
4145  "Inputs to shuffles are not the same type");
4146 
4147  // Check that both shuffles use the same mask. The masks are known to be of
4148  // the same length because the result vector type is the same.
4149  // Check also that shuffles have only one use to avoid introducing extra
4150  // instructions.
4151  if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4152  !SVN0->getMask().equals(SVN1->getMask()))
4153  return SDValue();
4154 
4155  // Don't try to fold this node if it requires introducing a
4156  // build vector of all zeros that might be illegal at this stage.
4157  SDValue ShOp = N0.getOperand(1);
4158  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4159  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4160 
4161  // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4162  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4163  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4164  N0.getOperand(0), N1.getOperand(0));
4165  return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4166  }
4167 
4168  // Don't try to fold this node if it requires introducing a
4169  // build vector of all zeros that might be illegal at this stage.
4170  ShOp = N0.getOperand(0);
4171  if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4172  ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4173 
4174  // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4175  if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4176  SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4177  N1.getOperand(1));
4178  return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4179  }
4180  }
4181 
4182  return SDValue();
4183 }
4184 
4185 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4186 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4187  const SDLoc &DL) {
4188  SDValue LL, LR, RL, RR, N0CC, N1CC;
4189  if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4190  !isSetCCEquivalent(N1, RL, RR, N1CC))
4191  return SDValue();
4192 
4193  assert(N0.getValueType() == N1.getValueType() &&
4194  "Unexpected operand types for bitwise logic op");
4195  assert(LL.getValueType() == LR.getValueType() &&
4196  RL.getValueType() == RR.getValueType() &&
4197  "Unexpected operand types for setcc");
4198 
4199  // If we're here post-legalization or the logic op type is not i1, the logic
4200  // op type must match a setcc result type. Also, all folds require new
4201  // operations on the left and right operands, so those types must match.
4202  EVT VT = N0.getValueType();
4203  EVT OpVT = LL.getValueType();
4204  if (LegalOperations || VT.getScalarType() != MVT::i1)
4205  if (VT != getSetCCResultType(OpVT))
4206  return SDValue();
4207  if (OpVT != RL.getValueType())
4208  return SDValue();
4209 
4210  ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4211  ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4212  bool IsInteger = OpVT.isInteger();
4213  if (LR == RR && CC0 == CC1 && IsInteger) {
4214  bool IsZero = isNullOrNullSplat(LR);
4215  bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4216 
4217  // All bits clear?
4218  bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4219  // All sign bits clear?
4220  bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4221  // Any bits set?
4222  bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4223  // Any sign bits set?
4224  bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4225 
4226  // (and (seteq X, 0), (seteq Y, 0)) --> (seteq (or X, Y), 0)
4227  // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4228  // (or (setne X, 0), (setne Y, 0)) --> (setne (or X, Y), 0)
4229  // (or (setlt X, 0), (setlt Y, 0)) --> (setlt (or X, Y), 0)
4230  if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4231  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4232  AddToWorklist(Or.getNode());
4233  return DAG.getSetCC(DL, VT, Or, LR, CC1);
4234  }
4235 
4236  // All bits set?
4237  bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4238  // All sign bits set?
4239  bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4240  // Any bits clear?
4241  bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4242  // Any sign bits clear?
4243  bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4244 
4245  // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4246  // (and (setlt X, 0), (setlt Y, 0)) --> (setlt (and X, Y), 0)
4247  // (or (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4248  // (or (setgt X, -1), (setgt Y -1)) --> (setgt (and X, Y), -1)
4249  if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4250  SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4251  AddToWorklist(And.getNode());
4252  return DAG.getSetCC(DL, VT, And, LR, CC1);
4253  }
4254  }
4255 
4256  // TODO: What is the 'or' equivalent of this fold?
4257  // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4258  if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4259  IsInteger && CC0 == ISD::SETNE &&
4260  ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4261  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4262  SDValue One = DAG.getConstant(1, DL, OpVT);
4263  SDValue Two = DAG.getConstant(2, DL, OpVT);
4264  SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4265  AddToWorklist(Add.getNode());
4266  return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4267  }
4268 
4269  // Try more general transforms if the predicates match and the only user of
4270  // the compares is the 'and' or 'or'.
4271  if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4272  N0.hasOneUse() && N1.hasOneUse()) {
4273  // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4274  // or (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4275  if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4276  SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4277  SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4278  SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4279  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4280  return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4281  }
4282 
4283  // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4284  if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4285  // Match a shared variable operand and 2 non-opaque constant operands.
4288  if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4289  // Canonicalize larger constant as C0.
4290  if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4291  std::swap(C0, C1);
4292 
4293  // The difference of the constants must be a single bit.
4294  const APInt &C0Val = C0->getAPIntValue();
4295  const APInt &C1Val = C1->getAPIntValue();
4296  if ((C0Val - C1Val).isPowerOf2()) {
4297  // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4298  // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4299  SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4300  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4301  SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4302  SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4303  SDValue Zero = DAG.getConstant(0, DL, OpVT);
4304  return DAG.getSetCC(DL, VT, And, Zero, CC0);
4305  }
4306  }
4307  }
4308  }
4309 
4310  // Canonicalize equivalent operands to LL == RL.
4311  if (LL == RR && LR == RL) {
4312  CC1 = ISD::getSetCCSwappedOperands(CC1);
4313  std::swap(RL, RR);
4314  }
4315 
4316  // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4317  // (or (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4318  if (LL == RL && LR == RR) {
4319  ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4320  : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4321  if (NewCC != ISD::SETCC_INVALID &&
4322  (!LegalOperations ||
4323  (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4324  TLI.isOperationLegal(ISD::SETCC, OpVT))))
4325  return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4326  }
4327 
4328  return SDValue();
4329 }
4330 
4331 /// This contains all DAGCombine rules which reduce two values combined by
4332 /// an And operation to a single value. This makes them reusable in the context
4333 /// of visitSELECT(). Rules involving constants are not included as
4334 /// visitSELECT() already handles those cases.
4335 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4336  EVT VT = N1.getValueType();
4337  SDLoc DL(N);
4338 
4339  // fold (and x, undef) -> 0
4340  if (N0.isUndef() || N1.isUndef())
4341  return DAG.getConstant(0, DL, VT);
4342 
4343  if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4344  return V;
4345 
4346  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4347  VT.getSizeInBits() <= 64) {
4348  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4349  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4350  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4351  // immediate for an add, but it is legal if its top c2 bits are set,
4352  // transform the ADD so the immediate doesn't need to be materialized
4353  // in a register.
4354  APInt ADDC = ADDI->getAPIntValue();
4355  APInt SRLC = SRLI->getAPIntValue();
4356  if (ADDC.getMinSignedBits() <= 64 &&
4357  SRLC.ult(VT.getSizeInBits()) &&
4358  !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4360  SRLC.getZExtValue());
4361  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4362  ADDC |= Mask;
4363  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4364  SDLoc DL0(N0);
4365  SDValue NewAdd =
4366  DAG.getNode(ISD::ADD, DL0, VT,
4367  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4368  CombineTo(N0.getNode(), NewAdd);
4369  // Return N so it doesn't get rechecked!
4370  return SDValue(N, 0);
4371  }
4372  }
4373  }
4374  }
4375  }
4376  }
4377 
4378  // Reduce bit extract of low half of an integer to the narrower type.
4379  // (and (srl i64:x, K), KMask) ->
4380  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4381  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4382  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4383  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4384  unsigned Size = VT.getSizeInBits();
4385  const APInt &AndMask = CAnd->getAPIntValue();
4386  unsigned ShiftBits = CShift->getZExtValue();
4387 
4388  // Bail out, this node will probably disappear anyway.
4389  if (ShiftBits == 0)
4390  return SDValue();
4391 
4392  unsigned MaskBits = AndMask.countTrailingOnes();
4393  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4394 
4395  if (AndMask.isMask() &&
4396  // Required bits must not span the two halves of the integer and
4397  // must fit in the half size type.
4398  (ShiftBits + MaskBits <= Size / 2) &&
4399  TLI.isNarrowingProfitable(VT, HalfVT) &&
4400  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4401  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4402  TLI.isTruncateFree(VT, HalfVT) &&
4403  TLI.isZExtFree(HalfVT, VT)) {
4404  // The isNarrowingProfitable is to avoid regressions on PPC and
4405  // AArch64 which match a few 64-bit bit insert / bit extract patterns
4406  // on downstream users of this. Those patterns could probably be
4407  // extended to handle extensions mixed in.
4408 
4409  SDValue SL(N0);
4410  assert(MaskBits <= Size);
4411 
4412  // Extracting the highest bit of the low half.
4413  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4414  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4415  N0.getOperand(0));
4416 
4417  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4418  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4419  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4420  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4421  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4422  }
4423  }
4424  }
4425  }
4426 
4427  return SDValue();
4428 }
4429 
4430 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4431  EVT LoadResultTy, EVT &ExtVT) {
4432  if (!AndC->getAPIntValue().isMask())
4433  return false;
4434 
4435  unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4436 
4437  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4438  EVT LoadedVT = LoadN->getMemoryVT();
4439 
4440  if (ExtVT == LoadedVT &&
4441  (!LegalOperations ||
4442  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4443  // ZEXTLOAD will match without needing to change the size of the value being
4444  // loaded.
4445  return true;
4446  }
4447 
4448  // Do not change the width of a volatile load.
4449  if (LoadN->isVolatile())
4450  return false;
4451 
4452  // Do not generate loads of non-round integer types since these can
4453  // be expensive (and would be wrong if the type is not byte sized).
4454  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4455  return false;
4456 
4457  if (LegalOperations &&
4458  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4459  return false;
4460 
4461  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4462  return false;
4463 
4464  return true;
4465 }
4466 
4467 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4468  ISD::LoadExtType ExtType, EVT &MemVT,
4469  unsigned ShAmt) {
4470  if (!LDST)
4471  return false;
4472  // Only allow byte offsets.
4473  if (ShAmt % 8)
4474  return false;
4475 
4476  // Do not generate loads of non-round integer types since these can
4477  // be expensive (and would be wrong if the type is not byte sized).
4478  if (!MemVT.isRound())
4479  return false;
4480 
4481  // Don't change the width of a volatile load.
4482  if (LDST->isVolatile())
4483  return false;
4484 
4485  // Verify that we are actually reducing a load width here.
4486  if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4487  return false;
4488 
4489  // Ensure that this isn't going to produce an unsupported unaligned access.
4490  if (ShAmt &&
4491  !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4492  LDST->getAddressSpace(), ShAmt / 8))
4493  return false;
4494 
4495  // It's not possible to generate a constant of extended or untyped type.
4496  EVT PtrType = LDST->getBasePtr().getValueType();
4497  if (PtrType == MVT::Untyped || PtrType.isExtended())
4498  return false;
4499 
4500  if (isa<LoadSDNode>(LDST)) {
4501  LoadSDNode *Load = cast<LoadSDNode>(LDST);
4502  // Don't transform one with multiple uses, this would require adding a new
4503  // load.
4504  if (!SDValue(Load, 0).hasOneUse())
4505  return false;
4506 
4507  if (LegalOperations &&
4508  !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4509  return false;
4510 
4511  // For the transform to be legal, the load must produce only two values
4512  // (the value loaded and the chain). Don't transform a pre-increment
4513  // load, for example, which produces an extra value. Otherwise the
4514  // transformation is not equivalent, and the downstream logic to replace
4515  // uses gets things wrong.
4516  if (Load->getNumValues() > 2)
4517  return false;
4518 
4519  // If the load that we're shrinking is an extload and we're not just
4520  // discarding the extension we can't simply shrink the load. Bail.
4521  // TODO: It would be possible to merge the extensions in some cases.
4522  if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4523  Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4524  return false;
4525 
4526  if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4527  return false;
4528  } else {
4529  assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4530  StoreSDNode *Store = cast<StoreSDNode>(LDST);
4531  // Can't write outside the original store
4532  if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4533  return false;
4534 
4535  if (LegalOperations &&
4536  !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4537  return false;
4538  }
4539  return true;
4540 }
4541 
4542 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4544  SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4546  SDNode *&NodeToMask) {
4547  // Recursively search for the operands, looking for loads which can be
4548  // narrowed.
4549  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4550  SDValue Op = N->getOperand(i);
4551 
4552  if (Op.getValueType().isVector())
4553  return false;
4554 
4555  // Some constants may need fixing up later if they are too large.
4556  if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4557  if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4558  (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4559  NodesWithConsts.insert(N);
4560  continue;
4561  }
4562 
4563  if (!Op.hasOneUse())
4564  return false;
4565 
4566  switch(Op.getOpcode()) {
4567  case ISD::LOAD: {
4568  auto *Load = cast<LoadSDNode>(Op);
4569  EVT ExtVT;
4570  if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4571  isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4572 
4573  // ZEXTLOAD is already small enough.
4574  if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4575  ExtVT.bitsGE(Load->getMemoryVT()))
4576  continue;
4577 
4578  // Use LE to convert equal sized loads to zext.
4579  if (ExtVT.bitsLE(Load->getMemoryVT()))
4580  Loads.push_back(Load);
4581 
4582  continue;
4583  }
4584  return false;
4585  }
4586  case ISD::ZERO_EXTEND:
4587  case ISD::AssertZext: {
4588  unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4589  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4590  EVT VT = Op.getOpcode() == ISD::AssertZext ?
4591  cast<VTSDNode>(Op.getOperand(1))->getVT() :
4592  Op.getOperand(0).getValueType();
4593 
4594  // We can accept extending nodes if the mask is wider or an equal
4595  // width to the original type.
4596  if (ExtVT.bitsGE(VT))
4597  continue;
4598  break;
4599  }
4600  case ISD::OR:
4601  case ISD::XOR:
4602  case ISD::AND:
4603  if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4604  NodeToMask))
4605  return false;
4606  continue;
4607  }
4608 
4609  // Allow one node which will masked along with any loads found.
4610  if (NodeToMask)
4611  return false;
4612 
4613  // Also ensure that the node to be masked only produces one data result.
4614  NodeToMask = Op.getNode();
4615  if (NodeToMask->getNumValues() > 1) {
4616  bool HasValue = false;
4617  for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4618  MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4619  if (VT != MVT::Glue && VT != MVT::Other) {
4620  if (HasValue) {
4621  NodeToMask = nullptr;
4622  return false;
4623  }
4624  HasValue = true;
4625  }
4626  }
4627  assert(HasValue && "Node to be masked has no data result?");
4628  }
4629  }
4630  return true;
4631 }
4632 
4633 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4634  auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4635  if (!Mask)
4636  return false;
4637 
4638  if (!Mask->getAPIntValue().isMask())
4639  return false;
4640 
4641  // No need to do anything if the and directly uses a load.
4642  if (isa<LoadSDNode>(N->getOperand(0)))
4643  return false;
4644 
4646  SmallPtrSet<SDNode*, 2> NodesWithConsts;
4647  SDNode *FixupNode = nullptr;
4648  if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4649  if (Loads.size() == 0)
4650  return false;
4651 
4652  LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4653  SDValue MaskOp = N->getOperand(1);
4654 
4655  // If it exists, fixup the single node we allow in the tree that needs
4656  // masking.
4657  if (FixupNode) {
4658  LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4659  SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4660  FixupNode->getValueType(0),
4661  SDValue(FixupNode, 0), MaskOp);
4662  DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4663  if (And.getOpcode() == ISD ::AND)
4664  DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4665  }
4666 
4667  // Narrow any constants that need it.
4668  for (auto *LogicN : NodesWithConsts) {
4669  SDValue Op0 = LogicN->getOperand(0);
4670  SDValue Op1 = LogicN->getOperand(1);
4671 
4672  if (isa<ConstantSDNode>(Op0))
4673  std::swap(Op0, Op1);
4674 
4675  SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4676  Op1, MaskOp);
4677 
4678  DAG.UpdateNodeOperands(LogicN, Op0, And);
4679  }
4680 
4681  // Create narrow loads.
4682  for (auto *Load : Loads) {
4683  LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4684  SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4685  SDValue(Load, 0), MaskOp);
4686  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4687  if (And.getOpcode() == ISD ::AND)
4688  And = SDValue(
4689  DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4690  SDValue NewLoad = ReduceLoadWidth(And.getNode());
4691  assert(NewLoad &&
4692  "Shouldn't be masking the load if it can't be narrowed");
4693  CombineTo(Load, NewLoad, NewLoad.getValue(1));
4694  }
4695  DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4696  return true;
4697  }
4698  return false;
4699 }
4700 
4701 // Unfold
4702 // x & (-1 'logical shift' y)
4703 // To
4704 // (x 'opposite logical shift' y) 'logical shift' y
4705 // if it is better for performance.
4706 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4707  assert(N->getOpcode() == ISD::AND);
4708 
4709  SDValue N0 = N->getOperand(0);
4710  SDValue N1 = N->getOperand(1);
4711 
4712  // Do we actually prefer shifts over mask?
4713  if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4714  return SDValue();
4715 
4716  // Try to match (-1 '[outer] logical shift' y)
4717  unsigned OuterShift;
4718  unsigned InnerShift; // The opposite direction to the OuterShift.
4719  SDValue Y; // Shift amount.
4720  auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4721  if (!M.hasOneUse())
4722  return false;
4723  OuterShift = M->getOpcode();
4724  if (OuterShift == ISD::SHL)
4725  InnerShift = ISD::SRL;
4726  else if (OuterShift == ISD::SRL)
4727  InnerShift = ISD::SHL;
4728  else
4729  return false;
4730  if (!isAllOnesConstant(M->getOperand(0)))
4731  return false;
4732  Y = M->getOperand(1);
4733  return true;
4734  };
4735 
4736  SDValue X;
4737  if (matchMask(N1))
4738  X = N0;
4739  else if (matchMask(N0))
4740  X = N1;
4741  else
4742  return SDValue();
4743 
4744  SDLoc DL(N);
4745  EVT VT = N->getValueType(0);
4746 
4747  // tmp = x 'opposite logical shift' y
4748  SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4749  // ret = tmp 'logical shift' y
4750  SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4751 
4752  return T1;
4753 }
4754 
4755 SDValue DAGCombiner::visitAND(SDNode *N) {
4756  SDValue N0 = N->getOperand(0);
4757  SDValue N1 = N->getOperand(1);
4758  EVT VT = N1.getValueType();
4759 
4760  // x & x --> x
4761  if (N0 == N1)
4762  return N0;
4763 
4764  // fold vector ops
4765  if (VT.isVector()) {
4766  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4767  return FoldedVOp;
4768 
4769  // fold (and x, 0) -> 0, vector edition
4771  // do not return N0, because undef node may exist in N0
4773  SDLoc(N), N0.getValueType());
4775  // do not return N1, because undef node may exist in N1
4777  SDLoc(N), N1.getValueType());
4778 
4779  // fold (and x, -1) -> x, vector edition
4781  return N1;
4783  return N0;
4784  }
4785 
4786  // fold (and c1, c2) -> c1&c2
4789  if (N0C && N1C && !N1C->isOpaque())
4790  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4791  // canonicalize constant to RHS
4794  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4795  // fold (and x, -1) -> x
4796  if (isAllOnesConstant(N1))
4797  return N0;
4798  // if (and x, c) is known to be zero, return 0
4799  unsigned BitWidth = VT.getScalarSizeInBits();
4800  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4801  APInt::getAllOnesValue(BitWidth)))
4802  return DAG.getConstant(0, SDLoc(N), VT);
4803 
4804  if (SDValue NewSel = foldBinOpIntoSelect(N))
4805  return NewSel;
4806 
4807  // reassociate and
4808  if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4809  return RAND;
4810 
4811  // Try to convert a constant mask AND into a shuffle clear mask.
4812  if (VT.isVector())
4813  if (SDValue Shuffle = XformToShuffleWithZero(N))
4814  return Shuffle;
4815 
4816  // fold (and (or x, C), D) -> D if (C & D) == D
4817  auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4818  return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4819  };
4820  if (N0.getOpcode() == ISD::OR &&
4821  ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4822  return N1;
4823  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4824  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4825  SDValue N0Op0 = N0.getOperand(0);
4826  APInt Mask = ~N1C->getAPIntValue();
4827  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4828  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4829  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4830  N0.getValueType(), N0Op0);
4831 
4832  // Replace uses of the AND with uses of the Zero extend node.
4833  CombineTo(N, Zext);
4834 
4835  // We actually want to replace all uses of the any_extend with the
4836  // zero_extend, to avoid duplicating things. This will later cause this
4837  // AND to be folded.
4838  CombineTo(N0.getNode(), Zext);
4839  return SDValue(N, 0); // Return N so it doesn't get rechecked!
4840  }
4841  }
4842  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4843  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4844  // already be zero by virtue of the width of the base type of the load.
4845  //
4846  // the 'X' node here can either be nothing or an extract_vector_elt to catch
4847  // more cases.
4848  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4850  N0.getOperand(0).getOpcode() == ISD::LOAD &&
4851  N0.getOperand(0).getResNo() == 0) ||
4852  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4853  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4854  N0 : N0.getOperand(0) );
4855 
4856  // Get the constant (if applicable) the zero'th operand is being ANDed with.
4857  // This can be a pure constant or a vector splat, in which case we treat the
4858  // vector as a scalar and use the splat value.
4860  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4861  Constant = C->getAPIntValue();
4862  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4863  APInt SplatValue, SplatUndef;
4864  unsigned SplatBitSize;
4865  bool HasAnyUndefs;
4866  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4867  SplatBitSize, HasAnyUndefs);
4868  if (IsSplat) {
4869  // Undef bits can contribute to a possible optimisation if set, so
4870  // set them.
4871  SplatValue |= SplatUndef;
4872 
4873  // The splat value may be something like "0x00FFFFFF", which means 0 for
4874  // the first vector value and FF for the rest, repeating. We need a mask
4875  // that will apply equally to all members of the vector, so AND all the
4876  // lanes of the constant together.
4877  EVT VT = Vector->getValueType(0);
4878  unsigned BitWidth = VT.getScalarSizeInBits();
4879 
4880  // If the splat value has been compressed to a bitlength lower
4881  // than the size of the vector lane, we need to re-expand it to
4882  // the lane size.
4883  if (BitWidth > SplatBitSize)
4884  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4885  SplatBitSize < BitWidth;
4886  SplatBitSize = SplatBitSize * 2)
4887  SplatValue |= SplatValue.shl(SplatBitSize);
4888 
4889  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4890  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.