LLVM  4.0.0
DAGCombiner.cpp
Go to the documentation of this file.
1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
19 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/Statistic.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/IR/LLVMContext.h"
34 #include "llvm/Support/Debug.h"
42 #include <algorithm>
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "dagcombine"
46 
47 STATISTIC(NodesCombined , "Number of dag nodes combined");
48 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
49 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
50 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
51 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
52 STATISTIC(SlicedLoads, "Number of load sliced");
53 
54 namespace {
55  static cl::opt<bool>
56  CombinerAA("combiner-alias-analysis", cl::Hidden,
57  cl::desc("Enable DAG combiner alias-analysis heuristics"));
58 
59  static cl::opt<bool>
60  CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
61  cl::desc("Enable DAG combiner's use of IR alias analysis"));
62 
63  static cl::opt<bool>
64  UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
65  cl::desc("Enable DAG combiner's use of TBAA"));
66 
67 #ifndef NDEBUG
69  CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
70  cl::desc("Only use DAG-combiner alias analysis in this"
71  " function"));
72 #endif
73 
74  /// Hidden option to stress test load slicing, i.e., when this option
75  /// is enabled, load slicing bypasses most of its profitability guards.
76  static cl::opt<bool>
77  StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
78  cl::desc("Bypass the profitability model of load "
79  "slicing"),
80  cl::init(false));
81 
82  static cl::opt<bool>
83  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
84  cl::desc("DAG combiner may split indexing from loads"));
85 
86 //------------------------------ DAGCombiner ---------------------------------//
87 
88  class DAGCombiner {
89  SelectionDAG &DAG;
90  const TargetLowering &TLI;
92  CodeGenOpt::Level OptLevel;
93  bool LegalOperations;
94  bool LegalTypes;
95  bool ForCodeSize;
96 
97  /// \brief Worklist of all of the nodes that need to be simplified.
98  ///
99  /// This must behave as a stack -- new nodes to process are pushed onto the
100  /// back and when processing we pop off of the back.
101  ///
102  /// The worklist will not contain duplicates but may contain null entries
103  /// due to nodes being deleted from the underlying DAG.
104  SmallVector<SDNode *, 64> Worklist;
105 
106  /// \brief Mapping from an SDNode to its position on the worklist.
107  ///
108  /// This is used to find and remove nodes from the worklist (by nulling
109  /// them) when they are deleted from the underlying DAG. It relies on
110  /// stable indices of nodes within the worklist.
111  DenseMap<SDNode *, unsigned> WorklistMap;
112 
113  /// \brief Set of nodes which have been combined (at least once).
114  ///
115  /// This is used to allow us to reliably add any operands of a DAG node
116  /// which have not yet been combined to the worklist.
117  SmallPtrSet<SDNode *, 32> CombinedNodes;
118 
119  // AA - Used for DAG load/store alias analysis.
120  AliasAnalysis &AA;
121 
122  /// When an instruction is simplified, add all users of the instruction to
123  /// the work lists because they might get more simplified now.
124  void AddUsersToWorklist(SDNode *N) {
125  for (SDNode *Node : N->uses())
126  AddToWorklist(Node);
127  }
128 
129  /// Call the node-specific routine that folds each particular type of node.
130  SDValue visit(SDNode *N);
131 
132  public:
133  /// Add to the worklist making sure its instance is at the back (next to be
134  /// processed.)
135  void AddToWorklist(SDNode *N) {
136  // Skip handle nodes as they can't usefully be combined and confuse the
137  // zero-use deletion strategy.
138  if (N->getOpcode() == ISD::HANDLENODE)
139  return;
140 
141  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
142  Worklist.push_back(N);
143  }
144 
145  /// Remove all instances of N from the worklist.
146  void removeFromWorklist(SDNode *N) {
147  CombinedNodes.erase(N);
148 
149  auto It = WorklistMap.find(N);
150  if (It == WorklistMap.end())
151  return; // Not in the worklist.
152 
153  // Null out the entry rather than erasing it to avoid a linear operation.
154  Worklist[It->second] = nullptr;
155  WorklistMap.erase(It);
156  }
157 
158  void deleteAndRecombine(SDNode *N);
159  bool recursivelyDeleteUnusedNodes(SDNode *N);
160 
161  /// Replaces all uses of the results of one DAG node with new values.
162  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
163  bool AddTo = true);
164 
165  /// Replaces all uses of the results of one DAG node with new values.
166  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
167  return CombineTo(N, &Res, 1, AddTo);
168  }
169 
170  /// Replaces all uses of the results of one DAG node with new values.
171  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
172  bool AddTo = true) {
173  SDValue To[] = { Res0, Res1 };
174  return CombineTo(N, To, 2, AddTo);
175  }
176 
177  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
178 
179  private:
180 
181  /// Check the specified integer node value to see if it can be simplified or
182  /// if things it uses can be simplified by bit propagation.
183  /// If so, return true.
184  bool SimplifyDemandedBits(SDValue Op) {
185  unsigned BitWidth = Op.getScalarValueSizeInBits();
186  APInt Demanded = APInt::getAllOnesValue(BitWidth);
187  return SimplifyDemandedBits(Op, Demanded);
188  }
189 
190  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
191 
192  bool CombineToPreIndexedLoadStore(SDNode *N);
193  bool CombineToPostIndexedLoadStore(SDNode *N);
194  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
195  bool SliceUpLoad(SDNode *N);
196 
197  /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
198  /// load.
199  ///
200  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
201  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
202  /// \param EltNo index of the vector element to load.
203  /// \param OriginalLoad load that EVE came from to be replaced.
204  /// \returns EVE on success SDValue() on failure.
205  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
206  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
207  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
208  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
209  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
210  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
211  SDValue PromoteIntBinOp(SDValue Op);
212  SDValue PromoteIntShiftOp(SDValue Op);
213  SDValue PromoteExtend(SDValue Op);
214  bool PromoteLoad(SDValue Op);
215 
216  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
217  SDValue ExtLoad, const SDLoc &DL,
218  ISD::NodeType ExtType);
219 
220  /// Call the node-specific routine that knows how to fold each
221  /// particular type of node. If that doesn't do anything, try the
222  /// target-specific DAG combines.
223  SDValue combine(SDNode *N);
224 
225  // Visitation implementation - Implement dag node combining for different
226  // node types. The semantics are as follows:
227  // Return Value:
228  // SDValue.getNode() == 0 - No change was made
229  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
230  // otherwise - N should be replaced by the returned Operand.
231  //
232  SDValue visitTokenFactor(SDNode *N);
233  SDValue visitMERGE_VALUES(SDNode *N);
234  SDValue visitADD(SDNode *N);
235  SDValue visitSUB(SDNode *N);
236  SDValue visitADDC(SDNode *N);
237  SDValue visitSUBC(SDNode *N);
238  SDValue visitADDE(SDNode *N);
239  SDValue visitSUBE(SDNode *N);
240  SDValue visitMUL(SDNode *N);
241  SDValue useDivRem(SDNode *N);
242  SDValue visitSDIV(SDNode *N);
243  SDValue visitUDIV(SDNode *N);
244  SDValue visitREM(SDNode *N);
245  SDValue visitMULHU(SDNode *N);
246  SDValue visitMULHS(SDNode *N);
247  SDValue visitSMUL_LOHI(SDNode *N);
248  SDValue visitUMUL_LOHI(SDNode *N);
249  SDValue visitSMULO(SDNode *N);
250  SDValue visitUMULO(SDNode *N);
251  SDValue visitIMINMAX(SDNode *N);
252  SDValue visitAND(SDNode *N);
253  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
254  SDValue visitOR(SDNode *N);
255  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
256  SDValue visitXOR(SDNode *N);
257  SDValue SimplifyVBinOp(SDNode *N);
258  SDValue visitSHL(SDNode *N);
259  SDValue visitSRA(SDNode *N);
260  SDValue visitSRL(SDNode *N);
261  SDValue visitRotate(SDNode *N);
262  SDValue visitBSWAP(SDNode *N);
263  SDValue visitBITREVERSE(SDNode *N);
264  SDValue visitCTLZ(SDNode *N);
265  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
266  SDValue visitCTTZ(SDNode *N);
267  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
268  SDValue visitCTPOP(SDNode *N);
269  SDValue visitSELECT(SDNode *N);
270  SDValue visitVSELECT(SDNode *N);
271  SDValue visitSELECT_CC(SDNode *N);
272  SDValue visitSETCC(SDNode *N);
273  SDValue visitSETCCE(SDNode *N);
274  SDValue visitSIGN_EXTEND(SDNode *N);
275  SDValue visitZERO_EXTEND(SDNode *N);
276  SDValue visitANY_EXTEND(SDNode *N);
277  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
278  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
279  SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
280  SDValue visitTRUNCATE(SDNode *N);
281  SDValue visitBITCAST(SDNode *N);
282  SDValue visitBUILD_PAIR(SDNode *N);
283  SDValue visitFADD(SDNode *N);
284  SDValue visitFSUB(SDNode *N);
285  SDValue visitFMUL(SDNode *N);
286  SDValue visitFMA(SDNode *N);
287  SDValue visitFDIV(SDNode *N);
288  SDValue visitFREM(SDNode *N);
289  SDValue visitFSQRT(SDNode *N);
290  SDValue visitFCOPYSIGN(SDNode *N);
291  SDValue visitSINT_TO_FP(SDNode *N);
292  SDValue visitUINT_TO_FP(SDNode *N);
293  SDValue visitFP_TO_SINT(SDNode *N);
294  SDValue visitFP_TO_UINT(SDNode *N);
295  SDValue visitFP_ROUND(SDNode *N);
296  SDValue visitFP_ROUND_INREG(SDNode *N);
297  SDValue visitFP_EXTEND(SDNode *N);
298  SDValue visitFNEG(SDNode *N);
299  SDValue visitFABS(SDNode *N);
300  SDValue visitFCEIL(SDNode *N);
301  SDValue visitFTRUNC(SDNode *N);
302  SDValue visitFFLOOR(SDNode *N);
303  SDValue visitFMINNUM(SDNode *N);
304  SDValue visitFMAXNUM(SDNode *N);
305  SDValue visitBRCOND(SDNode *N);
306  SDValue visitBR_CC(SDNode *N);
307  SDValue visitLOAD(SDNode *N);
308 
309  SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
310  SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
311 
312  SDValue visitSTORE(SDNode *N);
313  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
314  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
315  SDValue visitBUILD_VECTOR(SDNode *N);
316  SDValue visitCONCAT_VECTORS(SDNode *N);
317  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
318  SDValue visitVECTOR_SHUFFLE(SDNode *N);
319  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
320  SDValue visitINSERT_SUBVECTOR(SDNode *N);
321  SDValue visitMLOAD(SDNode *N);
322  SDValue visitMSTORE(SDNode *N);
323  SDValue visitMGATHER(SDNode *N);
324  SDValue visitMSCATTER(SDNode *N);
325  SDValue visitFP_TO_FP16(SDNode *N);
326  SDValue visitFP16_TO_FP(SDNode *N);
327 
328  SDValue visitFADDForFMACombine(SDNode *N);
329  SDValue visitFSUBForFMACombine(SDNode *N);
330  SDValue visitFMULForFMADistributiveCombine(SDNode *N);
331 
332  SDValue XformToShuffleWithZero(SDNode *N);
333  SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
334  SDValue RHS);
335 
336  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
337 
338  SDValue foldSelectOfConstants(SDNode *N);
339  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
340  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
341  SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
342  SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
343  SDValue N2, SDValue N3, ISD::CondCode CC,
344  bool NotExtCompare = false);
345  SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
346  SDValue N2, SDValue N3, ISD::CondCode CC);
347  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
348  const SDLoc &DL, bool foldBooleans = true);
349 
350  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
351  SDValue &CC) const;
352  bool isOneUseSetCC(SDValue N) const;
353 
354  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
355  unsigned HiOp);
356  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
357  SDValue CombineExtLoad(SDNode *N);
358  SDValue combineRepeatedFPDivisors(SDNode *N);
359  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
360  SDValue BuildSDIV(SDNode *N);
361  SDValue BuildSDIVPow2(SDNode *N);
362  SDValue BuildUDIV(SDNode *N);
363  SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
364  SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
365  SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
366  SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
367  SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
368  SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
369  SDNodeFlags *Flags, bool Reciprocal);
370  SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
371  SDNodeFlags *Flags, bool Reciprocal);
372  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
373  bool DemandHighBits = true);
374  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
375  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
376  SDValue InnerPos, SDValue InnerNeg,
377  unsigned PosOpcode, unsigned NegOpcode,
378  const SDLoc &DL);
379  SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
380  SDValue ReduceLoadWidth(SDNode *N);
381  SDValue ReduceLoadOpStoreWidth(SDNode *N);
382  SDValue splitMergedValStore(StoreSDNode *ST);
383  SDValue TransformFPLoadStorePair(SDNode *N);
384  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
385  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
386  SDValue reduceBuildVecToShuffle(SDNode *N);
387  SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
388  SDValue VecIn1, SDValue VecIn2,
389  unsigned LeftIdx);
390 
391  SDValue GetDemandedBits(SDValue V, const APInt &Mask);
392 
393  /// Walk up chain skipping non-aliasing memory nodes,
394  /// looking for aliasing nodes and adding them to the Aliases vector.
395  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
396  SmallVectorImpl<SDValue> &Aliases);
397 
398  /// Return true if there is any possibility that the two addresses overlap.
399  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
400 
401  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
402  /// chain (aliasing node.)
403  SDValue FindBetterChain(SDNode *N, SDValue Chain);
404 
405  /// Try to replace a store and any possibly adjacent stores on
406  /// consecutive chains with better chains. Return true only if St is
407  /// replaced.
408  ///
409  /// Notice that other chains may still be replaced even if the function
410  /// returns false.
411  bool findBetterNeighborChains(StoreSDNode *St);
412 
413  /// Match "(X shl/srl V1) & V2" where V2 may not be present.
414  bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
415 
416  /// Holds a pointer to an LSBaseSDNode as well as information on where it
417  /// is located in a sequence of memory operations connected by a chain.
418  struct MemOpLink {
419  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
420  MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
421  // Ptr to the mem node.
422  LSBaseSDNode *MemNode;
423  // Offset from the base ptr.
424  int64_t OffsetFromBase;
425  // What is the sequence number of this mem node.
426  // Lowest mem operand in the DAG starts at zero.
427  unsigned SequenceNum;
428  };
429 
430  /// This is a helper function for visitMUL to check the profitability
431  /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
432  /// MulNode is the original multiply, AddNode is (add x, c1),
433  /// and ConstNode is c2.
434  bool isMulAddWithConstProfitable(SDNode *MulNode,
435  SDValue &AddNode,
436  SDValue &ConstNode);
437 
438  /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
439  /// constant build_vector of the stored constant values in Stores.
440  SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
441  ArrayRef<MemOpLink> Stores,
442  SmallVectorImpl<SDValue> &Chains,
443  EVT Ty) const;
444 
445  /// This is a helper function for visitAND and visitZERO_EXTEND. Returns
446  /// true if the (and (load x) c) pattern matches an extload. ExtVT returns
447  /// the type of the loaded value to be extended. LoadedVT returns the type
448  /// of the original loaded value. NarrowLoad returns whether the load would
449  /// need to be narrowed in order to match.
450  bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
451  EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
452  bool &NarrowLoad);
453 
454  /// This is a helper function for MergeConsecutiveStores. When the source
455  /// elements of the consecutive stores are all constants or all extracted
456  /// vector elements, try to merge them into one larger store.
457  /// \return number of stores that were merged into a merged store (always
458  /// a prefix of \p StoreNode).
459  bool MergeStoresOfConstantsOrVecElts(
460  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
461  bool IsConstantSrc, bool UseVector);
462 
463  /// This is a helper function for MergeConsecutiveStores.
464  /// Stores that may be merged are placed in StoreNodes.
465  /// Loads that may alias with those stores are placed in AliasLoadNodes.
466  void getStoreMergeAndAliasCandidates(
467  StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
468  SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
469 
470  /// Helper function for MergeConsecutiveStores. Checks if
471  /// Candidate stores have indirect dependency through their
472  /// operands. \return True if safe to merge
473  bool checkMergeStoreCandidatesForDependencies(
474  SmallVectorImpl<MemOpLink> &StoreNodes);
475 
476  /// Merge consecutive store operations into a wide store.
477  /// This optimization uses wide integers or vectors when possible.
478  /// \return number of stores that were merged into a merged store (the
479  /// affected nodes are stored as a prefix in \p StoreNodes).
480  bool MergeConsecutiveStores(StoreSDNode *N,
481  SmallVectorImpl<MemOpLink> &StoreNodes);
482 
483  /// \brief Try to transform a truncation where C is a constant:
484  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
485  ///
486  /// \p N needs to be a truncation and its first operand an AND. Other
487  /// requirements are checked by the function (e.g. that trunc is
488  /// single-use) and if missed an empty SDValue is returned.
489  SDValue distributeTruncateThroughAnd(SDNode *N);
490 
491  public:
492  DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
493  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
494  OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
495  ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
496  }
497 
498  /// Runs the dag combiner on all nodes in the work list
499  void Run(CombineLevel AtLevel);
500 
501  SelectionDAG &getDAG() const { return DAG; }
502 
503  /// Returns a type large enough to hold any valid shift amount - before type
504  /// legalization these can be huge.
505  EVT getShiftAmountTy(EVT LHSTy) {
506  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
507  if (LHSTy.isVector())
508  return LHSTy;
509  auto &DL = DAG.getDataLayout();
510  return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
511  : TLI.getPointerTy(DL);
512  }
513 
514  /// This method returns true if we are running before type legalization or
515  /// if the specified VT is legal.
516  bool isTypeLegal(const EVT &VT) {
517  if (!LegalTypes) return true;
518  return TLI.isTypeLegal(VT);
519  }
520 
521  /// Convenience wrapper around TargetLowering::getSetCCResultType
522  EVT getSetCCResultType(EVT VT) const {
523  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
524  }
525  };
526 }
527 
528 
529 namespace {
530 /// This class is a DAGUpdateListener that removes any deleted
531 /// nodes from the worklist.
532 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
533  DAGCombiner &DC;
534 public:
535  explicit WorklistRemover(DAGCombiner &dc)
536  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
537 
538  void NodeDeleted(SDNode *N, SDNode *E) override {
539  DC.removeFromWorklist(N);
540  }
541 };
542 }
543 
544 //===----------------------------------------------------------------------===//
545 // TargetLowering::DAGCombinerInfo implementation
546 //===----------------------------------------------------------------------===//
547 
549  ((DAGCombiner*)DC)->AddToWorklist(N);
550 }
551 
553 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
554  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
555 }
556 
558 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
559  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
560 }
561 
562 
564 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
565  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
566 }
567 
570  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
571 }
572 
573 //===----------------------------------------------------------------------===//
574 // Helper Functions
575 //===----------------------------------------------------------------------===//
576 
577 void DAGCombiner::deleteAndRecombine(SDNode *N) {
578  removeFromWorklist(N);
579 
580  // If the operands of this node are only used by the node, they will now be
581  // dead. Make sure to re-visit them and recursively delete dead nodes.
582  for (const SDValue &Op : N->ops())
583  // For an operand generating multiple values, one of the values may
584  // become dead allowing further simplification (e.g. split index
585  // arithmetic from an indexed load).
586  if (Op->hasOneUse() || Op->getNumValues() > 1)
587  AddToWorklist(Op.getNode());
588 
589  DAG.DeleteNode(N);
590 }
591 
592 /// Return 1 if we can compute the negated form of the specified expression for
593 /// the same cost as the expression itself, or 2 if we can compute the negated
594 /// form more cheaply than the expression itself.
595 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
596  const TargetLowering &TLI,
597  const TargetOptions *Options,
598  unsigned Depth = 0) {
599  // fneg is removable even if it has multiple uses.
600  if (Op.getOpcode() == ISD::FNEG) return 2;
601 
602  // Don't allow anything with multiple uses.
603  if (!Op.hasOneUse()) return 0;
604 
605  // Don't recurse exponentially.
606  if (Depth > 6) return 0;
607 
608  switch (Op.getOpcode()) {
609  default: return false;
610  case ISD::ConstantFP:
611  // Don't invert constant FP values after legalize. The negated constant
612  // isn't necessarily legal.
613  return LegalOperations ? 0 : 1;
614  case ISD::FADD:
615  // FIXME: determine better conditions for this xform.
616  if (!Options->UnsafeFPMath) return 0;
617 
618  // After operation legalization, it might not be legal to create new FSUBs.
619  if (LegalOperations &&
621  return 0;
622 
623  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
624  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
625  Options, Depth + 1))
626  return V;
627  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
628  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
629  Depth + 1);
630  case ISD::FSUB:
631  // We can't turn -(A-B) into B-A when we honor signed zeros.
632  if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
633  return 0;
634 
635  // fold (fneg (fsub A, B)) -> (fsub B, A)
636  return 1;
637 
638  case ISD::FMUL:
639  case ISD::FDIV:
640  if (Options->HonorSignDependentRoundingFPMath()) return 0;
641 
642  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
643  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
644  Options, Depth + 1))
645  return V;
646 
647  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
648  Depth + 1);
649 
650  case ISD::FP_EXTEND:
651  case ISD::FP_ROUND:
652  case ISD::FSIN:
653  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
654  Depth + 1);
655  }
656 }
657 
658 /// If isNegatibleForFree returns true, return the newly negated expression.
660  bool LegalOperations, unsigned Depth = 0) {
661  const TargetOptions &Options = DAG.getTarget().Options;
662  // fneg is removable even if it has multiple uses.
663  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
664 
665  // Don't allow anything with multiple uses.
666  assert(Op.hasOneUse() && "Unknown reuse!");
667 
668  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
669 
670  const SDNodeFlags *Flags = Op.getNode()->getFlags();
671 
672  switch (Op.getOpcode()) {
673  default: llvm_unreachable("Unknown code");
674  case ISD::ConstantFP: {
675  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
676  V.changeSign();
677  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
678  }
679  case ISD::FADD:
680  // FIXME: determine better conditions for this xform.
681  assert(Options.UnsafeFPMath);
682 
683  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
684  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
685  DAG.getTargetLoweringInfo(), &Options, Depth+1))
686  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
687  GetNegatedExpression(Op.getOperand(0), DAG,
688  LegalOperations, Depth+1),
689  Op.getOperand(1), Flags);
690  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
691  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
692  GetNegatedExpression(Op.getOperand(1), DAG,
693  LegalOperations, Depth+1),
694  Op.getOperand(0), Flags);
695  case ISD::FSUB:
696  // fold (fneg (fsub 0, B)) -> B
697  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
698  if (N0CFP->isZero())
699  return Op.getOperand(1);
700 
701  // fold (fneg (fsub A, B)) -> (fsub B, A)
702  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
703  Op.getOperand(1), Op.getOperand(0), Flags);
704 
705  case ISD::FMUL:
706  case ISD::FDIV:
708 
709  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
710  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
711  DAG.getTargetLoweringInfo(), &Options, Depth+1))
712  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
713  GetNegatedExpression(Op.getOperand(0), DAG,
714  LegalOperations, Depth+1),
715  Op.getOperand(1), Flags);
716 
717  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
718  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
719  Op.getOperand(0),
720  GetNegatedExpression(Op.getOperand(1), DAG,
721  LegalOperations, Depth+1), Flags);
722 
723  case ISD::FP_EXTEND:
724  case ISD::FSIN:
725  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
726  GetNegatedExpression(Op.getOperand(0), DAG,
727  LegalOperations, Depth+1));
728  case ISD::FP_ROUND:
729  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
730  GetNegatedExpression(Op.getOperand(0), DAG,
731  LegalOperations, Depth+1),
732  Op.getOperand(1));
733  }
734 }
735 
736 // APInts must be the same size for most operations, this helper
737 // function zero extends the shorter of the pair so that they match.
738 // We provide an Offset so that we can create bitwidths that won't overflow.
739 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
740  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
741  LHS = LHS.zextOrSelf(Bits);
742  RHS = RHS.zextOrSelf(Bits);
743 }
744 
745 // Return true if this node is a setcc, or is a select_cc
746 // that selects between the target values used for true and false, making it
747 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
748 // the appropriate nodes based on the type of node we are checking. This
749 // simplifies life a bit for the callers.
750 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
751  SDValue &CC) const {
752  if (N.getOpcode() == ISD::SETCC) {
753  LHS = N.getOperand(0);
754  RHS = N.getOperand(1);
755  CC = N.getOperand(2);
756  return true;
757  }
758 
759  if (N.getOpcode() != ISD::SELECT_CC ||
760  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
761  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
762  return false;
763 
764  if (TLI.getBooleanContents(N.getValueType()) ==
766  return false;
767 
768  LHS = N.getOperand(0);
769  RHS = N.getOperand(1);
770  CC = N.getOperand(4);
771  return true;
772 }
773 
774 /// Return true if this is a SetCC-equivalent operation with only one use.
775 /// If this is true, it allows the users to invert the operation for free when
776 /// it is profitable to do so.
777 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
778  SDValue N0, N1, N2;
779  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
780  return true;
781  return false;
782 }
783 
784 // \brief Returns the SDNode if it is a constant float BuildVector
785 // or constant float.
787  if (isa<ConstantFPSDNode>(N))
788  return N.getNode();
790  return N.getNode();
791  return nullptr;
792 }
793 
794 // Determines if it is a constant integer or a build vector of constant
795 // integers (and undefs).
796 // Do not permit build vector implicit truncation.
797 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
798  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
799  return !(Const->isOpaque() && NoOpaques);
800  if (N.getOpcode() != ISD::BUILD_VECTOR)
801  return false;
802  unsigned BitWidth = N.getScalarValueSizeInBits();
803  for (const SDValue &Op : N->op_values()) {
804  if (Op.isUndef())
805  continue;
807  if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
808  (Const->isOpaque() && NoOpaques))
809  return false;
810  }
811  return true;
812 }
813 
814 // Determines if it is a constant null integer or a splatted vector of a
815 // constant null integer (with no undefs).
816 // Build vector implicit truncation is not an issue for null values.
818  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
819  return Splat->isNullValue();
820  return false;
821 }
822 
823 // Determines if it is a constant integer of one or a splatted vector of a
824 // constant integer of one (with no undefs).
825 // Do not permit build vector implicit truncation.
827  unsigned BitWidth = N.getScalarValueSizeInBits();
828  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
829  return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
830  return false;
831 }
832 
833 // Determines if it is a constant integer of all ones or a splatted vector of a
834 // constant integer of all ones (with no undefs).
835 // Do not permit build vector implicit truncation.
837  unsigned BitWidth = N.getScalarValueSizeInBits();
838  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
839  return Splat->isAllOnesValue() &&
840  Splat->getAPIntValue().getBitWidth() == BitWidth;
841  return false;
842 }
843 
844 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
845 // undef's.
846 static bool isAnyConstantBuildVector(const SDNode *N) {
849 }
850 
851 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
852  SDValue N1) {
853  EVT VT = N0.getValueType();
854  if (N0.getOpcode() == Opc) {
855  if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
856  if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
857  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
858  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
859  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
860  return SDValue();
861  }
862  if (N0.hasOneUse()) {
863  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
864  // use
865  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
866  if (!OpNode.getNode())
867  return SDValue();
868  AddToWorklist(OpNode.getNode());
869  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
870  }
871  }
872  }
873 
874  if (N1.getOpcode() == Opc) {
875  if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
876  if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
877  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
878  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
879  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
880  return SDValue();
881  }
882  if (N1.hasOneUse()) {
883  // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
884  // use
885  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
886  if (!OpNode.getNode())
887  return SDValue();
888  AddToWorklist(OpNode.getNode());
889  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
890  }
891  }
892  }
893 
894  return SDValue();
895 }
896 
897 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
898  bool AddTo) {
899  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
900  ++NodesCombined;
901  DEBUG(dbgs() << "\nReplacing.1 ";
902  N->dump(&DAG);
903  dbgs() << "\nWith: ";
904  To[0].getNode()->dump(&DAG);
905  dbgs() << " and " << NumTo-1 << " other values\n");
906  for (unsigned i = 0, e = NumTo; i != e; ++i)
907  assert((!To[i].getNode() ||
908  N->getValueType(i) == To[i].getValueType()) &&
909  "Cannot combine value to value of different type!");
910 
911  WorklistRemover DeadNodes(*this);
912  DAG.ReplaceAllUsesWith(N, To);
913  if (AddTo) {
914  // Push the new nodes and any users onto the worklist
915  for (unsigned i = 0, e = NumTo; i != e; ++i) {
916  if (To[i].getNode()) {
917  AddToWorklist(To[i].getNode());
918  AddUsersToWorklist(To[i].getNode());
919  }
920  }
921  }
922 
923  // Finally, if the node is now dead, remove it from the graph. The node
924  // may not be dead if the replacement process recursively simplified to
925  // something else needing this node.
926  if (N->use_empty())
927  deleteAndRecombine(N);
928  return SDValue(N, 0);
929 }
930 
931 void DAGCombiner::
932 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
933  // Replace all uses. If any nodes become isomorphic to other nodes and
934  // are deleted, make sure to remove them from our worklist.
935  WorklistRemover DeadNodes(*this);
936  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
937 
938  // Push the new node and any (possibly new) users onto the worklist.
939  AddToWorklist(TLO.New.getNode());
940  AddUsersToWorklist(TLO.New.getNode());
941 
942  // Finally, if the node is now dead, remove it from the graph. The node
943  // may not be dead if the replacement process recursively simplified to
944  // something else needing this node.
945  if (TLO.Old.getNode()->use_empty())
946  deleteAndRecombine(TLO.Old.getNode());
947 }
948 
949 /// Check the specified integer node value to see if it can be simplified or if
950 /// things it uses can be simplified by bit propagation. If so, return true.
951 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
952  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
953  APInt KnownZero, KnownOne;
954  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
955  return false;
956 
957  // Revisit the node.
958  AddToWorklist(Op.getNode());
959 
960  // Replace the old value with the new one.
961  ++NodesCombined;
962  DEBUG(dbgs() << "\nReplacing.2 ";
963  TLO.Old.getNode()->dump(&DAG);
964  dbgs() << "\nWith: ";
965  TLO.New.getNode()->dump(&DAG);
966  dbgs() << '\n');
967 
968  CommitTargetLoweringOpt(TLO);
969  return true;
970 }
971 
972 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
973  SDLoc DL(Load);
974  EVT VT = Load->getValueType(0);
975  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
976 
977  DEBUG(dbgs() << "\nReplacing.9 ";
978  Load->dump(&DAG);
979  dbgs() << "\nWith: ";
980  Trunc.getNode()->dump(&DAG);
981  dbgs() << '\n');
982  WorklistRemover DeadNodes(*this);
983  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
984  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
985  deleteAndRecombine(Load);
986  AddToWorklist(Trunc.getNode());
987 }
988 
989 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
990  Replace = false;
991  SDLoc DL(Op);
992  if (ISD::isUNINDEXEDLoad(Op.getNode())) {
993  LoadSDNode *LD = cast<LoadSDNode>(Op);
994  EVT MemVT = LD->getMemoryVT();
996  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
997  : ISD::EXTLOAD)
998  : LD->getExtensionType();
999  Replace = true;
1000  return DAG.getExtLoad(ExtType, DL, PVT,
1001  LD->getChain(), LD->getBasePtr(),
1002  MemVT, LD->getMemOperand());
1003  }
1004 
1005  unsigned Opc = Op.getOpcode();
1006  switch (Opc) {
1007  default: break;
1008  case ISD::AssertSext:
1009  return DAG.getNode(ISD::AssertSext, DL, PVT,
1010  SExtPromoteOperand(Op.getOperand(0), PVT),
1011  Op.getOperand(1));
1012  case ISD::AssertZext:
1013  return DAG.getNode(ISD::AssertZext, DL, PVT,
1014  ZExtPromoteOperand(Op.getOperand(0), PVT),
1015  Op.getOperand(1));
1016  case ISD::Constant: {
1017  unsigned ExtOpc =
1019  return DAG.getNode(ExtOpc, DL, PVT, Op);
1020  }
1021  }
1022 
1023  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1024  return SDValue();
1025  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1026 }
1027 
1028 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1029  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1030  return SDValue();
1031  EVT OldVT = Op.getValueType();
1032  SDLoc DL(Op);
1033  bool Replace = false;
1034  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1035  if (!NewOp.getNode())
1036  return SDValue();
1037  AddToWorklist(NewOp.getNode());
1038 
1039  if (Replace)
1040  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1041  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1042  DAG.getValueType(OldVT));
1043 }
1044 
1045 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1046  EVT OldVT = Op.getValueType();
1047  SDLoc DL(Op);
1048  bool Replace = false;
1049  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1050  if (!NewOp.getNode())
1051  return SDValue();
1052  AddToWorklist(NewOp.getNode());
1053 
1054  if (Replace)
1055  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1056  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1057 }
1058 
1059 /// Promote the specified integer binary operation if the target indicates it is
1060 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1061 /// i32 since i16 instructions are longer.
1062 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1063  if (!LegalOperations)
1064  return SDValue();
1065 
1066  EVT VT = Op.getValueType();
1067  if (VT.isVector() || !VT.isInteger())
1068  return SDValue();
1069 
1070  // If operation type is 'undesirable', e.g. i16 on x86, consider
1071  // promoting it.
1072  unsigned Opc = Op.getOpcode();
1073  if (TLI.isTypeDesirableForOp(Opc, VT))
1074  return SDValue();
1075 
1076  EVT PVT = VT;
1077  // Consult target whether it is a good idea to promote this operation and
1078  // what's the right type to promote it to.
1079  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1080  assert(PVT != VT && "Don't know what type to promote to!");
1081 
1082  bool Replace0 = false;
1083  SDValue N0 = Op.getOperand(0);
1084  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1085  if (!NN0.getNode())
1086  return SDValue();
1087 
1088  bool Replace1 = false;
1089  SDValue N1 = Op.getOperand(1);
1090  SDValue NN1;
1091  if (N0 == N1)
1092  NN1 = NN0;
1093  else {
1094  NN1 = PromoteOperand(N1, PVT, Replace1);
1095  if (!NN1.getNode())
1096  return SDValue();
1097  }
1098 
1099  AddToWorklist(NN0.getNode());
1100  if (NN1.getNode())
1101  AddToWorklist(NN1.getNode());
1102 
1103  if (Replace0)
1104  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1105  if (Replace1)
1106  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1107 
1108  DEBUG(dbgs() << "\nPromoting ";
1109  Op.getNode()->dump(&DAG));
1110  SDLoc DL(Op);
1111  return DAG.getNode(ISD::TRUNCATE, DL, VT,
1112  DAG.getNode(Opc, DL, PVT, NN0, NN1));
1113  }
1114  return SDValue();
1115 }
1116 
1117 /// Promote the specified integer shift operation if the target indicates it is
1118 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1119 /// i32 since i16 instructions are longer.
1120 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1121  if (!LegalOperations)
1122  return SDValue();
1123 
1124  EVT VT = Op.getValueType();
1125  if (VT.isVector() || !VT.isInteger())
1126  return SDValue();
1127 
1128  // If operation type is 'undesirable', e.g. i16 on x86, consider
1129  // promoting it.
1130  unsigned Opc = Op.getOpcode();
1131  if (TLI.isTypeDesirableForOp(Opc, VT))
1132  return SDValue();
1133 
1134  EVT PVT = VT;
1135  // Consult target whether it is a good idea to promote this operation and
1136  // what's the right type to promote it to.
1137  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1138  assert(PVT != VT && "Don't know what type to promote to!");
1139 
1140  bool Replace = false;
1141  SDValue N0 = Op.getOperand(0);
1142  if (Opc == ISD::SRA)
1143  N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1144  else if (Opc == ISD::SRL)
1145  N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1146  else
1147  N0 = PromoteOperand(N0, PVT, Replace);
1148  if (!N0.getNode())
1149  return SDValue();
1150 
1151  AddToWorklist(N0.getNode());
1152  if (Replace)
1153  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1154 
1155  DEBUG(dbgs() << "\nPromoting ";
1156  Op.getNode()->dump(&DAG));
1157  SDLoc DL(Op);
1158  return DAG.getNode(ISD::TRUNCATE, DL, VT,
1159  DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
1160  }
1161  return SDValue();
1162 }
1163 
1164 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1165  if (!LegalOperations)
1166  return SDValue();
1167 
1168  EVT VT = Op.getValueType();
1169  if (VT.isVector() || !VT.isInteger())
1170  return SDValue();
1171 
1172  // If operation type is 'undesirable', e.g. i16 on x86, consider
1173  // promoting it.
1174  unsigned Opc = Op.getOpcode();
1175  if (TLI.isTypeDesirableForOp(Opc, VT))
1176  return SDValue();
1177 
1178  EVT PVT = VT;
1179  // Consult target whether it is a good idea to promote this operation and
1180  // what's the right type to promote it to.
1181  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1182  assert(PVT != VT && "Don't know what type to promote to!");
1183  // fold (aext (aext x)) -> (aext x)
1184  // fold (aext (zext x)) -> (zext x)
1185  // fold (aext (sext x)) -> (sext x)
1186  DEBUG(dbgs() << "\nPromoting ";
1187  Op.getNode()->dump(&DAG));
1188  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1189  }
1190  return SDValue();
1191 }
1192 
1193 bool DAGCombiner::PromoteLoad(SDValue Op) {
1194  if (!LegalOperations)
1195  return false;
1196 
1197  if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1198  return false;
1199 
1200  EVT VT = Op.getValueType();
1201  if (VT.isVector() || !VT.isInteger())
1202  return false;
1203 
1204  // If operation type is 'undesirable', e.g. i16 on x86, consider
1205  // promoting it.
1206  unsigned Opc = Op.getOpcode();
1207  if (TLI.isTypeDesirableForOp(Opc, VT))
1208  return false;
1209 
1210  EVT PVT = VT;
1211  // Consult target whether it is a good idea to promote this operation and
1212  // what's the right type to promote it to.
1213  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1214  assert(PVT != VT && "Don't know what type to promote to!");
1215 
1216  SDLoc DL(Op);
1217  SDNode *N = Op.getNode();
1218  LoadSDNode *LD = cast<LoadSDNode>(N);
1219  EVT MemVT = LD->getMemoryVT();
1220  ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1221  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1222  : ISD::EXTLOAD)
1223  : LD->getExtensionType();
1224  SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1225  LD->getChain(), LD->getBasePtr(),
1226  MemVT, LD->getMemOperand());
1227  SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1228 
1229  DEBUG(dbgs() << "\nPromoting ";
1230  N->dump(&DAG);
1231  dbgs() << "\nTo: ";
1232  Result.getNode()->dump(&DAG);
1233  dbgs() << '\n');
1234  WorklistRemover DeadNodes(*this);
1235  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1236  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1237  deleteAndRecombine(N);
1238  AddToWorklist(Result.getNode());
1239  return true;
1240  }
1241  return false;
1242 }
1243 
1244 /// \brief Recursively delete a node which has no uses and any operands for
1245 /// which it is the only use.
1246 ///
1247 /// Note that this both deletes the nodes and removes them from the worklist.
1248 /// It also adds any nodes who have had a user deleted to the worklist as they
1249 /// may now have only one use and subject to other combines.
1250 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1251  if (!N->use_empty())
1252  return false;
1253 
1255  Nodes.insert(N);
1256  do {
1257  N = Nodes.pop_back_val();
1258  if (!N)
1259  continue;
1260 
1261  if (N->use_empty()) {
1262  for (const SDValue &ChildN : N->op_values())
1263  Nodes.insert(ChildN.getNode());
1264 
1265  removeFromWorklist(N);
1266  DAG.DeleteNode(N);
1267  } else {
1268  AddToWorklist(N);
1269  }
1270  } while (!Nodes.empty());
1271  return true;
1272 }
1273 
1274 //===----------------------------------------------------------------------===//
1275 // Main DAG Combiner implementation
1276 //===----------------------------------------------------------------------===//
1277 
1278 void DAGCombiner::Run(CombineLevel AtLevel) {
1279  // set the instance variables, so that the various visit routines may use it.
1280  Level = AtLevel;
1281  LegalOperations = Level >= AfterLegalizeVectorOps;
1282  LegalTypes = Level >= AfterLegalizeTypes;
1283 
1284  // Add all the dag nodes to the worklist.
1285  for (SDNode &Node : DAG.allnodes())
1286  AddToWorklist(&Node);
1287 
1288  // Create a dummy node (which is not added to allnodes), that adds a reference
1289  // to the root node, preventing it from being deleted, and tracking any
1290  // changes of the root.
1291  HandleSDNode Dummy(DAG.getRoot());
1292 
1293  // While the worklist isn't empty, find a node and try to combine it.
1294  while (!WorklistMap.empty()) {
1295  SDNode *N;
1296  // The Worklist holds the SDNodes in order, but it may contain null entries.
1297  do {
1298  N = Worklist.pop_back_val();
1299  } while (!N);
1300 
1301  bool GoodWorklistEntry = WorklistMap.erase(N);
1302  (void)GoodWorklistEntry;
1303  assert(GoodWorklistEntry &&
1304  "Found a worklist entry without a corresponding map entry!");
1305 
1306  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1307  // N is deleted from the DAG, since they too may now be dead or may have a
1308  // reduced number of uses, allowing other xforms.
1309  if (recursivelyDeleteUnusedNodes(N))
1310  continue;
1311 
1312  WorklistRemover DeadNodes(*this);
1313 
1314  // If this combine is running after legalizing the DAG, re-legalize any
1315  // nodes pulled off the worklist.
1316  if (Level == AfterLegalizeDAG) {
1317  SmallSetVector<SDNode *, 16> UpdatedNodes;
1318  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1319 
1320  for (SDNode *LN : UpdatedNodes) {
1321  AddToWorklist(LN);
1322  AddUsersToWorklist(LN);
1323  }
1324  if (!NIsValid)
1325  continue;
1326  }
1327 
1328  DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1329 
1330  // Add any operands of the new node which have not yet been combined to the
1331  // worklist as well. Because the worklist uniques things already, this
1332  // won't repeatedly process the same operand.
1333  CombinedNodes.insert(N);
1334  for (const SDValue &ChildN : N->op_values())
1335  if (!CombinedNodes.count(ChildN.getNode()))
1336  AddToWorklist(ChildN.getNode());
1337 
1338  SDValue RV = combine(N);
1339 
1340  if (!RV.getNode())
1341  continue;
1342 
1343  ++NodesCombined;
1344 
1345  // If we get back the same node we passed in, rather than a new node or
1346  // zero, we know that the node must have defined multiple values and
1347  // CombineTo was used. Since CombineTo takes care of the worklist
1348  // mechanics for us, we have no work to do in this case.
1349  if (RV.getNode() == N)
1350  continue;
1351 
1353  RV.getOpcode() != ISD::DELETED_NODE &&
1354  "Node was deleted but visit returned new node!");
1355 
1356  DEBUG(dbgs() << " ... into: ";
1357  RV.getNode()->dump(&DAG));
1358 
1359  if (N->getNumValues() == RV.getNode()->getNumValues())
1360  DAG.ReplaceAllUsesWith(N, RV.getNode());
1361  else {
1362  assert(N->getValueType(0) == RV.getValueType() &&
1363  N->getNumValues() == 1 && "Type mismatch");
1364  SDValue OpV = RV;
1365  DAG.ReplaceAllUsesWith(N, &OpV);
1366  }
1367 
1368  // Push the new node and any users onto the worklist
1369  AddToWorklist(RV.getNode());
1370  AddUsersToWorklist(RV.getNode());
1371 
1372  // Finally, if the node is now dead, remove it from the graph. The node
1373  // may not be dead if the replacement process recursively simplified to
1374  // something else needing this node. This will also take care of adding any
1375  // operands which have lost a user to the worklist.
1376  recursivelyDeleteUnusedNodes(N);
1377  }
1378 
1379  // If the root changed (e.g. it was a dead load, update the root).
1380  DAG.setRoot(Dummy.getValue());
1381  DAG.RemoveDeadNodes();
1382 }
1383 
1384 SDValue DAGCombiner::visit(SDNode *N) {
1385  switch (N->getOpcode()) {
1386  default: break;
1387  case ISD::TokenFactor: return visitTokenFactor(N);
1388  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1389  case ISD::ADD: return visitADD(N);
1390  case ISD::SUB: return visitSUB(N);
1391  case ISD::ADDC: return visitADDC(N);
1392  case ISD::SUBC: return visitSUBC(N);
1393  case ISD::ADDE: return visitADDE(N);
1394  case ISD::SUBE: return visitSUBE(N);
1395  case ISD::MUL: return visitMUL(N);
1396  case ISD::SDIV: return visitSDIV(N);
1397  case ISD::UDIV: return visitUDIV(N);
1398  case ISD::SREM:
1399  case ISD::UREM: return visitREM(N);
1400  case ISD::MULHU: return visitMULHU(N);
1401  case ISD::MULHS: return visitMULHS(N);
1402  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1403  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1404  case ISD::SMULO: return visitSMULO(N);
1405  case ISD::UMULO: return visitUMULO(N);
1406  case ISD::SMIN:
1407  case ISD::SMAX:
1408  case ISD::UMIN:
1409  case ISD::UMAX: return visitIMINMAX(N);
1410  case ISD::AND: return visitAND(N);
1411  case ISD::OR: return visitOR(N);
1412  case ISD::XOR: return visitXOR(N);
1413  case ISD::SHL: return visitSHL(N);
1414  case ISD::SRA: return visitSRA(N);
1415  case ISD::SRL: return visitSRL(N);
1416  case ISD::ROTR:
1417  case ISD::ROTL: return visitRotate(N);
1418  case ISD::BSWAP: return visitBSWAP(N);
1419  case ISD::BITREVERSE: return visitBITREVERSE(N);
1420  case ISD::CTLZ: return visitCTLZ(N);
1421  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1422  case ISD::CTTZ: return visitCTTZ(N);
1423  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1424  case ISD::CTPOP: return visitCTPOP(N);
1425  case ISD::SELECT: return visitSELECT(N);
1426  case ISD::VSELECT: return visitVSELECT(N);
1427  case ISD::SELECT_CC: return visitSELECT_CC(N);
1428  case ISD::SETCC: return visitSETCC(N);
1429  case ISD::SETCCE: return visitSETCCE(N);
1430  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1431  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1432  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1433  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1434  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1435  case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1436  case ISD::TRUNCATE: return visitTRUNCATE(N);
1437  case ISD::BITCAST: return visitBITCAST(N);
1438  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1439  case ISD::FADD: return visitFADD(N);
1440  case ISD::FSUB: return visitFSUB(N);
1441  case ISD::FMUL: return visitFMUL(N);
1442  case ISD::FMA: return visitFMA(N);
1443  case ISD::FDIV: return visitFDIV(N);
1444  case ISD::FREM: return visitFREM(N);
1445  case ISD::FSQRT: return visitFSQRT(N);
1446  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1447  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1448  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1449  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1450  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1451  case ISD::FP_ROUND: return visitFP_ROUND(N);
1452  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1453  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1454  case ISD::FNEG: return visitFNEG(N);
1455  case ISD::FABS: return visitFABS(N);
1456  case ISD::FFLOOR: return visitFFLOOR(N);
1457  case ISD::FMINNUM: return visitFMINNUM(N);
1458  case ISD::FMAXNUM: return visitFMAXNUM(N);
1459  case ISD::FCEIL: return visitFCEIL(N);
1460  case ISD::FTRUNC: return visitFTRUNC(N);
1461  case ISD::BRCOND: return visitBRCOND(N);
1462  case ISD::BR_CC: return visitBR_CC(N);
1463  case ISD::LOAD: return visitLOAD(N);
1464  case ISD::STORE: return visitSTORE(N);
1465  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1466  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1467  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1468  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1469  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1470  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1471  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1472  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1473  case ISD::MGATHER: return visitMGATHER(N);
1474  case ISD::MLOAD: return visitMLOAD(N);
1475  case ISD::MSCATTER: return visitMSCATTER(N);
1476  case ISD::MSTORE: return visitMSTORE(N);
1477  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1478  case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
1479  }
1480  return SDValue();
1481 }
1482 
1483 SDValue DAGCombiner::combine(SDNode *N) {
1484  SDValue RV = visit(N);
1485 
1486  // If nothing happened, try a target-specific DAG combine.
1487  if (!RV.getNode()) {
1489  "Node was deleted but visit returned NULL!");
1490 
1491  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1492  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1493 
1494  // Expose the DAG combiner to the target combiner impls.
1496  DagCombineInfo(DAG, Level, false, this);
1497 
1498  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1499  }
1500  }
1501 
1502  // If nothing happened still, try promoting the operation.
1503  if (!RV.getNode()) {
1504  switch (N->getOpcode()) {
1505  default: break;
1506  case ISD::ADD:
1507  case ISD::SUB:
1508  case ISD::MUL:
1509  case ISD::AND:
1510  case ISD::OR:
1511  case ISD::XOR:
1512  RV = PromoteIntBinOp(SDValue(N, 0));
1513  break;
1514  case ISD::SHL:
1515  case ISD::SRA:
1516  case ISD::SRL:
1517  RV = PromoteIntShiftOp(SDValue(N, 0));
1518  break;
1519  case ISD::SIGN_EXTEND:
1520  case ISD::ZERO_EXTEND:
1521  case ISD::ANY_EXTEND:
1522  RV = PromoteExtend(SDValue(N, 0));
1523  break;
1524  case ISD::LOAD:
1525  if (PromoteLoad(SDValue(N, 0)))
1526  RV = SDValue(N, 0);
1527  break;
1528  }
1529  }
1530 
1531  // If N is a commutative binary node, try commuting it to enable more
1532  // sdisel CSE.
1533  if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1534  N->getNumValues() == 1) {
1535  SDValue N0 = N->getOperand(0);
1536  SDValue N1 = N->getOperand(1);
1537 
1538  // Constant operands are canonicalized to RHS.
1539  if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1540  SDValue Ops[] = {N1, N0};
1541  SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1542  N->getFlags());
1543  if (CSENode)
1544  return SDValue(CSENode, 0);
1545  }
1546  }
1547 
1548  return RV;
1549 }
1550 
1551 /// Given a node, return its input chain if it has one, otherwise return a null
1552 /// sd operand.
1554  if (unsigned NumOps = N->getNumOperands()) {
1555  if (N->getOperand(0).getValueType() == MVT::Other)
1556  return N->getOperand(0);
1557  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1558  return N->getOperand(NumOps-1);
1559  for (unsigned i = 1; i < NumOps-1; ++i)
1560  if (N->getOperand(i).getValueType() == MVT::Other)
1561  return N->getOperand(i);
1562  }
1563  return SDValue();
1564 }
1565 
1566 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1567  // If N has two operands, where one has an input chain equal to the other,
1568  // the 'other' chain is redundant.
1569  if (N->getNumOperands() == 2) {
1570  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1571  return N->getOperand(0);
1572  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1573  return N->getOperand(1);
1574  }
1575 
1576  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1577  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1578  SmallPtrSet<SDNode*, 16> SeenOps;
1579  bool Changed = false; // If we should replace this token factor.
1580 
1581  // Start out with this token factor.
1582  TFs.push_back(N);
1583 
1584  // Iterate through token factors. The TFs grows when new token factors are
1585  // encountered.
1586  for (unsigned i = 0; i < TFs.size(); ++i) {
1587  SDNode *TF = TFs[i];
1588 
1589  // Check each of the operands.
1590  for (const SDValue &Op : TF->op_values()) {
1591 
1592  switch (Op.getOpcode()) {
1593  case ISD::EntryToken:
1594  // Entry tokens don't need to be added to the list. They are
1595  // redundant.
1596  Changed = true;
1597  break;
1598 
1599  case ISD::TokenFactor:
1600  if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1601  // Queue up for processing.
1602  TFs.push_back(Op.getNode());
1603  // Clean up in case the token factor is removed.
1604  AddToWorklist(Op.getNode());
1605  Changed = true;
1606  break;
1607  }
1609 
1610  default:
1611  // Only add if it isn't already in the list.
1612  if (SeenOps.insert(Op.getNode()).second)
1613  Ops.push_back(Op);
1614  else
1615  Changed = true;
1616  break;
1617  }
1618  }
1619  }
1620 
1621  SDValue Result;
1622 
1623  // If we've changed things around then replace token factor.
1624  if (Changed) {
1625  if (Ops.empty()) {
1626  // The entry token is the only possible outcome.
1627  Result = DAG.getEntryNode();
1628  } else {
1629  // New and improved token factor.
1630  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1631  }
1632 
1633  // Add users to worklist if AA is enabled, since it may introduce
1634  // a lot of new chained token factors while removing memory deps.
1635  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1636  : DAG.getSubtarget().useAA();
1637  return CombineTo(N, Result, UseAA /*add to worklist*/);
1638  }
1639 
1640  return Result;
1641 }
1642 
1643 /// MERGE_VALUES can always be eliminated.
1644 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1645  WorklistRemover DeadNodes(*this);
1646  // Replacing results may cause a different MERGE_VALUES to suddenly
1647  // be CSE'd with N, and carry its uses with it. Iterate until no
1648  // uses remain, to ensure that the node can be safely deleted.
1649  // First add the users of this node to the work list so that they
1650  // can be tried again once they have new operands.
1651  AddUsersToWorklist(N);
1652  do {
1653  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1654  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1655  } while (!N->use_empty());
1656  deleteAndRecombine(N);
1657  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1658 }
1659 
1660 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1661 /// ConstantSDNode pointer else nullptr.
1664  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1665 }
1666 
1667 SDValue DAGCombiner::visitADD(SDNode *N) {
1668  SDValue N0 = N->getOperand(0);
1669  SDValue N1 = N->getOperand(1);
1670  EVT VT = N0.getValueType();
1671  SDLoc DL(N);
1672 
1673  // fold vector ops
1674  if (VT.isVector()) {
1675  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1676  return FoldedVOp;
1677 
1678  // fold (add x, 0) -> x, vector edition
1680  return N0;
1682  return N1;
1683  }
1684 
1685  // fold (add x, undef) -> undef
1686  if (N0.isUndef())
1687  return N0;
1688 
1689  if (N1.isUndef())
1690  return N1;
1691 
1692  if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
1693  // canonicalize constant to RHS
1694  if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
1695  return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
1696  // fold (add c1, c2) -> c1+c2
1697  return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
1698  N1.getNode());
1699  }
1700 
1701  // fold (add x, 0) -> x
1702  if (isNullConstant(N1))
1703  return N0;
1704 
1705  // fold ((c1-A)+c2) -> (c1+c2)-A
1706  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
1707  if (N0.getOpcode() == ISD::SUB)
1708  if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
1709  return DAG.getNode(ISD::SUB, DL, VT,
1710  DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
1711  N0.getOperand(1));
1712  }
1713  }
1714 
1715  // reassociate add
1716  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
1717  return RADD;
1718 
1719  // fold ((0-A) + B) -> B-A
1720  if (N0.getOpcode() == ISD::SUB &&
1722  return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
1723 
1724  // fold (A + (0-B)) -> A-B
1725  if (N1.getOpcode() == ISD::SUB &&
1727  return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
1728 
1729  // fold (A+(B-A)) -> B
1730  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1731  return N1.getOperand(0);
1732 
1733  // fold ((B-A)+A) -> B
1734  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1735  return N0.getOperand(0);
1736 
1737  // fold (A+(B-(A+C))) to (B-C)
1738  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1739  N0 == N1.getOperand(1).getOperand(0))
1740  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1741  N1.getOperand(1).getOperand(1));
1742 
1743  // fold (A+(B-(C+A))) to (B-C)
1744  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1745  N0 == N1.getOperand(1).getOperand(1))
1746  return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
1747  N1.getOperand(1).getOperand(0));
1748 
1749  // fold (A+((B-A)+or-C)) to (B+or-C)
1750  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1751  N1.getOperand(0).getOpcode() == ISD::SUB &&
1752  N0 == N1.getOperand(0).getOperand(1))
1753  return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
1754  N1.getOperand(1));
1755 
1756  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1757  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1758  SDValue N00 = N0.getOperand(0);
1759  SDValue N01 = N0.getOperand(1);
1760  SDValue N10 = N1.getOperand(0);
1761  SDValue N11 = N1.getOperand(1);
1762 
1764  return DAG.getNode(ISD::SUB, DL, VT,
1765  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1766  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1767  }
1768 
1769  if (SimplifyDemandedBits(SDValue(N, 0)))
1770  return SDValue(N, 0);
1771 
1772  // fold (a+b) -> (a|b) iff a and b share no bits.
1773  if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
1774  VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
1775  return DAG.getNode(ISD::OR, DL, VT, N0, N1);
1776 
1777  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1778  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1779  isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
1780  return DAG.getNode(ISD::SUB, DL, VT, N0,
1781  DAG.getNode(ISD::SHL, DL, VT,
1782  N1.getOperand(0).getOperand(1),
1783  N1.getOperand(1)));
1784  if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
1785  isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
1786  return DAG.getNode(ISD::SUB, DL, VT, N1,
1787  DAG.getNode(ISD::SHL, DL, VT,
1788  N0.getOperand(0).getOperand(1),
1789  N0.getOperand(1)));
1790 
1791  if (N1.getOpcode() == ISD::AND) {
1792  SDValue AndOp0 = N1.getOperand(0);
1793  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1794  unsigned DestBits = VT.getScalarSizeInBits();
1795 
1796  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1797  // and similar xforms where the inner op is either ~0 or 0.
1798  if (NumSignBits == DestBits &&
1799  isOneConstantOrOneSplatConstant(N1->getOperand(1)))
1800  return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1801  }
1802 
1803  // add (sext i1), X -> sub X, (zext i1)
1804  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1805  N0.getOperand(0).getValueType() == MVT::i1 &&
1806  !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1807  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1808  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1809  }
1810 
1811  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1812  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1813  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1814  if (TN->getVT() == MVT::i1) {
1815  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1816  DAG.getConstant(1, DL, VT));
1817  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1818  }
1819  }
1820 
1821  return SDValue();
1822 }
1823 
1824 SDValue DAGCombiner::visitADDC(SDNode *N) {
1825  SDValue N0 = N->getOperand(0);
1826  SDValue N1 = N->getOperand(1);
1827  EVT VT = N0.getValueType();
1828 
1829  // If the flag result is dead, turn this into an ADD.
1830  if (!N->hasAnyUseOfValue(1))
1831  return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1832  DAG.getNode(ISD::CARRY_FALSE,
1833  SDLoc(N), MVT::Glue));
1834 
1835  // canonicalize constant to RHS.
1838  if (N0C && !N1C)
1839  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1840 
1841  // fold (addc x, 0) -> x + no carry out
1842  if (isNullConstant(N1))
1843  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1844  SDLoc(N), MVT::Glue));
1845 
1846  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1847  APInt LHSZero, LHSOne;
1848  APInt RHSZero, RHSOne;
1849  DAG.computeKnownBits(N0, LHSZero, LHSOne);
1850 
1851  if (LHSZero.getBoolValue()) {
1852  DAG.computeKnownBits(N1, RHSZero, RHSOne);
1853 
1854  // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1855  // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1856  if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1857  return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1858  DAG.getNode(ISD::CARRY_FALSE,
1859  SDLoc(N), MVT::Glue));
1860  }
1861 
1862  return SDValue();
1863 }
1864 
1865 SDValue DAGCombiner::visitADDE(SDNode *N) {
1866  SDValue N0 = N->getOperand(0);
1867  SDValue N1 = N->getOperand(1);
1868  SDValue CarryIn = N->getOperand(2);
1869 
1870  // canonicalize constant to RHS
1873  if (N0C && !N1C)
1874  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1875  N1, N0, CarryIn);
1876 
1877  // fold (adde x, y, false) -> (addc x, y)
1878  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1879  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1880 
1881  return SDValue();
1882 }
1883 
1884 // Since it may not be valid to emit a fold to zero for vector initializers
1885 // check if we can before folding.
1886 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
1887  SelectionDAG &DAG, bool LegalOperations,
1888  bool LegalTypes) {
1889  if (!VT.isVector())
1890  return DAG.getConstant(0, DL, VT);
1891  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1892  return DAG.getConstant(0, DL, VT);
1893  return SDValue();
1894 }
1895 
1896 SDValue DAGCombiner::visitSUB(SDNode *N) {
1897  SDValue N0 = N->getOperand(0);
1898  SDValue N1 = N->getOperand(1);
1899  EVT VT = N0.getValueType();
1900  SDLoc DL(N);
1901 
1902  // fold vector ops
1903  if (VT.isVector()) {
1904  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1905  return FoldedVOp;
1906 
1907  // fold (sub x, 0) -> x, vector edition
1909  return N0;
1910  }
1911 
1912  // fold (sub x, x) -> 0
1913  // FIXME: Refactor this and xor and other similar operations together.
1914  if (N0 == N1)
1915  return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
1916  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
1917  DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1918  // fold (sub c1, c2) -> c1-c2
1919  return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
1920  N1.getNode());
1921  }
1922 
1924 
1925  // fold (sub x, c) -> (add x, -c)
1926  if (N1C) {
1927  return DAG.getNode(ISD::ADD, DL, VT, N0,
1928  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
1929  }
1930 
1932  unsigned BitWidth = VT.getScalarSizeInBits();
1933  // Right-shifting everything out but the sign bit followed by negation is
1934  // the same as flipping arithmetic/logical shift type without the negation:
1935  // -(X >>u 31) -> (X >>s 31)
1936  // -(X >>s 31) -> (X >>u 31)
1937  if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
1938  ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
1939  if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
1940  auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
1941  if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
1942  return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
1943  }
1944  }
1945 
1946  // 0 - X --> 0 if the sub is NUW.
1947  if (N->getFlags()->hasNoUnsignedWrap())
1948  return N0;
1949 
1950  if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
1951  // N1 is either 0 or the minimum signed value. If the sub is NSW, then
1952  // N1 must be 0 because negating the minimum signed value is undefined.
1953  if (N->getFlags()->hasNoSignedWrap())
1954  return N0;
1955 
1956  // 0 - X --> X if X is 0 or the minimum signed value.
1957  return N1;
1958  }
1959  }
1960 
1961  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1963  return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
1964 
1965  // fold A-(A-B) -> B
1966  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1967  return N1.getOperand(1);
1968 
1969  // fold (A+B)-A -> B
1970  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1971  return N0.getOperand(1);
1972 
1973  // fold (A+B)-B -> A
1974  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1975  return N0.getOperand(0);
1976 
1977  // fold C2-(A+C1) -> (C2-C1)-A
1978  if (N1.getOpcode() == ISD::ADD) {
1979  SDValue N11 = N1.getOperand(1);
1980  if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
1981  isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
1982  SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
1983  return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
1984  }
1985  }
1986 
1987  // fold ((A+(B+or-C))-B) -> A+or-C
1988  if (N0.getOpcode() == ISD::ADD &&
1989  (N0.getOperand(1).getOpcode() == ISD::SUB ||
1990  N0.getOperand(1).getOpcode() == ISD::ADD) &&
1991  N0.getOperand(1).getOperand(0) == N1)
1992  return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
1993  N0.getOperand(1).getOperand(1));
1994 
1995  // fold ((A+(C+B))-B) -> A+C
1996  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
1997  N0.getOperand(1).getOperand(1) == N1)
1998  return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
1999  N0.getOperand(1).getOperand(0));
2000 
2001  // fold ((A-(B-C))-C) -> A-B
2002  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2003  N0.getOperand(1).getOperand(1) == N1)
2004  return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2005  N0.getOperand(1).getOperand(0));
2006 
2007  // If either operand of a sub is undef, the result is undef
2008  if (N0.isUndef())
2009  return N0;
2010  if (N1.isUndef())
2011  return N1;
2012 
2013  // If the relocation model supports it, consider symbol offsets.
2014  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2015  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2016  // fold (sub Sym, c) -> Sym-c
2017  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2018  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2019  GA->getOffset() -
2020  (uint64_t)N1C->getSExtValue());
2021  // fold (sub Sym+c1, Sym+c2) -> c1-c2
2022  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2023  if (GA->getGlobal() == GB->getGlobal())
2024  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2025  DL, VT);
2026  }
2027 
2028  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2029  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2030  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2031  if (TN->getVT() == MVT::i1) {
2032  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2033  DAG.getConstant(1, DL, VT));
2034  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2035  }
2036  }
2037 
2038  return SDValue();
2039 }
2040 
2041 SDValue DAGCombiner::visitSUBC(SDNode *N) {
2042  SDValue N0 = N->getOperand(0);
2043  SDValue N1 = N->getOperand(1);
2044  EVT VT = N0.getValueType();
2045  SDLoc DL(N);
2046 
2047  // If the flag result is dead, turn this into an SUB.
2048  if (!N->hasAnyUseOfValue(1))
2049  return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
2050  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2051 
2052  // fold (subc x, x) -> 0 + no borrow
2053  if (N0 == N1)
2054  return CombineTo(N, DAG.getConstant(0, DL, VT),
2055  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2056 
2057  // fold (subc x, 0) -> x + no borrow
2058  if (isNullConstant(N1))
2059  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2060 
2061  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
2062  if (isAllOnesConstant(N0))
2063  return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
2064  DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2065 
2066  return SDValue();
2067 }
2068 
2069 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2070  SDValue N0 = N->getOperand(0);
2071  SDValue N1 = N->getOperand(1);
2072  SDValue CarryIn = N->getOperand(2);
2073 
2074  // fold (sube x, y, false) -> (subc x, y)
2075  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2076  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2077 
2078  return SDValue();
2079 }
2080 
2081 SDValue DAGCombiner::visitMUL(SDNode *N) {
2082  SDValue N0 = N->getOperand(0);
2083  SDValue N1 = N->getOperand(1);
2084  EVT VT = N0.getValueType();
2085 
2086  // fold (mul x, undef) -> 0
2087  if (N0.isUndef() || N1.isUndef())
2088  return DAG.getConstant(0, SDLoc(N), VT);
2089 
2090  bool N0IsConst = false;
2091  bool N1IsConst = false;
2092  bool N1IsOpaqueConst = false;
2093  bool N0IsOpaqueConst = false;
2094  APInt ConstValue0, ConstValue1;
2095  // fold vector ops
2096  if (VT.isVector()) {
2097  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2098  return FoldedVOp;
2099 
2100  N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
2101  N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
2102  } else {
2103  N0IsConst = isa<ConstantSDNode>(N0);
2104  if (N0IsConst) {
2105  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2106  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2107  }
2108  N1IsConst = isa<ConstantSDNode>(N1);
2109  if (N1IsConst) {
2110  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2111  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2112  }
2113  }
2114 
2115  // fold (mul c1, c2) -> c1*c2
2116  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2117  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2118  N0.getNode(), N1.getNode());
2119 
2120  // canonicalize constant to RHS (vector doesn't have to splat)
2121  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2122  !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2123  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2124  // fold (mul x, 0) -> 0
2125  if (N1IsConst && ConstValue1 == 0)
2126  return N1;
2127  // We require a splat of the entire scalar bit width for non-contiguous
2128  // bit patterns.
2129  bool IsFullSplat =
2130  ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
2131  // fold (mul x, 1) -> x
2132  if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2133  return N0;
2134  // fold (mul x, -1) -> 0-x
2135  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2136  SDLoc DL(N);
2137  return DAG.getNode(ISD::SUB, DL, VT,
2138  DAG.getConstant(0, DL, VT), N0);
2139  }
2140  // fold (mul x, (1 << c)) -> x << c
2141  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2142  IsFullSplat) {
2143  SDLoc DL(N);
2144  return DAG.getNode(ISD::SHL, DL, VT, N0,
2145  DAG.getConstant(ConstValue1.logBase2(), DL,
2147  }
2148  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2149  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2150  IsFullSplat) {
2151  unsigned Log2Val = (-ConstValue1).logBase2();
2152  SDLoc DL(N);
2153  // FIXME: If the input is something that is easily negated (e.g. a
2154  // single-use add), we should put the negate there.
2155  return DAG.getNode(ISD::SUB, DL, VT,
2156  DAG.getConstant(0, DL, VT),
2157  DAG.getNode(ISD::SHL, DL, VT, N0,
2158  DAG.getConstant(Log2Val, DL,
2159  getShiftAmountTy(N0.getValueType()))));
2160  }
2161 
2162  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2163  if (N0.getOpcode() == ISD::SHL &&
2164  isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2165  isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2166  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
2168  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
2169  }
2170 
2171  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2172  // use.
2173  {
2174  SDValue Sh(nullptr, 0), Y(nullptr, 0);
2175 
2176  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2177  if (N0.getOpcode() == ISD::SHL &&
2179  N0.getNode()->hasOneUse()) {
2180  Sh = N0; Y = N1;
2181  } else if (N1.getOpcode() == ISD::SHL &&
2183  N1.getNode()->hasOneUse()) {
2184  Sh = N1; Y = N0;
2185  }
2186 
2187  if (Sh.getNode()) {
2188  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
2189  return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
2190  }
2191  }
2192 
2193  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2194  if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
2195  N0.getOpcode() == ISD::ADD &&
2196  DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
2197  isMulAddWithConstProfitable(N, N0, N1))
2198  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2199  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2200  N0.getOperand(0), N1),
2201  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2202  N0.getOperand(1), N1));
2203 
2204  // reassociate mul
2205  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2206  return RMUL;
2207 
2208  return SDValue();
2209 }
2210 
2211 /// Return true if divmod libcall is available.
2212 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
2213  const TargetLowering &TLI) {
2214  RTLIB::Libcall LC;
2215  EVT NodeType = Node->getValueType(0);
2216  if (!NodeType.isSimple())
2217  return false;
2218  switch (NodeType.getSimpleVT().SimpleTy) {
2219  default: return false; // No libcall for vector types.
2220  case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
2221  case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
2222  case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
2223  case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
2224  case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
2225  }
2226 
2227  return TLI.getLibcallName(LC) != nullptr;
2228 }
2229 
2230 /// Issue divrem if both quotient and remainder are needed.
2231 SDValue DAGCombiner::useDivRem(SDNode *Node) {
2232  if (Node->use_empty())
2233  return SDValue(); // This is a dead node, leave it alone.
2234 
2235  unsigned Opcode = Node->getOpcode();
2236  bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
2237  unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
2238 
2239  // DivMod lib calls can still work on non-legal types if using lib-calls.
2240  EVT VT = Node->getValueType(0);
2241  if (VT.isVector() || !VT.isInteger())
2242  return SDValue();
2243 
2244  if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
2245  return SDValue();
2246 
2247  // If DIVREM is going to get expanded into a libcall,
2248  // but there is no libcall available, then don't combine.
2249  if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
2250  !isDivRemLibcallAvailable(Node, isSigned, TLI))
2251  return SDValue();
2252 
2253  // If div is legal, it's better to do the normal expansion
2254  unsigned OtherOpcode = 0;
2255  if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
2256  OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
2257  if (TLI.isOperationLegalOrCustom(Opcode, VT))
2258  return SDValue();
2259  } else {
2260  OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2261  if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
2262  return SDValue();
2263  }
2264 
2265  SDValue Op0 = Node->getOperand(0);
2266  SDValue Op1 = Node->getOperand(1);
2267  SDValue combined;
2268  for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
2269  UE = Op0.getNode()->use_end(); UI != UE;) {
2270  SDNode *User = *UI++;
2271  if (User == Node || User->use_empty())
2272  continue;
2273  // Convert the other matching node(s), too;
2274  // otherwise, the DIVREM may get target-legalized into something
2275  // target-specific that we won't be able to recognize.
2276  unsigned UserOpc = User->getOpcode();
2277  if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
2278  User->getOperand(0) == Op0 &&
2279  User->getOperand(1) == Op1) {
2280  if (!combined) {
2281  if (UserOpc == OtherOpcode) {
2282  SDVTList VTs = DAG.getVTList(VT, VT);
2283  combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
2284  } else if (UserOpc == DivRemOpc) {
2285  combined = SDValue(User, 0);
2286  } else {
2287  assert(UserOpc == Opcode);
2288  continue;
2289  }
2290  }
2291  if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
2292  CombineTo(User, combined);
2293  else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
2294  CombineTo(User, combined.getValue(1));
2295  }
2296  }
2297  return combined;
2298 }
2299 
2300 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2301  SDValue N0 = N->getOperand(0);
2302  SDValue N1 = N->getOperand(1);
2303  EVT VT = N->getValueType(0);
2304 
2305  // fold vector ops
2306  if (VT.isVector())
2307  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2308  return FoldedVOp;
2309 
2310  SDLoc DL(N);
2311 
2312  // fold (sdiv c1, c2) -> c1/c2
2315  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2316  return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
2317  // fold (sdiv X, 1) -> X
2318  if (N1C && N1C->isOne())
2319  return N0;
2320  // fold (sdiv X, -1) -> 0-X
2321  if (N1C && N1C->isAllOnesValue())
2322  return DAG.getNode(ISD::SUB, DL, VT,
2323  DAG.getConstant(0, DL, VT), N0);
2324 
2325  // If we know the sign bits of both operands are zero, strength reduce to a
2326  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2327  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2328  return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
2329 
2330  // fold (sdiv X, pow2) -> simple ops after legalize
2331  // FIXME: We check for the exact bit here because the generic lowering gives
2332  // better results in that case. The target-specific lowering should learn how
2333  // to handle exact sdivs efficiently.
2334  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2335  !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2336  (N1C->getAPIntValue().isPowerOf2() ||
2337  (-N1C->getAPIntValue()).isPowerOf2())) {
2338  // Target-specific implementation of sdiv x, pow2.
2339  if (SDValue Res = BuildSDIVPow2(N))
2340  return Res;
2341 
2342  unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2343 
2344  // Splat the sign bit into the register
2345  SDValue SGN =
2346  DAG.getNode(ISD::SRA, DL, VT, N0,
2347  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2349  AddToWorklist(SGN.getNode());
2350 
2351  // Add (N0 < 0) ? abs2 - 1 : 0;
2352  SDValue SRL =
2353  DAG.getNode(ISD::SRL, DL, VT, SGN,
2354  DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2355  getShiftAmountTy(SGN.getValueType())));
2356  SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2357  AddToWorklist(SRL.getNode());
2358  AddToWorklist(ADD.getNode()); // Divide by pow2
2359  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2360  DAG.getConstant(lg2, DL,
2361  getShiftAmountTy(ADD.getValueType())));
2362 
2363  // If we're dividing by a positive value, we're done. Otherwise, we must
2364  // negate the result.
2365  if (N1C->getAPIntValue().isNonNegative())
2366  return SRA;
2367 
2368  AddToWorklist(SRA.getNode());
2369  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2370  }
2371 
2372  // If integer divide is expensive and we satisfy the requirements, emit an
2373  // alternate sequence. Targets may check function attributes for size/speed
2374  // trade-offs.
2375  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2376  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2377  if (SDValue Op = BuildSDIV(N))
2378  return Op;
2379 
2380  // sdiv, srem -> sdivrem
2381  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2382  // true. Otherwise, we break the simplification logic in visitREM().
2383  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2384  if (SDValue DivRem = useDivRem(N))
2385  return DivRem;
2386 
2387  // undef / X -> 0
2388  if (N0.isUndef())
2389  return DAG.getConstant(0, DL, VT);
2390  // X / undef -> undef
2391  if (N1.isUndef())
2392  return N1;
2393 
2394  return SDValue();
2395 }
2396 
2397 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2398  SDValue N0 = N->getOperand(0);
2399  SDValue N1 = N->getOperand(1);
2400  EVT VT = N->getValueType(0);
2401 
2402  // fold vector ops
2403  if (VT.isVector())
2404  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2405  return FoldedVOp;
2406 
2407  SDLoc DL(N);
2408 
2409  // fold (udiv c1, c2) -> c1/c2
2412  if (N0C && N1C)
2413  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
2414  N0C, N1C))
2415  return Folded;
2416 
2417  // fold (udiv x, (1 << c)) -> x >>u c
2418  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
2419  DAG.isKnownToBeAPowerOfTwo(N1)) {
2420  SDValue LogBase2 = BuildLogBase2(N1, DL);
2421  AddToWorklist(LogBase2.getNode());
2422 
2423  EVT ShiftVT = getShiftAmountTy(N0.getValueType());
2424  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
2425  AddToWorklist(Trunc.getNode());
2426  return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
2427  }
2428 
2429  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2430  if (N1.getOpcode() == ISD::SHL) {
2431  SDValue N10 = N1.getOperand(0);
2432  if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
2433  DAG.isKnownToBeAPowerOfTwo(N10)) {
2434  SDValue LogBase2 = BuildLogBase2(N10, DL);
2435  AddToWorklist(LogBase2.getNode());
2436 
2437  EVT ADDVT = N1.getOperand(1).getValueType();
2438  SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
2439  AddToWorklist(Trunc.getNode());
2440  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
2441  AddToWorklist(Add.getNode());
2442  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2443  }
2444  }
2445 
2446  // fold (udiv x, c) -> alternate
2447  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2448  if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
2449  if (SDValue Op = BuildUDIV(N))
2450  return Op;
2451 
2452  // sdiv, srem -> sdivrem
2453  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
2454  // true. Otherwise, we break the simplification logic in visitREM().
2455  if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
2456  if (SDValue DivRem = useDivRem(N))
2457  return DivRem;
2458 
2459  // undef / X -> 0
2460  if (N0.isUndef())
2461  return DAG.getConstant(0, DL, VT);
2462  // X / undef -> undef
2463  if (N1.isUndef())
2464  return N1;
2465 
2466  return SDValue();
2467 }
2468 
2469 // handles ISD::SREM and ISD::UREM
2470 SDValue DAGCombiner::visitREM(SDNode *N) {
2471  unsigned Opcode = N->getOpcode();
2472  SDValue N0 = N->getOperand(0);
2473  SDValue N1 = N->getOperand(1);
2474  EVT VT = N->getValueType(0);
2475  bool isSigned = (Opcode == ISD::SREM);
2476  SDLoc DL(N);
2477 
2478  // fold (rem c1, c2) -> c1%c2
2481  if (N0C && N1C)
2482  if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
2483  return Folded;
2484 
2485  if (isSigned) {
2486  // If we know the sign bits of both operands are zero, strength reduce to a
2487  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2488  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2489  return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
2490  } else {
2491  // fold (urem x, pow2) -> (and x, pow2-1)
2492  if (DAG.isKnownToBeAPowerOfTwo(N1)) {
2494  SDValue Add =
2495  DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
2496  AddToWorklist(Add.getNode());
2497  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2498  }
2499  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2500  if (N1.getOpcode() == ISD::SHL &&
2501  DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
2503  SDValue Add =
2504  DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
2505  AddToWorklist(Add.getNode());
2506  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2507  }
2508  }
2509 
2510  AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
2511 
2512  // If X/C can be simplified by the division-by-constant logic, lower
2513  // X%C to the equivalent of X-X/C*C.
2514  // To avoid mangling nodes, this simplification requires that the combine()
2515  // call for the speculative DIV must not cause a DIVREM conversion. We guard
2516  // against this by skipping the simplification if isIntDivCheap(). When
2517  // div is not cheap, combine will not return a DIVREM. Regardless,
2518  // checking cheapness here makes sense since the simplification results in
2519  // fatter code.
2520  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
2521  unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
2522  SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
2523  AddToWorklist(Div.getNode());
2524  SDValue OptimizedDiv = combine(Div.getNode());
2525  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2526  assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
2527  (OptimizedDiv.getOpcode() != ISD::SDIVREM));
2528  SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
2529  SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
2530  AddToWorklist(Mul.getNode());
2531  return Sub;
2532  }
2533  }
2534 
2535  // sdiv, srem -> sdivrem
2536  if (SDValue DivRem = useDivRem(N))
2537  return DivRem.getValue(1);
2538 
2539  // undef % X -> 0
2540  if (N0.isUndef())
2541  return DAG.getConstant(0, DL, VT);
2542  // X % undef -> undef
2543  if (N1.isUndef())
2544  return N1;
2545 
2546  return SDValue();
2547 }
2548 
2549 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2550  SDValue N0 = N->getOperand(0);
2551  SDValue N1 = N->getOperand(1);
2552  EVT VT = N->getValueType(0);
2553  SDLoc DL(N);
2554 
2555  // fold (mulhs x, 0) -> 0
2556  if (isNullConstant(N1))
2557  return N1;
2558  // fold (mulhs x, 1) -> (sra x, size(x)-1)
2559  if (isOneConstant(N1)) {
2560  SDLoc DL(N);
2561  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2562  DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
2564  }
2565  // fold (mulhs x, undef) -> 0
2566  if (N0.isUndef() || N1.isUndef())
2567  return DAG.getConstant(0, SDLoc(N), VT);
2568 
2569  // If the type twice as wide is legal, transform the mulhs to a wider multiply
2570  // plus a shift.
2571  if (VT.isSimple() && !VT.isVector()) {
2572  MVT Simple = VT.getSimpleVT();
2573  unsigned SimpleSize = Simple.getSizeInBits();
2574  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2575  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2576  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2577  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2578  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2579  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2580  DAG.getConstant(SimpleSize, DL,
2582  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2583  }
2584  }
2585 
2586  return SDValue();
2587 }
2588 
2589 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2590  SDValue N0 = N->getOperand(0);
2591  SDValue N1 = N->getOperand(1);
2592  EVT VT = N->getValueType(0);
2593  SDLoc DL(N);
2594 
2595  // fold (mulhu x, 0) -> 0
2596  if (isNullConstant(N1))
2597  return N1;
2598  // fold (mulhu x, 1) -> 0
2599  if (isOneConstant(N1))
2600  return DAG.getConstant(0, DL, N0.getValueType());
2601  // fold (mulhu x, undef) -> 0
2602  if (N0.isUndef() || N1.isUndef())
2603  return DAG.getConstant(0, DL, VT);
2604 
2605  // If the type twice as wide is legal, transform the mulhu to a wider multiply
2606  // plus a shift.
2607  if (VT.isSimple() && !VT.isVector()) {
2608  MVT Simple = VT.getSimpleVT();
2609  unsigned SimpleSize = Simple.getSizeInBits();
2610  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2611  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2612  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2613  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2614  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2615  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2616  DAG.getConstant(SimpleSize, DL,
2618  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2619  }
2620  }
2621 
2622  return SDValue();
2623 }
2624 
2625 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2626 /// give the opcodes for the two computations that are being performed. Return
2627 /// true if a simplification was made.
2628 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2629  unsigned HiOp) {
2630  // If the high half is not needed, just compute the low half.
2631  bool HiExists = N->hasAnyUseOfValue(1);
2632  if (!HiExists &&
2633  (!LegalOperations ||
2634  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2635  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2636  return CombineTo(N, Res, Res);
2637  }
2638 
2639  // If the low half is not needed, just compute the high half.
2640  bool LoExists = N->hasAnyUseOfValue(0);
2641  if (!LoExists &&
2642  (!LegalOperations ||
2643  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2644  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2645  return CombineTo(N, Res, Res);
2646  }
2647 
2648  // If both halves are used, return as it is.
2649  if (LoExists && HiExists)
2650  return SDValue();
2651 
2652  // If the two computed results can be simplified separately, separate them.
2653  if (LoExists) {
2654  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2655  AddToWorklist(Lo.getNode());
2656  SDValue LoOpt = combine(Lo.getNode());
2657  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2658  (!LegalOperations ||
2659  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2660  return CombineTo(N, LoOpt, LoOpt);
2661  }
2662 
2663  if (HiExists) {
2664  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2665  AddToWorklist(Hi.getNode());
2666  SDValue HiOpt = combine(Hi.getNode());
2667  if (HiOpt.getNode() && HiOpt != Hi &&
2668  (!LegalOperations ||
2669  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2670  return CombineTo(N, HiOpt, HiOpt);
2671  }
2672 
2673  return SDValue();
2674 }
2675 
2676 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2677  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
2678  return Res;
2679 
2680  EVT VT = N->getValueType(0);
2681  SDLoc DL(N);
2682 
2683  // If the type is twice as wide is legal, transform the mulhu to a wider
2684  // multiply plus a shift.
2685  if (VT.isSimple() && !VT.isVector()) {
2686  MVT Simple = VT.getSimpleVT();
2687  unsigned SimpleSize = Simple.getSizeInBits();
2688  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2689  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2690  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2691  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2692  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2693  // Compute the high part as N1.
2694  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2695  DAG.getConstant(SimpleSize, DL,
2697  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2698  // Compute the low part as N0.
2699  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2700  return CombineTo(N, Lo, Hi);
2701  }
2702  }
2703 
2704  return SDValue();
2705 }
2706 
2707 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2708  if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
2709  return Res;
2710 
2711  EVT VT = N->getValueType(0);
2712  SDLoc DL(N);
2713 
2714  // If the type is twice as wide is legal, transform the mulhu to a wider
2715  // multiply plus a shift.
2716  if (VT.isSimple() && !VT.isVector()) {
2717  MVT Simple = VT.getSimpleVT();
2718  unsigned SimpleSize = Simple.getSizeInBits();
2719  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2720  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2721  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2722  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2723  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2724  // Compute the high part as N1.
2725  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2726  DAG.getConstant(SimpleSize, DL,
2728  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2729  // Compute the low part as N0.
2730  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2731  return CombineTo(N, Lo, Hi);
2732  }
2733  }
2734 
2735  return SDValue();
2736 }
2737 
2738 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2739  // (smulo x, 2) -> (saddo x, x)
2740  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2741  if (C2->getAPIntValue() == 2)
2742  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2743  N->getOperand(0), N->getOperand(0));
2744 
2745  return SDValue();
2746 }
2747 
2748 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2749  // (umulo x, 2) -> (uaddo x, x)
2750  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2751  if (C2->getAPIntValue() == 2)
2752  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2753  N->getOperand(0), N->getOperand(0));
2754 
2755  return SDValue();
2756 }
2757 
2758 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
2759  SDValue N0 = N->getOperand(0);
2760  SDValue N1 = N->getOperand(1);
2761  EVT VT = N0.getValueType();
2762 
2763  // fold vector ops
2764  if (VT.isVector())
2765  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2766  return FoldedVOp;
2767 
2768  // fold (add c1, c2) -> c1+c2
2771  if (N0C && N1C)
2772  return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
2773 
2774  // canonicalize constant to RHS
2775  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2776  !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2777  return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
2778 
2779  return SDValue();
2780 }
2781 
2782 /// If this is a binary operator with two operands of the same opcode, try to
2783 /// simplify it.
2784 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2785  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2786  EVT VT = N0.getValueType();
2787  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2788 
2789  // Bail early if none of these transforms apply.
2790  if (N0.getNumOperands() == 0) return SDValue();
2791 
2792  // For each of OP in AND/OR/XOR:
2793  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2794  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2795  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2796  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2797  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2798  //
2799  // do not sink logical op inside of a vector extend, since it may combine
2800  // into a vsetcc.
2801  EVT Op0VT = N0.getOperand(0).getValueType();
2802  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2803  N0.getOpcode() == ISD::SIGN_EXTEND ||
2804  N0.getOpcode() == ISD::BSWAP ||
2805  // Avoid infinite looping with PromoteIntBinOp.
2806  (N0.getOpcode() == ISD::ANY_EXTEND &&
2807  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2808  (N0.getOpcode() == ISD::TRUNCATE &&
2809  (!TLI.isZExtFree(VT, Op0VT) ||
2810  !TLI.isTruncateFree(Op0VT, VT)) &&
2811  TLI.isTypeLegal(Op0VT))) &&
2812  !VT.isVector() &&
2813  Op0VT == N1.getOperand(0).getValueType() &&
2814  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2815  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2816  N0.getOperand(0).getValueType(),
2817  N0.getOperand(0), N1.getOperand(0));
2818  AddToWorklist(ORNode.getNode());
2819  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2820  }
2821 
2822  // For each of OP in SHL/SRL/SRA/AND...
2823  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2824  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
2825  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2826  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2827  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2828  N0.getOperand(1) == N1.getOperand(1)) {
2829  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2830  N0.getOperand(0).getValueType(),
2831  N0.getOperand(0), N1.getOperand(0));
2832  AddToWorklist(ORNode.getNode());
2833  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2834  ORNode, N0.getOperand(1));
2835  }
2836 
2837  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2838  // Only perform this optimization up until type legalization, before
2839  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2840  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2841  // we don't want to undo this promotion.
2842  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2843  // on scalars.
2844  if ((N0.getOpcode() == ISD::BITCAST ||
2845  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2847  SDValue In0 = N0.getOperand(0);
2848  SDValue In1 = N1.getOperand(0);
2849  EVT In0Ty = In0.getValueType();
2850  EVT In1Ty = In1.getValueType();
2851  SDLoc DL(N);
2852  // If both incoming values are integers, and the original types are the
2853  // same.
2854  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2855  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2856  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2857  AddToWorklist(Op.getNode());
2858  return BC;
2859  }
2860  }
2861 
2862  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2863  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2864  // If both shuffles use the same mask, and both shuffle within a single
2865  // vector, then it is worthwhile to move the swizzle after the operation.
2866  // The type-legalizer generates this pattern when loading illegal
2867  // vector types from memory. In many cases this allows additional shuffle
2868  // optimizations.
2869  // There are other cases where moving the shuffle after the xor/and/or
2870  // is profitable even if shuffles don't perform a swizzle.
2871  // If both shuffles use the same mask, and both shuffles have the same first
2872  // or second operand, then it might still be profitable to move the shuffle
2873  // after the xor/and/or operation.
2875  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2876  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2877 
2878  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2879  "Inputs to shuffles are not the same type");
2880 
2881  // Check that both shuffles use the same mask. The masks are known to be of
2882  // the same length because the result vector type is the same.
2883  // Check also that shuffles have only one use to avoid introducing extra
2884  // instructions.
2885  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2886  SVN0->getMask().equals(SVN1->getMask())) {
2887  SDValue ShOp = N0->getOperand(1);
2888 
2889  // Don't try to fold this node if it requires introducing a
2890  // build vector of all zeros that might be illegal at this stage.
2891  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2892  if (!LegalTypes)
2893  ShOp = DAG.getConstant(0, SDLoc(N), VT);
2894  else
2895  ShOp = SDValue();
2896  }
2897 
2898  // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2899  // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
2900  // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2901  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2902  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2903  N0->getOperand(0), N1->getOperand(0));
2904  AddToWorklist(NewNode.getNode());
2905  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2906  SVN0->getMask());
2907  }
2908 
2909  // Don't try to fold this node if it requires introducing a
2910  // build vector of all zeros that might be illegal at this stage.
2911  ShOp = N0->getOperand(0);
2912  if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
2913  if (!LegalTypes)
2914  ShOp = DAG.getConstant(0, SDLoc(N), VT);
2915  else
2916  ShOp = SDValue();
2917  }
2918 
2919  // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2920  // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
2921  // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2922  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2923  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2924  N0->getOperand(1), N1->getOperand(1));
2925  AddToWorklist(NewNode.getNode());
2926  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2927  SVN0->getMask());
2928  }
2929  }
2930  }
2931 
2932  return SDValue();
2933 }
2934 
2935 /// This contains all DAGCombine rules which reduce two values combined by
2936 /// an And operation to a single value. This makes them reusable in the context
2937 /// of visitSELECT(). Rules involving constants are not included as
2938 /// visitSELECT() already handles those cases.
2939 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
2940  SDNode *LocReference) {
2941  EVT VT = N1.getValueType();
2942 
2943  // fold (and x, undef) -> 0
2944  if (N0.isUndef() || N1.isUndef())
2945  return DAG.getConstant(0, SDLoc(LocReference), VT);
2946  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2947  SDValue LL, LR, RL, RR, CC0, CC1;
2948  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2949  ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2950  ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2951 
2952  if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2953  LL.getValueType().isInteger()) {
2954  // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2955  if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
2956  EVT CCVT = getSetCCResultType(LR.getValueType());
2957  if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2958  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2959  LR.getValueType(), LL, RL);
2960  AddToWorklist(ORNode.getNode());
2961  return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2962  }
2963  }
2964  if (isAllOnesConstant(LR)) {
2965  // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2966  if (Op1 == ISD::SETEQ) {
2967  EVT CCVT = getSetCCResultType(LR.getValueType());
2968  if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2969  SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2970  LR.getValueType(), LL, RL);
2971  AddToWorklist(ANDNode.getNode());
2972  return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
2973  }
2974  }
2975  // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2976  if (Op1 == ISD::SETGT) {
2977  EVT CCVT = getSetCCResultType(LR.getValueType());
2978  if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2979  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2980  LR.getValueType(), LL, RL);
2981  AddToWorklist(ORNode.getNode());
2982  return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2983  }
2984  }
2985  }
2986  }
2987  // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2988  if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2989  Op0 == Op1 && LL.getValueType().isInteger() &&
2990  Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
2991  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
2992  EVT CCVT = getSetCCResultType(LL.getValueType());
2993  if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
2994  SDLoc DL(N0);
2995  SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
2996  LL, DAG.getConstant(1, DL,
2997  LL.getValueType()));
2998  AddToWorklist(ADDNode.getNode());
2999  return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
3000  DAG.getConstant(2, DL, LL.getValueType()),
3001  ISD::SETUGE);
3002  }
3003  }
3004  // canonicalize equivalent to ll == rl
3005  if (LL == RR && LR == RL) {
3006  Op1 = ISD::getSetCCSwappedOperands(Op1);
3007  std::swap(RL, RR);
3008  }
3009  if (LL == RL && LR == RR) {
3010  bool isInteger = LL.getValueType().isInteger();
3011  ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
3012  if (Result != ISD::SETCC_INVALID &&
3013  (!LegalOperations ||
3014  (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3015  TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3016  EVT CCVT = getSetCCResultType(LL.getValueType());
3017  if (N0.getValueType() == CCVT ||
3018  (!LegalOperations && N0.getValueType() == MVT::i1))
3019  return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3020  LL, LR, Result);
3021  }
3022  }
3023  }
3024 
3025  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
3026  VT.getSizeInBits() <= 64) {
3027  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3028  APInt ADDC = ADDI->getAPIntValue();
3029  if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3030  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
3031  // immediate for an add, but it is legal if its top c2 bits are set,
3032  // transform the ADD so the immediate doesn't need to be materialized
3033  // in a register.
3034  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
3036  SRLI->getZExtValue());
3037  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
3038  ADDC |= Mask;
3039  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
3040  SDLoc DL(N0);
3041  SDValue NewAdd =
3042  DAG.getNode(ISD::ADD, DL, VT,
3043  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
3044  CombineTo(N0.getNode(), NewAdd);
3045  // Return N so it doesn't get rechecked!
3046  return SDValue(LocReference, 0);
3047  }
3048  }
3049  }
3050  }
3051  }
3052  }
3053 
3054  // Reduce bit extract of low half of an integer to the narrower type.
3055  // (and (srl i64:x, K), KMask) ->
3056  // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
3057  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
3058  if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
3059  if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3060  unsigned Size = VT.getSizeInBits();
3061  const APInt &AndMask = CAnd->getAPIntValue();
3062  unsigned ShiftBits = CShift->getZExtValue();
3063 
3064  // Bail out, this node will probably disappear anyway.
3065  if (ShiftBits == 0)
3066  return SDValue();
3067 
3068  unsigned MaskBits = AndMask.countTrailingOnes();
3069  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
3070 
3071  if (APIntOps::isMask(AndMask) &&
3072  // Required bits must not span the two halves of the integer and
3073  // must fit in the half size type.
3074  (ShiftBits + MaskBits <= Size / 2) &&
3075  TLI.isNarrowingProfitable(VT, HalfVT) &&
3076  TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
3077  TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
3078  TLI.isTruncateFree(VT, HalfVT) &&
3079  TLI.isZExtFree(HalfVT, VT)) {
3080  // The isNarrowingProfitable is to avoid regressions on PPC and
3081  // AArch64 which match a few 64-bit bit insert / bit extract patterns
3082  // on downstream users of this. Those patterns could probably be
3083  // extended to handle extensions mixed in.
3084 
3085  SDValue SL(N0);
3086  assert(MaskBits <= Size);
3087 
3088  // Extracting the highest bit of the low half.
3089  EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
3090  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
3091  N0.getOperand(0));
3092 
3093  SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
3094  SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
3095  SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
3096  SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
3097  return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
3098  }
3099  }
3100  }
3101  }
3102 
3103  return SDValue();
3104 }
3105 
3106 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
3107  EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
3108  bool &NarrowLoad) {
3109  uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
3110 
3111  if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
3112  return false;
3113 
3114  ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3115  LoadedVT = LoadN->getMemoryVT();
3116 
3117  if (ExtVT == LoadedVT &&
3118  (!LegalOperations ||
3119  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
3120  // ZEXTLOAD will match without needing to change the size of the value being
3121  // loaded.
3122  NarrowLoad = false;
3123  return true;
3124  }
3125 
3126  // Do not change the width of a volatile load.
3127  if (LoadN->isVolatile())
3128  return false;
3129 
3130  // Do not generate loads of non-round integer types since these can
3131  // be expensive (and would be wrong if the type is not byte sized).
3132  if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
3133  return false;
3134 
3135  if (LegalOperations &&
3136  !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
3137  return false;
3138 
3139  if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
3140  return false;
3141 
3142  NarrowLoad = true;
3143  return true;
3144 }
3145 
3146 SDValue DAGCombiner::visitAND(SDNode *N) {
3147  SDValue N0 = N->getOperand(0);
3148  SDValue N1 = N->getOperand(1);
3149  EVT VT = N1.getValueType();
3150 
3151  // x & x --> x
3152  if (N0 == N1)
3153  return N0;
3154 
3155  // fold vector ops
3156  if (VT.isVector()) {
3157  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3158  return FoldedVOp;
3159 
3160  // fold (and x, 0) -> 0, vector edition
3162  // do not return N0, because undef node may exist in N0
3163  return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
3164  SDLoc(N), N0.getValueType());
3166  // do not return N1, because undef node may exist in N1
3167  return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
3168  SDLoc(N), N1.getValueType());
3169 
3170  // fold (and x, -1) -> x, vector edition
3172  return N1;
3174  return N0;
3175  }
3176 
3177  // fold (and c1, c2) -> c1&c2
3180  if (N0C && N1C && !N1C->isOpaque())
3181  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
3182  // canonicalize constant to RHS
3183  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3184  !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3185  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
3186  // fold (and x, -1) -> x
3187  if (isAllOnesConstant(N1))
3188  return N0;
3189  // if (and x, c) is known to be zero, return 0
3190  unsigned BitWidth = VT.getScalarSizeInBits();
3191  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3192  APInt::getAllOnesValue(BitWidth)))
3193  return DAG.getConstant(0, SDLoc(N), VT);
3194  // reassociate and
3195  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
3196  return RAND;
3197  // fold (and (or x, C), D) -> D if (C & D) == D
3198  if (N1C && N0.getOpcode() == ISD::OR)
3199  if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
3200  if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
3201  return N1;
3202  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
3203  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3204  SDValue N0Op0 = N0.getOperand(0);
3205  APInt Mask = ~N1C->getAPIntValue();
3206  Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
3207  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
3208  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
3209  N0.getValueType(), N0Op0);
3210 
3211  // Replace uses of the AND with uses of the Zero extend node.
3212  CombineTo(N, Zext);
3213 
3214  // We actually want to replace all uses of the any_extend with the
3215  // zero_extend, to avoid duplicating things. This will later cause this
3216  // AND to be folded.
3217  CombineTo(N0.getNode(), Zext);
3218  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3219  }
3220  }
3221  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
3222  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
3223  // already be zero by virtue of the width of the base type of the load.
3224  //
3225  // the 'X' node here can either be nothing or an extract_vector_elt to catch
3226  // more cases.
3227  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
3229  N0.getOperand(0).getOpcode() == ISD::LOAD &&
3230  N0.getOperand(0).getResNo() == 0) ||
3231  (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
3232  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
3233  N0 : N0.getOperand(0) );
3234 
3235  // Get the constant (if applicable) the zero'th operand is being ANDed with.
3236  // This can be a pure constant or a vector splat, in which case we treat the
3237  // vector as a scalar and use the splat value.
3239  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
3240  Constant = C->getAPIntValue();
3241  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
3242  APInt SplatValue, SplatUndef;
3243  unsigned SplatBitSize;
3244  bool HasAnyUndefs;
3245  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
3246  SplatBitSize, HasAnyUndefs);
3247  if (IsSplat) {
3248  // Undef bits can contribute to a possible optimisation if set, so
3249  // set them.
3250  SplatValue |= SplatUndef;
3251 
3252  // The splat value may be something like "0x00FFFFFF", which means 0 for
3253  // the first vector value and FF for the rest, repeating. We need a mask
3254  // that will apply equally to all members of the vector, so AND all the
3255  // lanes of the constant together.
3256  EVT VT = Vector->getValueType(0);
3257  unsigned BitWidth = VT.getScalarSizeInBits();
3258 
3259  // If the splat value has been compressed to a bitlength lower
3260  // than the size of the vector lane, we need to re-expand it to
3261  // the lane size.
3262  if (BitWidth > SplatBitSize)
3263  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3264  SplatBitSize < BitWidth;
3265  SplatBitSize = SplatBitSize * 2)
3266  SplatValue |= SplatValue.shl(SplatBitSize);
3267 
3268  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3269  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3270  if (SplatBitSize % BitWidth == 0) {
3271  Constant = APInt::getAllOnesValue(BitWidth);
3272  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3273  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3274  }
3275  }
3276  }
3277 
3278  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3279  // actually legal and isn't going to get expanded, else this is a false
3280  // optimisation.
3281  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3282  Load->getValueType(0),
3283  Load->getMemoryVT());
3284 
3285  // Resize the constant to the same size as the original memory access before
3286  // extension. If it is still the AllOnesValue then this AND is completely
3287  // unneeded.
3288  Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
3289 
3290  bool B;
3291  switch (Load->getExtensionType()) {
3292  default: B = false; break;
3293  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3294  case ISD::ZEXTLOAD:
3295  case ISD::NON_EXTLOAD: B = true; break;
3296  }
3297 
3298  if (B && Constant.isAllOnesValue()) {
3299  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3300  // preserve semantics once we get rid of the AND.
3301  SDValue NewLoad(Load, 0);
3302  if (Load->getExtensionType() == ISD::EXTLOAD) {
3303  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3304  Load->getValueType(0), SDLoc(Load),
3305  Load->getChain(), Load->getBasePtr(),
3306  Load->getOffset(), Load->getMemoryVT(),
3307  Load->getMemOperand());
3308  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3309  if (Load->getNumValues() == 3) {
3310  // PRE/POST_INC loads have 3 values.
3311  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3312  NewLoad.getValue(2) };
3313  CombineTo(Load, To, 3, true);
3314  } else {
3315  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3316  }
3317  }
3318 
3319  // Fold the AND away, taking care not to fold to the old load node if we
3320  // replaced it.
3321  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3322 
3323  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3324  }
3325  }
3326 
3327  // fold (and (load x), 255) -> (zextload x, i8)
3328  // fold (and (extload x, i16), 255) -> (zextload x, i8)
3329  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3330  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
3331  (N0.getOpcode() == ISD::ANY_EXTEND &&
3332  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3333  bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3334  LoadSDNode *LN0 = HasAnyExt
3335  ? cast<LoadSDNode>(N0.getOperand(0))
3336  : cast<LoadSDNode>(N0);
3337  if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3338  LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3339  auto NarrowLoad = false;
3340  EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3341  EVT ExtVT, LoadedVT;
3342  if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
3343  NarrowLoad)) {
3344  if (!NarrowLoad) {
3345  SDValue NewLoad =
3346  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3347  LN0->getChain(), LN0->getBasePtr(), ExtVT,
3348  LN0->getMemOperand());
3349  AddToWorklist(N);
3350  CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3351  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3352  } else {
3353  EVT PtrType = LN0->getOperand(1).getValueType();
3354 
3355  unsigned Alignment = LN0->getAlignment();
3356  SDValue NewPtr = LN0->getBasePtr();
3357 
3358  // For big endian targets, we need to add an offset to the pointer
3359  // to load the correct bytes. For little endian systems, we merely
3360  // need to read fewer bytes from the same pointer.
3361  if (DAG.getDataLayout().isBigEndian()) {
3362  unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3363  unsigned EVTStoreBytes = ExtVT.getStoreSize();
3364  unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3365  SDLoc DL(LN0);
3366  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3367  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3368  Alignment = MinAlign(Alignment, PtrOff);
3369  }
3370 
3371  AddToWorklist(NewPtr.getNode());
3372 
3373  SDValue Load = DAG.getExtLoad(
3374  ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
3375  LN0->getPointerInfo(), ExtVT, Alignment,
3376  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
3377  AddToWorklist(N);
3378  CombineTo(LN0, Load, Load.getValue(1));
3379  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3380  }
3381  }
3382  }
3383  }
3384 
3385  if (SDValue Combined = visitANDLike(N0, N1, N))
3386  return Combined;
3387 
3388  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
3389  if (N0.getOpcode() == N1.getOpcode())
3390  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3391  return Tmp;
3392 
3393  // Masking the negated extension of a boolean is just the zero-extended
3394  // boolean:
3395  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
3396  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
3397  //
3398  // Note: the SimplifyDemandedBits fold below can make an information-losing
3399  // transform, and then we have no way to find this better fold.
3400  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
3402  SDValue SubRHS = N0.getOperand(1);
3403  if (SubLHS && SubLHS->isNullValue()) {
3404  if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
3405  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3406  return SubRHS;
3407  if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
3408  SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
3409  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
3410  }
3411  }
3412 
3413  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3414  // fold (and (sra)) -> (and (srl)) when possible.
3415  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
3416  return SDValue(N, 0);
3417 
3418  // fold (zext_inreg (extload x)) -> (zextload x)
3419  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3420  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3421  EVT MemVT = LN0->getMemoryVT();
3422  // If we zero all the possible extended bits, then we can turn this into
3423  // a zextload if we are running before legalize or the operation is legal.
3424  unsigned BitWidth = N1.getScalarValueSizeInBits();
3425  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3426  BitWidth - MemVT.getScalarSizeInBits())) &&
3427  ((!LegalOperations && !LN0->isVolatile()) ||
3428  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3429  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3430  LN0->getChain(), LN0->getBasePtr(),
3431  MemVT, LN0->getMemOperand());
3432  AddToWorklist(N);
3433  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3434  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3435  }
3436  }
3437  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3438  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3439  N0.hasOneUse()) {
3440  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3441  EVT MemVT = LN0->getMemoryVT();
3442  // If we zero all the possible extended bits, then we can turn this into
3443  // a zextload if we are running before legalize or the operation is legal.
3444  unsigned BitWidth = N1.getScalarValueSizeInBits();
3445  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3446  BitWidth - MemVT.getScalarSizeInBits())) &&
3447  ((!LegalOperations && !LN0->isVolatile()) ||
3448  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3449  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3450  LN0->getChain(), LN0->getBasePtr(),
3451  MemVT, LN0->getMemOperand());
3452  AddToWorklist(N);
3453  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3454  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3455  }
3456  }
3457  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3458  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3459  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3460  N0.getOperand(1), false))
3461  return BSwap;
3462  }
3463 
3464  return SDValue();
3465 }
3466 
3467 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3468 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3469  bool DemandHighBits) {
3470  if (!LegalOperations)
3471  return SDValue();
3472 
3473  EVT VT = N->getValueType(0);
3474  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3475  return SDValue();
3476  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3477  return SDValue();
3478 
3479  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3480  bool LookPassAnd0 = false;
3481  bool LookPassAnd1 = false;
3482  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3483  std::swap(N0, N1);
3484  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3485  std::swap(N0, N1);
3486  if (N0.getOpcode() == ISD::AND) {
3487  if (!N0.getNode()->hasOneUse())
3488  return SDValue();
3490  if (!N01C || N01C->getZExtValue() != 0xFF00)
3491  return SDValue();
3492  N0 = N0.getOperand(0);
3493  LookPassAnd0 = true;
3494  }
3495 
3496  if (N1.getOpcode() == ISD::AND) {
3497  if (!N1.getNode()->hasOneUse())
3498  return SDValue();
3500  if (!N11C || N11C->getZExtValue() != 0xFF)
3501  return SDValue();
3502  N1 = N1.getOperand(0);
3503  LookPassAnd1 = true;
3504  }
3505 
3506  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3507  std::swap(N0, N1);
3508  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3509  return SDValue();
3510  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
3511  return SDValue();
3512 
3515  if (!N01C || !N11C)
3516  return SDValue();
3517  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3518  return SDValue();
3519 
3520  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3521  SDValue N00 = N0->getOperand(0);
3522  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3523  if (!N00.getNode()->hasOneUse())
3524  return SDValue();
3526  if (!N001C || N001C->getZExtValue() != 0xFF)
3527  return SDValue();
3528  N00 = N00.getOperand(0);
3529  LookPassAnd0 = true;
3530  }
3531 
3532  SDValue N10 = N1->getOperand(0);
3533  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3534  if (!N10.getNode()->hasOneUse())
3535  return SDValue();
3537  if (!N101C || N101C->getZExtValue() != 0xFF00)
3538  return SDValue();
3539  N10 = N10.getOperand(0);
3540  LookPassAnd1 = true;
3541  }
3542 
3543  if (N00 != N10)
3544  return SDValue();
3545 
3546  // Make sure everything beyond the low halfword gets set to zero since the SRL
3547  // 16 will clear the top bits.
3548  unsigned OpSizeInBits = VT.getSizeInBits();
3549  if (DemandHighBits && OpSizeInBits > 16) {
3550  // If the left-shift isn't masked out then the only way this is a bswap is
3551  // if all bits beyond the low 8 are 0. In that case the entire pattern
3552  // reduces to a left shift anyway: leave it for other parts of the combiner.
3553  if (!LookPassAnd0)
3554  return SDValue();
3555 
3556  // However, if the right shift isn't masked out then it might be because
3557  // it's not needed. See if we can spot that too.
3558  if (!LookPassAnd1 &&
3559  !DAG.MaskedValueIsZero(
3560  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3561  return SDValue();
3562  }
3563 
3564  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3565  if (OpSizeInBits > 16) {
3566  SDLoc DL(N);
3567  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3568  DAG.getConstant(OpSizeInBits - 16, DL,
3569  getShiftAmountTy(VT)));
3570  }
3571  return Res;
3572 }
3573 
3574 /// Return true if the specified node is an element that makes up a 32-bit
3575 /// packed halfword byteswap.
3576 /// ((x & 0x000000ff) << 8) |
3577 /// ((x & 0x0000ff00) >> 8) |
3578 /// ((x & 0x00ff0000) << 8) |
3579 /// ((x & 0xff000000) >> 8)
3581  if (!N.getNode()->hasOneUse())
3582  return false;
3583 
3584  unsigned Opc = N.getOpcode();
3585  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3586  return false;
3587 
3589  if (!N1C)
3590  return false;
3591 
3592  unsigned Num;
3593  switch (N1C->getZExtValue()) {
3594  default:
3595  return false;
3596  case 0xFF: Num = 0; break;
3597  case 0xFF00: Num = 1; break;
3598  case 0xFF0000: Num = 2; break;
3599  case 0xFF000000: Num = 3; break;
3600  }
3601 
3602  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3603  SDValue N0 = N.getOperand(0);
3604  if (Opc == ISD::AND) {
3605  if (Num == 0 || Num == 2) {
3606  // (x >> 8) & 0xff
3607  // (x >> 8) & 0xff0000
3608  if (N0.getOpcode() != ISD::SRL)
3609  return false;
3611  if (!C || C->getZExtValue() != 8)
3612  return false;
3613  } else {
3614  // (x << 8) & 0xff00
3615  // (x << 8) & 0xff000000
3616  if (N0.getOpcode() != ISD::SHL)
3617  return false;
3619  if (!C || C->getZExtValue() != 8)
3620  return false;
3621  }
3622  } else if (Opc == ISD::SHL) {
3623  // (x & 0xff) << 8
3624  // (x & 0xff0000) << 8
3625  if (Num != 0 && Num != 2)
3626  return false;
3628  if (!C || C->getZExtValue() != 8)
3629  return false;
3630  } else { // Opc == ISD::SRL
3631  // (x & 0xff00) >> 8
3632  // (x & 0xff000000) >> 8
3633  if (Num != 1 && Num != 3)
3634  return false;
3636  if (!C || C->getZExtValue() != 8)
3637  return false;
3638  }
3639 
3640  if (Parts[Num])
3641  return false;
3642 
3643  Parts[Num] = N0.getOperand(0).getNode();
3644  return true;
3645 }
3646 
3647 /// Match a 32-bit packed halfword bswap. That is
3648 /// ((x & 0x000000ff) << 8) |
3649 /// ((x & 0x0000ff00) >> 8) |
3650 /// ((x & 0x00ff0000) << 8) |
3651 /// ((x & 0xff000000) >> 8)
3652 /// => (rotl (bswap x), 16)
3653 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3654  if (!LegalOperations)
3655  return SDValue();
3656 
3657  EVT VT = N->getValueType(0);
3658  if (VT != MVT::i32)
3659  return SDValue();
3660  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3661  return SDValue();
3662 
3663  // Look for either
3664  // (or (or (and), (and)), (or (and), (and)))
3665  // (or (or (or (and), (and)), (and)), (and))
3666  if (N0.getOpcode() != ISD::OR)
3667  return SDValue();
3668  SDValue N00 = N0.getOperand(0);
3669  SDValue N01 = N0.getOperand(1);
3670  SDNode *Parts[4] = {};
3671 
3672  if (N1.getOpcode() == ISD::OR &&
3673  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3674  // (or (or (and), (and)), (or (and), (and)))
3675  SDValue N000 = N00.getOperand(0);
3676  if (!isBSwapHWordElement(N000, Parts))
3677  return SDValue();
3678 
3679  SDValue N001 = N00.getOperand(1);
3680  if (!isBSwapHWordElement(N001, Parts))
3681  return SDValue();
3682  SDValue N010 = N01.getOperand(0);
3683  if (!isBSwapHWordElement(N010, Parts))
3684  return SDValue();
3685  SDValue N011 = N01.getOperand(1);
3686  if (!isBSwapHWordElement(N011, Parts))
3687  return SDValue();
3688  } else {
3689  // (or (or (or (and), (and)), (and)), (and))
3690  if (!isBSwapHWordElement(N1, Parts))
3691  return SDValue();
3692  if (!isBSwapHWordElement(N01, Parts))
3693  return SDValue();
3694  if (N00.getOpcode() != ISD::OR)
3695  return SDValue();
3696  SDValue N000 = N00.getOperand(0);
3697  if (!isBSwapHWordElement(N000, Parts))
3698  return SDValue();
3699  SDValue N001 = N00.getOperand(1);
3700  if (!isBSwapHWordElement(N001, Parts))
3701  return SDValue();
3702  }
3703 
3704  // Make sure the parts are all coming from the same node.
3705  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3706  return SDValue();
3707 
3708  SDLoc DL(N);
3709  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3710  SDValue(Parts[0], 0));
3711 
3712  // Result of the bswap should be rotated by 16. If it's not legal, then
3713  // do (x << 16) | (x >> 16).
3714  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3715  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3716  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3717  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3718  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3719  return DAG.getNode(ISD::OR, DL, VT,
3720  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3721  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3722 }
3723 
3724 /// This contains all DAGCombine rules which reduce two values combined by
3725 /// an Or operation to a single value \see visitANDLike().
3726 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3727  EVT VT = N1.getValueType();
3728  // fold (or x, undef) -> -1
3729  if (!LegalOperations &&
3730  (N0.isUndef() || N1.isUndef())) {
3731  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3732  return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
3733  SDLoc(LocReference), VT);
3734  }
3735  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3736  SDValue LL, LR, RL, RR, CC0, CC1;
3737  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3738  ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3739  ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3740 
3741  if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3742  // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3743  // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3744  if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3745  EVT CCVT = getSetCCResultType(LR.getValueType());
3746  if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3747  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3748  LR.getValueType(), LL, RL);
3749  AddToWorklist(ORNode.getNode());
3750  return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3751  }
3752  }
3753  // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3754  // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
3755  if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3756  EVT CCVT = getSetCCResultType(LR.getValueType());
3757  if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
3758  SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3759  LR.getValueType(), LL, RL);
3760  AddToWorklist(ANDNode.getNode());
3761  return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3762  }
3763  }
3764  }
3765  // canonicalize equivalent to ll == rl
3766  if (LL == RR && LR == RL) {
3767  Op1 = ISD::getSetCCSwappedOperands(Op1);
3768  std::swap(RL, RR);
3769  }
3770  if (LL == RL && LR == RR) {
3771  bool isInteger = LL.getValueType().isInteger();
3772  ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3773  if (Result != ISD::SETCC_INVALID &&
3774  (!LegalOperations ||
3775  (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3776  TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
3777  EVT CCVT = getSetCCResultType(LL.getValueType());
3778  if (N0.getValueType() == CCVT ||
3779  (!LegalOperations && N0.getValueType() == MVT::i1))
3780  return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3781  LL, LR, Result);
3782  }
3783  }
3784  }
3785 
3786  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
3787  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3788  // Don't increase # computations.
3789  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3790  // We can only do this xform if we know that bits from X that are set in C2
3791  // but not in C1 are already zero. Likewise for Y.
3792  if (const ConstantSDNode *N0O1C =
3794  if (const ConstantSDNode *N1O1C =
3796  // We can only do this xform if we know that bits from X that are set in
3797  // C2 but not in C1 are already zero. Likewise for Y.
3798  const APInt &LHSMask = N0O1C->getAPIntValue();
3799  const APInt &RHSMask = N1O1C->getAPIntValue();
3800 
3801  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3802  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3803  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3804  N0.getOperand(0), N1.getOperand(0));
3805  SDLoc DL(LocReference);
3806  return DAG.getNode(ISD::AND, DL, VT, X,
3807  DAG.getConstant(LHSMask | RHSMask, DL, VT));
3808  }
3809  }
3810  }
3811  }
3812 
3813  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3814  if (N0.getOpcode() == ISD::AND &&
3815  N1.getOpcode() == ISD::AND &&
3816  N0.getOperand(0) == N1.getOperand(0) &&
3817  // Don't increase # computations.
3818  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3819  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3820  N0.getOperand(1), N1.getOperand(1));
3821  return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
3822  }
3823 
3824  return SDValue();
3825 }
3826 
3827 SDValue DAGCombiner::visitOR(SDNode *N) {
3828  SDValue N0 = N->getOperand(0);
3829  SDValue N1 = N->getOperand(1);
3830  EVT VT = N1.getValueType();
3831 
3832  // x | x --> x
3833  if (N0 == N1)
3834  return N0;
3835 
3836  // fold vector ops
3837  if (VT.isVector()) {
3838  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3839  return FoldedVOp;
3840 
3841  // fold (or x, 0) -> x, vector edition
3843  return N1;
3845  return N0;
3846 
3847  // fold (or x, -1) -> -1, vector edition
3849  // do not return N0, because undef node may exist in N0
3850  return DAG.getConstant(
3852  N0.getValueType());
3854  // do not return N1, because undef node may exist in N1
3855  return DAG.getConstant(
3857  N1.getValueType());
3858 
3859  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
3860  // Do this only if the resulting shuffle is legal.
3861  if (isa<ShuffleVectorSDNode>(N0) &&
3862  isa<ShuffleVectorSDNode>(N1) &&
3863  // Avoid folding a node with illegal type.
3864  TLI.isTypeLegal(VT)) {
3865  bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
3866  bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
3867  bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
3868  bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
3869  // Ensure both shuffles have a zero input.
3870  if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
3871  assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
3872  assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
3873  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3874  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3875  bool CanFold = true;
3876  int NumElts = VT.getVectorNumElements();
3877  SmallVector<int, 4> Mask(NumElts);
3878 
3879  for (int i = 0; i != NumElts; ++i) {
3880  int M0 = SV0->getMaskElt(i);
3881  int M1 = SV1->getMaskElt(i);
3882 
3883  // Determine if either index is pointing to a zero vector.
3884  bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
3885  bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
3886 
3887  // If one element is zero and the otherside is undef, keep undef.
3888  // This also handles the case that both are undef.
3889  if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
3890  Mask[i] = -1;
3891  continue;
3892  }
3893 
3894  // Make sure only one of the elements is zero.
3895  if (M0Zero == M1Zero) {
3896  CanFold = false;
3897  break;
3898  }
3899 
3900  assert((M0 >= 0 || M1 >= 0) && "Undef index!");
3901 
3902  // We have a zero and non-zero element. If the non-zero came from
3903  // SV0 make the index a LHS index. If it came from SV1, make it
3904  // a RHS index. We need to mod by NumElts because we don't care
3905  // which operand it came from in the original shuffles.
3906  Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
3907  }
3908 
3909  if (CanFold) {
3910  SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
3911  SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
3912 
3913  bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3914  if (!LegalMask) {
3915  std::swap(NewLHS, NewRHS);
3917  LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
3918  }
3919 
3920  if (LegalMask)
3921  return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
3922  }
3923  }
3924  }
3925  }
3926 
3927  // fold (or c1, c2) -> c1|c2
3930  if (N0C && N1C && !N1C->isOpaque())
3931  return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
3932  // canonicalize constant to RHS
3933  if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3934  !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3935  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3936  // fold (or x, 0) -> x
3937  if (isNullConstant(N1))
3938  return N0;
3939  // fold (or x, -1) -> -1
3940  if (isAllOnesConstant(N1))
3941  return N1;
3942  // fold (or x, c) -> c iff (x & ~c) == 0
3943  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3944  return N1;
3945 
3946  if (SDValue Combined = visitORLike(N0, N1, N))
3947  return Combined;
3948 
3949  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3950  if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
3951  return BSwap;
3952  if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
3953  return BSwap;
3954 
3955  // reassociate or
3956  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
3957  return ROR;
3958  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3959  // iff (c1 & c2) == 0.
3960  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3961  isa<ConstantSDNode>(N0.getOperand(1))) {
3962  ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3963  if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3964  if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
3965  N1C, C1))
3966  return DAG.getNode(
3967  ISD::AND, SDLoc(N), VT,
3968  DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3969  return SDValue();
3970  }
3971  }
3972  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
3973  if (N0.getOpcode() == N1.getOpcode())
3974  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
3975  return Tmp;
3976 
3977  // See if this is some rotate idiom.
3978  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3979  return SDValue(Rot, 0);
3980 
3981  // Simplify the operands using demanded-bits information.
3982  if (!VT.isVector() &&
3984  return SDValue(N, 0);
3985 
3986  return SDValue();
3987 }
3988 
3989 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3990 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3991  if (Op.getOpcode() == ISD::AND) {
3992  if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
3993  Mask = Op.getOperand(1);
3994  Op = Op.getOperand(0);
3995  } else {
3996  return false;
3997  }
3998  }
3999 
4000  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
4001  Shift = Op;
4002  return true;
4003  }
4004 
4005  return false;
4006 }
4007 
4008 // Return true if we can prove that, whenever Neg and Pos are both in the
4009 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that
4010 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
4011 //
4012 // (or (shift1 X, Neg), (shift2 X, Pos))
4013 //
4014 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
4015 // in direction shift1 by Neg. The range [0, EltSize) means that we only need
4016 // to consider shift amounts with defined behavior.
4017 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
4018  // If EltSize is a power of 2 then:
4019  //
4020  // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
4021  // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
4022  //
4023  // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
4024  // for the stronger condition:
4025  //
4026  // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A]
4027  //
4028  // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
4029  // we can just replace Neg with Neg' for the rest of the function.
4030  //
4031  // In other cases we check for the even stronger condition:
4032  //
4033  // Neg == EltSize - Pos [B]
4034  //
4035  // for all Neg and Pos. Note that the (or ...) then invokes undefined
4036  // behavior if Pos == 0 (and consequently Neg == EltSize).
4037  //
4038  // We could actually use [A] whenever EltSize is a power of 2, but the
4039  // only extra cases that it would match are those uninteresting ones
4040  // where Neg and Pos are never in range at the same time. E.g. for
4041  // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
4042  // as well as (sub 32, Pos), but:
4043  //
4044  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
4045  //
4046  // always invokes undefined behavior for 32-bit X.
4047  //
4048  // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
4049  unsigned MaskLoBits = 0;
4050  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
4051  if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
4052  if (NegC->getAPIntValue() == EltSize - 1) {
4053  Neg = Neg.getOperand(0);
4054  MaskLoBits = Log2_64(EltSize);
4055  }
4056  }
4057  }
4058 
4059  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
4060  if (Neg.getOpcode() != ISD::SUB)
4061  return false;
4063  if (!NegC)
4064  return false;
4065  SDValue NegOp1 = Neg.getOperand(1);
4066 
4067  // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
4068  // Pos'. The truncation is redundant for the purpose of the equality.
4069  if (MaskLoBits && Pos.getOpcode() == ISD::AND)
4070  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4071  if (PosC->getAPIntValue() == EltSize - 1)
4072  Pos = Pos.getOperand(0);
4073 
4074  // The condition we need is now:
4075  //
4076  // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
4077  //
4078  // If NegOp1 == Pos then we need:
4079  //
4080  // EltSize & Mask == NegC & Mask
4081  //
4082  // (because "x & Mask" is a truncation and distributes through subtraction).
4083  APInt Width;
4084  if (Pos == NegOp1)
4085  Width = NegC->getAPIntValue();
4086 
4087  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
4088  // Then the condition we want to prove becomes:
4089  //
4090  // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
4091  //
4092  // which, again because "x & Mask" is a truncation, becomes:
4093  //
4094  // NegC & Mask == (EltSize - PosC) & Mask
4095  // EltSize & Mask == (NegC + PosC) & Mask
4096  else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
4097  if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
4098  Width = PosC->getAPIntValue() + NegC->getAPIntValue();
4099  else
4100  return false;
4101  } else
4102  return false;
4103 
4104  // Now we just need to check that EltSize & Mask == Width & Mask.
4105  if (MaskLoBits)
4106  // EltSize & Mask is 0 since Mask is EltSize - 1.
4107  return Width.getLoBits(MaskLoBits) == 0;
4108  return Width == EltSize;
4109 }
4110 
4111 // A subroutine of MatchRotate used once we have found an OR of two opposite
4112 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
4113 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
4114 // former being preferred if supported. InnerPos and InnerNeg are Pos and
4115 // Neg with outer conversions stripped away.
4116 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
4117  SDValue Neg, SDValue InnerPos,
4118  SDValue InnerNeg, unsigned PosOpcode,
4119  unsigned NegOpcode, const SDLoc &DL) {
4120  // fold (or (shl x, (*ext y)),
4121  // (srl x, (*ext (sub 32, y)))) ->
4122  // (rotl x, y) or (rotr x, (sub 32, y))
4123  //
4124  // fold (or (shl x, (*ext (sub 32, y))),
4125  // (srl x, (*ext y))) ->
4126  // (rotr x, y) or (rotl x, (sub 32, y))
4127  EVT VT = Shifted.getValueType();
4128  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
4129  bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
4130  return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
4131  HasPos ? Pos : Neg).getNode();
4132  }
4133 
4134  return nullptr;
4135 }
4136 
4137 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
4138 // idioms for rotate, and if the target supports rotation instructions, generate
4139 // a rot[lr].
4140 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
4141  // Must be a legal type. Expanded 'n promoted things won't work with rotates.
4142  EVT VT = LHS.getValueType();
4143  if (!TLI.isTypeLegal(VT)) return nullptr;
4144 
4145  // The target must have at least one rotate flavor.
4146  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
4147  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
4148  if (!HasROTL && !HasROTR) return nullptr;
4149 
4150  // Match "(X shl/srl V1) & V2" where V2 may not be present.
4151  SDValue LHSShift; // The shift.
4152  SDValue LHSMask; // AND value if any.
4153  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
4154  return nullptr; // Not part of a rotate.
4155 
4156  SDValue RHSShift; // The shift.
4157  SDValue RHSMask; // AND value if any.
4158  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
4159  return nullptr; // Not part of a rotate.
4160 
4161  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
4162  return nullptr; // Not shifting the same value.
4163 
4164  if (LHSShift.getOpcode() == RHSShift.getOpcode())
4165  return nullptr; // Shifts must disagree.
4166 
4167  // Canonicalize shl to left side in a shl/srl pair.
4168  if (RHSShift.getOpcode() == ISD::SHL) {
4169  std::swap(LHS, RHS);
4170  std::swap(LHSShift, RHSShift);
4171  std::swap(LHSMask, RHSMask);
4172  }
4173 
4174  unsigned EltSizeInBits = VT.getScalarSizeInBits();
4175  SDValue LHSShiftArg = LHSShift.getOperand(0);
4176  SDValue LHSShiftAmt = LHSShift.getOperand(1);
4177  SDValue RHSShiftArg = RHSShift.getOperand(0);
4178  SDValue RHSShiftAmt = RHSShift.getOperand(1);
4179 
4180  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
4181  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
4182  if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
4183  uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
4184  uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
4185  if ((LShVal + RShVal) != EltSizeInBits)
4186  return nullptr;
4187 
4188  SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
4189  LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
4190 
4191  // If there is an AND of either shifted operand, apply it to the result.
4192  if (LHSMask.getNode() || RHSMask.getNode()) {
4193  APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
4194  SDValue Mask = DAG.getConstant(AllBits, DL, VT);
4195 
4196  if (LHSMask.getNode()) {
4197  APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
4198  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4199  DAG.getNode(ISD::OR, DL, VT, LHSMask,
4200  DAG.getConstant(RHSBits, DL, VT)));
4201  }
4202  if (RHSMask.getNode()) {
4203  APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
4204  Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
4205  DAG.getNode(ISD::OR, DL, VT, RHSMask,
4206  DAG.getConstant(LHSBits, DL, VT)));
4207  }
4208 
4209  Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
4210  }
4211 
4212  return Rot.getNode();
4213  }
4214 
4215  // If there is a mask here, and we have a variable shift, we can't be sure
4216  // that we're masking out the right stuff.
4217  if (LHSMask.getNode() || RHSMask.getNode())
4218  return nullptr;
4219 
4220  // If the shift amount is sign/zext/any-extended just peel it off.
4221  SDValue LExtOp0 = LHSShiftAmt;
4222  SDValue RExtOp0 = RHSShiftAmt;
4223  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4224  LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4225  LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4226  LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
4227  (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
4228  RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
4229  RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
4230  RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
4231  LExtOp0 = LHSShiftAmt.getOperand(0);
4232  RExtOp0 = RHSShiftAmt.getOperand(0);
4233  }
4234 
4235  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
4236  LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
4237  if (TryL)
4238  return TryL;
4239 
4240  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
4241  RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
4242  if (TryR)
4243  return TryR;
4244 
4245  return nullptr;
4246 }
4247 
4248 namespace {
4249 /// Helper struct to parse and store a memory address as base + index + offset.
4250 /// We ignore sign extensions when it is safe to do so.
4251 /// The following two expressions are not equivalent. To differentiate we need
4252 /// to store whether there was a sign extension involved in the index
4253 /// computation.
4254 /// (load (i64 add (i64 copyfromreg %c)
4255 /// (i64 signextend (add (i8 load %index)
4256 /// (i8 1))))
4257 /// vs
4258 ///
4259 /// (load (i64 add (i64 copyfromreg %c)
4260 /// (i64 signextend (i32 add (i32 signextend (i8 load %index))
4261 /// (i32 1)))))
4262 struct BaseIndexOffset {
4263  SDValue Base;
4264  SDValue Index;
4265  int64_t Offset;
4266  bool IsIndexSignExt;
4267 
4268  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
4269 
4270  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
4271  bool IsIndexSignExt) :
4272  Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
4273 
4274  bool equalBaseIndex(const BaseIndexOffset &Other) {
4275  return Other.Base == Base && Other.Index == Index &&
4276  Other.IsIndexSignExt == IsIndexSignExt;
4277  }
4278 
4279  /// Parses tree in Ptr for base, index, offset addresses.
4280  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG,
4281  int64_t PartialOffset = 0) {
4282  bool IsIndexSignExt = false;
4283 
4284  // Split up a folded GlobalAddress+Offset into its component parts.
4285  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
4286  if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
4287  return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
4288  SDLoc(GA),
4289  GA->getValueType(0),
4290  /*Offset=*/PartialOffset,
4291  /*isTargetGA=*/false,
4292  GA->getTargetFlags()),
4293  SDValue(),
4294  GA->getOffset(),
4295  IsIndexSignExt);
4296  }
4297 
4298  // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
4299  // instruction, then it could be just the BASE or everything else we don't
4300  // know how to handle. Just use Ptr as BASE and give up.
4301  if (Ptr->getOpcode() != ISD::ADD)
4302  return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4303 
4304  // We know that we have at least an ADD instruction. Try to pattern match
4305  // the simple case of BASE + OFFSET.
4306  if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
4307  int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
4308  return match(Ptr->getOperand(0), DAG, Offset + PartialOffset);
4309  }
4310 
4311  // Inside a loop the current BASE pointer is calculated using an ADD and a
4312  // MUL instruction. In this case Ptr is the actual BASE pointer.
4313  // (i64 add (i64 %array_ptr)
4314  // (i64 mul (i64 %induction_var)
4315  // (i64 %element_size)))
4316  if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
4317  return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4318 
4319  // Look at Base + Index + Offset cases.
4320  SDValue Base = Ptr->getOperand(0);
4321  SDValue IndexOffset = Ptr->getOperand(1);
4322 
4323  // Skip signextends.
4324  if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
4325  IndexOffset = IndexOffset->getOperand(0);
4326  IsIndexSignExt = true;
4327  }
4328 
4329  // Either the case of Base + Index (no offset) or something else.
4330  if (IndexOffset->getOpcode() != ISD::ADD)
4331  return BaseIndexOffset(Base, IndexOffset, PartialOffset, IsIndexSignExt);
4332 
4333  // Now we have the case of Base + Index + offset.
4334  SDValue Index = IndexOffset->getOperand(0);
4335  SDValue Offset = IndexOffset->getOperand(1);
4336 
4337  if (!isa<ConstantSDNode>(Offset))
4338  return BaseIndexOffset(Ptr, SDValue(), PartialOffset, IsIndexSignExt);
4339 
4340  // Ignore signextends.
4341  if (Index->getOpcode() == ISD::SIGN_EXTEND) {
4342  Index = Index->getOperand(0);
4343  IsIndexSignExt = true;
4344  } else IsIndexSignExt = false;
4345 
4346  int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
4347  return BaseIndexOffset(Base, Index, Off + PartialOffset, IsIndexSignExt);
4348  }
4349 };
4350 } // namespace
4351 
4352 SDValue DAGCombiner::visitXOR(SDNode *N) {
4353  SDValue N0 = N->getOperand(0);
4354  SDValue N1 = N->getOperand(1);
4355  EVT VT = N0.getValueType();
4356 
4357  // fold vector ops
4358  if (VT.isVector()) {
4359  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4360  return FoldedVOp;
4361 
4362  // fold (xor x, 0) -> x, vector edition
4364  return N1;
4366  return N0;
4367  }
4368 
4369  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
4370  if (N0.isUndef() && N1.isUndef())
4371  return DAG.getConstant(0, SDLoc(N), VT);
4372  // fold (xor x, undef) -> undef
4373  if (N0.isUndef())
4374  return N0;
4375  if (N1.isUndef())
4376  return N1;
4377  // fold (xor c1, c2) -> c1^c2
4380  if (N0C && N1C)
4381  return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
4382  // canonicalize constant to RHS
4385  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
4386  // fold (xor x, 0) -> x
4387  if (isNullConstant(N1))
4388  return N0;
4389  // reassociate xor
4390  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4391  return RXOR;
4392 
4393  // fold !(x cc y) -> (x !cc y)
4394  SDValue LHS, RHS, CC;
4395  if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4396  bool isInt = LHS.getValueType().isInteger();
4397  ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4398  isInt);
4399 
4400  if (!LegalOperations ||
4401  TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4402  switch (N0.getOpcode()) {
4403  default:
4404  llvm_unreachable("Unhandled SetCC Equivalent!");
4405  case ISD::SETCC:
4406  return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
4407  case ISD::SELECT_CC:
4408  return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
4409  N0.getOperand(3), NotCC);
4410  }
4411  }
4412  }
4413 
4414  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4415  if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4416  N0.getNode()->hasOneUse() &&
4417  isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4418  SDValue V = N0.getOperand(0);
4419  SDLoc DL(N0);
4420  V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4421  DAG.getConstant(1, DL, V.getValueType()));
4422  AddToWorklist(V.getNode());
4423  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4424  }
4425 
4426  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4427  if (isOneConstant(N1) && VT == MVT::i1 &&
4428  (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4429  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4430  if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4431  unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4432  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4433  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4434  AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4435  return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4436  }
4437  }
4438  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4439  if (isAllOnesConstant(N1) &&
4440  (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4441  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4442  if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4443  unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4444  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4445  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4446  AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4447  return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4448  }
4449  }
4450  // fold (xor (and x, y), y) -> (and (not x), y)
4451  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4452  N0->getOperand(1) == N1) {
4453  SDValue X = N0->getOperand(0);
4454  SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4455  AddToWorklist(NotX.getNode());
4456  return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4457  }
4458  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4459  if (N1C && N0.getOpcode() == ISD::XOR) {
4460  if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4461  SDLoc DL(N);
4462  return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4463  DAG.getConstant(N1C->getAPIntValue() ^
4464  N00C->getAPIntValue(), DL, VT));
4465  }
4466  if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4467  SDLoc DL(N);
4468  return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4469  DAG.getConstant(N1C->getAPIntValue() ^
4470  N01C->getAPIntValue(), DL, VT));
4471  }
4472  }
4473  // fold (xor x, x) -> 0
4474  if (N0 == N1)
4475  return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
4476 
4477  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
4478  // Here is a concrete example of this equivalence:
4479  // i16 x == 14
4480  // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
4481  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
4482  //
4483  // =>
4484  //
4485  // i16 ~1 == 0b1111111111111110
4486  // i16 rol(~1, 14) == 0b1011111111111111
4487  //
4488  // Some additional tips to help conceptualize this transform:
4489  // - Try to see the operation as placing a single zero in a value of all ones.
4490  // - There exists no value for x which would allow the result to contain zero.
4491  // - Values of x larger than the bitwidth are undefined and do not require a
4492  // consistent result.
4493  // - Pushing the zero left requires shifting one bits in from the right.
4494  // A rotate left of ~1 is a nice way of achieving the desired result.
4495  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
4496  && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
4497  SDLoc DL(N);
4498  return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
4499  N0.getOperand(1));
4500  }
4501 
4502  // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
4503  if (N0.getOpcode() == N1.getOpcode())
4504  if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
4505  return Tmp;
4506 
4507  // Simplify the expression using non-local knowledge.
4508  if (!VT.isVector() &&
4510  return SDValue(N, 0);
4511 
4512  return SDValue();
4513 }
4514 
4515 /// Handle transforms common to the three shifts, when the shift amount is a
4516 /// constant.
4517 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
4518  SDNode *LHS = N->getOperand(0).getNode();
4519  if (!LHS->hasOneUse()) return SDValue();
4520 
4521  // We want to pull some binops through shifts, so that we have (and (shift))
4522  // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
4523  // thing happens with address calculations, so it's important to canonicalize
4524  // it.
4525  bool HighBitSet = false; // Can we transform this if the high bit is set?
4526 
4527  switch (LHS->getOpcode()) {
4528  default: return SDValue();
4529  case ISD::OR:
4530  case ISD::XOR:
4531  HighBitSet = false; // We can only transform sra if the high bit is clear.
4532  break;
4533  case ISD::AND:
4534  HighBitSet = true; // We can only transform sra if the high bit is set.
4535  break;
4536  case ISD::ADD:
4537  if (N->getOpcode() != ISD::SHL)
4538  return SDValue(); // only shl(add) not sr[al](add).
4539  HighBitSet = false; // We can only transform sra if the high bit is clear.
4540  break;
4541  }
4542 
4543  // We require the RHS of the binop to be a constant and not opaque as well.
4544  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
4545  if (!BinOpCst) return SDValue();
4546 
4547  // FIXME: disable this unless the input to the binop is a shift by a constant
4548  // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
4549  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
4550  bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
4551  BinOpLHSVal->getOpcode() == ISD::SRA ||
4552  BinOpLHSVal->getOpcode() == ISD::SRL;
4553  bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
4554  BinOpLHSVal->getOpcode() == ISD::SELECT;
4555 
4556  if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
4557  !isCopyOrSelect)
4558  return SDValue();
4559 
4560  if (isCopyOrSelect && N->hasOneUse())
4561  return SDValue();
4562 
4563  EVT VT = N->getValueType(0);
4564 
4565  // If this is a signed shift right, and the high bit is modified by the
4566  // logical operation, do not perform the transformation. The highBitSet
4567  // boolean indicates the value of the high bit of the constant which would
4568  // cause it to be modified for this operation.
4569  if (N->getOpcode() == ISD::SRA) {
4570  bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
4571  if (BinOpRHSSignSet != HighBitSet)
4572  return SDValue();
4573  }
4574 
4575  if (!TLI.isDesirableToCommuteWithShift(LHS))
4576  return SDValue();
4577 
4578  // Fold the constants, shifting the binop RHS by the shift amount.
4579  SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
4580  N->getValueType(0),
4581  LHS->getOperand(1), N->getOperand(1));
4582  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
4583 
4584  // Create the new shift.
4585  SDValue NewShift = DAG.getNode(N->getOpcode(),
4586  SDLoc(LHS->getOperand(0)),
4587  VT, LHS->getOperand(0), N->getOperand(1));
4588 
4589  // Create the new binop.
4590  return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
4591 }
4592 
4593 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
4594  assert(N->getOpcode() == ISD::TRUNCATE);
4595  assert(N->getOperand(0).getOpcode() == ISD::AND);
4596 
4597  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
4598  if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
4599  SDValue N01 = N->getOperand(0).getOperand(1);
4600  if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
4601  SDLoc DL(N);
4602  EVT TruncVT = N->getValueType(0);
4603  SDValue N00 = N->getOperand(0).getOperand(0);
4604  SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
4605  SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
4606  AddToWorklist(Trunc00.getNode());
4607  AddToWorklist(Trunc01.getNode());
4608  return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
4609  }
4610  }
4611 
4612  return SDValue();
4613 }
4614 
4615 SDValue DAGCombiner::visitRotate(SDNode *N) {
4616  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
4617  if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
4618  N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
4619  if (SDValue NewOp1 =
4620  distributeTruncateThroughAnd(N->getOperand(1).getNode()))
4621  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
4622  N->getOperand(0), NewOp1);
4623  }
4624  return SDValue();
4625 }
4626 
4627 SDValue DAGCombiner::visitSHL(SDNode *N) {
4628  SDValue N0 = N->getOperand(0);
4629  SDValue N1 = N->getOperand(1);
4630  EVT VT = N0.getValueType();
4631  unsigned OpSizeInBits = VT.getScalarSizeInBits();
4632 
4633  // fold vector ops
4634  if (VT.isVector()) {
4635  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4636  return FoldedVOp;
4637 
4639  // If setcc produces all-one true value then:
4640  // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
4641  if (N1CV && N1CV->isConstant()) {
4642  if (N0.getOpcode() == ISD::AND) {
4643  SDValue N00 = N0->getOperand(0);
4644  SDValue N01 = N0->getOperand(1);
4646 
4647  if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
4648  TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
4650  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
4651  N01CV, N1CV))
4652  return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
4653  }
4654  }
4655  }
4656  }
4657 
4659 
4660  // fold (shl c1, c2) -> c1<<c2
4662  if (N0C && N1C && !N1C->isOpaque())
4663  return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
4664  // fold (shl 0, x) -> 0
4665  if (isNullConstant(N0))
4666  return N0;
4667  // fold (shl x, c >= size(x)) -> undef
4668  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4669  return DAG.getUNDEF(VT);
4670  // fold (shl x, 0) -> x
4671  if (N1C && N1C->isNullValue())
4672  return N0;
4673  // fold (shl undef, x) -> 0
4674  if (N0.isUndef())
4675  return DAG.getConstant(0, SDLoc(N), VT);
4676  // if (shl x, c) is known to be zero, return 0
4677  if (DAG.MaskedValueIsZero(SDValue(N, 0),
4678  APInt::getAllOnesValue(OpSizeInBits)))
4679  return DAG.getConstant(0, SDLoc(N), VT);
4680  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
4681  if (N1.getOpcode() == ISD::TRUNCATE &&
4682  N1.getOperand(0).getOpcode() == ISD::AND) {
4683  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4684  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
4685  }
4686 
4687  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4688  return SDValue(N, 0);
4689 
4690  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
4691  if (N1C && N0.getOpcode() == ISD::SHL) {
4692  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4693  SDLoc DL(N);
4694  APInt c1 = N0C1->getAPIntValue();
4695  APInt c2 = N1C->getAPIntValue();
4696  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4697 
4698  APInt Sum = c1 + c2;
4699  if (Sum.uge(OpSizeInBits))
4700  return DAG.getConstant(0, DL, VT);
4701 
4702  return DAG.getNode(
4703  ISD::SHL, DL, VT, N0.getOperand(0),
4704  DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4705  }
4706  }
4707 
4708  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
4709  // For this to be valid, the second form must not preserve any of the bits
4710  // that are shifted out by the inner shift in the first form. This means
4711  // the outer shift size must be >= the number of bits added by the ext.
4712  // As a corollary, we don't care what kind of ext it is.
4713  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
4714  N0.getOpcode() == ISD::ANY_EXTEND ||
4715  N0.getOpcode() == ISD::SIGN_EXTEND) &&
4716  N0.getOperand(0).getOpcode() == ISD::SHL) {
4717  SDValue N0Op0 = N0.getOperand(0);
4718  if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4719  APInt c1 = N0Op0C1->getAPIntValue();
4720  APInt c2 = N1C->getAPIntValue();
4721  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4722 
4723  EVT InnerShiftVT = N0Op0.getValueType();
4724  uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4725  if (c2.uge(OpSizeInBits - InnerShiftSize)) {
4726  SDLoc DL(N0);
4727  APInt Sum = c1 + c2;
4728  if (Sum.uge(OpSizeInBits))
4729  return DAG.getConstant(0, DL, VT);
4730 
4731  return DAG.getNode(
4732  ISD::SHL, DL, VT,
4733  DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
4734  DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4735  }
4736  }
4737  }
4738 
4739  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
4740  // Only fold this if the inner zext has no other uses to avoid increasing
4741  // the total number of instructions.
4742  if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
4743  N0.getOperand(0).getOpcode() == ISD::SRL) {
4744  SDValue N0Op0 = N0.getOperand(0);
4745  if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4746  if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
4747  uint64_t c1 = N0Op0C1->getZExtValue();
4748  uint64_t c2 = N1C->getZExtValue();
4749  if (c1 == c2) {
4750  SDValue NewOp0 = N0.getOperand(0);
4751  EVT CountVT = NewOp0.getOperand(1).getValueType();
4752  SDLoc DL(N);
4753  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
4754  NewOp0,
4755  DAG.getConstant(c2, DL, CountVT));
4756  AddToWorklist(NewSHL.getNode());
4757  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
4758  }
4759  }
4760  }
4761  }
4762 
4763  // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
4764  // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
4765  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
4766  cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
4767  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4768  uint64_t C1 = N0C1->getZExtValue();
4769  uint64_t C2 = N1C->getZExtValue();
4770  SDLoc DL(N);
4771  if (C1 <= C2)
4772  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4773  DAG.getConstant(C2 - C1, DL, N1.getValueType()));
4774  return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
4775  DAG.getConstant(C1 - C2, DL, N1.getValueType()));
4776  }
4777  }
4778 
4779  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
4780  // (and (srl x, (sub c1, c2), MASK)
4781  // Only fold this if the inner shift has no other uses -- if it does, folding
4782  // this will increase the total number of instructions.
4783  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4784  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4785  uint64_t c1 = N0C1->getZExtValue();
4786  if (c1 < OpSizeInBits) {
4787  uint64_t c2 = N1C->getZExtValue();
4788  APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
4789  SDValue Shift;
4790  if (c2 > c1) {
4791  Mask = Mask.shl(c2 - c1);
4792  SDLoc DL(N);
4793  Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4794  DAG.getConstant(c2 - c1, DL, N1.getValueType()));
4795  } else {
4796  Mask = Mask.lshr(c1 - c2);
4797  SDLoc DL(N);
4798  Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4799  DAG.getConstant(c1 - c2, DL, N1.getValueType()));
4800  }
4801  SDLoc DL(N0);
4802  return DAG.getNode(ISD::AND, DL, VT, Shift,
4803  DAG.getConstant(Mask, DL, VT));
4804  }
4805  }
4806  }
4807 
4808  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
4809  if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
4810  isConstantOrConstantVector(N1, /* No Opaques */ true)) {
4811  unsigned BitSize = VT.getScalarSizeInBits();
4812  SDLoc DL(N);
4813  SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
4814  SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
4815  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
4816  }
4817 
4818  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4819  // Variant of version done on multiply, except mul by a power of 2 is turned
4820  // into a shift.
4821  if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
4822  isConstantOrConstantVector(N1, /* No Opaques */ true) &&
4823  isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
4824  SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
4825  SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4826  AddToWorklist(Shl0.getNode());
4827  AddToWorklist(Shl1.getNode());
4828  return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
4829  }
4830 
4831  // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
4832  if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
4833  isConstantOrConstantVector(N1, /* No Opaques */ true) &&
4834  isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
4835  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4836  if (isConstantOrConstantVector(Shl))
4837  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
4838  }
4839 
4840  if (N1C && !N1C->isOpaque())
4841  if (SDValue NewSHL = visitShiftByConstant(N, N1C))
4842  return NewSHL;
4843 
4844  return SDValue();
4845 }
4846 
4847 SDValue DAGCombiner::visitSRA(SDNode *N) {
4848  SDValue N0 = N->getOperand(0);
4849  SDValue N1 = N->getOperand(1);
4850  EVT VT = N0.getValueType();
4851  unsigned OpSizeInBits = VT.getScalarSizeInBits();
4852 
4853  // Arithmetic shifting an all-sign-bit value is a no-op.
4854  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
4855  return N0;
4856 
4857  // fold vector ops
4858  if (VT.isVector())
4859  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4860  return FoldedVOp;
4861 
4863 
4864  // fold (sra c1, c2) -> (sra c1, c2)
4866  if (N0C && N1C && !N1C->isOpaque())
4867  return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
4868  // fold (sra 0, x) -> 0
4869  if (isNullConstant(N0))
4870  return N0;
4871  // fold (sra -1, x) -> -1
4872  if (isAllOnesConstant(N0))
4873  return N0;
4874  // fold (sra x, c >= size(x)) -> undef
4875  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4876  return DAG.getUNDEF(VT);
4877  // fold (sra x, 0) -> x
4878  if (N1C && N1C->isNullValue())
4879  return N0;
4880  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
4881  // sext_inreg.
4882  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
4883  unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
4884  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
4885  if (VT.isVector())
4886  ExtVT = EVT::getVectorVT(*DAG.getContext(),
4887  ExtVT, VT.getVectorNumElements());
4888  if ((!LegalOperations ||
4889  TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
4890  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
4891  N0.getOperand(0), DAG.getValueType(ExtVT));
4892  }
4893 
4894  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
4895  if (N1C && N0.getOpcode() == ISD::SRA) {
4896  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4897  SDLoc DL(N);
4898  APInt c1 = N0C1->getAPIntValue();
4899  APInt c2 = N1C->getAPIntValue();
4900  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
4901 
4902  APInt Sum = c1 + c2;
4903  if (Sum.uge(OpSizeInBits))
4904  Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
4905 
4906  return DAG.getNode(
4907  ISD::SRA, DL, VT, N0.getOperand(0),
4908  DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
4909  }
4910  }
4911 
4912  // fold (sra (shl X, m), (sub result_size, n))
4913  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
4914  // result_size - n != m.
4915  // If truncate is free for the target sext(shl) is likely to result in better
4916  // code.
4917  if (N0.getOpcode() == ISD::SHL && N1C) {
4918  // Get the two constanst of the shifts, CN0 = m, CN = n.
4919  const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
4920  if (N01C) {
4921  LLVMContext &Ctx = *DAG.getContext();
4922  // Determine what the truncate's result bitsize and type would be.
4923  EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
4924 
4925  if (VT.isVector())
4926  TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
4927 
4928  // Determine the residual right-shift amount.
4929  int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
4930 
4931  // If the shift is not a no-op (in which case this should be just a sign
4932  // extend already), the truncated to type is legal, sign_extend is legal
4933  // on that type, and the truncate to that type is both legal and free,
4934  // perform the transform.
4935  if ((ShiftAmt > 0) &&
4936  TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
4937  TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
4938  TLI.isTruncateFree(VT, TruncVT)) {
4939 
4940  SDLoc DL(N);
4941  SDValue Amt = DAG.getConstant(ShiftAmt, DL,
4943  SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
4944  N0.getOperand(0), Amt);
4945  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
4946  Shift);
4947  return DAG.getNode(ISD::SIGN_EXTEND, DL,
4948  N->getValueType(0), Trunc);
4949  }
4950  }
4951  }
4952 
4953  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
4954  if (N1.getOpcode() == ISD::TRUNCATE &&
4955  N1.getOperand(0).getOpcode() == ISD::AND) {
4956  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
4957  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
4958  }
4959 
4960  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
4961  // if c1 is equal to the number of bits the trunc removes
4962  if (N0.getOpcode() == ISD::TRUNCATE &&
4963  (N0.getOperand(0).getOpcode() == ISD::SRL ||
4964  N0.getOperand(0).getOpcode() == ISD::SRA) &&
4965  N0.getOperand(0).hasOneUse() &&
4966  N0.getOperand(0).getOperand(1).hasOneUse() &&
4967  N1C) {
4968  SDValue N0Op0 = N0.getOperand(0);
4969  if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
4970  unsigned LargeShiftVal = LargeShift->getZExtValue();
4971  EVT LargeVT = N0Op0.getValueType();
4972 
4973  if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
4974  SDLoc DL(N);
4975  SDValue Amt =
4976  DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
4978  SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
4979  N0Op0.getOperand(0), Amt);
4980  return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
4981  }
4982  }
4983  }
4984 
4985  // Simplify, based on bits shifted out of the LHS.
4986  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4987  return SDValue(N, 0);
4988 
4989 
4990  // If the sign bit is known to be zero, switch this to a SRL.
4991  if (DAG.SignBitIsZero(N0))
4992  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
4993 
4994  if (N1C && !N1C->isOpaque())
4995  if (SDValue NewSRA = visitShiftByConstant(N, N1C))
4996  return NewSRA;
4997 
4998  return SDValue();
4999 }
5000 
5001 SDValue DAGCombiner::visitSRL(SDNode *N) {
5002  SDValue N0 = N->getOperand(0);
5003  SDValue N1 = N->getOperand(1);
5004  EVT VT = N0.getValueType();
5005  unsigned OpSizeInBits = VT.getScalarSizeInBits();
5006 
5007  // fold vector ops
5008  if (VT.isVector())
5009  if (SDValue FoldedVOp = SimplifyVBinOp(N))
5010  return FoldedVOp;
5011 
5013 
5014  // fold (srl c1, c2) -> c1 >>u c2
5016  if (N0C && N1C && !N1C->isOpaque())
5017  return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
5018  // fold (srl 0, x) -> 0
5019  if (isNullConstant(N0))
5020  return N0;
5021  // fold (srl x, c >= size(x)) -> undef
5022  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
5023  return DAG.getUNDEF(VT);
5024  // fold (srl x, 0) -> x
5025  if (N1C && N1C->isNullValue())
5026  return N0;
5027  // if (srl x, c) is known to be zero, return 0
5028  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5029  APInt::getAllOnesValue(OpSizeInBits)))
5030  return DAG.getConstant(0, SDLoc(N), VT);
5031 
5032  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
5033  if (N1C && N0.getOpcode() == ISD::SRL) {
5034  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
5035  SDLoc DL(N);
5036  APInt c1 = N0C1->getAPIntValue();
5037  APInt c2 = N1C->getAPIntValue();
5038  zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
5039 
5040  APInt Sum = c1 + c2;
5041  if (Sum.uge(OpSizeInBits))
5042  return DAG.getConstant(0, DL, VT);
5043 
5044  return DAG.getNode(
5045  ISD::SRL, DL, VT, N0.getOperand(0),
5046  DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
5047  }
5048  }
5049 
5050  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
5051  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
5052  N0.getOperand(0).getOpcode() == ISD::SRL &&
5053  isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
5054  uint64_t c1 =
5055  cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
5056  uint64_t c2 = N1C->getZExtValue();
5057  EVT InnerShiftVT = N0.getOperand(0).getValueType();
5058  EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
5059  uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
5060  // This is only valid if the OpSizeInBits + c1 = size of inner shift.
5061  if (c1 + OpSizeInBits == InnerShiftSize) {
5062  SDLoc DL(N0);
5063  if (c1 + c2 >= InnerShiftSize)
5064  return DAG.getConstant(0, DL, VT);
5065  return DAG.getNode(ISD::TRUNCATE, DL, VT,
5066  DAG.getNode(ISD::SRL, DL, InnerShiftVT,
5067  N0.getOperand(0)->getOperand(0),
5068  DAG.getConstant(c1 + c2, DL,
5069  ShiftCountVT)));
5070  }
5071  }
5072 
5073  // fold (srl (shl x, c), c) -> (and x, cst2)
5074  if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
5075  isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
5076  SDLoc DL(N);
5078  SDValue Mask =
5079  DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
5080  AddToWorklist(Mask.getNode());
5081  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
5082  }
5083 
5084  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
5085  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5086  // Shifting in all undef bits?
5087  EVT SmallVT = N0.getOperand(0).getValueType();
5088  unsigned BitSize = SmallVT.getScalarSizeInBits();
5089  if (N1C->getZExtValue() >= BitSize)
5090  return DAG.getUNDEF(VT);
5091 
5092  if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
5093  uint64_t ShiftAmt = N1C->getZExtValue();
5094  SDLoc DL0(N0);
5095  SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
5096  N0.getOperand(0),
5097  DAG.getConstant(ShiftAmt, DL0,
5098  getShiftAmountTy(SmallVT)));
5099  AddToWorklist(SmallShift.getNode());
5100  APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
5101  SDLoc DL(N);
5102  return DAG.getNode(ISD::AND, DL, VT,
5103  DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
5104  DAG.getConstant(Mask, DL, VT));
5105  }
5106  }
5107 
5108  // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
5109  // bit, which is unmodified by sra.
5110  if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
5111  if (N0.getOpcode() == ISD::SRA)
5112  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
5113  }
5114 
5115  // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
5116  if (N1C && N0.getOpcode() == ISD::CTLZ &&
5117  N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
5118  APInt KnownZero, KnownOne;
5119  DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
5120 
5121  // If any of the input bits are KnownOne, then the input couldn't be all
5122  // zeros, thus the result of the srl will always be zero.
5123  if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
5124 
5125  // If all of the bits input the to ctlz node are known to be zero, then
5126  // the result of the ctlz is "32" and the result of the shift is one.
5127  APInt UnknownBits = ~KnownZero;
5128  if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
5129 
5130  // Otherwise, check to see if there is exactly one bit input to the ctlz.
5131  if ((UnknownBits & (UnknownBits - 1)) == 0) {
5132  // Okay, we know that only that the single bit specified by UnknownBits
5133  // could be set on input to the CTLZ node. If this bit is set, the SRL
5134  // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
5135  // to an SRL/XOR pair, which is likely to simplify more.
5136  unsigned ShAmt = UnknownBits.countTrailingZeros();
5137  SDValue Op = N0.getOperand(0);
5138 
5139  if (ShAmt) {
5140  SDLoc DL(N0);
5141  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5142  DAG.getConstant(ShAmt, DL,
5144  AddToWorklist(Op.getNode());
5145  }
5146 
5147  SDLoc DL(N);
5148  return DAG.getNode(ISD::XOR, DL, VT,
5149  Op, DAG.getConstant(1, DL, VT));
5150  }
5151  }
5152 
5153  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
5154  if (N1.getOpcode() == ISD::TRUNCATE &&
5155  N1.getOperand(0).getOpcode() == ISD::AND) {
5156  if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
5157  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
5158  }
5159 
5160  // fold operands of srl based on knowledge that the low bits are not
5161  // demanded.
5162  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
5163  return SDValue(N, 0);
5164 
5165  if (N1C && !N1C->isOpaque())
5166  if (SDValue NewSRL = visitShiftByConstant(N, N1C))
5167  return NewSRL;
5168 
5169  // Attempt to convert a srl of a load into a narrower zero-extending load.
5170  if (SDValue NarrowLoad = ReduceLoadWidth(N))
5171  return NarrowLoad;
5172 
5173  // Here is a common situation. We want to optimize:
5174  //
5175  // %a = ...
5176  // %b = and i32 %a, 2
5177  // %c = srl i32 %b, 1
5178  // brcond i32 %c ...
5179  //
5180  // into
5181  //
5182  // %a = ...
5183  // %b = and %a, 2
5184  // %c = setcc eq %b, 0
5185  // brcond %c ...
5186  //
5187  // However when after the source operand of SRL is optimized into AND, the SRL
5188  // itself may not be optimized further. Look for it and add the BRCOND into
5189  // the worklist.
5190  if (N->hasOneUse()) {
5191  SDNode *Use = *N->use_begin();
5192  if (Use->getOpcode() == ISD::BRCOND)
5193  AddToWorklist(Use);
5194  else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
5195  // Also look pass the truncate.
5196  Use = *Use->use_begin();
5197  if (Use->getOpcode() == ISD::BRCOND)
5198  AddToWorklist(Use);
5199  }
5200  }
5201 
5202  return SDValue();
5203 }
5204 
5205 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
5206  SDValue N0 = N->getOperand(0);
5207  EVT VT = N->getValueType(0);
5208 
5209  // fold (bswap c1) -> c2
5211  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
5212  // fold (bswap (bswap x)) -> x
5213  if (N0.getOpcode() == ISD::BSWAP)
5214  return N0->getOperand(0);
5215  return SDValue();
5216 }
5217 
5218 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
5219  SDValue N0 = N->getOperand(0);
5220 
5221  // fold (bitreverse (bitreverse x)) -> x
5222  if (N0.getOpcode() == ISD::BITREVERSE)
5223  return N0.getOperand(0);
5224  return SDValue();
5225 }
5226 
5227 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
5228  SDValue N0 = N->getOperand(0);
5229  EVT VT = N->getValueType(0);
5230 
5231  // fold (ctlz c1) -> c2
5233  return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
5234  return SDValue();
5235 }
5236 
5237 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
5238  SDValue N0 = N->getOperand(0);
5239  EVT VT = N->getValueType(0);
5240 
5241  // fold (ctlz_zero_undef c1) -> c2
5243  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5244  return SDValue();
5245 }
5246 
5247 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
5248  SDValue N0 = N->getOperand(0);
5249  EVT VT = N->getValueType(0);
5250 
5251  // fold (cttz c1) -> c2
5253  return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
5254  return SDValue();
5255 }
5256 
5257 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
5258  SDValue N0 = N->getOperand(0);
5259  EVT VT = N->getValueType(0);
5260 
5261  // fold (cttz_zero_undef c1) -> c2
5263  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
5264  return SDValue();
5265 }
5266 
5267 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
5268  SDValue N0 = N->getOperand(0);
5269  EVT VT = N->getValueType(0);
5270 
5271  // fold (ctpop c1) -> c2
5273  return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
5274  return SDValue();
5275 }
5276 
5277 
5278 /// \brief Generate Min/Max node
5279 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
5280  SDValue RHS, SDValue True, SDValue False,
5281  ISD::CondCode CC, const TargetLowering &TLI,
5282  SelectionDAG &DAG) {
5283  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
5284  return SDValue();
5285 
5286  switch (CC) {
5287  case ISD::SETOLT:
5288  case ISD::SETOLE:
5289  case ISD::SETLT:
5290  case ISD::SETLE:
5291  case ISD::SETULT:
5292  case ISD::SETULE: {
5293  unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
5294  if (TLI.isOperationLegal(Opcode, VT))
5295  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5296  return SDValue();
5297  }
5298  case ISD::SETOGT:
5299  case ISD::SETOGE:
5300  case ISD::SETGT:
5301  case ISD::SETGE:
5302  case ISD::SETUGT:
5303  case ISD::SETUGE: {
5304  unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
5305  if (TLI.isOperationLegal(Opcode, VT))
5306  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
5307  return SDValue();
5308  }
5309  default:
5310  return SDValue();
5311  }
5312 }
5313 
5314 // TODO: We should handle other cases of selecting between {-1,0,1} here.
5315 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
5316  SDValue Cond = N->getOperand(0);
5317  SDValue N1 = N->getOperand(1);
5318  SDValue N2 = N->getOperand(2);
5319  EVT VT = N->getValueType(0);
5320  EVT CondVT = Cond.getValueType();
5321  SDLoc DL(N);
5322 
5323  // fold (select Cond, 0, 1) -> (xor Cond, 1)
5324  // We can't do this reliably if integer based booleans have different contents
5325  // to floating point based booleans. This is because we can't tell whether we
5326  // have an integer-based boolean or a floating-point-based boolean unless we
5327  // can find the SETCC that produced it and inspect its operands. This is
5328  // fairly easy if C is the SETCC node, but it can potentially be
5329  // undiscoverable (or not reasonably discoverable). For example, it could be
5330  // in another basic block or it could require searching a complicated
5331  // expression.
5332  if (VT.isInteger() &&
5333  (CondVT == MVT::i1 || (CondVT.isInteger() &&
5334  TLI.getBooleanContents(false, true) ==
5336  TLI.getBooleanContents(false, false) ==
5338  isNullConstant(N1) && isOneConstant(N2)) {
5339  SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
5340  DAG.getConstant(1, DL, CondVT));
5341  if (VT.bitsEq(CondVT))
5342  return NotCond;
5343  return DAG.getZExtOrTrunc(NotCond, DL, VT);
5344  }
5345 
5346  return SDValue();
5347 }
5348 
5349 SDValue DAGCombiner::visitSELECT(SDNode *N) {
5350  SDValue N0 = N->getOperand(0);
5351  SDValue N1 = N->getOperand(1);
5352  SDValue N2 = N->getOperand(2);
5353  EVT VT = N->getValueType(0);
5354  EVT VT0 = N0.getValueType();
5355 
5356  // fold (select C, X, X) -> X
5357  if (N1 == N2)
5358  return N1;
5359  if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
5360  // fold (select true, X, Y) -> X
5361  // fold (select false, X, Y) -> Y
5362  return !N0C->isNullValue() ? N1 : N2;
5363  }
5364  // fold (select X, X, Y) -> (or X, Y)
5365  // fold (select X, 1, Y) -> (or C, Y)
5366  if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
5367  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
5368 
5369  if (SDValue V = foldSelectOfConstants(N))
5370  return V;
5371 
5372  // fold (select C, 0, X) -> (and (not C), X)
5373  if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
5374  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5375  AddToWorklist(NOTNode.getNode());
5376  return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
5377  }
5378  // fold (select C, X, 1) -> (or (not C), X)
5379  if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
5380  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
5381  AddToWorklist(NOTNode.getNode());
5382  return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
5383  }
5384  // fold (select X, Y, X) -> (and X, Y)
5385  // fold (select X, Y, 0) -> (and X, Y)
5386  if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
5387  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
5388 
5389  // If we can fold this based on the true/false value, do so.
5390  if (SimplifySelectOps(N, N1, N2))
5391  return SDValue(N, 0); // Don't revisit N.
5392 
5393  if (VT0 == MVT::i1) {
5394  // The code in this block deals with the following 2 equivalences:
5395  // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
5396  // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
5397  // The target can specify its preferred form with the
5398  // shouldNormalizeToSelectSequence() callback. However we always transform
5399  // to the right anyway if we find the inner select exists in the DAG anyway
5400  // and we always transform to the left side if we know that we can further
5401  // optimize the combination of the conditions.
5402  bool normalizeToSequence
5403  = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
5404  // select (and Cond0, Cond1), X, Y
5405  // -> select Cond0, (select Cond1, X, Y), Y
5406  if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
5407  SDValue Cond0 = N0->getOperand(0);
5408  SDValue Cond1 = N0->getOperand(1);
5409  SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5410  N1.getValueType(), Cond1, N1, N2);
5411  if (normalizeToSequence || !InnerSelect.use_empty())
5412  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
5413  InnerSelect, N2);
5414  }
5415  // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
5416  if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
5417  SDValue Cond0 = N0->getOperand(0);
5418  SDValue Cond1 = N0->getOperand(1);
5419  SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5420  N1.getValueType(), Cond1, N1, N2);
5421  if (normalizeToSequence || !InnerSelect.use_empty())
5422  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
5423  InnerSelect);
5424  }
5425 
5426  // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
5427  if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
5428  SDValue N1_0 = N1->getOperand(0);
5429  SDValue N1_1 = N1->getOperand(1);
5430  SDValue N1_2 = N1->getOperand(2);
5431  if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
5432  // Create the actual and node if we can generate good code for it.
5433  if (!normalizeToSequence) {
5434  SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
5435  N0, N1_0);
5436  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
5437  N1_1, N2);
5438  }
5439  // Otherwise see if we can optimize the "and" to a better pattern.
5440  if (SDValue Combined = visitANDLike(N0, N1_0, N))
5441  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5442  N1_1, N2);
5443  }
5444  }
5445  // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
5446  if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
5447  SDValue N2_0 = N2->getOperand(0);
5448  SDValue N2_1 = N2->getOperand(1);
5449  SDValue N2_2 = N2->getOperand(2);
5450  if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
5451  // Create the actual or node if we can generate good code for it.
5452  if (!normalizeToSequence) {
5453  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
5454  N0, N2_0);
5455  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
5456  N1, N2_2);
5457  }
5458  // Otherwise see if we can optimize to a better pattern.
5459  if (SDValue Combined = visitORLike(N0, N2_0, N))
5460  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5461  N1, N2_2);
5462  }
5463  }
5464  }
5465 
5466  // select (xor Cond, 1), X, Y -> select Cond, Y, X
5467  if (VT0 == MVT::i1) {
5468  if (N0->getOpcode() == ISD::XOR) {
5469  if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
5470  SDValue Cond0 = N0->getOperand(0);
5471  if (C->isOne())
5472  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
5473  Cond0, N2, N1);
5474  }
5475  }
5476  }
5477 
5478  // fold selects based on a setcc into other things, such as min/max/abs
5479  if (N0.getOpcode() == ISD::SETCC) {
5480  // select x, y (fcmp lt x, y) -> fminnum x, y
5481  // select x, y (fcmp gt x, y) -> fmaxnum x, y
5482  //
5483  // This is OK if we don't care about what happens if either operand is a
5484  // NaN.
5485  //
5486 
5487  // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
5488  // no signed zeros as well as no nans.
5489  const TargetOptions &Options = DAG.getTarget().Options;
5490  if (Options.UnsafeFPMath &&
5491  VT.isFloatingPoint() && N0.hasOneUse() &&
5492  DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
5493  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5494 
5495  if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
5496  N0.getOperand(1), N1, N2, CC,
5497  TLI, DAG))
5498  return FMinMax;
5499  }
5500 
5501  if ((!LegalOperations &&
5502  TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
5503  TLI.isOperationLegal(ISD::SELECT_CC, VT))
5504  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
5505  N0.getOperand(0), N0.getOperand(1),
5506  N1, N2, N0.getOperand(2));
5507  return SimplifySelect(SDLoc(N), N0, N1, N2);
5508  }
5509 
5510  return SDValue();
5511 }
5512 
5513 static
5514 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
5515  SDLoc DL(N);
5516  EVT LoVT, HiVT;
5517  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
5518 
5519  // Split the inputs.
5520  SDValue Lo, Hi, LL, LH, RL, RH;
5521  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
5522  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
5523 
5524  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
5525  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
5526 
5527  return std::make_pair(Lo, Hi);
5528 }
5529 
5530 // This function assumes all the vselect's arguments are CONCAT_VECTOR
5531 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
5533  SDLoc DL(N);
5534  SDValue Cond = N->getOperand(0);
5535  SDValue LHS = N->getOperand(1);
5536  SDValue RHS = N->getOperand(2);
5537  EVT VT = N->getValueType(0);
5538  int NumElems = VT.getVectorNumElements();
5540  RHS.getOpcode() == ISD::CONCAT_VECTORS &&
5541  Cond.getOpcode() == ISD::BUILD_VECTOR);
5542 
5543  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
5544  // binary ones here.
5545  if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
5546  return SDValue();
5547 
5548  // We're sure we have an even number of elements due to the
5549  // concat_vectors we have as arguments to vselect.
5550  // Skip BV elements until we find one that's not an UNDEF
5551  // After we find an UNDEF element, keep looping until we get to half the
5552  // length of the BV and see if all the non-undef nodes are the same.
5553  ConstantSDNode *BottomHalf = nullptr;
5554  for (int i = 0; i < NumElems / 2; ++i) {
5555  if (Cond->getOperand(i)->isUndef())
5556  continue;
5557 
5558  if (BottomHalf == nullptr)
5559  BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5560  else if (Cond->getOperand(i).getNode() != BottomHalf)
5561  return SDValue();
5562  }
5563 
5564  // Do the same for the second half of the BuildVector
5565  ConstantSDNode *TopHalf = nullptr;
5566  for (int i = NumElems / 2; i < NumElems; ++i) {
5567  if (Cond->getOperand(i)->isUndef())
5568  continue;
5569 
5570  if (TopHalf == nullptr)
5571  TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5572  else if (Cond->getOperand(i).getNode() != TopHalf)
5573  return SDValue();
5574  }
5575 
5576  assert(TopHalf && BottomHalf &&
5577  "One half of the selector was all UNDEFs and the other was all the "
5578  "same value. This should have been addressed before this function.");
5579  return DAG.getNode(
5580  ISD::CONCAT_VECTORS, DL, VT,
5581  BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
5582  TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
5583 }
5584 
5585 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
5586 
5587  if (Level >= AfterLegalizeTypes)
5588  return SDValue();
5589 
5590  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
5591  SDValue Mask = MSC->getMask();
5592  SDValue Data = MSC->getValue();
5593  SDLoc DL(N);
5594 
5595  // If the MSCATTER data type requires splitting and the mask is provided by a
5596  // SETCC, then split both nodes and its operands before legalization. This
5597  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5598  // and enables future optimizations (e.g. min/max pattern matching on X86).
5599  if (Mask.getOpcode() != ISD::SETCC)
5600  return SDValue();
5601 
5602  // Check if any splitting is required.
5603  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5605  return SDValue();
5606  SDValue MaskLo, MaskHi, Lo, Hi;
5607  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5608 
5609  EVT LoVT, HiVT;
5610  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
5611 
5612  SDValue Chain = MSC->getChain();
5613 
5614  EVT MemoryVT = MSC->getMemoryVT();
5615  unsigned Alignment = MSC->getOriginalAlignment();
5616 
5617  EVT LoMemVT, HiMemVT;
5618  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5619 
5620  SDValue DataLo, DataHi;
5621  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5622 
5623  SDValue BasePtr = MSC->getBasePtr();
5624  SDValue IndexLo, IndexHi;
5625  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
5626 
5628  getMachineMemOperand(MSC->getPointerInfo(),
5630  Alignment, MSC->getAAInfo(), MSC->getRanges());
5631 
5632  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
5633  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
5634  DL, OpsLo, MMO);
5635 
5636  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
5637  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
5638  DL, OpsHi, MMO);
5639 
5640  AddToWorklist(Lo.getNode());
5641  AddToWorklist(Hi.getNode());
5642 
5643  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5644 }
5645 
5646 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
5647 
5648  if (Level >= AfterLegalizeTypes)
5649  return SDValue();
5650 
5652  SDValue Mask = MST->getMask();
5653  SDValue Data = MST->getValue();
5654  EVT VT = Data.getValueType();
5655  SDLoc DL(N);
5656 
5657  // If the MSTORE data type requires splitting and the mask is provided by a
5658  // SETCC, then split both nodes and its operands before legalization. This
5659  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5660  // and enables future optimizations (e.g. min/max pattern matching on X86).
5661  if (Mask.getOpcode() == ISD::SETCC) {
5662 
5663  // Check if any splitting is required.
5664  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5666  return SDValue();
5667 
5668  SDValue MaskLo, MaskHi, Lo, Hi;
5669  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5670 
5671  SDValue Chain = MST->getChain();
5672  SDValue Ptr = MST->getBasePtr();
5673 
5674  EVT MemoryVT = MST->getMemoryVT();
5675  unsigned Alignment = MST->getOriginalAlignment();
5676 
5677  // if Alignment is equal to the vector size,
5678  // take the half of it for the second part
5679  unsigned SecondHalfAlignment =
5680  (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
5681 
5682  EVT LoMemVT, HiMemVT;
5683  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5684 
5685  SDValue DataLo, DataHi;
5686  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5687 
5689  getMachineMemOperand(MST->getPointerInfo(),
5691  Alignment, MST->getAAInfo(), MST->getRanges());
5692 
5693  Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
5694  MST->isTruncatingStore(),
5695  MST->isCompressingStore());
5696 
5697  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
5698  MST->isCompressingStore());
5699 
5700  MMO = DAG.getMachineFunction().
5701  getMachineMemOperand(MST->getPointerInfo(),
5703  SecondHalfAlignment, MST->getAAInfo(),
5704  MST->getRanges());
5705 
5706  Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
5707  MST->isTruncatingStore(),
5708  MST->isCompressingStore());
5709 
5710  AddToWorklist(Lo.getNode());
5711  AddToWorklist(Hi.getNode());
5712 
5713  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5714  }
5715  return SDValue();
5716 }
5717 
5718 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
5719 
5720  if (Level >= AfterLegalizeTypes)
5721  return SDValue();
5722 
5724  SDValue Mask = MGT->getMask();
5725  SDLoc DL(N);
5726 
5727  // If the MGATHER result requires splitting and the mask is provided by a
5728  // SETCC, then split both nodes and its operands before legalization. This
5729  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5730  // and enables future optimizations (e.g. min/max pattern matching on X86).
5731 
5732  if (Mask.getOpcode() != ISD::SETCC)
5733  return SDValue();
5734 
5735  EVT VT = N->getValueType(0);
5736 
5737  // Check if any splitting is required.
5738  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5740  return SDValue();
5741 
5742  SDValue MaskLo, MaskHi, Lo, Hi;
5743  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5744 
5745  SDValue Src0 = MGT->getValue();
5746  SDValue Src0Lo, Src0Hi;
5747  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5748 
5749  EVT LoVT, HiVT;
5750  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5751 
5752  SDValue Chain = MGT->getChain();
5753  EVT MemoryVT = MGT->getMemoryVT();
5754  unsigned Alignment = MGT->getOriginalAlignment();
5755 
5756  EVT LoMemVT, HiMemVT;
5757  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5758 
5759  SDValue BasePtr = MGT->getBasePtr();
5760  SDValue Index = MGT->getIndex();
5761  SDValue IndexLo, IndexHi;
5762  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
5763 
5765  getMachineMemOperand(MGT->getPointerInfo(),
5767  Alignment, MGT->getAAInfo(), MGT->getRanges());
5768 
5769  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
5770  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
5771  MMO);
5772 
5773  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
5774  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
5775  MMO);
5776 
5777  AddToWorklist(Lo.getNode());
5778  AddToWorklist(Hi.getNode());
5779 
5780  // Build a factor node to remember that this load is independent of the
5781  // other one.
5782  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5783  Hi.getValue(1));
5784 
5785  // Legalized the chain result - switch anything that used the old chain to
5786  // use the new one.
5787  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
5788 
5789  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5790 
5791  SDValue RetOps[] = { GatherRes, Chain };
5792  return DAG.getMergeValues(RetOps, DL);
5793 }
5794 
5795 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
5796 
5797  if (Level >= AfterLegalizeTypes)
5798  return SDValue();
5799 
5801  SDValue Mask = MLD->getMask();
5802  SDLoc DL(N);
5803 
5804  // If the MLOAD result requires splitting and the mask is provided by a
5805  // SETCC, then split both nodes and its operands before legalization. This
5806  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5807  // and enables future optimizations (e.g. min/max pattern matching on X86).
5808 
5809  if (Mask.getOpcode() == ISD::SETCC) {
5810  EVT VT = N->getValueType(0);
5811 
5812  // Check if any splitting is required.
5813  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5815  return SDValue();
5816 
5817  SDValue MaskLo, MaskHi, Lo, Hi;
5818  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5819 
5820  SDValue Src0 = MLD->getSrc0();
5821  SDValue Src0Lo, Src0Hi;
5822  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5823 
5824  EVT LoVT, HiVT;
5825  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
5826 
5827  SDValue Chain = MLD->getChain();
5828  SDValue Ptr = MLD->getBasePtr();
5829  EVT MemoryVT = MLD->getMemoryVT();
5830  unsigned Alignment = MLD->getOriginalAlignment();
5831 
5832  // if Alignment is equal to the vector size,
5833  // take the half of it for the second part
5834  unsigned SecondHalfAlignment =
5835  (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
5836  Alignment/2 : Alignment;
5837 
5838  EVT LoMemVT, HiMemVT;
5839  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5840 
5842  getMachineMemOperand(MLD->getPointerInfo(),
5844  Alignment, MLD->getAAInfo(), MLD->getRanges());
5845 
5846  Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
5848 
5849  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
5850  MLD->isExpandingLoad());
5851 
5852  MMO = DAG.getMachineFunction().
5853  getMachineMemOperand(MLD->getPointerInfo(),
5855  SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
5856 
5857  Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
5859 
5860  AddToWorklist(Lo.getNode());
5861  AddToWorklist(Hi.getNode());
5862 
5863  // Build a factor node to remember that this load is independent of the
5864  // other one.
5865  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5866  Hi.getValue(1));
5867 
5868  // Legalized the chain result - switch anything that used the old chain to
5869  // use the new one.
5870  DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
5871 
5872  SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5873 
5874  SDValue RetOps[] = { LoadRes, Chain };
5875  return DAG.getMergeValues(RetOps, DL);
5876  }
5877  return SDValue();
5878 }
5879 
5880 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
5881  SDValue N0 = N->getOperand(0);
5882  SDValue N1 = N->getOperand(1);
5883  SDValue N2 = N->getOperand(2);
5884  SDLoc DL(N);
5885 
5886  // fold (vselect C, X, X) -> X
5887  if (N1 == N2)
5888  return N1;
5889 
5890  // Canonicalize integer abs.
5891  // vselect (setg[te] X, 0), X, -X ->
5892  // vselect (setgt X, -1), X, -X ->
5893  // vselect (setl[te] X, 0), -X, X ->
5894  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
5895  if (N0.getOpcode() == ISD::SETCC) {
5896  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5897  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5898  bool isAbs = false;
5899  bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
5900 
5901  if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
5902  (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
5903  N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
5905  else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
5906  N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
5907  isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5908 
5909  if (isAbs) {
5910  EVT VT = LHS.getValueType();
5911  SDValue Shift = DAG.getNode(
5912  ISD::SRA, DL, VT, LHS,
5913  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
5914  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
5915  AddToWorklist(Shift.getNode());
5916  AddToWorklist(Add.getNode());
5917  return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
5918  }
5919  }
5920 
5921  if (SimplifySelectOps(N, N1, N2))
5922  return SDValue(N, 0); // Don't revisit N.
5923 
5924  // If the VSELECT result requires splitting and the mask is provided by a
5925  // SETCC, then split both nodes and its operands before legalization. This
5926  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5927  // and enables future optimizations (e.g. min/max pattern matching on X86).
5928  if (N0.getOpcode() == ISD::SETCC) {
5929  EVT VT = N->getValueType(0);
5930 
5931  // Check if any splitting is required.
5932  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5934  return SDValue();
5935 
5936  SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
5937  std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
5938  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
5939  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
5940 
5941  Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
5942  Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
5943 
5944  // Add the new VSELECT nodes to the work list in case they need to be split
5945  // again.
5946  AddToWorklist(Lo.getNode());
5947  AddToWorklist(Hi.getNode());
5948 
5949  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5950  }
5951 
5952  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
5954  return N1;
5955  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
5957  return N2;
5958 
5959  // The ConvertSelectToConcatVector function is assuming both the above
5960  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
5961  // and addressed.
5962  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
5963  N2.getOpcode() == ISD::CONCAT_VECTORS &&
5965  if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
5966  return CV;
5967  }
5968 
5969  return SDValue();
5970 }
5971 
5972 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
5973  SDValue N0 = N->getOperand(0);
5974  SDValue N1 = N->getOperand(1);
5975  SDValue N2 = N->getOperand(2);
5976  SDValue N3 = N->getOperand(3);
5977  SDValue N4 = N->getOperand(4);
5978  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
5979 
5980  // fold select_cc lhs, rhs, x, x, cc -> x
5981  if (N2 == N3)
5982  return N2;
5983 
5984  // Determine if the condition we're dealing with is constant
5985  if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
5986  CC, SDLoc(N), false)) {
5987  AddToWorklist(SCC.getNode());
5988 
5989  if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
5990  if (!SCCC->isNullValue())
5991  return N2; // cond always true -> true val
5992  else
5993  return N3; // cond always false -> false val
5994  } else if (SCC->isUndef()) {
5995  // When the condition is UNDEF, just return the first operand. This is
5996  // coherent the DAG creation, no setcc node is created in this case
5997  return N2;
5998  } else if (SCC.getOpcode() == ISD::SETCC) {
5999  // Fold to a simpler select_cc
6000  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
6001  SCC.getOperand(0), SCC.getOperand(1), N2, N3,
6002  SCC.getOperand(2));
6003  }
6004  }
6005 
6006  // If we can fold this based on the true/false value, do so.
6007  if (SimplifySelectOps(N, N2, N3))
6008  return SDValue(N, 0); // Don't revisit N.
6009 
6010  // fold select_cc into other things, such as min/max/abs
6011  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
6012 }
6013 
6014 SDValue DAGCombiner::visitSETCC(SDNode *N) {
6015  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
6016  cast<CondCodeSDNode>(N->getOperand(2))->get(),
6017  SDLoc(N));
6018 }
6019 
6020 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
6021  SDValue LHS = N->getOperand(0);
6022  SDValue RHS = N->getOperand(1);
6023  SDValue Carry = N->getOperand(2);
6024  SDValue Cond = N->getOperand(3);
6025 
6026  // If Carry is false, fold to a regular SETCC.
6027  if (Carry.getOpcode() == ISD::CARRY_FALSE)
6028  return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
6029 
6030  return SDValue();
6031 }
6032 
6033 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
6034 /// a build_vector of constants.
6035 /// This function is called by the DAGCombiner when visiting sext/zext/aext
6036 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
6037 /// Vector extends are not folded if operations are legal; this is to
6038 /// avoid introducing illegal build_vector dag nodes.
6040  SelectionDAG &DAG, bool LegalTypes,
6041  bool LegalOperations) {
6042  unsigned Opcode = N->getOpcode();
6043  SDValue N0 = N->getOperand(0);
6044  EVT VT = N->getValueType(0);
6045 
6046  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
6047  Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6049  && "Expected EXTEND dag node in input!");
6050 
6051  // fold (sext c1) -> c1
6052  // fold (zext c1) -> c1
6053  // fold (aext c1) -> c1
6054  if (isa<ConstantSDNode>(N0))
6055  return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
6056 
6057  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
6058  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
6059  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
6060  EVT SVT = VT.getScalarType();
6061  if (!(VT.isVector() &&
6062  (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
6064  return nullptr;
6065 
6066  // We can fold this node into a build_vector.
6067  unsigned VTBits = SVT.getSizeInBits();
6068  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
6070  unsigned NumElts = VT.getVectorNumElements();
6071  SDLoc DL(N);
6072 
6073  for (unsigned i=0; i != NumElts; ++i) {
6074  SDValue Op = N0->getOperand(i);
6075  if (Op->isUndef()) {
6076  Elts.push_back(DAG.getUNDEF(SVT));
6077  continue;
6078  }
6079 
6080  SDLoc DL(Op);
6081  // Get the constant value and if needed trunc it to the size of the type.
6082  // Nodes like build_vector might have constants wider than the scalar type.
6083  APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
6084  if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
6085  Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
6086  else
6087  Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
6088  }
6089 
6090  return DAG.getBuildVector(VT, DL, Elts).getNode();
6091 }
6092 
6093 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
6094 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
6095 // transformation. Returns true if extension are possible and the above
6096 // mentioned transformation is profitable.
6098  unsigned ExtOpc,
6099  SmallVectorImpl<SDNode *> &ExtendNodes,
6100  const TargetLowering &TLI) {
6101  bool HasCopyToRegUses = false;
6102  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
6103  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
6104  UE = N0.getNode()->use_end();
6105  UI != UE; ++UI) {
6106  SDNode *User = *UI;
6107  if (User == N)
6108  continue;
6109  if (UI.getUse().getResNo() != N0.getResNo())
6110  continue;
6111  // FIXME: Only extend SETCC N, N and SETCC N, c for now.
6112  if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
6113  ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
6114  if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
6115  // Sign bits will be lost after a zext.
6116  return false;
6117  bool Add = false;
6118  for (unsigned i = 0; i != 2; ++i) {
6119  SDValue UseOp = User->getOperand(i);
6120  if (UseOp == N0)
6121  continue;
6122  if (!isa<ConstantSDNode>(UseOp))
6123  return false;
6124  Add = true;
6125  }
6126  if (Add)
6127  ExtendNodes.push_back(User);
6128  continue;
6129  }
6130  // If truncates aren't free and there are users we can't
6131  // extend, it isn't worthwhile.
6132  if (!isTruncFree)
6133  return false;
6134  // Remember if this value is live-out.
6135  if (User->getOpcode() == ISD::CopyToReg)
6136  HasCopyToRegUses = true;
6137  }
6138 
6139  if (HasCopyToRegUses) {
6140  bool BothLiveOut = false;
6141  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6142  UI != UE; ++UI) {
6143  SDUse &Use = UI.getUse();
6144  if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
6145  BothLiveOut = true;
6146  break;
6147  }
6148  }
6149  if (BothLiveOut)
6150  // Both unextended and extended values are live out. There had better be
6151  // a good reason for the transformation.
6152  return ExtendNodes.size();
6153  }
6154  return true;
6155 }
6156 
6157 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
6158  SDValue Trunc, SDValue ExtLoad,
6159  const SDLoc &DL, ISD::NodeType ExtType) {
6160  // Extend SetCC uses if necessary.
6161  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
6162  SDNode *SetCC = SetCCs[i];
6164 
6165  for (unsigned j = 0; j != 2; ++j) {
6166  SDValue SOp = SetCC->getOperand(j);
6167  if (SOp == Trunc)
6168  Ops.push_back(ExtLoad);
6169  else
6170  Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
6171  }
6172 
6173  Ops.push_back(SetCC->getOperand(2));
6174  CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
6175  }
6176 }
6177 
6178 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
6179 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
6180  SDValue N0 = N->getOperand(0);
6181  EVT DstVT = N->getValueType(0);
6182  EVT SrcVT = N0.getValueType();
6183 
6184  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
6185  N->getOpcode() == ISD::ZERO_EXTEND) &&
6186  "Unexpected node type (not an extend)!");
6187 
6188  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
6189  // For example, on a target with legal v4i32, but illegal v8i32, turn:
6190  // (v8i32 (sext (v8i16 (load x))))
6191  // into:
6192  // (v8i32 (concat_vectors (v4i32 (sextload x)),
6193  // (v4i32 (sextload (x + 16)))))
6194  // Where uses of the original load, i.e.:
6195  // (v8i16 (load x))
6196  // are replaced with:
6197  // (v8i16 (truncate
6198  // (v8i32 (concat_vectors (v4i32 (sextload x)),
6199  // (v4i32 (sextload (x + 16)))))))
6200  //
6201  // This combine is only applicable to illegal, but splittable, vectors.
6202  // All legal types, and illegal non-vector types, are handled elsewhere.
6203  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
6204  //
6205  if (N0->getOpcode() != ISD::LOAD)
6206  return SDValue();
6207 
6208  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6209 
6210  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
6211  !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
6212  !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
6213  return SDValue();
6214 
6215  SmallVector<SDNode *, 4> SetCCs;
6216  if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
6217  return SDValue();
6218 
6219  ISD::LoadExtType ExtType =
6221 
6222  // Try to split the vector types to get down to legal types.
6223  EVT SplitSrcVT = SrcVT;
6224  EVT SplitDstVT = DstVT;
6225  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
6226  SplitSrcVT.getVectorNumElements() > 1) {
6227  SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
6228  SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
6229  }
6230 
6231  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
6232  return SDValue();
6233 
6234  SDLoc DL(N);
6235  const unsigned NumSplits =
6236  DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
6237  const unsigned Stride = SplitSrcVT.getStoreSize();
6239  SmallVector<SDValue, 4> Chains;
6240 
6241  SDValue BasePtr = LN0->getBasePtr();
6242  for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
6243  const unsigned Offset = Idx * Stride;
6244  const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
6245 
6246  SDValue SplitLoad = DAG.getExtLoad(
6247  ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
6248  LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
6249  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
6250 
6251  BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
6252  DAG.getConstant(Stride, DL, BasePtr.getValueType()));
6253 
6254  Loads.push_back(SplitLoad.getValue(0));
6255  Chains.push_back(SplitLoad.getValue(1));
6256  }
6257 
6258  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
6259  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
6260 
6261  CombineTo(N, NewValue);
6262 
6263  // Replace uses of the original load (before extension)
6264  // with a truncate of the concatenated sextloaded vectors.
6265  SDValue Trunc =
6266  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
6267  CombineTo(N0.getNode(), Trunc, NewChain);
6268  ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
6269  (ISD::NodeType)N->getOpcode());
6270  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6271 }
6272 
6273 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
6274  SDValue N0 = N->getOperand(0);
6275  EVT VT = N->getValueType(0);
6276 
6277  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6278  LegalOperations))
6279  return SDValue(Res, 0);
6280 
6281  // fold (sext (sext x)) -> (sext x)
6282  // fold (sext (aext x)) -> (sext x)
6283  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6284  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
6285  N0.getOperand(0));
6286 
6287  if (N0.getOpcode() == ISD::TRUNCATE) {
6288  // fold (sext (truncate (load x))) -> (sext (smaller load x))
6289  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
6290  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6291  SDNode *oye = N0.getOperand(0).getNode();
6292  if (NarrowLoad.getNode() != N0.getNode()) {
6293  CombineTo(N0.getNode(), NarrowLoad);
6294  // CombineTo deleted the truncate, if needed, but not what's under it.
6295  AddToWorklist(oye);
6296  }
6297  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6298  }
6299 
6300  // See if the value being truncated is already sign extended. If so, just
6301  // eliminate the trunc/sext pair.
6302  SDValue Op = N0.getOperand(0);
6303  unsigned OpBits = Op.getScalarValueSizeInBits();
6304  unsigned MidBits = N0.getScalarValueSizeInBits();
6305  unsigned DestBits = VT.getScalarSizeInBits();
6306  unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
6307 
6308  if (OpBits == DestBits) {
6309  // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
6310  // bits, it is already ready.
6311  if (NumSignBits > DestBits-MidBits)
6312  return Op;
6313  } else if (OpBits < DestBits) {
6314  // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
6315  // bits, just sext from i32.
6316  if (NumSignBits > OpBits-MidBits)
6317  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
6318  } else {
6319  // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
6320  // bits, just truncate to i32.
6321  if (NumSignBits > OpBits-MidBits)
6322  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6323  }
6324 
6325  // fold (sext (truncate x)) -> (sextinreg x).
6326  if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
6327  N0.getValueType())) {
6328  if (OpBits < DestBits)
6329  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
6330  else if (OpBits > DestBits)
6331  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
6332  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
6333  DAG.getValueType(N0.getValueType()));
6334  }
6335  }
6336 
6337  // fold (sext (load x)) -> (sext (truncate (sextload x)))
6338  // Only generate vector extloads when 1) they're legal, and 2) they are
6339  // deemed desirable by the target.
6341  ((!LegalOperations && !VT.isVector() &&
6342  !cast<LoadSDNode>(N0)->isVolatile()) ||
6343  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
6344  bool DoXform = true;
6345  SmallVector<SDNode*, 4> SetCCs;
6346  if (!N0.hasOneUse())
6347  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
6348  if (VT.isVector())
6349  DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6350  if (DoXform) {
6351  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6352  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6353  LN0->getChain(),
6354  LN0->getBasePtr(), N0.getValueType(),
6355  LN0->getMemOperand());
6356  CombineTo(N, ExtLoad);
6357  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6358  N0.getValueType(), ExtLoad);
6359  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6360  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6362  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6363  }
6364  }
6365 
6366  // fold (sext (load x)) to multiple smaller sextloads.
6367  // Only on illegal but splittable vectors.
6368  if (SDValue ExtLoad = CombineExtLoad(N))
6369  return ExtLoad;
6370 
6371  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
6372  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
6373  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6374  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6375  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6376  EVT MemVT = LN0->getMemoryVT();
6377  if ((!LegalOperations && !LN0->isVolatile()) ||
6378  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
6379  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6380  LN0->getChain(),
6381  LN0->getBasePtr(), MemVT,
6382  LN0->getMemOperand());
6383  CombineTo(N, ExtLoad);
6384  CombineTo(N0.getNode(),
6385  DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6386  N0.getValueType(), ExtLoad),
6387  ExtLoad.getValue(1));
6388  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6389  }
6390  }
6391 
6392  // fold (sext (and/or/xor (load x), cst)) ->
6393  // (and/or/xor (sextload x), (sext cst))
6394  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6395  N0.getOpcode() == ISD::XOR) &&
6396  isa<LoadSDNode>(N0.getOperand(0)) &&
6397  N0.getOperand(1).getOpcode() == ISD::Constant &&
6398  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
6399  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6400  LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6401  if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
6402  bool DoXform = true;
6403  SmallVector<SDNode*, 4> SetCCs;
6404  if (!N0.hasOneUse())
6406  SetCCs, TLI);
6407  if (DoXform) {
6408  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
6409  LN0->getChain(), LN0->getBasePtr(),
6410  LN0->getMemoryVT(),
6411  LN0->getMemOperand());
6412  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6413  Mask = Mask.sext(VT.getSizeInBits());
6414  SDLoc DL(N);
6415  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6416  ExtLoad, DAG.getConstant(Mask, DL, VT));
6417  SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6418  SDLoc(N0.getOperand(0)),
6419  N0.getOperand(0).getValueType(), ExtLoad);
6420  CombineTo(N, And);
6421  CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6422  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6424  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6425  }
6426  }
6427  }
6428 
6429  if (N0.getOpcode() == ISD::SETCC) {
6430  EVT N0VT = N0.getOperand(0).getValueType();
6431  // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
6432  // Only do this before legalize for now.
6433  if (VT.isVector() && !LegalOperations &&
6434  TLI.getBooleanContents(N0VT) ==
6436  // On some architectures (such as SSE/NEON/etc) the SETCC result type is
6437  // of the same size as the compared operands. Only optimize sext(setcc())
6438  // if this is the case.
6439  EVT SVT = getSetCCResultType(N0VT);
6440 
6441  // We know that the # elements of the results is the same as the
6442  // # elements of the compare (and the # elements of the compare result
6443  // for that matter). Check to see that they are the same size. If so,
6444  // we know that the element size of the sext'd result matches the
6445  // element size of the compare operands.
6446  if (VT.getSizeInBits() == SVT.getSizeInBits())
6447  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6448  N0.getOperand(1),
6449  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6450 
6451  // If the desired elements are smaller or larger than the source
6452  // elements we can use a matching integer vector type and then
6453  // truncate/sign extend
6454  EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6455  if (SVT == MatchingVectorType) {
6456  SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
6457  N0.getOperand(0), N0.getOperand(1),
6458  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6459  return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
6460  }
6461  }
6462 
6463  // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
6464  // Here, T can be 1 or -1, depending on the type of the setcc and
6465  // getBooleanContents().
6466  unsigned SetCCWidth = N0.getScalarValueSizeInBits();
6467 
6468  SDLoc DL(N);
6469  // To determine the "true" side of the select, we need to know the high bit
6470  // of the value returned by the setcc if it evaluates to true.
6471  // If the type of the setcc is i1, then the true case of the select is just
6472  // sext(i1 1), that is, -1.
6473  // If the type of the setcc is larger (say, i8) then the value of the high
6474  // bit depends on getBooleanContents(). So, ask TLI for a real "true" value
6475  // of the appropriate width.
6476  SDValue ExtTrueVal =
6477  (SetCCWidth == 1)
6478  ? DAG.getConstant(APInt::getAllOnesValue(VT.getScalarSizeInBits()),
6479  DL, VT)
6480  : TLI.getConstTrueVal(DAG, VT, DL);
6481 
6482  if (SDValue SCC = SimplifySelectCC(
6483  DL, N0.getOperand(0), N0.getOperand(1), ExtTrueVal,
6484  DAG.getConstant(0, DL, VT),
6485  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6486  return SCC;
6487 
6488  if (!VT.isVector()) {
6489  EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
6490  if (!LegalOperations ||
6491  TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
6492  SDLoc DL(N);
6493  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6494  SDValue SetCC =
6495  DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC);
6496  return DAG.getSelect(DL, VT, SetCC, ExtTrueVal,
6497  DAG.getConstant(0, DL, VT));
6498  }
6499  }
6500  }
6501 
6502  // fold (sext x) -> (zext x) if the sign bit is known zero.
6503  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
6504  DAG.SignBitIsZero(N0))
6505  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
6506 
6507  return SDValue();
6508 }
6509 
6510 // isTruncateOf - If N is a truncate of some other value, return true, record
6511 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
6512 // This function computes KnownZero to avoid a duplicated call to
6513 // computeKnownBits in the caller.
6514 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
6515  APInt &KnownZero) {
6516  APInt KnownOne;
6517  if (N->getOpcode() == ISD::TRUNCATE) {
6518  Op = N->getOperand(0);
6519  DAG.computeKnownBits(Op, KnownZero, KnownOne);
6520  return true;
6521  }
6522 
6523  if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
6524  cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
6525  return false;
6526 
6527  SDValue Op0 = N->getOperand(0);
6528  SDValue Op1 = N->getOperand(1);
6529  assert(Op0.getValueType() == Op1.getValueType());
6530 
6531  if (isNullConstant(Op0))
6532  Op = Op1;
6533  else if (isNullConstant(Op1))
6534  Op = Op0;
6535  else
6536  return false;
6537 
6538  DAG.computeKnownBits(Op, KnownZero, KnownOne);
6539 
6540  if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
6541  return false;
6542 
6543  return true;
6544 }
6545 
6546 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
6547  SDValue N0 = N->getOperand(0);
6548  EVT VT = N->getValueType(0);
6549 
6550  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6551  LegalOperations))
6552  return SDValue(Res, 0);
6553 
6554  // fold (zext (zext x)) -> (zext x)
6555  // fold (zext (aext x)) -> (zext x)
6556  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6557  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
6558  N0.getOperand(0));
6559 
6560  // fold (zext (truncate x)) -> (zext x) or
6561  // (zext (truncate x)) -> (truncate x)
6562  // This is valid when the truncated bits of x are already zero.
6563  // FIXME: We should extend this to work for vectors too.
6564  SDValue Op;
6565  APInt KnownZero;
6566  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
6567  APInt TruncatedBits =
6568  (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
6569  APInt(Op.getValueSizeInBits(), 0) :
6571  N0.getValueSizeInBits(),
6573  VT.getSizeInBits()));
6574  if (TruncatedBits == (KnownZero & TruncatedBits)) {
6575  if (VT.bitsGT(Op.getValueType()))
6576  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
6577  if (VT.bitsLT(Op.getValueType()))
6578  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6579 
6580  return Op;
6581  }
6582  }
6583 
6584  // fold (zext (truncate (load x))) -> (zext (smaller load x))
6585  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
6586  if (N0.getOpcode() == ISD::TRUNCATE) {
6587  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6588  SDNode *oye = N0.getOperand(0).getNode();
6589  if (NarrowLoad.getNode() != N0.getNode()) {
6590  CombineTo(N0.getNode(), NarrowLoad);
6591  // CombineTo deleted the truncate, if needed, but not what's under it.
6592  AddToWorklist(oye);
6593  }
6594  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6595  }
6596  }
6597 
6598  // fold (zext (truncate x)) -> (and x, mask)
6599  if (N0.getOpcode() == ISD::TRUNCATE) {
6600  // fold (zext (truncate (load x))) -> (zext (smaller load x))
6601  // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
6602  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6603  SDNode *oye = N0.getOperand(0).getNode();
6604  if (NarrowLoad.getNode() != N0.getNode()) {
6605  CombineTo(N0.getNode(), NarrowLoad);
6606  // CombineTo deleted the truncate, if needed, but not what's under it.
6607  AddToWorklist(oye);
6608  }
6609  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6610  }
6611 
6612  EVT SrcVT = N0.getOperand(0).getValueType();
6613  EVT MinVT = N0.getValueType();
6614 
6615  // Try to mask before the extension to avoid having to generate a larger mask,
6616  // possibly over several sub-vectors.
6617  if (SrcVT.bitsLT(VT)) {
6618  if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
6619  TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
6620  SDValue Op = N0.getOperand(0);
6621  Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6622  AddToWorklist(Op.getNode());
6623  return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
6624  }
6625  }
6626 
6627  if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
6628  SDValue Op = N0.getOperand(0);
6629  if (SrcVT.bitsLT(VT)) {
6630  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
6631  AddToWorklist(Op.getNode());
6632  } else if (SrcVT.bitsGT(VT)) {
6633  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6634  AddToWorklist(Op.getNode());
6635  }
6636  return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
6637  }
6638  }
6639 
6640  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
6641  // if either of the casts is not free.
6642  if (N0.getOpcode() == ISD::AND &&
6643  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6644  N0.getOperand(1).getOpcode() == ISD::Constant &&
6645  (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6646  N0.getValueType()) ||
6647  !TLI.isZExtFree(N0.getValueType(), VT))) {
6648  SDValue X = N0.getOperand(0).getOperand(0);
6649  if (X.getValueType().bitsLT(VT)) {
6650  X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
6651  } else if (X.getValueType().bitsGT(VT)) {
6652  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6653  }
6654  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6655  Mask = Mask.zext(VT.getSizeInBits());
6656  SDLoc DL(N);
6657  return DAG.getNode(ISD::AND, DL, VT,
6658  X, DAG.getConstant(Mask, DL, VT));
6659  }
6660 
6661  // fold (zext (load x)) -> (zext (truncate (zextload x)))
6662  // Only generate vector extloads when 1) they're legal, and 2) they are
6663  // deemed desirable by the target.
6665  ((!LegalOperations && !VT.isVector() &&
6666  !cast<LoadSDNode>(N0)->isVolatile()) ||
6667  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
6668  bool DoXform = true;
6669  SmallVector<SDNode*, 4> SetCCs;
6670  if (!N0.hasOneUse())
6671  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
6672  if (VT.isVector())
6673  DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6674  if (DoXform) {
6675  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6676  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6677  LN0->getChain(),
6678  LN0->getBasePtr(), N0.getValueType(),
6679  LN0->getMemOperand());
6680  CombineTo(N, ExtLoad);
6681  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6682  N0.getValueType(), ExtLoad);
6683  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6684 
6685  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6687  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6688  }
6689  }
6690 
6691  // fold (zext (load x)) to multiple smaller zextloads.
6692  // Only on illegal but splittable vectors.
6693  if (SDValue ExtLoad = CombineExtLoad(N))
6694  return ExtLoad;
6695 
6696  // fold (zext (and/or/xor (load x), cst)) ->
6697  // (and/or/xor (zextload x), (zext cst))
6698  // Unless (and (load x) cst) will match as a zextload already and has
6699  // additional users.
6700  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6701  N0.getOpcode() == ISD::XOR) &&
6702  isa<LoadSDNode>(N0.getOperand(0)) &&
6703  N0.getOperand(1).getOpcode() == ISD::Constant &&
6704  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
6705  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6706  LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6707  if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
6708  bool DoXform = true;
6709  SmallVector<SDNode*, 4> SetCCs;
6710  if (!N0.hasOneUse()) {
6711  if (N0.getOpcode() == ISD::AND) {
6712  auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
6713  auto NarrowLoad = false;
6714  EVT LoadResultTy = AndC->getValueType(0);
6715  EVT ExtVT, LoadedVT;
6716  if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
6717  NarrowLoad))
6718  DoXform = false;
6719  }
6720  if (DoXform)
6721  DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
6722  ISD::ZERO_EXTEND, SetCCs, TLI);
6723  }
6724  if (DoXform) {
6725  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
6726  LN0->getChain(), LN0->getBasePtr(),
6727  LN0->getMemoryVT(),
6728  LN0->getMemOperand());
6729  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6730  Mask = Mask.zext(VT.getSizeInBits());
6731  SDLoc DL(N);
6732  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6733  ExtLoad, DAG.getConstant(Mask, DL, VT));
6734  SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6735  SDLoc(N0.getOperand(0)),
6736  N0.getOperand(0).getValueType(), ExtLoad);
6737  CombineTo(N, And);
6738  CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6739  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6741  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6742  }
6743  }
6744  }
6745 
6746  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
6747  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
6748  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6749  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6750  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6751  EVT MemVT = LN0->getMemoryVT();
6752  if ((!LegalOperations && !LN0->isVolatile()) ||
6753  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
6754  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6755  LN0->getChain(),
6756  LN0->getBasePtr(), MemVT,
6757  LN0->getMemOperand());
6758  CombineTo(N, ExtLoad);
6759  CombineTo(N0.getNode(),
6760  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
6761  ExtLoad),
6762  ExtLoad.getValue(1));
6763  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6764  }
6765  }
6766 
6767  if (N0.getOpcode() == ISD::SETCC) {
6768  // Only do this before legalize for now.
6769  if (!LegalOperations && VT.isVector() &&
6771  EVT N00VT = N0.getOperand(0).getValueType();
6772  if (getSetCCResultType(N00VT) == N0.getValueType())
6773  return SDValue();
6774 
6775  // We know that the # elements of the results is the same as the #
6776  // elements of the compare (and the # elements of the compare result for
6777  // that matter). Check to see that they are the same size. If so, we know
6778  // that the element size of the sext'd result matches the element size of
6779  // the compare operands.
6780  SDLoc DL(N);
6781  SDValue VecOnes = DAG.getConstant(1, DL, VT);
6782  if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
6783  // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
6784  SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
6785  N0.getOperand(1), N0.getOperand(2));
6786  return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
6787  }
6788 
6789  // If the desired elements are smaller or larger than the source
6790  // elements we can use a matching integer vector type and then
6791  // truncate/sign extend.
6792  EVT MatchingElementType = EVT::getIntegerVT(
6793  *DAG.getContext(), N00VT.getScalarSizeInBits());
6794  EVT MatchingVectorType = EVT::getVectorVT(
6795  *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
6796  SDValue VsetCC =
6797  DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
6798  N0.getOperand(1), N0.getOperand(2));
6799  return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
6800  VecOnes);
6801  }
6802 
6803  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6804  SDLoc DL(N);
6805  if (SDValue SCC = SimplifySelectCC(
6806  DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6807  DAG.getConstant(0, DL, VT),
6808  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6809  return SCC;
6810  }
6811 
6812  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
6813  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
6814  isa<ConstantSDNode>(N0.getOperand(1)) &&
6815  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
6816  N0.hasOneUse()) {
6817  SDValue ShAmt = N0.getOperand(1);
6818  unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6819  if (N0.getOpcode() == ISD::SHL) {
6820  SDValue InnerZExt = N0.getOperand(0);
6821  // If the original shl may be shifting out bits, do not perform this
6822  // transformation.
6823  unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
6824  InnerZExt.getOperand(0).getValueSizeInBits();
6825  if (ShAmtVal > KnownZeroBits)
6826  return SDValue();
6827  }
6828 
6829  SDLoc DL(N);
6830 
6831  // Ensure that the shift amount is wide enough for the shifted value.
6832  if (VT.getSizeInBits() >= 256)
6833  ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
6834 
6835  return DAG.getNode(N0.getOpcode(), DL, VT,
6836  DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
6837  ShAmt);
6838  }
6839 
6840  return SDValue();
6841 }
6842 
6843 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
6844  SDValue N0 = N->getOperand(0);
6845  EVT VT = N->getValueType(0);
6846 
6847  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6848  LegalOperations))
6849  return SDValue(Res, 0);
6850 
6851  // fold (aext (aext x)) -> (aext x)
6852  // fold (aext (zext x)) -> (zext x)
6853  // fold (aext (sext x)) -> (sext x)
6854  if (N0.getOpcode() == ISD::ANY_EXTEND ||
6855  N0.getOpcode() == ISD::ZERO_EXTEND ||
6856  N0.getOpcode() == ISD::SIGN_EXTEND)
6857  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
6858 
6859  // fold (aext (truncate (load x))) -> (aext (smaller load x))
6860  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
6861  if (N0.getOpcode() == ISD::TRUNCATE) {
6862  if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
6863  SDNode *oye = N0.getOperand(0).getNode();
6864  if (NarrowLoad.getNode() != N0.getNode()) {
6865  CombineTo(N0.getNode(), NarrowLoad);
6866  // CombineTo deleted the truncate, if needed, but not what's under it.
6867  AddToWorklist(oye);
6868  }
6869  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6870  }
6871  }
6872 
6873  // fold (aext (truncate x))
6874  if (N0.getOpcode() == ISD::TRUNCATE) {
6875  SDValue TruncOp = N0.getOperand(0);
6876  if (TruncOp.getValueType() == VT)
6877  return TruncOp; // x iff x size == zext size.
6878  if (TruncOp.getValueType().bitsGT(VT))
6879  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
6880  return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
6881  }
6882 
6883  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
6884  // if the trunc is not free.
6885  if (N0.getOpcode() == ISD::AND &&
6886  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6887  N0.getOperand(1).getOpcode() == ISD::Constant &&
6888  !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6889  N0.getValueType())) {
6890  SDLoc DL(N);
6891  SDValue X = N0.getOperand(0).getOperand(0);
6892  if (X.getValueType().bitsLT(VT)) {
6893  X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
6894  } else if (X.getValueType().bitsGT(VT)) {
6895  X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
6896  }
6897  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6898  Mask = Mask.zext(VT.getSizeInBits());
6899  return DAG.getNode(ISD::AND, DL, VT,
6900  X, DAG.getConstant(Mask, DL, VT));
6901  }
6902 
6903  // fold (aext (load x)) -> (aext (truncate (extload x)))
6904  // None of the supported targets knows how to perform load and any_ext
6905  // on vectors in one instruction. We only perform this transformation on
6906  // scalars.
6907  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
6908  ISD::isUNINDEXEDLoad(N0.getNode()) &&
6909  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
6910  bool DoXform = true;
6911  SmallVector<SDNode*, 4> SetCCs;
6912  if (!N0.hasOneUse())
6913  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
6914  if (DoXform) {
6915  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6916  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6917  LN0->getChain(),
6918  LN0->getBasePtr(), N0.getValueType(),
6919  LN0->getMemOperand());
6920  CombineTo(N, ExtLoad);
6921  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6922  N0.getValueType(), ExtLoad);
6923  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6924  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6925  ISD::ANY_EXTEND);
6926  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6927  }
6928  }
6929 
6930  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
6931  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
6932  // fold (aext ( extload x)) -> (aext (truncate (extload x)))
6933  if (N0.getOpcode() == ISD::LOAD &&
6935  N0.hasOneUse()) {
6936  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6937  ISD::LoadExtType ExtType = LN0->getExtensionType();
6938  EVT MemVT = LN0->getMemoryVT();
6939  if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
6940  SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
6941  VT, LN0->getChain(), LN0->getBasePtr(),
6942  MemVT, LN0->getMemOperand());
6943  CombineTo(N, ExtLoad);
6944  CombineTo(N0.getNode(),
6945  DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6946  N0.getValueType(), ExtLoad),
6947  ExtLoad.getValue(1));
6948  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6949  }
6950  }
6951 
6952  if (N0.getOpcode() == ISD::SETCC) {
6953  // For vectors:
6954  // aext(setcc) -> vsetcc
6955  // aext(setcc) -> truncate(vsetcc)
6956  // aext(setcc) -> aext(vsetcc)
6957  // Only do this before legalize for now.
6958  if (VT.isVector() && !LegalOperations) {
6959  EVT N0VT = N0.getOperand(0).getValueType();
6960  // We know that the # elements of the results is the same as the
6961  // # elements of the compare (and the # elements of the compare result
6962  // for that matter). Check to see that they are the same size. If so,
6963  // we know that the element size of the sext'd result matches the
6964  // element size of the compare operands.
6965  if (VT.getSizeInBits() == N0VT.getSizeInBits())
6966  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6967  N0.getOperand(1),
6968  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6969  // If the desired elements are smaller or larger than the source
6970  // elements we can use a matching integer vector type and then
6971  // truncate/any extend
6972  else {
6973  EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6974  SDValue VsetCC =
6975  DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
6976  N0.getOperand(1),
6977  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6978  return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
6979  }
6980  }
6981 
6982  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6983  SDLoc DL(N);
6984  if (SDValue SCC = SimplifySelectCC(
6985  DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
6986  DAG.getConstant(0, DL, VT),
6987  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
6988  return SCC;
6989  }
6990 
6991  return SDValue();
6992 }
6993 
6994 /// See if the specified operand can be simplified with the knowledge that only
6995 /// the bits specified by Mask are used. If so, return the simpler operand,
6996 /// otherwise return a null SDValue.
6997 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
6998  switch (V.getOpcode()) {
6999  default: break;
7000  case ISD::Constant: {
7001  const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
7002  assert(CV && "Const value should be ConstSDNode.");
7003  const APInt &CVal = CV->getAPIntValue();
7004  APInt NewVal = CVal & Mask;
7005  if (NewVal != CVal)
7006  return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
7007  break;
7008  }
7009  case ISD::OR:
7010  case ISD::XOR:
7011  // If the LHS or RHS don't contribute bits to the or, drop them.
7012  if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
7013  return V.getOperand(1);
7014  if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
7015  return V.getOperand(0);
7016  break;
7017  case ISD::SRL:
7018  // Only look at single-use SRLs.
7019  if (!V.getNode()->hasOneUse())
7020  break;
7021  if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
7022  // See if we can recursively simplify the LHS.
7023  unsigned Amt = RHSC->getZExtValue();
7024 
7025  // Watch out for shift count overflow though.
7026  if (Amt >= Mask.getBitWidth()) break;
7027  APInt NewMask = Mask << Amt;
7028  if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
7029  return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
7030  SimplifyLHS, V.getOperand(1));
7031  }
7032  }
7033  return SDValue();
7034 }
7035 
7036 /// If the result of a wider load is shifted to right of N bits and then
7037 /// truncated to a narrower type and where N is a multiple of number of bits of
7038 /// the narrower type, transform it to a narrower load from address + N / num of
7039 /// bits of new type. If the result is to be extended, also fold the extension
7040 /// to form a extending load.
7041 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
7042  unsigned Opc = N->getOpcode();
7043 
7045  SDValue N0 = N->getOperand(0);
7046  EVT VT = N->getValueType(0);
7047  EVT ExtVT = VT;
7048 
7049  // This transformation isn't valid for vector loads.
7050  if (VT.isVector())
7051  return SDValue();
7052 
7053  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
7054  // extended to VT.
7055  if (Opc == ISD::SIGN_EXTEND_INREG) {
7056  ExtType = ISD::SEXTLOAD;
7057  ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7058  } else if (Opc == ISD::SRL) {
7059  // Another special-case: SRL is basically zero-extending a narrower value.
7060  ExtType = ISD::ZEXTLOAD;
7061  N0 = SDValue(N, 0);
7063  if (!N01) return SDValue();
7064  ExtVT = EVT::getIntegerVT(*DAG.getContext(),
7065  VT.getSizeInBits() - N01->getZExtValue());
7066  }
7067  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
7068  return SDValue();
7069 
7070  unsigned EVTBits = ExtVT.getSizeInBits();
7071 
7072  // Do not generate loads of non-round integer types since these can
7073  // be expensive (and would be wrong if the type is not byte sized).
7074  if (!ExtVT.isRound())
7075  return SDValue();
7076 
7077  unsigned ShAmt = 0;
7078  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
7079  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7080  ShAmt = N01->getZExtValue();
7081  // Is the shift amount a multiple of size of VT?
7082  if ((ShAmt & (EVTBits-1)) == 0) {
7083  N0 = N0.getOperand(0);
7084  // Is the load width a multiple of size of VT?
7085  if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
7086  return SDValue();
7087  }
7088 
7089  // At this point, we must have a load or else we can't do the transform.
7090  if (!isa<LoadSDNode>(N0)) return SDValue();
7091 
7092  // Because a SRL must be assumed to *need* to zero-extend the high bits
7093  // (as opposed to anyext the high bits), we can't combine the zextload
7094  // lowering of SRL and an sextload.
7095  if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
7096  return SDValue();
7097 
7098  // If the shift amount is larger than the input type then we're not
7099  // accessing any of the loaded bytes. If the load was a zextload/extload
7100  // then the result of the shift+trunc is zero/undef (handled elsewhere).
7101  if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
7102  return SDValue();
7103  }
7104  }
7105 
7106  // If the load is shifted left (and the result isn't shifted back right),
7107  // we can fold the truncate through the shift.
7108  unsigned ShLeftAmt = 0;
7109  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7110  ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
7111  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
7112  ShLeftAmt = N01->getZExtValue();
7113  N0 = N0.getOperand(0);
7114  }
7115  }
7116 
7117  // If we haven't found a load, we can't narrow it. Don't transform one with
7118  // multiple uses, this would require adding a new load.
7119  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
7120  return SDValue();
7121 
7122  // Don't change the width of a volatile load.
7123  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7124  if (LN0->isVolatile())
7125  return SDValue();
7126 
7127  // Verify that we are actually reducing a load width here.
7128  if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
7129  return SDValue();
7130 
7131  // For the transform to be legal, the load must produce only two values
7132  // (the value loaded and the chain). Don't transform a pre-increment
7133  // load, for example, which produces an extra value. Otherwise the
7134  // transformation is not equivalent, and the downstream logic to replace
7135  // uses gets things wrong.
7136  if (LN0->getNumValues() > 2)
7137  return SDValue();
7138 
7139  // If the load that we're shrinking is an extload and we're not just
7140  // discarding the extension we can't simply shrink the load. Bail.
7141  // TODO: It would be possible to merge the extensions in some cases.
7142  if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
7143  LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
7144  return SDValue();
7145 
7146  if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
7147  return SDValue();
7148 
7149  EVT PtrType = N0.getOperand(1).getValueType();
7150 
7151  if (PtrType == MVT::Untyped || PtrType.isExtended())
7152  // It's not possible to generate a constant of extended or untyped type.
7153  return SDValue();
7154 
7155  // For big endian targets, we need to adjust the offset to the pointer to
7156  // load the correct bytes.
7157  if (DAG.getDataLayout().isBigEndian()) {
7158  unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
7159  unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
7160  ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
7161  }
7162 
7163  uint64_t PtrOff = ShAmt / 8;
7164  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
7165  SDLoc DL(LN0);
7166  // The original load itself didn't wrap, so an offset within it doesn't.
7168  Flags.setNoUnsignedWrap(true);
7169  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
7170  PtrType, LN0->getBasePtr(),
7171  DAG.getConstant(PtrOff, DL, PtrType),
7172  &Flags);
7173  AddToWorklist(NewPtr.getNode());
7174 
7175  SDValue Load;
7176  if (ExtType == ISD::NON_EXTLOAD)
7177  Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
7178  LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
7179  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7180  else
7181  Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
7182  LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
7183  NewAlign, LN0->getMemOperand()->getFlags(),
7184  LN0->getAAInfo());
7185 
7186  // Replace the old load's chain with the new load's chain.
7187  WorklistRemover DeadNodes(*this);
7188  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7189 
7190  // Shift the result left, if we've swallowed a left shift.
7191  SDValue Result = Load;
7192  if (ShLeftAmt != 0) {
7193  EVT ShImmTy = getShiftAmountTy(Result.getValueType());
7194  if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
7195  ShImmTy = VT;
7196  // If the shift amount is as large as the result size (but, presumably,
7197  // no larger than the source) then the useful bits of the result are
7198  // zero; we can't simply return the shortened shift, because the result
7199  // of that operation is undefined.
7200  SDLoc DL(N0);
7201  if (ShLeftAmt >= VT.getSizeInBits())
7202  Result = DAG.getConstant(0, DL, VT);
7203  else
7204  Result = DAG.getNode(ISD::SHL, DL, VT,
7205  Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
7206  }
7207 
7208  // Return the new loaded value.
7209  return Result;
7210 }
7211 
7212 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
7213  SDValue N0 = N->getOperand(0);
7214  SDValue N1 = N->getOperand(1);
7215  EVT VT = N->getValueType(0);
7216  EVT EVT = cast<VTSDNode>(N1)->getVT();
7217  unsigned VTBits = VT.getScalarSizeInBits();
7218  unsigned EVTBits = EVT.getScalarSizeInBits();
7219 
7220  if (N0.isUndef())
7221  return DAG.getUNDEF(VT);
7222 
7223  // fold (sext_in_reg c1) -> c1
7225  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
7226 
7227  // If the input is already sign extended, just drop the extension.
7228  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
7229  return N0;
7230 
7231  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
7232  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
7233  EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
7234  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7235  N0.getOperand(0), N1);
7236 
7237  // fold (sext_in_reg (sext x)) -> (sext x)
7238  // fold (sext_in_reg (aext x)) -> (sext x)
7239  // if x is small enough.
7240  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
7241  SDValue N00 = N0.getOperand(0);
7242  if (N00.getScalarValueSizeInBits() <= EVTBits &&
7243  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7244  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7245  }
7246 
7247  // fold (sext_in_reg (zext x)) -> (sext x)
7248  // iff we are extending the source sign bit.
7249  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
7250  SDValue N00 = N0.getOperand(0);
7251  if (N00.getScalarValueSizeInBits() == EVTBits &&
7252  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
7253  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
7254  }
7255 
7256  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
7257  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
7258  return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
7259 
7260  // fold operands of sext_in_reg based on knowledge that the top bits are not
7261  // demanded.
7262  if (SimplifyDemandedBits(SDValue(N, 0)))
7263  return SDValue(N, 0);
7264 
7265  // fold (sext_in_reg (load x)) -> (smaller sextload x)
7266  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
7267  if (SDValue NarrowLoad = ReduceLoadWidth(N))
7268  return NarrowLoad;
7269 
7270  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
7271  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
7272  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
7273  if (N0.getOpcode() == ISD::SRL) {
7274  if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
7275  if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
7276  // We can turn this into an SRA iff the input to the SRL is already sign
7277  // extended enough.
7278  unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
7279  if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
7280  return DAG.getNode(ISD::SRA, SDLoc(N), VT,
7281  N0.getOperand(0), N0.getOperand(1));
7282  }
7283  }
7284 
7285  // fold (sext_inreg (extload x)) -> (sextload x)
7286  if (ISD::isEXTLoad(N0.getNode()) &&
7287  ISD::isUNINDEXEDLoad(N0.getNode()) &&
7288  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7289  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7290  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7291  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7292  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7293  LN0->getChain(),
7294  LN0->getBasePtr(), EVT,
7295  LN0->getMemOperand());
7296  CombineTo(N, ExtLoad);
7297  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7298  AddToWorklist(ExtLoad.getNode());
7299  return SDValue(N, 0); // Return N so it doesn't get rechecked!
7300  }
7301  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
7302  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
7303  N0.hasOneUse() &&
7304  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
7305  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
7306  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
7307  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7308  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
7309  LN0->getChain(),
7310  LN0->getBasePtr(), EVT,
7311  LN0->getMemOperand());
7312  CombineTo(N, ExtLoad);
7313  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7314  return SDValue(N, 0); // Return N so it doesn't get rechecked!
7315  }
7316 
7317  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
7318  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
7319  if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7320  N0.getOperand(1), false))
7321  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7322  BSwap, N1);
7323  }
7324 
7325  return SDValue();
7326 }
7327 
7328 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
7329  SDValue N0 = N->getOperand(0);
7330  EVT VT = N->getValueType(0);
7331 
7332  if (N0.isUndef())
7333  return DAG.getUNDEF(VT);
7334 
7335  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7336  LegalOperations))
7337  return SDValue(Res, 0);
7338 
7339  return SDValue();
7340 }
7341 
7342 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
7343  SDValue N0 = N->getOperand(0);
7344  EVT VT = N->getValueType(0);
7345 
7346  if (N0.isUndef())
7347  return DAG.getUNDEF(VT);
7348 
7349  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
7350  LegalOperations))
7351  return SDValue(Res, 0);
7352 
7353  return SDValue();
7354 }
7355 
7356 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
7357  SDValue N0 = N->getOperand(0);
7358  EVT VT = N->getValueType(0);
7359  bool isLE = DAG.getDataLayout().isLittleEndian();
7360 
7361  // noop truncate
7362  if (N0.getValueType() == N->getValueType(0))
7363  return N0;
7364  // fold (truncate c1) -> c1
7366  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
7367  // fold (truncate (truncate x)) -> (truncate x)
7368  if (N0.getOpcode() == ISD::TRUNCATE)
7369  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7370  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
7371  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
7372  N0.getOpcode() == ISD::SIGN_EXTEND ||
7373  N0.getOpcode() == ISD::ANY_EXTEND) {
7374  // if the source is smaller than the dest, we still need an extend.
7375  if (N0.getOperand(0).getValueType().bitsLT(VT))
7376  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
7377  // if the source is larger than the dest, than we just need the truncate.
7378  if (N0.getOperand(0).getValueType().bitsGT(VT))
7379  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
7380  // if the source and dest are the same type, we can drop both the extend
7381  // and the truncate.
7382  return N0.getOperand(0);
7383  }
7384 
7385  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
7386  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
7387  return SDValue();
7388 
7389  // Fold extract-and-trunc into a narrow extract. For example:
7390  // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
7391  // i32 y = TRUNCATE(i64 x)
7392  // -- becomes --
7393  // v16i8 b = BITCAST (v2i64 val)
7394  // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
7395  //
7396  // Note: We only run this optimization after type legalization (which often
7397  // creates this pattern) and before operation legalization after which
7398  // we need to be more careful about the vector instructions that we generate.
7399  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7400  LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
7401 
7402  EVT VecTy = N0.getOperand(0).getValueType();
7403  EVT ExTy = N0.getValueType();
7404  EVT TrTy = N->getValueType(0);
7405 
7406  unsigned NumElem = VecTy.getVectorNumElements();
7407  unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
7408 
7409  EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
7410  assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
7411 
7412  SDValue EltNo = N0->getOperand(1);
7413  if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
7414  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
7415  EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
7416  int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
7417 
7418  SDLoc DL(N);
7419  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
7420  DAG.getBitcast(NVT, N0.getOperand(0)),
7421  DAG.getConstant(Index, DL, IndexTy));
7422  }
7423  }
7424 
7425  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
7426  if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
7427  EVT SrcVT = N0.getValueType();
7428  if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
7429  TLI.isTruncateFree(SrcVT, VT)) {
7430  SDLoc SL(N0);
7431  SDValue Cond = N0.getOperand(0);
7432  SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
7433  SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
7434  return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
7435  }
7436  }
7437 
7438  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
7439  if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
7440  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
7441  TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
7442  if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
7443  uint64_t Amt = CAmt->getZExtValue();
7444  unsigned Size = VT.getScalarSizeInBits();
7445 
7446  if (Amt < Size) {
7447  SDLoc SL(N);
7448  EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
7449 
7450  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
7451  return DAG.getNode(ISD::SHL, SL, VT, Trunc,
7452  DAG.getConstant(Amt, SL, AmtVT));
7453  }
7454  }
7455  }
7456 
7457  // Fold a series of buildvector, bitcast, and truncate if possible.
7458  // For example fold
7459  // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
7460  // (2xi32 (buildvector x, y)).
7461  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
7462  N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
7463  N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
7464  N0.getOperand(0).hasOneUse()) {
7465 
7466  SDValue BuildVect = N0.getOperand(0);
7467  EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
7468  EVT TruncVecEltTy = VT.getVectorElementType();
7469 
7470  // Check that the element types match.
7471  if (BuildVectEltTy == TruncVecEltTy) {
7472  // Now we only need to compute the offset of the truncated elements.
7473  unsigned BuildVecNumElts = BuildVect.getNumOperands();
7474  unsigned TruncVecNumElts = VT.getVectorNumElements();
7475  unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
7476 
7477  assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
7478  "Invalid number of elements");
7479 
7481  for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
7482  Opnds.push_back(BuildVect.getOperand(i));
7483 
7484  return DAG.getBuildVector(VT, SDLoc(N), Opnds);
7485  }
7486  }
7487 
7488  // See if we can simplify the input to this truncate through knowledge that
7489  // only the low bits are being used.
7490  // For example "trunc (or (shl x, 8), y)" // -> trunc y
7491  // Currently we only perform this optimization on scalars because vectors
7492  // may have different active low bits.
7493  if (!VT.isVector()) {
7494  if (SDValue Shorter =
7495  GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
7496  VT.getSizeInBits())))
7497  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
7498  }
7499  // fold (truncate (load x)) -> (smaller load x)
7500  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
7501  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
7502  if (SDValue Reduced = ReduceLoadWidth(N))
7503  return Reduced;
7504 
7505  // Handle the case where the load remains an extending load even
7506  // after truncation.
7507  if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
7508  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7509  if (!LN0->isVolatile() &&
7510  LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
7511  SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
7512  VT, LN0->getChain(), LN0->getBasePtr(),
7513  LN0->getMemoryVT(),
7514  LN0->getMemOperand());
7515  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
7516  return NewLoad;
7517  }
7518  }
7519  }
7520  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
7521  // where ... are all 'undef'.
7522  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
7523  SmallVector<EVT, 8> VTs;
7524  SDValue V;
7525  unsigned Idx = 0;
7526  unsigned NumDefs = 0;
7527 
7528  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
7529  SDValue X = N0.getOperand(i);
7530  if (!X.isUndef()) {
7531  V = X;
7532  Idx = i;
7533  NumDefs++;
7534  }
7535  // Stop if more than one members are non-undef.
7536  if (NumDefs > 1)
7537  break;
7539  VT.getVectorElementType(),
7541  }
7542 
7543  if (NumDefs == 0)
7544  return DAG.getUNDEF(VT);
7545 
7546  if (NumDefs == 1) {
7547  assert(V.getNode() && "The single defined operand is empty!");
7549  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
7550  if (i != Idx) {
7551  Opnds.push_back(DAG.getUNDEF(VTs[i]));
7552  continue;
7553  }
7554  SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
7555  AddToWorklist(NV.getNode());
7556  Opnds.push_back(NV);
7557  }
7558  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
7559  }
7560  }
7561 
7562  // Fold truncate of a bitcast of a vector to an extract of the low vector
7563  // element.
7564  //
7565  // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
7566  if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
7567  SDValue VecSrc = N0.getOperand(0);
7568  EVT SrcVT = VecSrc.getValueType();
7569  if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
7570  (!LegalOperations ||
7571  TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
7572  SDLoc SL(N);
7573 
7574  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
7575  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
7576  VecSrc, DAG.getConstant(0, SL, IdxVT));
7577  }
7578  }
7579 
7580  // Simplify the operands using demanded-bits information.
7581  if (!VT.isVector() &&
7583  return SDValue(N, 0);
7584 
7585  return SDValue();
7586 }
7587 
7588 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
7589  SDValue Elt = N->getOperand(i);
7590  if (Elt.getOpcode() != ISD::MERGE_VALUES)
7591  return Elt.getNode();
7592  return Elt.getOperand(Elt.getResNo()).getNode();
7593 }
7594 
7595 /// build_pair (load, load) -> load
7596 /// if load locations are consecutive.
7597 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
7598  assert(N->getOpcode() == ISD::BUILD_PAIR);
7599 
7602  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
7603  LD1->getAddressSpace() != LD2->getAddressSpace())
7604  return SDValue();
7605  EVT LD1VT = LD1->getValueType(0);
7606  unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
7607  if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
7608  DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
7609  unsigned Align = LD1->getAlignment();
7610  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
7611  VT.getTypeForEVT(*DAG.getContext()));
7612 
7613  if (NewAlign <= Align &&
7614  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
7615  return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
7616  LD1->getPointerInfo(), Align);
7617  }
7618 
7619  return SDValue();
7620 }
7621 
7622 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
7623  // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
7624  // and Lo parts; on big-endian machines it doesn't.
7625  return DAG.getDataLayout().isBigEndian() ? 1 : 0;
7626 }
7627 
7629  const TargetLowering &TLI) {
7630  // If this is not a bitcast to an FP type or if the target doesn't have
7631  // IEEE754-compliant FP logic, we're done.
7632  EVT VT = N->getValueType(0);
7633  if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
7634  return SDValue();
7635 
7636  // TODO: Use splat values for the constant-checking below and remove this
7637  // restriction.
7638  SDValue N0 = N->getOperand(0);
7639  EVT SourceVT = N0.getValueType();
7640  if (SourceVT.isVector())
7641  return SDValue();
7642 
7643  unsigned FPOpcode;
7644  APInt SignMask;
7645  switch (N0.getOpcode()) {
7646  case ISD::AND:
7647  FPOpcode = ISD::FABS;
7648  SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
7649  break;
7650  case ISD::XOR:
7651  FPOpcode = ISD::FNEG;
7652  SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
7653  break;
7654  // TODO: ISD::OR --> ISD::FNABS?
7655  default:
7656  return SDValue();
7657  }
7658 
7659  // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
7660  // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
7661  SDValue LogicOp0 = N0.getOperand(0);
7662  ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7663  if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
7664  LogicOp0.getOpcode() == ISD::BITCAST &&
7665  LogicOp0->getOperand(0).getValueType() == VT)
7666  return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
7667 
7668  return SDValue();
7669 }
7670 
7671 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
7672  SDValue N0 = N->getOperand(0);
7673  EVT VT = N->getValueType(0);
7674 
7675  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
7676  // Only do this before legalize, since afterward the target may be depending
7677  // on the bitconvert.
7678  // First check to see if this is all constant.
7679  if (!LegalTypes &&
7680  N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
7681  VT.isVector()) {
7682  bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
7683 
7684  EVT DestEltVT = N->getValueType(0).getVectorElementType();
7685  assert(!DestEltVT.isVector() &&
7686  "Element type of vector ValueType must not be vector!");
7687  if (isSimple)
7688  return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
7689  }
7690 
7691  // If the input is a constant, let getNode fold it.
7692  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
7693  // If we can't allow illegal operations, we need to check that this is just
7694  // a fp -> int or int -> conversion and that the resulting operation will
7695  // be legal.
7696  if (!LegalOperations ||
7697  (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
7698  TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
7699  (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
7700  TLI.isOperationLegal(ISD::Constant, VT)))
7701  return DAG.getBitcast(VT, N0);
7702  }
7703 
7704  // (conv (conv x, t1), t2) -> (conv x, t2)
7705  if (N0.getOpcode() == ISD::BITCAST)
7706  return DAG.getBitcast(VT, N0.getOperand(0));
7707 
7708  // fold (conv (load x)) -> (load (conv*)x)
7709  // If the resultant load doesn't need a higher alignment than the original!
7710  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7711  // Do not change the width of a volatile load.
7712  !cast<LoadSDNode>(N0)->isVolatile() &&
7713  // Do not remove the cast if the types differ in endian layout.
7714  TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
7715  TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
7716  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
7717  TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
7718  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7719  unsigned OrigAlign = LN0->getAlignment();
7720 
7721  bool Fast = false;
7722  if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
7723  LN0->getAddressSpace(), OrigAlign, &Fast) &&
7724  Fast) {
7725  SDValue Load =
7726  DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
7727  LN0->getPointerInfo(), OrigAlign,
7728  LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
7729  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7730  return Load;
7731  }
7732  }
7733 
7734  if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
7735  return V;
7736 
7737  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7738  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7739  //
7740  // For ppc_fp128:
7741  // fold (bitcast (fneg x)) ->
7742  // flipbit = signbit
7743  // (xor (bitcast x) (build_pair flipbit, flipbit))
7744  //
7745  // fold (bitcast (fabs x)) ->
7746  // flipbit = (and (extract_element (bitcast x), 0), signbit)
7747  // (xor (bitcast x) (build_pair flipbit, flipbit))
7748  // This often reduces constant pool loads.
7749  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
7750  (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
7751  N0.getNode()->hasOneUse() && VT.isInteger() &&
7752  !VT.isVector() && !N0.getValueType().isVector()) {
7753  SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
7754  AddToWorklist(NewConv.getNode());
7755 
7756  SDLoc DL(N);
7757  if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7758  assert(VT.getSizeInBits() == 128);
7759  SDValue SignBit = DAG.getConstant(
7760  APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
7761  SDValue FlipBit;
7762  if (N0.getOpcode() == ISD::FNEG) {
7763  FlipBit = SignBit;
7764  AddToWorklist(FlipBit.getNode());
7765  } else {
7766  assert(N0.getOpcode() == ISD::FABS);
7767  SDValue Hi =
7768  DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
7770  SDLoc(NewConv)));
7771  AddToWorklist(Hi.getNode());
7772  FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
7773  AddToWorklist(FlipBit.getNode());
7774  }
7775  SDValue FlipBits =
7776  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7777  AddToWorklist(FlipBits.getNode());
7778  return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
7779  }
7780  APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7781  if (N0.getOpcode() == ISD::FNEG)
7782  return DAG.getNode(ISD::XOR, DL, VT,
7783  NewConv, DAG.getConstant(SignBit, DL, VT));
7784  assert(N0.getOpcode() == ISD::FABS);
7785  return DAG.getNode(ISD::AND, DL, VT,
7786  NewConv, DAG.getConstant(~SignBit, DL, VT));
7787  }
7788 
7789  // fold (bitconvert (fcopysign cst, x)) ->
7790  // (or (and (bitconvert x), sign), (and cst, (not sign)))
7791  // Note that we don't handle (copysign x, cst) because this can always be
7792  // folded to an fneg or fabs.
7793  //
7794  // For ppc_fp128:
7795  // fold (bitcast (fcopysign cst, x)) ->
7796  // flipbit = (and (extract_element
7797  // (xor (bitcast cst), (bitcast x)), 0),
7798  // signbit)
7799  // (xor (bitcast cst) (build_pair flipbit, flipbit))
7800  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
7801  isa<ConstantFPSDNode>(N0.getOperand(0)) &&
7802  VT.isInteger() && !VT.isVector()) {
7803  unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
7804  EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
7805  if (isTypeLegal(IntXVT)) {
7806  SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
7807  AddToWorklist(X.getNode());
7808 
7809  // If X has a different width than the result/lhs, sext it or truncate it.
7810  unsigned VTWidth = VT.getSizeInBits();
7811  if (OrigXWidth < VTWidth) {
7812  X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
7813  AddToWorklist(X.getNode());
7814  } else if (OrigXWidth > VTWidth) {
7815  // To get the sign bit in the right place, we have to shift it right
7816  // before truncating.
7817  SDLoc DL(X);
7818  X = DAG.getNode(ISD::SRL, DL,
7819  X.getValueType(), X,
7820  DAG.getConstant(OrigXWidth-VTWidth, DL,
7821  X.getValueType()));
7822  AddToWorklist(X.getNode());
7823  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7824  AddToWorklist(X.getNode());
7825  }
7826 
7827  if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
7828  APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
7829  SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7830  AddToWorklist(Cst.getNode());
7831  SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
7832  AddToWorklist(X.getNode());
7833  SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
7834  AddToWorklist(XorResult.getNode());
7835  SDValue XorResult64 = DAG.getNode(
7836  ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
7838  SDLoc(XorResult)));
7839  AddToWorklist(XorResult64.getNode());
7840  SDValue FlipBit =
7841  DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
7842  DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
7843  AddToWorklist(FlipBit.getNode());
7844  SDValue FlipBits =
7845  DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
7846  AddToWorklist(FlipBits.getNode());
7847  return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
7848  }
7849  APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7850  X = DAG.getNode(ISD::AND, SDLoc(X), VT,
7851  X, DAG.getConstant(SignBit, SDLoc(X), VT));
7852  AddToWorklist(X.getNode());
7853 
7854  SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
7855  Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
7856  Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
7857  AddToWorklist(Cst.getNode());
7858 
7859  return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
7860  }
7861  }
7862 
7863  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
7864  if (N0.getOpcode() == ISD::BUILD_PAIR)
7865  if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
7866  return CombineLD;
7867 
7868  // Remove double bitcasts from shuffles - this is often a legacy of
7869  // XformToShuffleWithZero being used to combine bitmaskings (of
7870  // float vectors bitcast to integer vectors) into shuffles.
7871  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
7872  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
7873  N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
7876  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
7877 
7878  // If operands are a bitcast, peek through if it casts the original VT.
7879  // If operands are a constant, just bitcast back to original VT.
7880  auto PeekThroughBitcast = [&](SDValue Op) {
7881  if (Op.getOpcode() == ISD::BITCAST &&
7882  Op.getOperand(0).getValueType() == VT)
7883  return SDValue(Op.getOperand(0));
7886  return DAG.getBitcast(VT, Op);
7887  return SDValue();
7888  };
7889 
7890  SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
7891  SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
7892  if (!(SV0 && SV1))
7893  return SDValue();
7894 
7895  int MaskScale =
7897  SmallVector<int, 8> NewMask;
7898  for (int M : SVN->getMask())
7899  for (int i = 0; i != MaskScale; ++i)
7900  NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
7901 
7902  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7903  if (!LegalMask) {
7904  std::swap(SV0, SV1);
7906  LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7907  }
7908 
7909  if (LegalMask)
7910  return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
7911  }
7912 
7913  return SDValue();
7914 }
7915 
7916 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
7917  EVT VT = N->getValueType(0);
7918  return CombineConsecutiveLoads(N, VT);
7919 }
7920 
7921 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
7922 /// operands. DstEltVT indicates the destination element value type.
7923 SDValue DAGCombiner::
7924 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
7925  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
7926 
7927  // If this is already the right type, we're done.
7928  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
7929 
7930  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
7931  unsigned DstBitSize = DstEltVT.getSizeInBits();
7932 
7933  // If this is a conversion of N elements of one type to N elements of another
7934  // type, convert each element. This handles FP<->INT cases.
7935  if (SrcBitSize == DstBitSize) {
7936  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7938 
7939  // Due to the FP element handling below calling this routine recursively,
7940  // we can end up with a scalar-to-vector node here.
7941  if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
7942  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
7943  DAG.getBitcast(DstEltVT, BV->getOperand(0)));
7944 
7946  for (SDValue Op : BV->op_values()) {
7947  // If the vector element type is not legal, the BUILD_VECTOR operands
7948  // are promoted and implicitly truncated. Make that explicit here.
7949  if (Op.getValueType() != SrcEltVT)
7950  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
7951  Ops.push_back(DAG.getBitcast(DstEltVT, Op));
7952  AddToWorklist(Ops.back().getNode());
7953  }
7954  return DAG.getBuildVector(VT, SDLoc(BV), Ops);
7955  }
7956 
7957  // Otherwise, we're growing or shrinking the elements. To avoid having to
7958  // handle annoying details of growing/shrinking FP values, we convert them to
7959  // int first.
7960  if (SrcEltVT.isFloatingPoint()) {
7961  // Convert the input float vector to a int vector where the elements are the
7962  // same sizes.
7963  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
7964  BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
7965  SrcEltVT = IntVT;
7966  }
7967 
7968  // Now we know the input is an integer vector. If the output is a FP type,
7969  // convert to integer first, then to FP of the right size.
7970  if (DstEltVT.isFloatingPoint()) {
7971  EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
7972  SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
7973 
7974  // Next, convert to FP elements of the same size.
7975  return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
7976  }
7977 
7978  SDLoc DL(BV);
7979 
7980  // Okay, we know the src/dst types are both integers of differing types.
7981  // Handling growing first.
7982  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
7983  if (SrcBitSize < DstBitSize) {
7984  unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
7985 
7987  for (unsigned i = 0, e = BV->getNumOperands(); i != e;
7988  i += NumInputsPerOutput) {
7989  bool isLE = DAG.getDataLayout().isLittleEndian();
7990  APInt NewBits = APInt(DstBitSize, 0);
7991  bool EltIsUndef = true;
7992  for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
7993  // Shift the previously computed bits over.
7994  NewBits <<= SrcBitSize;
7995  SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
7996  if (Op.isUndef()) continue;
7997  EltIsUndef = false;
7998 
7999  NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
8000  zextOrTrunc(SrcBitSize).zext(DstBitSize);
8001  }
8002 
8003  if (EltIsUndef)
8004  Ops.push_back(DAG.getUNDEF(DstEltVT));
8005  else
8006  Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
8007  }
8008 
8009  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
8010  return DAG.getBuildVector(VT, DL, Ops);
8011  }
8012 
8013  // Finally, this must be the case where we are shrinking elements: each input
8014  // turns into multiple outputs.
8015  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
8016  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
8017  NumOutputsPerInput*BV->getNumOperands());
8019 
8020  for (const SDValue &Op : BV->op_values()) {
8021  if (Op.isUndef()) {
8022  Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
8023  continue;
8024  }
8025 
8026  APInt OpVal = cast<ConstantSDNode>(Op)->
8027  getAPIntValue().zextOrTrunc(SrcBitSize);
8028 
8029  for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
8030  APInt ThisVal = OpVal.trunc(DstBitSize);
8031  Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
8032  OpVal = OpVal.lshr(DstBitSize);
8033  }
8034 
8035  // For big endian targets, swap the order of the pieces of each element.
8036  if (DAG.getDataLayout().isBigEndian())
8037  std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
8038  }
8039 
8040  return DAG.getBuildVector(VT, DL, Ops);
8041 }
8042 
8043 /// Try to perform FMA combining on a given FADD node.
8044 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
8045  SDValue N0 = N->getOperand(0);
8046  SDValue N1 = N->getOperand(1);
8047  EVT VT = N->getValueType(0);
8048  SDLoc SL(N);
8049 
8050  const TargetOptions &Options = DAG.getTarget().Options;
8051  bool AllowFusion =
8052  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8053 
8054  // Floating-point multiply-add with intermediate rounding.
8055  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8056 
8057  // Floating-point multiply-add without intermediate rounding.
8058  bool HasFMA =
8059  AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8060  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8061 
8062  // No valid opcode, do not combine.
8063  if (!HasFMAD && !HasFMA)
8064  return SDValue();
8065 
8067  ;
8068  if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8069  return SDValue();
8070 
8071  // Always prefer FMAD to FMA for precision.
8072  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8073  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8074  bool LookThroughFPExt = TLI.isFPExtFree(VT);
8075 
8076  // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
8077  // prefer to fold the multiply with fewer uses.
8078  if (Aggressive && N0.getOpcode() == ISD::FMUL &&
8079  N1.getOpcode() == ISD::FMUL) {
8080  if (N0.getNode()->use_size() > N1.getNode()->use_size())
8081  std::swap(N0, N1);
8082  }
8083 
8084  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
8085  if (N0.getOpcode() == ISD::FMUL &&
8086  (Aggressive || N0->hasOneUse())) {
8087  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8088  N0.getOperand(0), N0.getOperand(1), N1);
8089  }
8090 
8091  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
8092  // Note: Commutes FADD operands.
8093  if (N1.getOpcode() == ISD::FMUL &&
8094  (Aggressive || N1->hasOneUse())) {
8095  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8096  N1.getOperand(0), N1.getOperand(1), N0);
8097  }
8098 
8099  // Look through FP_EXTEND nodes to do more combining.
8100  if (AllowFusion && LookThroughFPExt) {
8101  // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
8102  if (N0.getOpcode() == ISD::FP_EXTEND) {
8103  SDValue N00 = N0.getOperand(0);
8104  if (N00.getOpcode() == ISD::FMUL)
8105  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8106  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8107  N00.getOperand(0)),
8108  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8109  N00.getOperand(1)), N1);
8110  }
8111 
8112  // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
8113  // Note: Commutes FADD operands.
8114  if (N1.getOpcode() == ISD::FP_EXTEND) {
8115  SDValue N10 = N1.getOperand(0);
8116  if (N10.getOpcode() == ISD::FMUL)
8117  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8118  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8119  N10.getOperand(0)),
8120  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8121  N10.getOperand(1)), N0);
8122  }
8123  }
8124 
8125  // More folding opportunities when target permits.
8126  if (Aggressive) {
8127  // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
8128  // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8129  // are currently only supported on binary nodes.
8130  if (Options.UnsafeFPMath &&
8131  N0.getOpcode() == PreferredFusedOpcode &&
8132  N0.getOperand(2).getOpcode() == ISD::FMUL &&
8133  N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
8134  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8135  N0.getOperand(0), N0.getOperand(1),
8136  DAG.getNode(PreferredFusedOpcode, SL, VT,
8137  N0.getOperand(2).getOperand(0),
8138  N0.getOperand(2).getOperand(1),
8139  N1));
8140  }
8141 
8142  // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
8143  // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8144  // are currently only supported on binary nodes.
8145  if (Options.UnsafeFPMath &&
8146  N1->getOpcode() == PreferredFusedOpcode &&
8147  N1.getOperand(2).getOpcode() == ISD::FMUL &&
8148  N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
8149  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8150  N1.getOperand(0), N1.getOperand(1),
8151  DAG.getNode(PreferredFusedOpcode, SL, VT,
8152  N1.getOperand(2).getOperand(0),
8153  N1.getOperand(2).getOperand(1),
8154  N0));
8155  }
8156 
8157  if (AllowFusion && LookThroughFPExt) {
8158  // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
8159  // -> (fma x, y, (fma (fpext u), (fpext v), z))
8160  auto FoldFAddFMAFPExtFMul = [&] (
8161  SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8162  return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
8163  DAG.getNode(PreferredFusedOpcode, SL, VT,
8164  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8165  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8166  Z));
8167  };
8168  if (N0.getOpcode() == PreferredFusedOpcode) {
8169  SDValue N02 = N0.getOperand(2);
8170  if (N02.getOpcode() == ISD::FP_EXTEND) {
8171  SDValue N020 = N02.getOperand(0);
8172  if (N020.getOpcode() == ISD::FMUL)
8173  return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
8174  N020.getOperand(0), N020.getOperand(1),
8175  N1);
8176  }
8177  }
8178 
8179  // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
8180  // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
8181  // FIXME: This turns two single-precision and one double-precision
8182  // operation into two double-precision operations, which might not be
8183  // interesting for all targets, especially GPUs.
8184  auto FoldFAddFPExtFMAFMul = [&] (
8185  SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
8186  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8187  DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
8188  DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
8189  DAG.getNode(PreferredFusedOpcode, SL, VT,
8190  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
8191  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
8192  Z));
8193  };
8194  if (N0.getOpcode() == ISD::FP_EXTEND) {
8195  SDValue N00 = N0.getOperand(0);
8196  if (N00.getOpcode() == PreferredFusedOpcode) {
8197  SDValue N002 = N00.getOperand(2);
8198  if (N002.getOpcode() == ISD::FMUL)
8199  return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
8200  N002.getOperand(0), N002.getOperand(1),
8201  N1);
8202  }
8203  }
8204 
8205  // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
8206  // -> (fma y, z, (fma (fpext u), (fpext v), x))
8207  if (N1.getOpcode() == PreferredFusedOpcode) {
8208  SDValue N12 = N1.getOperand(2);
8209  if (N12.getOpcode() == ISD::FP_EXTEND) {
8210  SDValue N120 = N12.getOperand(0);
8211  if (N120.getOpcode() == ISD::FMUL)
8212  return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
8213  N120.getOperand(0), N120.getOperand(1),
8214  N0);
8215  }
8216  }
8217 
8218  // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
8219  // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
8220  // FIXME: This turns two single-precision and one double-precision
8221  // operation into two double-precision operations, which might not be
8222  // interesting for all targets, especially GPUs.
8223  if (N1.getOpcode() == ISD::FP_EXTEND) {
8224  SDValue N10 = N1.getOperand(0);
8225  if (N10.getOpcode() == PreferredFusedOpcode) {
8226  SDValue N102 = N10.getOperand(2);
8227  if (N102.getOpcode() == ISD::FMUL)
8228  return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
8229  N102.getOperand(0), N102.getOperand(1),
8230  N0);
8231  }
8232  }
8233  }
8234  }
8235 
8236  return SDValue();
8237 }
8238 
8239 /// Try to perform FMA combining on a given FSUB node.
8240 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
8241  SDValue N0 = N->getOperand(0);
8242  SDValue N1 = N->getOperand(1);
8243  EVT VT = N->getValueType(0);
8244  SDLoc SL(N);
8245 
8246  const TargetOptions &Options = DAG.getTarget().Options;
8247  bool AllowFusion =
8248  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
8249 
8250  // Floating-point multiply-add with intermediate rounding.
8251  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8252 
8253  // Floating-point multiply-add without intermediate rounding.
8254  bool HasFMA =
8255  AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8256  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8257 
8258  // No valid opcode, do not combine.
8259  if (!HasFMAD && !HasFMA)
8260  return SDValue();
8261 
8263  if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
8264  return SDValue();
8265 
8266  // Always prefer FMAD to FMA for precision.
8267  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8268  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8269  bool LookThroughFPExt = TLI.isFPExtFree(VT);
8270 
8271  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
8272  if (N0.getOpcode() == ISD::FMUL &&
8273  (Aggressive || N0->hasOneUse())) {
8274  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8275  N0.getOperand(0), N0.getOperand(1),
8276  DAG.getNode(ISD::FNEG, SL, VT, N1));
8277  }
8278 
8279  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
8280  // Note: Commutes FSUB operands.
8281  if (N1.getOpcode() == ISD::FMUL &&
8282  (Aggressive || N1->hasOneUse()))
8283  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8284  DAG.getNode(ISD::FNEG, SL, VT,
8285  N1.getOperand(0)),
8286  N1.getOperand(1), N0);
8287 
8288  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
8289  if (N0.getOpcode() == ISD::FNEG &&
8290  N0.getOperand(0).getOpcode() == ISD::FMUL &&
8291  (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
8292  SDValue N00 = N0.getOperand(0).getOperand(0);
8293  SDValue N01 = N0.getOperand(0).getOperand(1);
8294  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8295  DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
8296  DAG.getNode(ISD::FNEG, SL, VT, N1));
8297  }
8298 
8299  // Look through FP_EXTEND nodes to do more combining.
8300  if (AllowFusion && LookThroughFPExt) {
8301  // fold (fsub (fpext (fmul x, y)), z)
8302  // -> (fma (fpext x), (fpext y), (fneg z))
8303  if (N0.getOpcode() == ISD::FP_EXTEND) {
8304  SDValue N00 = N0.getOperand(0);
8305  if (N00.getOpcode() == ISD::FMUL)
8306  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8307  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8308  N00.getOperand(0)),
8309  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8310  N00.getOperand(1)),
8311  DAG.getNode(ISD::FNEG, SL, VT, N1));
8312  }
8313 
8314  // fold (fsub x, (fpext (fmul y, z)))
8315  // -> (fma (fneg (fpext y)), (fpext z), x)
8316  // Note: Commutes FSUB operands.
8317  if (N1.getOpcode() == ISD::FP_EXTEND) {
8318  SDValue N10 = N1.getOperand(0);
8319  if (N10.getOpcode() == ISD::FMUL)
8320  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8321  DAG.getNode(ISD::FNEG, SL, VT,
8322  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8323  N10.getOperand(0))),
8324  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8325  N10.getOperand(1)),
8326  N0);
8327  }
8328 
8329  // fold (fsub (fpext (fneg (fmul, x, y))), z)
8330  // -> (fneg (fma (fpext x), (fpext y), z))
8331  // Note: This could be removed with appropriate canonicalization of the
8332  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8333  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8334  // from implementing the canonicalization in visitFSUB.
8335  if (N0.getOpcode() == ISD::FP_EXTEND) {
8336  SDValue N00 = N0.getOperand(0);
8337  if (N00.getOpcode() == ISD::FNEG) {
8338  SDValue N000 = N00.getOperand(0);
8339  if (N000.getOpcode() == ISD::FMUL) {
8340  return DAG.getNode(ISD::FNEG, SL, VT,
8341  DAG.getNode(PreferredFusedOpcode, SL, VT,
8342  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8343  N000.getOperand(0)),
8344  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8345  N000.getOperand(1)),
8346  N1));
8347  }
8348  }
8349  }
8350 
8351  // fold (fsub (fneg (fpext (fmul, x, y))), z)
8352  // -> (fneg (fma (fpext x)), (fpext y), z)
8353  // Note: This could be removed with appropriate canonicalization of the
8354  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
8355  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
8356  // from implementing the canonicalization in visitFSUB.
8357  if (N0.getOpcode() == ISD::FNEG) {
8358  SDValue N00 = N0.getOperand(0);
8359  if (N00.getOpcode() == ISD::FP_EXTEND) {
8360  SDValue N000 = N00.getOperand(0);
8361  if (N000.getOpcode() == ISD::FMUL) {
8362  return DAG.getNode(ISD::FNEG, SL, VT,
8363  DAG.getNode(PreferredFusedOpcode, SL, VT,
8364  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8365  N000.getOperand(0)),
8366  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8367  N000.getOperand(1)),
8368  N1));
8369  }
8370  }
8371  }
8372 
8373  }
8374 
8375  // More folding opportunities when target permits.
8376  if (Aggressive) {
8377  // fold (fsub (fma x, y, (fmul u, v)), z)
8378  // -> (fma x, y (fma u, v, (fneg z)))
8379  // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8380  // are currently only supported on binary nodes.
8381  if (Options.UnsafeFPMath &&
8382  N0.getOpcode() == PreferredFusedOpcode &&
8383  N0.getOperand(2).getOpcode() == ISD::FMUL &&
8384  N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
8385  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8386  N0.getOperand(0), N0.getOperand(1),
8387  DAG.getNode(PreferredFusedOpcode, SL, VT,
8388  N0.getOperand(2).getOperand(0),
8389  N0.getOperand(2).getOperand(1),
8390  DAG.getNode(ISD::FNEG, SL, VT,
8391  N1)));
8392  }
8393 
8394  // fold (fsub x, (fma y, z, (fmul u, v)))
8395  // -> (fma (fneg y), z, (fma (fneg u), v, x))
8396  // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF
8397  // are currently only supported on binary nodes.
8398  if (Options.UnsafeFPMath &&
8399  N1.getOpcode() == PreferredFusedOpcode &&
8400  N1.getOperand(2).getOpcode() == ISD::FMUL) {
8401  SDValue N20 = N1.getOperand(2).getOperand(0);
8402  SDValue N21 = N1.getOperand(2).getOperand(1);
8403  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8404  DAG.getNode(ISD::FNEG, SL, VT,
8405  N1.getOperand(0)),
8406  N1.getOperand(1),
8407  DAG.getNode(PreferredFusedOpcode, SL, VT,
8408  DAG.getNode(ISD::FNEG, SL, VT, N20),
8409 
8410  N21, N0));
8411  }
8412 
8413  if (AllowFusion && LookThroughFPExt) {
8414  // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
8415  // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
8416  if (N0.getOpcode() == PreferredFusedOpcode) {
8417  SDValue N02 = N0.getOperand(2);
8418  if (N02.getOpcode() == ISD::FP_EXTEND) {
8419  SDValue N020 = N02.getOperand(0);
8420  if (N020.getOpcode() == ISD::FMUL)
8421  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8422  N0.getOperand(0), N0.getOperand(1),
8423  DAG.getNode(PreferredFusedOpcode, SL, VT,
8424  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8425  N020.getOperand(0)),
8426  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8427  N020.getOperand(1)),
8428  DAG.getNode(ISD::FNEG, SL, VT,
8429  N1)));
8430  }
8431  }
8432 
8433  // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
8434  // -> (fma (fpext x), (fpext y),
8435  // (fma (fpext u), (fpext v), (fneg z)))
8436  // FIXME: This turns two single-precision and one double-precision
8437  // operation into two double-precision operations, which might not be
8438  // interesting for all targets, especially GPUs.
8439  if (N0.getOpcode() == ISD::FP_EXTEND) {
8440  SDValue N00 = N0.getOperand(0);
8441  if (N00.getOpcode() == PreferredFusedOpcode) {
8442  SDValue N002 = N00.getOperand(2);
8443  if (N002.getOpcode() == ISD::FMUL)
8444  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8445  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8446  N00.getOperand(0)),
8447  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8448  N00.getOperand(1)),
8449  DAG.getNode(PreferredFusedOpcode, SL, VT,
8450  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8451  N002.getOperand(0)),
8452  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8453  N002.getOperand(1)),
8454  DAG.getNode(ISD::FNEG, SL, VT,
8455  N1)));
8456  }
8457  }
8458 
8459  // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
8460  // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
8461  if (N1.getOpcode() == PreferredFusedOpcode &&
8462  N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
8463  SDValue N120 = N1.getOperand(2).getOperand(0);
8464  if (N120.getOpcode() == ISD::FMUL) {
8465  SDValue N1200 = N120.getOperand(0);
8466  SDValue N1201 = N120.getOperand(1);
8467  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8468  DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
8469  N1.getOperand(1),
8470  DAG.getNode(PreferredFusedOpcode, SL, VT,
8471  DAG.getNode(ISD::FNEG, SL, VT,
8472  DAG.getNode(ISD::FP_EXTEND, SL,
8473  VT, N1200)),
8474  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8475  N1201),
8476  N0));
8477  }
8478  }
8479 
8480  // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
8481  // -> (fma (fneg (fpext y)), (fpext z),
8482  // (fma (fneg (fpext u)), (fpext v), x))
8483  // FIXME: This turns two single-precision and one double-precision
8484  // operation into two double-precision operations, which might not be
8485  // interesting for all targets, especially GPUs.
8486  if (N1.getOpcode() == ISD::FP_EXTEND &&
8487  N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
8488  SDValue N100 = N1.getOperand(0).getOperand(0);
8489  SDValue N101 = N1.getOperand(0).getOperand(1);
8490  SDValue N102 = N1.getOperand(0).getOperand(2);
8491  if (N102.getOpcode() == ISD::FMUL) {
8492  SDValue N1020 = N102.getOperand(0);
8493  SDValue N1021 = N102.getOperand(1);
8494  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8495  DAG.getNode(ISD::FNEG, SL, VT,
8496  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8497  N100)),
8498  DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
8499  DAG.getNode(PreferredFusedOpcode, SL, VT,
8500  DAG.getNode(ISD::FNEG, SL, VT,
8501  DAG.getNode(ISD::FP_EXTEND, SL,
8502  VT, N1020)),
8503  DAG.getNode(ISD::FP_EXTEND, SL, VT,
8504  N1021),
8505  N0));
8506  }
8507  }
8508  }
8509  }
8510 
8511  return SDValue();
8512 }
8513 
8514 /// Try to perform FMA combining on a given FMUL node based on the distributive
8515 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
8516 /// subtraction instead of addition).
8517 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
8518  SDValue N0 = N->getOperand(0);
8519  SDValue N1 = N->getOperand(1);
8520  EVT VT = N->getValueType(0);
8521  SDLoc SL(N);
8522 
8523  assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
8524 
8525  const TargetOptions &Options = DAG.getTarget().Options;
8526 
8527  // The transforms below are incorrect when x == 0 and y == inf, because the
8528  // intermediate multiplication produces a nan.
8529  if (!Options.NoInfsFPMath)
8530  return SDValue();
8531 
8532  // Floating-point multiply-add without intermediate rounding.
8533  bool HasFMA =
8534  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
8535  TLI.isFMAFasterThanFMulAndFAdd(VT) &&
8536  (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
8537 
8538  // Floating-point multiply-add with intermediate rounding. This can result
8539  // in a less precise result due to the changed rounding order.
8540  bool HasFMAD = Options.UnsafeFPMath &&
8541  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
8542 
8543  // No valid opcode, do not combine.
8544  if (!HasFMAD && !HasFMA)
8545  return SDValue();
8546 
8547  // Always prefer FMAD to FMA for precision.
8548  unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
8549  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
8550 
8551  // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
8552  // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
8553  auto FuseFADD = [&](SDValue X, SDValue Y) {
8554  if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
8555  auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8556  if (XC1 && XC1->isExactlyValue(+1.0))
8557  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8558  if (XC1 && XC1->isExactlyValue(-1.0))
8559  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8560  DAG.getNode(ISD::FNEG, SL, VT, Y));
8561  }
8562  return SDValue();
8563  };
8564 
8565  if (SDValue FMA = FuseFADD(N0, N1))
8566  return FMA;
8567  if (SDValue FMA = FuseFADD(N1, N0))
8568  return FMA;
8569 
8570  // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
8571  // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
8572  // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
8573  // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
8574  auto FuseFSUB = [&](SDValue X, SDValue Y) {
8575  if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
8576  auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
8577  if (XC0 && XC0->isExactlyValue(+1.0))
8578  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8579  DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8580  Y);
8581  if (XC0 && XC0->isExactlyValue(-1.0))
8582  return DAG.getNode(PreferredFusedOpcode, SL, VT,
8583  DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
8584  DAG.getNode(ISD::FNEG, SL, VT, Y));
8585 
8586  auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
8587  if (XC1 && XC1->isExactlyValue(+1.0))
8588  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
8589  DAG.getNode(ISD::FNEG, SL, VT, Y));
8590  if (XC1 && XC1->isExactlyValue(-1.0))
8591  return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
8592  }
8593  return SDValue();
8594  };
8595 
8596  if (SDValue FMA = FuseFSUB(N0, N1))
8597  return FMA;
8598  if (SDValue FMA = FuseFSUB(N1, N0))
8599  return FMA;
8600 
8601  return SDValue();
8602 }
8603 
8604 SDValue DAGCombiner::visitFADD(SDNode *N) {
8605  SDValue N0 = N->getOperand(0);
8606  SDValue N1 = N->getOperand(1);
8607  bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
8608  bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
8609  EVT VT = N->getValueType(0);
8610  SDLoc DL(N);
8611  const TargetOptions &Options = DAG.getTarget().Options;
8612  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8613 
8614  // fold vector ops
8615  if (VT.isVector())
8616  if (SDValue FoldedVOp = SimplifyVBinOp(N))
8617  return FoldedVOp;
8618 
8619  // fold (fadd c1, c2) -> c1 + c2
8620  if (N0CFP && N1CFP)
8621  return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
8622 
8623  // canonicalize constant to RHS
8624  if (N0CFP && !N1CFP)
8625  return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
8626 
8627  // fold (fadd A, (fneg B)) -> (fsub A, B)
8628  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8629  isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
8630  return DAG.getNode(ISD::FSUB, DL, VT, N0,
8631  GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8632 
8633  // fold (fadd (fneg A), B) -> (fsub B, A)
8634  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
8635  isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
8636  return DAG.getNode(ISD::FSUB, DL, VT, N1,
8637  GetNegatedExpression(N0, DAG, LegalOperations), Flags);
8638 
8639  // FIXME: Auto-upgrade the target/function-level option.
8640  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
8641  // fold (fadd A, 0) -> A
8642  if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
8643  if (N1C->isZero())
8644  return N0;
8645  }
8646 
8647  // If 'unsafe math' is enabled, fold lots of things.
8648  if (Options.UnsafeFPMath) {
8649  // No FP constant should be created after legalization as Instruction
8650  // Selection pass has a hard time dealing with FP constants.
8651  bool AllowNewConst = (Level < AfterLegalizeDAG);
8652 
8653  // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
8654  if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
8656  return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
8657  DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
8658  Flags),
8659  Flags);
8660 
8661  // If allowed, fold (fadd (fneg x), x) -> 0.0
8662  if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
8663  return DAG.getConstantFP(0.0, DL, VT);
8664 
8665  // If allowed, fold (fadd x, (fneg x)) -> 0.0
8666  if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
8667  return DAG.getConstantFP(0.0, DL, VT);
8668 
8669  // We can fold chains of FADD's of the same value into multiplications.
8670  // This transform is not safe in general because we are reducing the number
8671  // of rounding steps.
8672  if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
8673  if (N0.getOpcode() == ISD::FMUL) {
8674  bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8675  bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
8676 
8677  // (fadd (fmul x, c), x) -> (fmul x, c+1)
8678  if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
8679  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8680  DAG.getConstantFP(1.0, DL, VT), Flags);
8681  return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
8682  }
8683 
8684  // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
8685  if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
8686  N1.getOperand(0) == N1.getOperand(1) &&
8687  N0.getOperand(0) == N1.getOperand(0)) {
8688  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
8689  DAG.getConstantFP(2.0, DL, VT), Flags);
8690  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
8691  }
8692  }
8693 
8694  if (N1.getOpcode() == ISD::FMUL) {
8695  bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8696  bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
8697 
8698  // (fadd x, (fmul x, c)) -> (fmul x, c+1)
8699  if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
8700  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8701  DAG.getConstantFP(1.0, DL, VT), Flags);
8702  return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
8703  }
8704 
8705  // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
8706  if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
8707  N0.getOperand(0) == N0.getOperand(1) &&
8708  N1.getOperand(0) == N0.getOperand(0)) {
8709  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
8710  DAG.getConstantFP(2.0, DL, VT), Flags);
8711  return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
8712  }
8713  }
8714 
8715  if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
8716  bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
8717  // (fadd (fadd x, x), x) -> (fmul x, 3.0)
8718  if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
8719  (N0.getOperand(0) == N1)) {
8720  return DAG.getNode(ISD::FMUL, DL, VT,
8721  N1, DAG.getConstantFP(3.0, DL, VT), Flags);
8722  }
8723  }
8724 
8725  if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
8726  bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
8727  // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
8728  if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
8729  N1.getOperand(0) == N0) {
8730  return DAG.getNode(ISD::FMUL, DL, VT,
8731  N0, DAG.getConstantFP(3.0, DL, VT), Flags);
8732  }
8733  }
8734 
8735  // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
8736  if (AllowNewConst &&
8737  N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
8738  N0.getOperand(0) == N0.getOperand(1) &&
8739  N1.getOperand(0) == N1.getOperand(1) &&
8740  N0.getOperand(0) == N1.getOperand(0)) {
8741  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
8742  DAG.getConstantFP(4.0, DL, VT), Flags);
8743  }
8744  }
8745  } // enable-unsafe-fp-math
8746 
8747  // FADD -> FMA combines:
8748  if (SDValue Fused = visitFADDForFMACombine(N)) {
8749  AddToWorklist(Fused.getNode());
8750  return Fused;
8751  }
8752  return SDValue();
8753 }
8754 
8755 SDValue DAGCombiner::visitFSUB(SDNode *N) {
8756  SDValue N0 = N->getOperand(0);
8757  SDValue N1 = N->getOperand(1);
8760  EVT VT = N->getValueType(0);
8761  SDLoc DL(N);
8762  const TargetOptions &Options = DAG.getTarget().Options;
8763  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8764 
8765  // fold vector ops
8766  if (VT.isVector())
8767  if (SDValue FoldedVOp = SimplifyVBinOp(N))
8768  return FoldedVOp;
8769 
8770  // fold (fsub c1, c2) -> c1-c2
8771  if (N0CFP && N1CFP)
8772  return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
8773 
8774  // fold (fsub A, (fneg B)) -> (fadd A, B)
8775  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8776  return DAG.getNode(ISD::FADD, DL, VT, N0,
8777  GetNegatedExpression(N1, DAG, LegalOperations), Flags);
8778 
8779  // FIXME: Auto-upgrade the target/function-level option.
8780  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
8781  // (fsub 0, B) -> -B
8782  if (N0CFP && N0CFP->isZero()) {
8783  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8784  return GetNegatedExpression(N1, DAG, LegalOperations);
8785  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8786  return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
8787  }
8788  }
8789 
8790  // If 'unsafe math' is enabled, fold lots of things.
8791  if (Options.UnsafeFPMath) {
8792  // (fsub A, 0) -> A
8793  if (N1CFP && N1CFP->isZero())
8794  return N0;
8795 
8796  // (fsub x, x) -> 0.0
8797  if (N0 == N1)
8798  return DAG.getConstantFP(0.0f, DL, VT);
8799 
8800  // (fsub x, (fadd x, y)) -> (fneg y)
8801  // (fsub x, (fadd y, x)) -> (fneg y)
8802  if (N1.getOpcode() == ISD::FADD) {
8803  SDValue N10 = N1->getOperand(0);
8804  SDValue N11 = N1->getOperand(1);
8805 
8806  if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
8807  return GetNegatedExpression(N11, DAG, LegalOperations);
8808 
8809  if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
8810  return GetNegatedExpression(N10, DAG, LegalOperations);
8811  }
8812  }
8813 
8814  // FSUB -> FMA combines:
8815  if (SDValue Fused = visitFSUBForFMACombine(N)) {
8816  AddToWorklist(Fused.getNode());
8817  return Fused;
8818  }
8819 
8820  return SDValue();
8821 }
8822 
8823 SDValue DAGCombiner::visitFMUL(SDNode *N) {
8824  SDValue N0 = N->getOperand(0);
8825  SDValue N1 = N->getOperand(1);
8828  EVT VT = N->getValueType(0);
8829  SDLoc DL(N);
8830  const TargetOptions &Options = DAG.getTarget().Options;
8831  const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
8832 
8833  // fold vector ops
8834  if (VT.isVector()) {
8835  // This just handles C1 * C2 for vectors. Other vector folds are below.
8836  if (SDValue FoldedVOp = SimplifyVBinOp(N))
8837  return FoldedVOp;
8838  }
8839 
8840  // fold (fmul c1, c2) -> c1*c2
8841  if (N0CFP && N1CFP)
8842  return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
8843 
8844  // canonicalize constant to RHS
8847  return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
8848 
8849  // fold (fmul A, 1.0) -> A
8850  if (N1CFP && N1CFP->isExactlyValue(1.0))
8851  return N0;
8852 
8853  if (Options.UnsafeFPMath) {
8854  // fold (fmul A, 0) -> 0
8855  if (N1CFP && N1CFP->isZero())
8856  return N1;
8857 
8858  // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
8859  if (N0.getOpcode() == ISD::FMUL) {
8860  // Fold scalars or any vector constants (not just splats).
8861  // This fold is done in general by InstCombine, but extra fmul insts
8862  // may have been generated during lowering.
8863  SDValue N00 = N0.getOperand(0);
8864  SDValue N01 = N0.getOperand(1);
8865  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8866  auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
8867  auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
8868 
8869  // Check 1: Make sure that the first operand of the inner multiply is NOT
8870  // a constant. Otherwise, we may induce infinite looping.
8871  if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
8872  // Check 2: Make sure that the second operand of the inner multiply and
8873  // the second operand of the outer multiply are constants.
8874  if ((N1CFP && isConstOrConstSplatFP(N01)) ||
8875  (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
8876  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
8877  return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
8878  }
8879  }
8880  }
8881 
8882  // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
8883  // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
8884  // during an early run of DAGCombiner can prevent folding with fmuls
8885  // inserted during lowering.
8886  if (N0.getOpcode() == ISD::FADD &&
8887  (N0.getOperand(0) == N0.getOperand(1)) &&
8888  N0.hasOneUse()) {
8889  const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
8890  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
8891  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
8892  }
8893  }
8894 
8895  // fold (fmul X, 2.0) -> (fadd X, X)
8896  if (N1CFP && N1CFP->isExactlyValue(+2.0))
8897  return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
8898 
8899  // fold (fmul X, -1.0) -> (fneg X)
8900  if (N1CFP && N1CFP->isExactlyValue(-1.0))
8901  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8902  return DAG.getNode(ISD::FNEG, DL, VT, N0);
8903 
8904  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
8905  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8906  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8907  // Both can be negated for free, check to see if at least one is cheaper
8908  // negated.
8909  if (LHSNeg == 2 || RHSNeg == 2)
8910  return DAG.getNode(ISD::FMUL, DL, VT,
8911  GetNegatedExpression(N0, DAG, LegalOperations),
8912  GetNegatedExpression(N1, DAG, LegalOperations),
8913  Flags);
8914  }
8915  }
8916 
8917  // FMUL -> FMA combines:
8918  if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
8919  AddToWorklist(Fused.getNode());
8920  return Fused;
8921  }
8922 
8923  return SDValue();
8924 }
8925 
8926 SDValue DAGCombiner::visitFMA(SDNode *N) {
8927  SDValue N0 = N->getOperand(0);
8928  SDValue N1 = N->getOperand(1);
8929  SDValue N2 = N->getOperand(2);
8932  EVT VT = N->getValueType(0);
8933  SDLoc DL(N);
8934  const TargetOptions &Options = DAG.getTarget().Options;
8935 
8936  // Constant fold FMA.
8937  if (isa<ConstantFPSDNode>(N0) &&
8938  isa<ConstantFPSDNode>(N1) &&
8939  isa<ConstantFPSDNode>(N2)) {
8940  return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
8941  }
8942 
8943  if (Options.UnsafeFPMath) {
8944  if (N0CFP && N0CFP->isZero())
8945  return N2;
8946  if (N1CFP && N1CFP->isZero())
8947  return N2;
8948  }
8949  // TODO: The FMA node should have flags that propagate to these nodes.
8950  if (N0CFP && N0CFP->isExactlyValue(1.0))
8951  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
8952  if (N1CFP && N1CFP->isExactlyValue(1.0))
8953  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
8954 
8955  // Canonicalize (fma c, x, y) -> (fma x, c, y)
8958  return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
8959 
8960  // TODO: FMA nodes should have flags that propagate to the created nodes.
8961  // For now, create a Flags object for use with all unsafe math transforms.
8963  Flags.setUnsafeAlgebra(true);
8964 
8965  if (Options.UnsafeFPMath) {
8966  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
8967  if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
8970  return DAG.getNode(ISD::FMUL, DL, VT, N0,
8971  DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
8972  &Flags), &Flags);
8973  }
8974 
8975  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
8976  if (N0.getOpcode() == ISD::FMUL &&
8978  isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
8979  return DAG.getNode(ISD::FMA, DL, VT,
8980  N0.getOperand(0),
8981  DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
8982  &Flags),
8983  N2);
8984  }
8985  }
8986 
8987  // (fma x, 1, y) -> (fadd x, y)
8988  // (fma x, -1, y) -> (fadd (fneg x), y)
8989  if (N1CFP) {
8990  if (N1CFP->isExactlyValue(1.0))
8991  // TODO: The FMA node should have flags that propagate to this node.
8992  return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
8993 
8994  if (N1CFP->isExactlyValue(-1.0) &&
8995  (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
8996  SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
8997  AddToWorklist(RHSNeg.getNode());
8998  // TODO: The FMA node should have flags that propagate to this node.
8999  return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
9000  }
9001  }
9002 
9003  if (Options.UnsafeFPMath) {
9004  // (fma x, c, x) -> (fmul x, (c+1))
9005  if (N1CFP && N0 == N2) {
9006  return DAG.getNode(ISD::FMUL, DL, VT, N0,
9007  DAG.getNode(ISD::FADD, DL, VT, N1,
9008  DAG.getConstantFP(1.0, DL, VT), &Flags),
9009  &Flags);
9010  }
9011 
9012  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
9013  if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
9014  return DAG.getNode(ISD::FMUL, DL, VT, N0,
9015  DAG.getNode(ISD::FADD, DL, VT, N1,
9016  DAG.getConstantFP(-1.0, DL, VT), &Flags),
9017  &Flags);
9018  }
9019  }
9020 
9021  return SDValue();
9022 }
9023 
9024 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
9025 // reciprocal.
9026 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
9027 // Notice that this is not always beneficial. One reason is different targets
9028 // may have different costs for FDIV and FMUL, so sometimes the cost of two
9029 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
9030 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
9031 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
9032  bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
9033  const SDNodeFlags *Flags = N->getFlags();
9034  if (!UnsafeMath && !Flags->hasAllowReciprocal())
9035  return SDValue();
9036 
9037  // Skip if current node is a reciprocal.
9038  SDValue N0 = N->getOperand(0);
9040  if (N0CFP && N0CFP->isExactlyValue(1.0))
9041  return SDValue();
9042 
9043  // Exit early if the target does not want this transform or if there can't
9044  // possibly be enough uses of the divisor to make the transform worthwhile.
9045  SDValue N1 = N->getOperand(1);
9046  unsigned MinUses = TLI.combineRepeatedFPDivisors();
9047  if (!MinUses || N1->use_size() < MinUses)
9048  return SDValue();
9049 
9050  // Find all FDIV users of the same divisor.
9051  // Use a set because duplicates may be present in the user list.
9053  for (auto *U : N1->uses()) {
9054  if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
9055  // This division is eligible for optimization only if global unsafe math
9056  // is enabled or if this division allows reciprocal formation.
9057  if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
9058  Users.insert(U);
9059  }
9060  }
9061 
9062  // Now that we have the actual number of divisor uses, make sure it meets
9063  // the minimum threshold specified by the target.
9064  if (Users.size() < MinUses)
9065  return SDValue();
9066 
9067  EVT VT = N->getValueType(0);
9068  SDLoc DL(N);
9069  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
9070  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
9071 
9072  // Dividend / Divisor -> Dividend * Reciprocal
9073  for (auto *U : Users) {
9074  SDValue Dividend = U->getOperand(0);
9075  if (Dividend != FPOne) {
9076  SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
9077  Reciprocal, Flags);
9078  CombineTo(U, NewNode);
9079  } else if (U != Reciprocal.getNode()) {
9080  // In the absence of fast-math-flags, this user node is always the
9081  // same node as Reciprocal, but with FMF they may be different nodes.
9082  CombineTo(U, Reciprocal);
9083  }
9084  }
9085  return SDValue(N, 0); // N was replaced.
9086 }
9087 
9088 SDValue DAGCombiner::visitFDIV(SDNode *N) {
9089  SDValue N0 = N->getOperand(0);
9090  SDValue N1 = N->getOperand(1);
9093  EVT VT = N->getValueType(0);
9094  SDLoc DL(N);
9095  const TargetOptions &Options = DAG.getTarget().Options;
9096  SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
9097 
9098  // fold vector ops
9099  if (VT.isVector())
9100  if (SDValue FoldedVOp = SimplifyVBinOp(N))
9101  return FoldedVOp;
9102 
9103  // fold (fdiv c1, c2) -> c1/c2
9104  if (N0CFP && N1CFP)
9105  return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
9106 
9107  if (Options.UnsafeFPMath) {
9108  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
9109  if (N1CFP) {
9110  // Compute the reciprocal 1.0 / c2.
9111  const APFloat &N1APF = N1CFP->getValueAPF();
9112  APFloat Recip(N1APF.getSemantics(), 1); // 1.0
9113  APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
9114  // Only do the transform if the reciprocal is a legal fp immediate that
9115  // isn't too nasty (eg NaN, denormal, ...).
9116  if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
9117  (!LegalOperations ||
9118  // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
9119  // backend)... we should handle this gracefully after Legalize.
9120  // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
9121  TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
9122  TLI.isFPImmLegal(Recip, VT)))
9123  return DAG.getNode(ISD::FMUL, DL, VT, N0,
9124  DAG.getConstantFP(Recip, DL, VT), Flags);
9125  }
9126 
9127  // If this FDIV is part of a reciprocal square root, it may be folded
9128  // into a target-specific square root estimate instruction.
9129  if (N1.getOpcode() == ISD::FSQRT) {
9130  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
9131  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9132  }
9133  } else if (N1.getOpcode() == ISD::FP_EXTEND &&
9134  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9135  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9136  Flags)) {
9137  RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
9138  AddToWorklist(RV.getNode());
9139  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9140  }
9141  } else if (N1.getOpcode() == ISD::FP_ROUND &&
9142  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9143  if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
9144  Flags)) {
9145  RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
9146  AddToWorklist(RV.getNode());
9147  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9148  }
9149  } else if (N1.getOpcode() == ISD::FMUL) {
9150  // Look through an FMUL. Even though this won't remove the FDIV directly,
9151  // it's still worthwhile to get rid of the FSQRT if possible.
9152  SDValue SqrtOp;
9153  SDValue OtherOp;
9154  if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
9155  SqrtOp = N1.getOperand(0);
9156  OtherOp = N1.getOperand(1);
9157  } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
9158  SqrtOp = N1.getOperand(1);
9159  OtherOp = N1.getOperand(0);
9160  }
9161  if (SqrtOp.getNode()) {
9162  // We found a FSQRT, so try to make this fold:
9163  // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
9164  if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
9165  RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
9166  AddToWorklist(RV.getNode());
9167  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9168  }
9169  }
9170  }
9171 
9172  // Fold into a reciprocal estimate and multiply instead of a real divide.
9173  if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
9174  AddToWorklist(RV.getNode());
9175  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
9176  }
9177  }
9178 
9179  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
9180  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
9181  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
9182  // Both can be negated for free, check to see if at least one is cheaper
9183  // negated.
9184  if (LHSNeg == 2 || RHSNeg == 2)
9185  return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
9186  GetNegatedExpression(N0, DAG, LegalOperations),
9187  GetNegatedExpression(N1, DAG, LegalOperations),
9188  Flags);
9189  }
9190  }
9191 
9192  if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
9193  return CombineRepeatedDivisors;
9194 
9195  return SDValue();
9196 }
9197 
9198 SDValue DAGCombiner::visitFREM(SDNode *N) {
9199  SDValue N0 = N->getOperand(0);
9200  SDValue N1 = N->getOperand(1);
9203  EVT VT = N->getValueType(0);
9204 
9205  // fold (frem c1, c2) -> fmod(c1,c2)
9206  if (N0CFP && N1CFP)
9207  return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
9208  &cast<BinaryWithFlagsSDNode>(N)->Flags);
9209 
9210  return SDValue();
9211 }
9212 
9213 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
9214  if (!DAG.getTarget().Options.UnsafeFPMath)
9215  return SDValue();
9216 
9217  SDValue N0 = N->getOperand(0);
9218  if (TLI.isFsqrtCheap(N0, DAG))
9219  return SDValue();
9220 
9221  // TODO: FSQRT nodes should have flags that propagate to the created nodes.
9222  // For now, create a Flags object for use with all unsafe math transforms.
9224  Flags.setUnsafeAlgebra(true);
9225  return buildSqrtEstimate(N0, &Flags);
9226 }
9227 
9228 /// copysign(x, fp_extend(y)) -> copysign(x, y)
9229 /// copysign(x, fp_round(y)) -> copysign(x, y)
9231  SDValue N1 = N->getOperand(1);
9232  if ((N1.getOpcode() == ISD::FP_EXTEND ||
9233  N1.getOpcode() == ISD::FP_ROUND)) {
9234  // Do not optimize out type conversion of f128 type yet.
9235  // For some targets like x86_64, configuration is changed to keep one f128
9236  // value in one SSE register, but instruction selection cannot handle
9237  // FCOPYSIGN on SSE registers yet.
9238  EVT N1VT = N1->getValueType(0);
9239  EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
9240  return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
9241  }
9242  return false;
9243 }
9244 
9245 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
9246  SDValue N0 = N->getOperand(0);
9247  SDValue N1 = N->getOperand(1);
9250  EVT VT = N->getValueType(0);
9251 
9252  if (N0CFP && N1CFP) // Constant fold
9253  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
9254 
9255  if (N1CFP) {
9256  const APFloat &V = N1CFP->getValueAPF();
9257  // copysign(x, c1) -> fabs(x) iff ispos(c1)
9258  // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
9259  if (!V.isNegative()) {
9260  if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
9261  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9262  } else {
9263  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
9264  return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9265  DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
9266  }
9267  }
9268 
9269  // copysign(fabs(x), y) -> copysign(x, y)
9270  // copysign(fneg(x), y) -> copysign(x, y)
9271  // copysign(copysign(x,z), y) -> copysign(x, y)
9272  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
9273  N0.getOpcode() == ISD::FCOPYSIGN)
9274  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
9275 
9276  // copysign(x, abs(y)) -> abs(x)
9277  if (N1.getOpcode() == ISD::FABS)
9278  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9279 
9280  // copysign(x, copysign(y,z)) -> copysign(x, z)
9281  if (N1.getOpcode() == ISD::FCOPYSIGN)
9282  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
9283 
9284  // copysign(x, fp_extend(y)) -> copysign(x, y)
9285  // copysign(x, fp_round(y)) -> copysign(x, y)
9287  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
9288 
9289  return SDValue();
9290 }
9291 
9292 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
9293  SDValue N0 = N->getOperand(0);
9294  EVT VT = N->getValueType(0);
9295  EVT OpVT = N0.getValueType();
9296 
9297  // fold (sint_to_fp c1) -> c1fp
9299  // ...but only if the target supports immediate floating-point values
9300  (!LegalOperations ||
9301  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9302  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9303 
9304  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
9305  // but UINT_TO_FP is legal on this target, try to convert.
9306  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
9307  TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
9308  // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
9309  if (DAG.SignBitIsZero(N0))
9310  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9311  }
9312 
9313  // The next optimizations are desirable only if SELECT_CC can be lowered.
9314  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9315  // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9316  if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
9317  !VT.isVector() &&
9318  (!LegalOperations ||
9319  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9320  SDLoc DL(N);
9321  SDValue Ops[] =
9322  { N0.getOperand(0), N0.getOperand(1),
9323  DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9324  N0.getOperand(2) };
9325  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9326  }
9327 
9328  // fold (sint_to_fp (zext (setcc x, y, cc))) ->
9329  // (select_cc x, y, 1.0, 0.0,, cc)
9330  if (N0.getOpcode() == ISD::ZERO_EXTEND &&
9331  N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
9332  (!LegalOperations ||
9333  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9334  SDLoc DL(N);
9335  SDValue Ops[] =
9336  { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
9337  DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9338  N0.getOperand(0).getOperand(2) };
9339  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9340  }
9341  }
9342 
9343  return SDValue();
9344 }
9345 
9346 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
9347  SDValue N0 = N->getOperand(0);
9348  EVT VT = N->getValueType(0);
9349  EVT OpVT = N0.getValueType();
9350 
9351  // fold (uint_to_fp c1) -> c1fp
9353  // ...but only if the target supports immediate floating-point values
9354  (!LegalOperations ||
9355  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
9356  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
9357 
9358  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
9359  // but SINT_TO_FP is legal on this target, try to convert.
9360  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
9361  TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
9362  // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
9363  if (DAG.SignBitIsZero(N0))
9364  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
9365  }
9366 
9367  // The next optimizations are desirable only if SELECT_CC can be lowered.
9368  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
9369  // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
9370 
9371  if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
9372  (!LegalOperations ||
9373  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
9374  SDLoc DL(N);
9375  SDValue Ops[] =
9376  { N0.getOperand(0), N0.getOperand(1),
9377  DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
9378  N0.getOperand(2) };
9379  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
9380  }
9381  }
9382 
9383  return SDValue();
9384 }
9385 
9386 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
9388  SDValue N0 = N->getOperand(0);
9389  EVT VT = N->getValueType(0);
9390 
9391  if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
9392  return SDValue();
9393 
9394  SDValue Src = N0.getOperand(0);
9395  EVT SrcVT = Src.getValueType();
9396  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
9397  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
9398 
9399  // We can safely assume the conversion won't overflow the output range,
9400  // because (for example) (uint8_t)18293.f is undefined behavior.
9401 
9402  // Since we can assume the conversion won't overflow, our decision as to
9403  // whether the input will fit in the float should depend on the minimum
9404  // of the input range and output range.
9405 
9406  // This means this is also safe for a signed input and unsigned output, since
9407  // a negative input would lead to undefined behavior.
9408  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
9409  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
9410  unsigned ActualSize = std::min(InputSize, OutputSize);
9411  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
9412 
9413  // We can only fold away the float conversion if the input range can be
9414  // represented exactly in the float range.
9415  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
9416  if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
9417  unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
9418  : ISD::ZERO_EXTEND;
9419  return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
9420  }
9421  if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
9422  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
9423  return DAG.getBitcast(VT, Src);
9424  }
9425  return SDValue();
9426 }
9427 
9428 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
9429  SDValue N0 = N->getOperand(0);
9430  EVT VT = N->getValueType(0);
9431 
9432  // fold (fp_to_sint c1fp) -> c1
9434  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
9435 
9436  return FoldIntToFPToInt(N, DAG);
9437 }
9438 
9439 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
9440  SDValue N0 = N->getOperand(0);
9441  EVT VT = N->getValueType(0);
9442 
9443  // fold (fp_to_uint c1fp) -> c1
9445  return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
9446 
9447  return FoldIntToFPToInt(N, DAG);
9448 }
9449 
9450 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
9451  SDValue N0 = N->getOperand(0);
9452  SDValue N1 = N->getOperand(1);
9454  EVT VT = N->getValueType(0);
9455 
9456  // fold (fp_round c1fp) -> c1fp
9457  if (N0CFP)
9458  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
9459 
9460  // fold (fp_round (fp_extend x)) -> x
9461  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
9462  return N0.getOperand(0);
9463 
9464  // fold (fp_round (fp_round x)) -> (fp_round x)
9465  if (N0.getOpcode() == ISD::FP_ROUND) {
9466  const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
9467  const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
9468 
9469  // Skip this folding if it results in an fp_round from f80 to f16.
9470  //
9471  // f80 to f16 always generates an expensive (and as yet, unimplemented)
9472  // libcall to __truncxfhf2 instead of selecting native f16 conversion
9473  // instructions from f32 or f64. Moreover, the first (value-preserving)
9474  // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
9475  // x86.
9476  if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
9477  return SDValue();
9478 
9479  // If the first fp_round isn't a value preserving truncation, it might
9480  // introduce a tie in the second fp_round, that wouldn't occur in the
9481  // single-step fp_round we want to fold to.
9482  // In other words, double rounding isn't the same as rounding.
9483  // Also, this is a value preserving truncation iff both fp_round's are.
9484  if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
9485  SDLoc DL(N);
9486  return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
9487  DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
9488  }
9489  }
9490 
9491  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
9492  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
9493  SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
9494  N0.getOperand(0), N1);
9495  AddToWorklist(Tmp.getNode());
9496  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
9497  Tmp, N0.getOperand(1));
9498  }
9499 
9500  return SDValue();
9501 }
9502 
9503 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
9504  SDValue N0 = N->getOperand(0);
9505  EVT VT = N->getValueType(0);
9506  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9508 
9509  // fold (fp_round_inreg c1fp) -> c1fp
9510  if (N0CFP && isTypeLegal(EVT)) {
9511  SDLoc DL(N);
9512  SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
9513  return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
9514  }
9515 
9516  return SDValue();
9517 }
9518 
9519 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
9520  SDValue N0 = N->getOperand(0);
9521  EVT VT = N->getValueType(0);
9522 
9523  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
9524  if (N->hasOneUse() &&
9525  N->use_begin()->getOpcode() == ISD::FP_ROUND)
9526  return SDValue();
9527 
9528  // fold (fp_extend c1fp) -> c1fp
9530  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
9531 
9532  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
9533  if (N0.getOpcode() == ISD::FP16_TO_FP &&
9534  TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
9535  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
9536 
9537  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
9538  // value of X.
9539  if (N0.getOpcode() == ISD::FP_ROUND
9540  && N0.getConstantOperandVal(1) == 1) {
9541  SDValue In = N0.getOperand(0);
9542  if (In.getValueType() == VT) return In;
9543  if (VT.bitsLT(In.getValueType()))
9544  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
9545  In, N0.getOperand(1));
9546  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
9547  }
9548 
9549  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
9550  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
9551  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9552  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9553  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9554  LN0->getChain(),
9555  LN0->getBasePtr(), N0.getValueType(),
9556  LN0->getMemOperand());
9557  CombineTo(N, ExtLoad);
9558  CombineTo(N0.getNode(),
9559  DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
9560  N0.getValueType(), ExtLoad,
9561  DAG.getIntPtrConstant(1, SDLoc(N0))),
9562  ExtLoad.getValue(1));
9563  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9564  }
9565 
9566  return SDValue();
9567 }
9568 
9569 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
9570  SDValue N0 = N->getOperand(0);
9571  EVT VT = N->getValueType(0);
9572 
9573  // fold (fceil c1) -> fceil(c1)
9575  return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
9576 
9577  return SDValue();
9578 }
9579 
9580 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
9581  SDValue N0 = N->getOperand(0);
9582  EVT VT = N->getValueType(0);
9583 
9584  // fold (ftrunc c1) -> ftrunc(c1)
9586  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
9587 
9588  return SDValue();
9589 }
9590 
9591 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
9592  SDValue N0 = N->getOperand(0);
9593  EVT VT = N->getValueType(0);
9594 
9595  // fold (ffloor c1) -> ffloor(c1)
9597  return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
9598 
9599  return SDValue();
9600 }
9601 
9602 // FIXME: FNEG and FABS have a lot in common; refactor.
9603 SDValue DAGCombiner::visitFNEG(SDNode *N) {
9604  SDValue N0 = N->getOperand(0);
9605  EVT VT = N->getValueType(0);
9606 
9607  // Constant fold FNEG.
9609  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
9610 
9611  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
9612  &DAG.getTarget().Options))
9613  return GetNegatedExpression(N0, DAG, LegalOperations);
9614 
9615  // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
9616  // constant pool values.
9617  if (!TLI.isFNegFree(VT) &&
9618  N0.getOpcode() == ISD::BITCAST &&
9619  N0.getNode()->hasOneUse()) {
9620  SDValue Int = N0.getOperand(0);
9621  EVT IntVT = Int.getValueType();
9622  if (IntVT.isInteger() && !IntVT.isVector()) {
9623  APInt SignMask;
9624  if (N0.getValueType().isVector()) {
9625  // For a vector, get a mask such as 0x80... per scalar element
9626  // and splat it.
9628  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9629  } else {
9630  // For a scalar, just generate 0x80...
9631  SignMask = APInt::getSignBit(IntVT.getSizeInBits());
9632  }
9633  SDLoc DL0(N0);
9634  Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
9635  DAG.getConstant(SignMask, DL0, IntVT));
9636  AddToWorklist(Int.getNode());
9637  return DAG.getBitcast(VT, Int);
9638  }
9639  }
9640 
9641  // (fneg (fmul c, x)) -> (fmul -c, x)
9642  if (N0.getOpcode() == ISD::FMUL &&
9643  (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
9645  if (CFP1) {
9646  APFloat CVal = CFP1->getValueAPF();
9647  CVal.changeSign();
9648  if (Level >= AfterLegalizeDAG &&
9649  (TLI.isFPImmLegal(CVal, VT) ||
9650  TLI.isOperationLegal(ISD::ConstantFP, VT)))
9651  return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
9652  DAG.getNode(ISD::FNEG, SDLoc(N), VT,
9653  N0.getOperand(1)),
9654  &cast<BinaryWithFlagsSDNode>(N0)->Flags);
9655  }
9656  }
9657 
9658  return SDValue();
9659 }
9660 
9661 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
9662  SDValue N0 = N->getOperand(0);
9663  SDValue N1 = N->getOperand(1);
9664  EVT VT = N->getValueType(0);
9665  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9666  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9667 
9668  if (N0CFP && N1CFP) {
9669  const APFloat &C0 = N0CFP->getValueAPF();
9670  const APFloat &C1 = N1CFP->getValueAPF();
9671  return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
9672  }
9673 
9674  // Canonicalize to constant on RHS.
9677  return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
9678 
9679  return SDValue();
9680 }
9681 
9682 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
9683  SDValue N0 = N->getOperand(0);
9684  SDValue N1 = N->getOperand(1);
9685  EVT VT = N->getValueType(0);
9686  const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
9687  const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
9688 
9689  if (N0CFP && N1CFP) {
9690  const APFloat &C0 = N0CFP->getValueAPF();
9691  const APFloat &C1 = N1CFP->getValueAPF();
9692  return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
9693  }
9694 
9695  // Canonicalize to constant on RHS.
9698  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
9699 
9700  return SDValue();
9701 }
9702 
9703 SDValue DAGCombiner::visitFABS(SDNode *N) {
9704  SDValue N0 = N->getOperand(0);
9705  EVT VT = N->getValueType(0);
9706 
9707  // fold (fabs c1) -> fabs(c1)
9709  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
9710 
9711  // fold (fabs (fabs x)) -> (fabs x)
9712  if (N0.getOpcode() == ISD::FABS)
9713  return N->getOperand(0);
9714 
9715  // fold (fabs (fneg x)) -> (fabs x)
9716  // fold (fabs (fcopysign x, y)) -> (fabs x)
9717  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
9718  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
9719 
9720  // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
9721  // constant pool values.
9722  if (!TLI.isFAbsFree(VT) &&
9723  N0.getOpcode() == ISD::BITCAST &&
9724  N0.getNode()->hasOneUse()) {
9725  SDValue Int = N0.getOperand(0);
9726  EVT IntVT = Int.getValueType();
9727  if (IntVT.isInteger() && !IntVT.isVector()) {
9728  APInt SignMask;
9729  if (N0.getValueType().isVector()) {
9730  // For a vector, get a mask such as 0x7f... per scalar element
9731  // and splat it.
9732  SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
9733  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
9734  } else {
9735  // For a scalar, just generate 0x7f...
9736  SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
9737  }
9738  SDLoc DL(N0);
9739  Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
9740  DAG.getConstant(SignMask, DL, IntVT));
9741  AddToWorklist(Int.getNode());
9742  return DAG.getBitcast(N->getValueType(0), Int);
9743  }
9744  }
9745 
9746  return SDValue();
9747 }
9748 
9749 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
9750  SDValue Chain = N->getOperand(0);
9751  SDValue N1 = N->getOperand(1);
9752  SDValue N2 = N->getOperand(2);
9753 
9754  // If N is a constant we could fold this into a fallthrough or unconditional
9755  // branch. However that doesn't happen very often in normal code, because
9756  // Instcombine/SimplifyCFG should have handled the available opportunities.
9757  // If we did this folding here, it would be necessary to update the
9758  // MachineBasicBlock CFG, which is awkward.
9759 
9760  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
9761  // on the target.
9762  if (N1.getOpcode() == ISD::SETCC &&
9763  TLI.isOperationLegalOrCustom(ISD::BR_CC,
9764  N1.getOperand(0).getValueType())) {
9765  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9766  Chain, N1.getOperand(2),
9767  N1.getOperand(0), N1.getOperand(1), N2);
9768  }
9769 
9770  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
9771  ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
9772  (N1.getOperand(0).hasOneUse() &&
9773  N1.getOperand(0).getOpcode() == ISD::SRL))) {
9774  SDNode *Trunc = nullptr;
9775  if (N1.getOpcode() == ISD::TRUNCATE) {
9776  // Look pass the truncate.
9777  Trunc = N1.getNode();
9778  N1 = N1.getOperand(0);
9779  }
9780 
9781  // Match this pattern so that we can generate simpler code:
9782  //
9783  // %a = ...
9784  // %b = and i32 %a, 2
9785  // %c = srl i32 %b, 1
9786  // brcond i32 %c ...
9787  //
9788  // into
9789  //
9790  // %a = ...
9791  // %b = and i32 %a, 2
9792  // %c = setcc eq %b, 0
9793  // brcond %c ...
9794  //
9795  // This applies only when the AND constant value has one bit set and the
9796  // SRL constant is equal to the log2 of the AND constant. The back-end is
9797  // smart enough to convert the result into a TEST/JMP sequence.
9798  SDValue Op0 = N1.getOperand(0);
9799  SDValue Op1 = N1.getOperand(1);
9800 
9801  if (Op0.getOpcode() == ISD::AND &&
9802  Op1.getOpcode() == ISD::Constant) {
9803  SDValue AndOp1 = Op0.getOperand(1);
9804 
9805  if (AndOp1.getOpcode() == ISD::Constant) {
9806  const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
9807 
9808  if (AndConst.isPowerOf2() &&
9809  cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
9810  SDLoc DL(N);
9811  SDValue SetCC =
9812  DAG.getSetCC(DL,
9814  Op0, DAG.getConstant(0, DL, Op0.getValueType()),
9815  ISD::SETNE);
9816 
9817  SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
9818  MVT::Other, Chain, SetCC, N2);
9819  // Don't add the new BRCond into the worklist or else SimplifySelectCC
9820  // will convert it back to (X & C1) >> C2.
9821  CombineTo(N, NewBRCond, false);
9822  // Truncate is dead.
9823  if (Trunc)
9824  deleteAndRecombine(Trunc);
9825  // Replace the uses of SRL with SETCC
9826  WorklistRemover DeadNodes(*this);
9827  DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9828  deleteAndRecombine(N1.getNode());
9829  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9830  }
9831  }
9832  }
9833 
9834  if (Trunc)
9835  // Restore N1 if the above transformation doesn't match.
9836  N1 = N->getOperand(1);
9837  }
9838 
9839  // Transform br(xor(x, y)) -> br(x != y)
9840  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
9841  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
9842  SDNode *TheXor = N1.getNode();
9843  SDValue Op0 = TheXor->getOperand(0);
9844  SDValue Op1 = TheXor->getOperand(1);
9845  if (Op0.getOpcode() == Op1.getOpcode()) {
9846  // Avoid missing important xor optimizations.
9847  if (SDValue Tmp = visitXOR(TheXor)) {
9848  if (Tmp.getNode() != TheXor) {
9849  DEBUG(dbgs() << "\nReplacing.8 ";
9850  TheXor->dump(&DAG);
9851  dbgs() << "\nWith: ";
9852  Tmp.getNode()->dump(&DAG);
9853  dbgs() << '\n');
9854  WorklistRemover DeadNodes(*this);
9855  DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
9856  deleteAndRecombine(TheXor);
9857  return DAG.getNode(ISD::BRCOND, SDLoc(N),
9858  MVT::Other, Chain, Tmp, N2);
9859  }
9860 
9861  // visitXOR has changed XOR's operands or replaced the XOR completely,
9862  // bail out.
9863  return SDValue(N, 0);
9864  }
9865  }
9866 
9867  if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
9868  bool Equal = false;
9869  if (isOneConstant(Op0) && Op0.hasOneUse() &&
9870  Op0.getOpcode() == ISD::XOR) {
9871  TheXor = Op0.getNode();
9872  Equal = true;
9873  }
9874 
9875  EVT SetCCVT = N1.getValueType();
9876  if (LegalTypes)
9877  SetCCVT = getSetCCResultType(SetCCVT);
9878  SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
9879  SetCCVT,
9880  Op0, Op1,
9881  Equal ? ISD::SETEQ : ISD::SETNE);
9882  // Replace the uses of XOR with SETCC
9883  WorklistRemover DeadNodes(*this);
9884  DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9885  deleteAndRecombine(N1.getNode());
9886  return DAG.getNode(ISD::BRCOND, SDLoc(N),
9887  MVT::Other, Chain, SetCC, N2);
9888  }
9889  }
9890 
9891  return SDValue();
9892 }
9893 
9894 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
9895 //
9896 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
9897  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
9898  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
9899 
9900  // If N is a constant we could fold this into a fallthrough or unconditional
9901  // branch. However that doesn't happen very often in normal code, because
9902  // Instcombine/SimplifyCFG should have handled the available opportunities.
9903  // If we did this folding here, it would be necessary to update the
9904  // MachineBasicBlock CFG, which is awkward.
9905 
9906  // Use SimplifySetCC to simplify SETCC's.
9907  SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
9908  CondLHS, CondRHS, CC->get(), SDLoc(N),
9909  false);
9910  if (Simp.getNode()) AddToWorklist(Simp.getNode());
9911 
9912  // fold to a simpler setcc
9913  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
9914  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9915  N->getOperand(0), Simp.getOperand(2),
9916  Simp.getOperand(0), Simp.getOperand(1),
9917  N->getOperand(4));
9918 
9919  return SDValue();
9920 }
9921 
9922 /// Return true if 'Use' is a load or a store that uses N as its base pointer
9923 /// and that N may be folded in the load / store addressing mode.
9925  SelectionDAG &DAG,
9926  const TargetLowering &TLI) {
9927  EVT VT;
9928  unsigned AS;
9929 
9930  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
9931  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
9932  return false;
9933  VT = LD->getMemoryVT();
9934  AS = LD->getAddressSpace();
9935  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
9936  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
9937  return false;
9938  VT = ST->getMemoryVT();
9939  AS = ST->getAddressSpace();
9940  } else
9941  return false;
9942 
9944  if (N->getOpcode() == ISD::ADD) {
9946  if (Offset)
9947  // [reg +/- imm]
9948  AM.BaseOffs = Offset->getSExtValue();
9949  else
9950  // [reg +/- reg]
9951  AM.Scale = 1;
9952  } else if (N->getOpcode() == ISD::SUB) {
9954  if (Offset)
9955  // [reg +/- imm]
9956  AM.BaseOffs = -Offset->getSExtValue();
9957  else
9958  // [reg +/- reg]
9959  AM.Scale = 1;
9960  } else
9961  return false;
9962 
9963  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
9964  VT.getTypeForEVT(*DAG.getContext()), AS);
9965 }
9966 
9967 /// Try turning a load/store into a pre-indexed load/store when the base
9968 /// pointer is an add or subtract and it has other uses besides the load/store.
9969 /// After the transformation, the new indexed load/store has effectively folded
9970 /// the add/subtract in and all of its other uses are redirected to the
9971 /// new load/store.
9972 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
9973  if (Level < AfterLegalizeDAG)
9974  return false;
9975 
9976  bool isLoad = true;
9977  SDValue Ptr;
9978  EVT VT;
9979  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9980  if (LD->isIndexed())
9981  return false;
9982  VT = LD->getMemoryVT();
9983  if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
9984  !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
9985  return false;
9986  Ptr = LD->getBasePtr();
9987  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9988  if (ST->isIndexed())
9989  return false;
9990  VT = ST->getMemoryVT();
9991  if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
9992  !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
9993  return false;
9994  Ptr = ST->getBasePtr();
9995  isLoad = false;
9996  } else {
9997  return false;
9998  }
9999 
10000  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
10001  // out. There is no reason to make this a preinc/predec.
10002  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
10003  Ptr.getNode()->hasOneUse())
10004  return false;
10005 
10006  // Ask the target to do addressing mode selection.
10007  SDValue BasePtr;
10008  SDValue Offset;
10010  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
10011  return false;
10012 
10013  // Backends without true r+i pre-indexed forms may need to pass a
10014  // constant base with a variable offset so that constant coercion
10015  // will work with the patterns in canonical form.
10016  bool Swapped = false;
10017  if (isa<ConstantSDNode>(BasePtr)) {
10018  std::swap(BasePtr, Offset);
10019  Swapped = true;
10020  }
10021 
10022  // Don't create a indexed load / store with zero offset.
10023  if (isNullConstant(Offset))
10024  return false;
10025 
10026  // Try turning it into a pre-indexed load / store except when:
10027  // 1) The new base ptr is a frame index.
10028  // 2) If N is a store and the new base ptr is either the same as or is a
10029  // predecessor of the value being stored.
10030  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
10031  // that would create a cycle.
10032  // 4) All uses are load / store ops that use it as old base ptr.
10033 
10034  // Check #1. Preinc'ing a frame index would require copying the stack pointer
10035  // (plus the implicit offset) to a register to preinc anyway.
10036  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10037  return false;
10038 
10039  // Check #2.
10040  if (!isLoad) {
10041  SDValue Val = cast<StoreSDNode>(N)->getValue();
10042  if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
10043  return false;
10044  }
10045 
10046  // Caches for hasPredecessorHelper.
10049  Worklist.push_back(N);
10050 
10051  // If the offset is a constant, there may be other adds of constants that
10052  // can be folded with this one. We should do this to avoid having to keep
10053  // a copy of the original base pointer.
10054  SmallVector<SDNode *, 16> OtherUses;
10055  if (isa<ConstantSDNode>(Offset))
10056  for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
10057  UE = BasePtr.getNode()->use_end();
10058  UI != UE; ++UI) {
10059  SDUse &Use = UI.getUse();
10060  // Skip the use that is Ptr and uses of other results from BasePtr's
10061  // node (important for nodes that return multiple results).
10062  if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
10063  continue;
10064 
10065  if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
10066  continue;
10067 
10068  if (Use.getUser()->getOpcode() != ISD::ADD &&
10069  Use.getUser()->getOpcode() != ISD::SUB) {
10070  OtherUses.clear();
10071  break;
10072  }
10073 
10074  SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
10075  if (!isa<ConstantSDNode>(Op1)) {
10076  OtherUses.clear();
10077  break;
10078  }
10079 
10080  // FIXME: In some cases, we can be smarter about this.
10081  if (Op1.getValueType() != Offset.getValueType()) {
10082  OtherUses.clear();
10083  break;
10084  }
10085 
10086  OtherUses.push_back(Use.getUser());
10087  }
10088 
10089  if (Swapped)
10090  std::swap(BasePtr, Offset);
10091 
10092  // Now check for #3 and #4.
10093  bool RealUse = false;
10094 
10095  for (SDNode *Use : Ptr.getNode()->uses()) {
10096  if (Use == N)
10097  continue;
10098  if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
10099  return false;
10100 
10101  // If Ptr may be folded in addressing mode of other use, then it's
10102  // not profitable to do this transformation.
10103  if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
10104  RealUse = true;
10105  }
10106 
10107  if (!RealUse)
10108  return false;
10109 
10110  SDValue Result;
10111  if (isLoad)
10112  Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10113  BasePtr, Offset, AM);
10114  else
10115  Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10116  BasePtr, Offset, AM);
10117  ++PreIndexedNodes;
10118  ++NodesCombined;
10119  DEBUG(dbgs() << "\nReplacing.4 ";
10120  N->dump(&DAG);
10121  dbgs() << "\nWith: ";
10122  Result.getNode()->dump(&DAG);
10123  dbgs() << '\n');
10124  WorklistRemover DeadNodes(*this);
10125  if (isLoad) {
10126  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10127  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10128  } else {
10129  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10130  }
10131 
10132  // Finally, since the node is now dead, remove it from the graph.
10133  deleteAndRecombine(N);
10134 
10135  if (Swapped)
10136  std::swap(BasePtr, Offset);
10137 
10138  // Replace other uses of BasePtr that can be updated to use Ptr
10139  for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
10140  unsigned OffsetIdx = 1;
10141  if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
10142  OffsetIdx = 0;
10143  assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
10144  BasePtr.getNode() && "Expected BasePtr operand");
10145 
10146  // We need to replace ptr0 in the following expression:
10147  // x0 * offset0 + y0 * ptr0 = t0
10148  // knowing that
10149  // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
10150  //
10151  // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
10152  // indexed load/store and the expresion that needs to be re-written.
10153  //
10154  // Therefore, we have:
10155  // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
10156 
10157  ConstantSDNode *CN =
10158  cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
10159  int X0, X1, Y0, Y1;
10160  const APInt &Offset0 = CN->getAPIntValue();
10161  APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
10162 
10163  X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
10164  Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
10165  X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
10166  Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
10167 
10168  unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
10169 
10170  APInt CNV = Offset0;
10171  if (X0 < 0) CNV = -CNV;
10172  if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
10173  else CNV = CNV - Offset1;
10174 
10175  SDLoc DL(OtherUses[i]);
10176 
10177  // We can now generate the new expression.
10178  SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
10179  SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
10180 
10181  SDValue NewUse = DAG.getNode(Opcode,
10182  DL,
10183  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
10184  DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
10185  deleteAndRecombine(OtherUses[i]);
10186  }
10187 
10188  // Replace the uses of Ptr with uses of the updated base value.
10189  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
10190  deleteAndRecombine(Ptr.getNode());
10191 
10192  return true;
10193 }
10194 
10195 /// Try to combine a load/store with a add/sub of the base pointer node into a
10196 /// post-indexed load/store. The transformation folded the add/subtract into the
10197 /// new indexed load/store effectively and all of its uses are redirected to the
10198 /// new load/store.
10199 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
10200  if (Level < AfterLegalizeDAG)
10201  return false;
10202 
10203  bool isLoad = true;
10204  SDValue Ptr;
10205  EVT VT;
10206  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
10207  if (LD->isIndexed())
10208  return false;
10209  VT = LD->getMemoryVT();
10210  if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
10211  !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
10212  return false;
10213  Ptr = LD->getBasePtr();
10214  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
10215  if (ST->isIndexed())
10216  return false;
10217  VT = ST->getMemoryVT();
10218  if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
10219  !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
10220  return false;
10221  Ptr = ST->getBasePtr();
10222  isLoad = false;
10223  } else {
10224  return false;
10225  }
10226 
10227  if (Ptr.getNode()->hasOneUse())
10228  return false;
10229 
10230  for (SDNode *Op : Ptr.getNode()->uses()) {
10231  if (Op == N ||
10232  (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
10233  continue;
10234 
10235  SDValue BasePtr;
10236  SDValue Offset;
10238  if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
10239  // Don't create a indexed load / store with zero offset.
10240  if (isNullConstant(Offset))
10241  continue;
10242 
10243  // Try turning it into a post-indexed load / store except when
10244  // 1) All uses are load / store ops that use it as base ptr (and
10245  // it may be folded as addressing mmode).
10246  // 2) Op must be independent of N, i.e. Op is neither a predecessor
10247  // nor a successor of N. Otherwise, if Op is folded that would
10248  // create a cycle.
10249 
10250  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
10251  continue;
10252 
10253  // Check for #1.
10254  bool TryNext = false;
10255  for (SDNode *Use : BasePtr.getNode()->uses()) {
10256  if (Use == Ptr.getNode())
10257  continue;
10258 
10259  // If all the uses are load / store addresses, then don't do the
10260  // transformation.
10261  if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
10262  bool RealUse = false;
10263  for (SDNode *UseUse : Use->uses()) {
10264  if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
10265  RealUse = true;
10266  }
10267 
10268  if (!RealUse) {
10269  TryNext = true;
10270  break;
10271  }
10272  }
10273  }
10274 
10275  if (TryNext)
10276  continue;
10277 
10278  // Check for #2
10279  if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
10280  SDValue Result = isLoad
10281  ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
10282  BasePtr, Offset, AM)
10283  : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
10284  BasePtr, Offset, AM);
10285  ++PostIndexedNodes;
10286  ++NodesCombined;
10287  DEBUG(dbgs() << "\nReplacing.5 ";
10288  N->dump(&DAG);
10289  dbgs() << "\nWith: ";
10290  Result.getNode()->dump(&DAG);
10291  dbgs() << '\n');
10292  WorklistRemover DeadNodes(*this);
10293  if (isLoad) {
10294  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
10295  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
10296  } else {
10297  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
10298  }
10299 
10300  // Finally, since the node is now dead, remove it from the graph.
10301  deleteAndRecombine(N);
10302 
10303  // Replace the uses of Use with uses of the updated base value.
10305  Result.getValue(isLoad ? 1 : 0));
10306  deleteAndRecombine(Op);
10307  return true;
10308  }
10309  }
10310  }
10311 
10312  return false;
10313 }
10314 
10315 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
10316 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
10318  assert(AM != ISD::UNINDEXED);
10319  SDValue BP = LD->getOperand(1);
10320  SDValue Inc = LD->getOperand(2);
10321 
10322  // Some backends use TargetConstants for load offsets, but don't expect
10323  // TargetConstants in general ADD nodes. We can convert these constants into
10324  // regular Constants (if the constant is not opaque).
10325  assert((Inc.getOpcode() != ISD::TargetConstant ||
10326  !cast<ConstantSDNode>(Inc)->isOpaque()) &&
10327  "Cannot split out indexing using opaque target constants");
10328  if (Inc.getOpcode() == ISD::TargetConstant) {
10329  ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
10330  Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
10331  ConstInc->getValueType(0));
10332  }
10333 
10334  unsigned Opc =
10335  (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
10336  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
10337 }
10338 
10339 SDValue DAGCombiner::visitLOAD(SDNode *N) {
10340  LoadSDNode *LD = cast<LoadSDNode>(N);
10341  SDValue Chain = LD->getChain();
10342  SDValue Ptr = LD->getBasePtr();
10343 
10344  // If load is not volatile and there are no uses of the loaded value (and
10345  // the updated indexed value in case of indexed loads), change uses of the
10346  // chain value into uses of the chain input (i.e. delete the dead load).
10347  if (!LD->isVolatile()) {
10348  if (N->getValueType(1) == MVT::Other) {
10349  // Unindexed loads.
10350  if (!N->hasAnyUseOfValue(0)) {
10351  // It's not safe to use the two value CombineTo variant here. e.g.
10352  // v1, chain2 = load chain1, loc
10353  // v2, chain3 = load chain2, loc
10354  // v3 = add v2, c
10355  // Now we replace use of chain2 with chain1. This makes the second load
10356  // isomorphic to the one we are deleting, and thus makes this load live.
10357  DEBUG(dbgs() << "\nReplacing.6 ";
10358  N->dump(&DAG);
10359  dbgs() << "\nWith chain: ";
10360  Chain.getNode()->dump(&DAG);
10361  dbgs() << "\n");
10362  WorklistRemover DeadNodes(*this);
10363  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10364 
10365  if (N->use_empty())
10366  deleteAndRecombine(N);
10367 
10368  return SDValue(N, 0); // Return N so it doesn't get rechecked!
10369  }
10370  } else {
10371  // Indexed loads.
10372  assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
10373 
10374  // If this load has an opaque TargetConstant offset, then we cannot split
10375  // the indexing into an add/sub directly (that TargetConstant may not be
10376  // valid for a different type of node, and we cannot convert an opaque
10377  // target constant into a regular constant).
10378  bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
10379  cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
10380 
10381  if (!N->hasAnyUseOfValue(0) &&
10382  ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
10383  SDValue Undef = DAG.getUNDEF(N->getValueType(0));
10384  SDValue Index;
10385  if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
10386  Index = SplitIndexingFromLoad(LD);
10387  // Try to fold the base pointer arithmetic into subsequent loads and
10388  // stores.
10389  AddUsersToWorklist(N);
10390  } else
10391  Index = DAG.getUNDEF(N->getValueType(1));
10392  DEBUG(dbgs() << "\nReplacing.7 ";
10393  N->dump(&DAG);
10394  dbgs() << "\nWith: ";
10395  Undef.getNode()->dump(&DAG);
10396  dbgs() << " and 2 other values\n");
10397  WorklistRemover DeadNodes(*this);
10398  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
10399  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
10400  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
10401  deleteAndRecombine(N);
10402  return SDValue(N, 0); // Return N so it doesn't get rechecked!
10403  }
10404  }
10405  }
10406 
10407  // If this load is directly stored, replace the load value with the stored
10408  // value.
10409  // TODO: Handle store large -> read small portion.
10410  // TODO: Handle TRUNCSTORE/LOADEXT
10411  if (OptLevel != CodeGenOpt::None &&
10412  ISD::isNormalLoad(N) && !LD->isVolatile()) {
10413  if (ISD::isNON_TRUNCStore(Chain.getNode())) {
10414  StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
10415  if (PrevST->getBasePtr() == Ptr &&
10416  PrevST->getValue().getValueType() == N->getValueType(0))
10417  return CombineTo(N, Chain.getOperand(1), Chain);
10418  }
10419  }
10420 
10421  // Try to infer better alignment information than the load already has.
10422  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
10423  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
10424  if (Align > LD->getMemOperand()->getBaseAlignment()) {
10425  SDValue NewLoad = DAG.getExtLoad(
10426  LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
10427  LD->getPointerInfo(), LD->getMemoryVT(), Align,
10428  LD->getMemOperand()->getFlags(), LD->getAAInfo());
10429  if (NewLoad.getNode() != N)
10430  return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
10431  }
10432  }
10433  }
10434 
10435  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
10436  : DAG.getSubtarget().useAA();
10437 #ifndef NDEBUG
10438  if (CombinerAAOnlyFunc.getNumOccurrences() &&
10439  CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
10440  UseAA = false;
10441 #endif
10442  if (UseAA && LD->isUnindexed()) {
10443  // Walk up chain skipping non-aliasing memory nodes.
10444  SDValue BetterChain = FindBetterChain(N, Chain);
10445 
10446  // If there is a better chain.
10447  if (Chain != BetterChain) {
10448  SDValue ReplLoad;
10449 
10450  // Replace the chain to void dependency.
10451  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
10452  ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
10453  BetterChain, Ptr, LD->getMemOperand());
10454  } else {
10455  ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
10456  LD->getValueType(0),
10457  BetterChain, Ptr, LD->getMemoryVT(),
10458  LD->getMemOperand());
10459  }
10460 
10461  // Create token factor to keep old chain connected.
10463  MVT::Other, Chain, ReplLoad.getValue(1));
10464 
10465  // Make sure the new and old chains are cleaned up.
10466  AddToWorklist(Token.getNode());
10467 
10468  // Replace uses with load result and token factor. Don't add users
10469  // to work list.
10470  return CombineTo(N, ReplLoad.getValue(0), Token, false);
10471  }
10472  }
10473 
10474  // Try transforming N to an indexed load.
10475  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
10476  return SDValue(N, 0);
10477 
10478  // Try to slice up N to more direct loads if the slices are mapped to
10479  // different register banks or pairing can take place.
10480  if (SliceUpLoad(N))
10481  return SDValue(N, 0);
10482 
10483  return SDValue();
10484 }
10485 
10486 namespace {
10487 /// \brief Helper structure used to slice a load in smaller loads.
10488 /// Basically a slice is obtained from the following sequence:
10489 /// Origin = load Ty1, Base
10490 /// Shift = srl Ty1 Origin, CstTy Amount
10491 /// Inst = trunc Shift to Ty2
10492 ///
10493 /// Then, it will be rewriten into:
10494 /// Slice = load SliceTy, Base + SliceOffset
10495 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
10496 ///
10497 /// SliceTy is deduced from the number of bits that are actually used to
10498 /// build Inst.
10499 struct LoadedSlice {
10500  /// \brief Helper structure used to compute the cost of a slice.
10501  struct Cost {
10502  /// Are we optimizing for code size.
10503  bool ForCodeSize;
10504  /// Various cost.
10505  unsigned Loads;
10506  unsigned Truncates;
10507  unsigned CrossRegisterBanksCopies;
10508  unsigned ZExts;
10509  unsigned Shift;
10510 
10511  Cost(bool ForCodeSize = false)
10512  : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
10513  CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
10514 
10515  /// \brief Get the cost of one isolated slice.
10516  Cost(const LoadedSlice &LS, bool ForCodeSize = false)
10517  : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
10518  CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
10519  EVT TruncType = LS.Inst->getValueType(0);
10520  EVT LoadedType = LS.getLoadedType();
10521  if (TruncType != LoadedType &&
10522  !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
10523  ZExts = 1;
10524  }
10525 
10526  /// \brief Account for slicing gain in the current cost.
10527  /// Slicing provide a few gains like removing a shift or a
10528  /// truncate. This method allows to grow the cost of the original
10529  /// load with the gain from this slice.
10530  void addSliceGain(const LoadedSlice &LS) {
10531  // Each slice saves a truncate.
10532  const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
10533  if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
10534  LS.Inst->getValueType(0)))
10535  ++Truncates;
10536  // If there is a shift amount, this slice gets rid of it.
10537  if (LS.Shift)
10538  ++Shift;
10539  // If this slice can merge a cross register bank copy, account for it.
10540  if (LS.canMergeExpensiveCrossRegisterBankCopy())
10541  ++CrossRegisterBanksCopies;
10542  }
10543 
10544  Cost &operator+=(const Cost &RHS) {
10545  Loads += RHS.Loads;
10546  Truncates += RHS.Truncates;
10547  CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
10548  ZExts += RHS.ZExts;
10549  Shift += RHS.Shift;
10550  return *this;
10551  }
10552 
10553  bool operator==(const Cost &RHS) const {
10554  return Loads == RHS.Loads && Truncates == RHS.Truncates &&
10555  CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
10556  ZExts == RHS.ZExts && Shift == RHS.Shift;
10557  }
10558 
10559  bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
10560 
10561  bool operator<(const Cost &RHS) const {
10562  // Assume cross register banks copies are as expensive as loads.
10563  // FIXME: Do we want some more target hooks?
10564  unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
10565  unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
10566  // Unless we are optimizing for code size, consider the
10567  // expensive operation first.
10568  if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
10569  return ExpensiveOpsLHS < ExpensiveOpsRHS;
10570  return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
10571  (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
10572  }
10573 
10574  bool operator>(const Cost &RHS) const { return RHS < *this; }
10575 
10576  bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
10577 
10578  bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
10579  };
10580  // The last instruction that represent the slice. This should be a
10581  // truncate instruction.
10582  SDNode *Inst;
10583  // The original load instruction.
10584  LoadSDNode *Origin;
10585  // The right shift amount in bits from the original load.
10586  unsigned Shift;
10587  // The DAG from which Origin came from.
10588  // This is used to get some contextual information about legal types, etc.
10589  SelectionDAG *DAG;
10590 
10591  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
10592  unsigned Shift = 0, SelectionDAG *DAG = nullptr)
10593  : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
10594 
10595  /// \brief Get the bits used in a chunk of bits \p BitWidth large.
10596  /// \return Result is \p BitWidth and has used bits set to 1 and
10597  /// not used bits set to 0.
10598  APInt getUsedBits() const {
10599  // Reproduce the trunc(lshr) sequence:
10600  // - Start from the truncated value.
10601  // - Zero extend to the desired bit width.
10602  // - Shift left.
10603  assert(Origin && "No original load to compare against.");
10604  unsigned BitWidth = Origin->getValueSizeInBits(0);
10605  assert(Inst && "This slice is not bound to an instruction");
10606  assert(Inst->getValueSizeInBits(0) <= BitWidth &&
10607  "Extracted slice is bigger than the whole type!");
10608  APInt UsedBits(Inst->getValueSizeInBits(0), 0);
10609  UsedBits.setAllBits();
10610  UsedBits = UsedBits.zext(BitWidth);
10611  UsedBits <<= Shift;
10612  return UsedBits;
10613  }
10614 
10615  /// \brief Get the size of the slice to be loaded in bytes.
10616  unsigned getLoadedSize() const {
10617  unsigned SliceSize = getUsedBits().countPopulation();
10618  assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
10619  return SliceSize / 8;
10620  }
10621 
10622  /// \brief Get the type that will be loaded for this slice.
10623  /// Note: This may not be the final type for the slice.
10624  EVT getLoadedType() const {
10625  assert(DAG && "Missing context");
10626  LLVMContext &Ctxt = *DAG->getContext();
10627  return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
10628  }
10629 
10630  /// \brief Get the alignment of the load used for this slice.
10631  unsigned getAlignment() const {
10632  unsigned Alignment = Origin->getAlignment();
10633  unsigned Offset = getOffsetFromBase();
10634  if (Offset != 0)
10635  Alignment = MinAlign(Alignment, Alignment + Offset);
10636  return Alignment;
10637  }
10638 
10639  /// \brief Check if this slice can be rewritten with legal operations.
10640  bool isLegal() const {
10641  // An invalid slice is not legal.
10642  if (!Origin || !Inst || !DAG)
10643  return false;
10644 
10645  // Offsets are for indexed load only, we do not handle that.
10646  if (!Origin->getOffset().isUndef())
10647  return false;
10648 
10649  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10650 
10651  // Check that the type is legal.
10652  EVT SliceType = getLoadedType();
10653  if (!TLI.isTypeLegal(SliceType))
10654  return false;
10655 
10656  // Check that the load is legal for this type.
10657  if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
10658  return false;
10659 
10660  // Check that the offset can be computed.
10661  // 1. Check its type.
10662  EVT PtrType = Origin->getBasePtr().getValueType();
10663  if (PtrType == MVT::Untyped || PtrType.isExtended())
10664  return false;
10665 
10666  // 2. Check that it fits in the immediate.
10667  if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
10668  return false;
10669 
10670  // 3. Check that the computation is legal.
10671  if (!TLI.isOperationLegal(ISD::ADD, PtrType))
10672  return false;
10673 
10674  // Check that the zext is legal if it needs one.
10675  EVT TruncateType = Inst->getValueType(0);
10676  if (TruncateType != SliceType &&
10677  !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
10678  return false;
10679 
10680  return true;
10681  }
10682 
10683  /// \brief Get the offset in bytes of this slice in the original chunk of
10684  /// bits.
10685  /// \pre DAG != nullptr.
10686  uint64_t getOffsetFromBase() const {
10687  assert(DAG && "Missing context.");
10688  bool IsBigEndian = DAG->getDataLayout().isBigEndian();
10689  assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
10690  uint64_t Offset = Shift / 8;
10691  unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
10692  assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
10693  "The size of the original loaded type is not a multiple of a"
10694  " byte.");
10695  // If Offset is bigger than TySizeInBytes, it means we are loading all
10696  // zeros. This should have been optimized before in the process.
10697  assert(TySizeInBytes > Offset &&
10698  "Invalid shift amount for given loaded size");
10699  if (IsBigEndian)
10700  Offset = TySizeInBytes - Offset - getLoadedSize();
10701  return Offset;
10702  }
10703 
10704  /// \brief Generate the sequence of instructions to load the slice
10705  /// represented by this object and redirect the uses of this slice to
10706  /// this new sequence of instructions.
10707  /// \pre this->Inst && this->Origin are valid Instructions and this
10708  /// object passed the legal check: LoadedSlice::isLegal returned true.
10709  /// \return The last instruction of the sequence used to load the slice.
10710  SDValue loadSlice() const {
10711  assert(Inst && Origin && "Unable to replace a non-existing slice.");
10712  const SDValue &OldBaseAddr = Origin->getBasePtr();
10713  SDValue BaseAddr = OldBaseAddr;
10714  // Get the offset in that chunk of bytes w.r.t. the endianness.
10715  int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
10716  assert(Offset >= 0 && "Offset too big to fit in int64_t!");
10717  if (Offset) {
10718  // BaseAddr = BaseAddr + Offset.
10719  EVT ArithType = BaseAddr.getValueType();
10720  SDLoc DL(Origin);
10721  BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
10722  DAG->getConstant(Offset, DL, ArithType));
10723  }
10724 
10725  // Create the type of the loaded slice according to its size.
10726  EVT SliceType = getLoadedType();
10727 
10728  // Create the load for the slice.
10729  SDValue LastInst =
10730  DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
10731  Origin->getPointerInfo().getWithOffset(Offset),
10732  getAlignment(), Origin->getMemOperand()->getFlags());
10733  // If the final type is not the same as the loaded type, this means that
10734  // we have to pad with zero. Create a zero extend for that.
10735  EVT FinalType = Inst->getValueType(0);
10736  if (SliceType != FinalType)
10737  LastInst =
10738  DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
10739  return LastInst;
10740  }
10741 
10742  /// \brief Check if this slice can be merged with an expensive cross register
10743  /// bank copy. E.g.,
10744  /// i = load i32
10745  /// f = bitcast i32 i to float
10746  bool canMergeExpensiveCrossRegisterBankCopy() const {
10747  if (!Inst || !Inst->hasOneUse())
10748  return false;
10749  SDNode *Use = *Inst->use_begin();
10750  if (Use->getOpcode() != ISD::BITCAST)
10751  return false;
10752  assert(DAG && "Missing context");
10753  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
10754  EVT ResVT = Use->getValueType(0);
10755  const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
10756  const TargetRegisterClass *ArgRC =
10758  if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
10759  return false;
10760 
10761  // At this point, we know that we perform a cross-register-bank copy.
10762  // Check if it is expensive.
10763  const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
10764  // Assume bitcasts are cheap, unless both register classes do not
10765  // explicitly share a common sub class.
10766  if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
10767  return false;
10768 
10769  // Check if it will be merged with the load.
10770  // 1. Check the alignment constraint.
10771  unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
10772  ResVT.getTypeForEVT(*DAG->getContext()));
10773 
10774  if (RequiredAlignment > getAlignment())
10775  return false;
10776 
10777  // 2. Check that the load is a legal operation for that type.
10778  if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
10779  return false;
10780 
10781  // 3. Check that we do not have a zext in the way.
10782  if (Inst->getValueType(0) != getLoadedType())
10783  return false;
10784 
10785  return true;
10786  }
10787 };
10788 }
10789 
10790 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
10791 /// \p UsedBits looks like 0..0 1..1 0..0.
10792 static bool areUsedBitsDense(const APInt &UsedBits) {
10793  // If all the bits are one, this is dense!
10794  if (UsedBits.isAllOnesValue())
10795  return true;
10796 
10797  // Get rid of the unused bits on the right.
10798  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
10799  // Get rid of the unused bits on the left.
10800  if (NarrowedUsedBits.countLeadingZeros())
10801  NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
10802  // Check that the chunk of bits is completely used.
10803  return NarrowedUsedBits.isAllOnesValue();
10804 }
10805 
10806 /// \brief Check whether or not \p First and \p Second are next to each other
10807 /// in memory. This means that there is no hole between the bits loaded
10808 /// by \p First and the bits loaded by \p Second.
10809 static bool areSlicesNextToEachOther(const LoadedSlice &First,
10810  const LoadedSlice &Second) {
10811  assert(First.Origin == Second.Origin && First.Origin &&
10812  "Unable to match different memory origins.");
10813  APInt UsedBits = First.getUsedBits();
10814  assert((UsedBits & Second.getUsedBits()) == 0 &&
10815  "Slices are not supposed to overlap.");
10816  UsedBits |= Second.getUsedBits();
10817  return areUsedBitsDense(UsedBits);
10818 }
10819 
10820 /// \brief Adjust the \p GlobalLSCost according to the target
10821 /// paring capabilities and the layout of the slices.
10822 /// \pre \p GlobalLSCost should account for at least as many loads as
10823 /// there is in the slices in \p LoadedSlices.
10825  LoadedSlice::Cost &GlobalLSCost) {
10826  unsigned NumberOfSlices = LoadedSlices.size();
10827  // If there is less than 2 elements, no pairing is possible.
10828  if (NumberOfSlices < 2)
10829  return;
10830 
10831  // Sort the slices so that elements that are likely to be next to each
10832  // other in memory are next to each other in the list.
10833  std::sort(LoadedSlices.begin(), LoadedSlices.end(),
10834  [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
10835  assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
10836  return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
10837  });
10838  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
10839  // First (resp. Second) is the first (resp. Second) potentially candidate
10840  // to be placed in a paired load.
10841  const LoadedSlice *First = nullptr;
10842  const LoadedSlice *Second = nullptr;
10843  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
10844  // Set the beginning of the pair.
10845  First = Second) {
10846 
10847  Second = &LoadedSlices[CurrSlice];
10848 
10849  // If First is NULL, it means we start a new pair.
10850  // Get to the next slice.
10851  if (!First)
10852  continue;
10853 
10854  EVT LoadedType = First->getLoadedType();
10855 
10856  // If the types of the slices are different, we cannot pair them.
10857  if (LoadedType != Second->getLoadedType())
10858  continue;
10859 
10860  // Check if the target supplies paired loads for this type.
10861  unsigned RequiredAlignment = 0;
10862  if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
10863  // move to the next pair, this type is hopeless.
10864  Second = nullptr;
10865  continue;
10866  }
10867  // Check if we meet the alignment requirement.
10868  if (RequiredAlignment > First->getAlignment())
10869  continue;
10870 
10871  // Check that both loads are next to each other in memory.
10872  if (!areSlicesNextToEachOther(*First, *Second))
10873  continue;
10874 
10875  assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
10876  --GlobalLSCost.Loads;
10877  // Move to the next pair.
10878  Second = nullptr;
10879  }
10880 }
10881 
10882 /// \brief Check the profitability of all involved LoadedSlice.
10883 /// Currently, it is considered profitable if there is exactly two
10884 /// involved slices (1) which are (2) next to each other in memory, and
10885 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
10886 ///
10887 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
10888 /// the elements themselves.
10889 ///
10890 /// FIXME: When the cost model will be mature enough, we can relax
10891 /// constraints (1) and (2).
10893  const APInt &UsedBits, bool ForCodeSize) {
10894  unsigned NumberOfSlices = LoadedSlices.size();
10895  if (StressLoadSlicing)
10896  return NumberOfSlices > 1;
10897 
10898  // Check (1).
10899  if (NumberOfSlices != 2)
10900  return false;
10901 
10902  // Check (2).
10903  if (!areUsedBitsDense(UsedBits))
10904  return false;
10905 
10906  // Check (3).
10907  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
10908  // The original code has one big load.
10909  OrigCost.Loads = 1;
10910  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
10911  const LoadedSlice &LS = LoadedSlices[CurrSlice];
10912  // Accumulate the cost of all the slices.
10913  LoadedSlice::Cost SliceCost(LS, ForCodeSize);
10914  GlobalSlicingCost += SliceCost;
10915 
10916  // Account as cost in the original configuration the gain obtained
10917  // with the current slices.
10918  OrigCost.addSliceGain(LS);
10919  }
10920 
10921  // If the target supports paired load, adjust the cost accordingly.
10922  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
10923  return OrigCost > GlobalSlicingCost;
10924 }
10925 
10926 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
10927 /// operations, split it in the various pieces being extracted.
10928 ///
10929 /// This sort of thing is introduced by SROA.
10930 /// This slicing takes care not to insert overlapping loads.
10931 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
10932 bool DAGCombiner::SliceUpLoad(SDNode *N) {
10933  if (Level < AfterLegalizeDAG)
10934  return false;
10935 
10936  LoadSDNode *LD = cast<LoadSDNode>(N);
10937  if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
10938  !LD->getValueType(0).isInteger())
10939  return false;
10940 
10941  // Keep track of already used bits to detect overlapping values.
10942  // In that case, we will just abort the transformation.
10943  APInt UsedBits(LD->getValueSizeInBits(0), 0);
10944 
10945  SmallVector<LoadedSlice, 4> LoadedSlices;
10946 
10947  // Check if this load is used as several smaller chunks of bits.
10948  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
10949  // of computation for each trunc.
10950  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
10951  UI != UIEnd; ++UI) {
10952  // Skip the uses of the chain.
10953  if (UI.getUse().getResNo() != 0)
10954  continue;
10955 
10956  SDNode *User = *UI;
10957  unsigned Shift = 0;
10958 
10959  // Check if this is a trunc(lshr).
10960  if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
10961  isa<ConstantSDNode>(User->getOperand(1))) {
10962  Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
10963  User = *User->use_begin();
10964  }
10965 
10966  // At this point, User is a Truncate, iff we encountered, trunc or
10967  // trunc(lshr).
10968  if (User->getOpcode() != ISD::TRUNCATE)
10969  return false;
10970 
10971  // The width of the type must be a power of 2 and greater than 8-bits.
10972  // Otherwise the load cannot be represented in LLVM IR.
10973  // Moreover, if we shifted with a non-8-bits multiple, the slice
10974  // will be across several bytes. We do not support that.
10975  unsigned Width = User->getValueSizeInBits(0);
10976  if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
10977  return 0;
10978 
10979  // Build the slice for this chain of computations.
10980  LoadedSlice LS(User, LD, Shift, &DAG);
10981  APInt CurrentUsedBits = LS.getUsedBits();
10982 
10983  // Check if this slice overlaps with another.
10984  if ((CurrentUsedBits & UsedBits) != 0)
10985  return false;
10986  // Update the bits used globally.
10987  UsedBits |= CurrentUsedBits;
10988 
10989  // Check if the new slice would be legal.
10990  if (!LS.isLegal())
10991  return false;
10992 
10993  // Record the slice.
10994  LoadedSlices.push_back(LS);
10995  }
10996 
10997  // Abort slicing if it does not seem to be profitable.
10998  if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
10999  return false;
11000 
11001  ++SlicedLoads;
11002 
11003  // Rewrite each chain to use an independent load.
11004  // By construction, each chain can be represented by a unique load.
11005 
11006  // Prepare the argument for the new token factor for all the slices.
11007  SmallVector<SDValue, 8> ArgChains;
11009  LSIt = LoadedSlices.begin(),
11010  LSItEnd = LoadedSlices.end();
11011  LSIt != LSItEnd; ++LSIt) {
11012  SDValue SliceInst = LSIt->loadSlice();
11013  CombineTo(LSIt->Inst, SliceInst, true);
11014  if (SliceInst.getOpcode() != ISD::LOAD)
11015  SliceInst = SliceInst.getOperand(0);
11016  assert(SliceInst->getOpcode() == ISD::LOAD &&
11017  "It takes more than a zext to get to the loaded slice!!");
11018  ArgChains.push_back(SliceInst.getValue(1));
11019  }
11020 
11021  SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
11022  ArgChains);
11023  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
11024  return true;
11025 }
11026 
11027 /// Check to see if V is (and load (ptr), imm), where the load is having
11028 /// specific bytes cleared out. If so, return the byte size being masked out
11029 /// and the shift amount.
11030 static std::pair<unsigned, unsigned>
11032  std::pair<unsigned, unsigned> Result(0, 0);
11033 
11034  // Check for the structure we're looking for.
11035  if (V->getOpcode() != ISD::AND ||
11036  !isa<ConstantSDNode>(V->getOperand(1)) ||
11038  return Result;
11039 
11040  // Check the chain and pointer.
11041  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
11042  if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
11043 
11044  // The store should be chained directly to the load or be an operand of a
11045  // tokenfactor.
11046  if (LD == Chain.getNode())
11047  ; // ok.
11048  else if (Chain->getOpcode() != ISD::TokenFactor)
11049  return Result; // Fail.
11050  else {
11051  bool isOk = false;
11052  for (const SDValue &ChainOp : Chain->op_values())
11053  if (ChainOp.getNode() == LD) {
11054  isOk = true;
11055  break;
11056  }
11057  if (!isOk) return Result;
11058  }
11059 
11060  // This only handles simple types.
11061  if (V.getValueType() != MVT::i16 &&
11062  V.getValueType() != MVT::i32 &&
11063  V.getValueType() != MVT::i64)
11064  return Result;
11065 
11066  // Check the constant mask. Invert it so that the bits being masked out are
11067  // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
11068  // follow the sign bit for uniformity.
11069  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
11070  unsigned NotMaskLZ = countLeadingZeros(NotMask);
11071  if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
11072  unsigned NotMaskTZ = countTrailingZeros(NotMask);
11073  if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
11074  if (NotMaskLZ == 64) return Result; // All zero mask.
11075 
11076  // See if we have a continuous run of bits. If so, we have 0*1+0*
11077  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
11078  return Result;
11079 
11080  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
11081  if (V.getValueType() != MVT::i64 && NotMaskLZ)
11082  NotMaskLZ -= 64-V.getValueSizeInBits();
11083 
11084  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
11085  switch (MaskedBytes) {
11086  case 1:
11087  case 2:
11088  case 4: break;
11089  default: return Result; // All one mask, or 5-byte mask.
11090  }
11091 
11092  // Verify that the first bit starts at a multiple of mask so that the access
11093  // is aligned the same as the access width.
11094  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
11095 
11096  Result.first = MaskedBytes;
11097  Result.second = NotMaskTZ/8;
11098  return Result;
11099 }
11100 
11101 
11102 /// Check to see if IVal is something that provides a value as specified by
11103 /// MaskInfo. If so, replace the specified store with a narrower store of
11104 /// truncated IVal.
11105 static SDNode *
11106 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
11107  SDValue IVal, StoreSDNode *St,
11108  DAGCombiner *DC) {
11109  unsigned NumBytes = MaskInfo.first;
11110  unsigned ByteShift = MaskInfo.second;
11111  SelectionDAG &DAG = DC->getDAG();
11112 
11113  // Check to see if IVal is all zeros in the part being masked in by the 'or'
11114  // that uses this. If not, this is not a replacement.
11115  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
11116  ByteShift*8, (ByteShift+NumBytes)*8);
11117  if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
11118 
11119  // Check that it is legal on the target to do this. It is legal if the new
11120  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
11121  // legalization.
11122  MVT VT = MVT::getIntegerVT(NumBytes*8);
11123  if (!DC->isTypeLegal(VT))
11124  return nullptr;
11125 
11126  // Okay, we can do this! Replace the 'St' store with a store of IVal that is
11127  // shifted by ByteShift and truncated down to NumBytes.
11128  if (ByteShift) {
11129  SDLoc DL(IVal);
11130  IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
11131  DAG.getConstant(ByteShift*8, DL,
11132  DC->getShiftAmountTy(IVal.getValueType())));
11133  }
11134 
11135  // Figure out the offset for the store and the alignment of the access.
11136  unsigned StOffset;
11137  unsigned NewAlign = St->getAlignment();
11138 
11139  if (DAG.getDataLayout().isLittleEndian())
11140  StOffset = ByteShift;
11141  else
11142  StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
11143 
11144  SDValue Ptr = St->getBasePtr();
11145  if (StOffset) {
11146  SDLoc DL(IVal);
11147  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
11148  Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
11149  NewAlign = MinAlign(NewAlign, StOffset);
11150  }
11151 
11152  // Truncate down to the new size.
11153  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
11154 
11155  ++OpsNarrowed;
11156  return DAG
11157  .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
11158  St->getPointerInfo().getWithOffset(StOffset), NewAlign)
11159  .getNode();
11160 }
11161 
11162 
11163 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
11164 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
11165 /// narrowing the load and store if it would end up being a win for performance
11166 /// or code size.
11167 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
11168  StoreSDNode *ST = cast<StoreSDNode>(N);
11169  if (ST->isVolatile())
11170  return SDValue();
11171 
11172  SDValue Chain = ST->getChain();
11173  SDValue Value = ST->getValue();
11174  SDValue Ptr = ST->getBasePtr();
11175  EVT VT = Value.getValueType();
11176 
11177  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
11178  return SDValue();
11179 
11180  unsigned Opc = Value.getOpcode();
11181 
11182  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
11183  // is a byte mask indicating a consecutive number of bytes, check to see if
11184  // Y is known to provide just those bytes. If so, we try to replace the
11185  // load + replace + store sequence with a single (narrower) store, which makes
11186  // the load dead.
11187  if (Opc == ISD::OR) {
11188  std::pair<unsigned, unsigned> MaskedLoad;
11189  MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
11190  if (MaskedLoad.first)
11191  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11192  Value.getOperand(1), ST,this))
11193  return SDValue(NewST, 0);
11194 
11195  // Or is commutative, so try swapping X and Y.
11196  MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
11197  if (MaskedLoad.first)
11198  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
11199  Value.getOperand(0), ST,this))
11200  return SDValue(NewST, 0);
11201  }
11202 
11203  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
11204  Value.getOperand(1).getOpcode() != ISD::Constant)
11205  return SDValue();
11206 
11207  SDValue N0 = Value.getOperand(0);
11208  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11209  Chain == SDValue(N0.getNode(), 1)) {
11210  LoadSDNode *LD = cast<LoadSDNode>(N0);
11211  if (LD->getBasePtr() != Ptr ||
11212  LD->getPointerInfo().getAddrSpace() !=
11213  ST->getPointerInfo().getAddrSpace())
11214  return SDValue();
11215 
11216  // Find the type to narrow it the load / op / store to.
11217  SDValue N1 = Value.getOperand(1);
11218  unsigned BitWidth = N1.getValueSizeInBits();
11219  APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
11220  if (Opc == ISD::AND)
11221  Imm ^= APInt::getAllOnesValue(BitWidth);
11222  if (Imm == 0 || Imm.isAllOnesValue())
11223  return SDValue();
11224  unsigned ShAmt = Imm.countTrailingZeros();
11225  unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
11226  unsigned NewBW = NextPowerOf2(MSB - ShAmt);
11227  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11228  // The narrowing should be profitable, the load/store operation should be
11229  // legal (or custom) and the store size should be equal to the NewVT width.
11230  while (NewBW < BitWidth &&
11231  (NewVT.getStoreSizeInBits() != NewBW ||
11232  !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
11233  !TLI.isNarrowingProfitable(VT, NewVT))) {
11234  NewBW = NextPowerOf2(NewBW);
11235  NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
11236  }
11237  if (NewBW >= BitWidth)
11238  return SDValue();
11239 
11240  // If the lsb changed does not start at the type bitwidth boundary,
11241  // start at the previous one.
11242  if (ShAmt % NewBW)
11243  ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
11244  APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
11245  std::min(BitWidth, ShAmt + NewBW));
11246  if ((Imm & Mask) == Imm) {
11247  APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
11248  if (Opc == ISD::AND)
11249  NewImm ^= APInt::getAllOnesValue(NewBW);
11250  uint64_t PtrOff = ShAmt / 8;
11251  // For big endian targets, we need to adjust the offset to the pointer to
11252  // load the correct bytes.
11253  if (DAG.getDataLayout().isBigEndian())
11254  PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
11255 
11256  unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
11257  Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
11258  if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
11259  return SDValue();
11260 
11261  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
11262  Ptr.getValueType(), Ptr,
11263  DAG.getConstant(PtrOff, SDLoc(LD),
11264  Ptr.getValueType()));
11265  SDValue NewLD =
11266  DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
11267  LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
11268  LD->getMemOperand()->getFlags(), LD->getAAInfo());
11269  SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
11270  DAG.getConstant(NewImm, SDLoc(Value),
11271  NewVT));
11272  SDValue NewST =
11273  DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
11274  ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
11275 
11276  AddToWorklist(NewPtr.getNode());
11277  AddToWorklist(NewLD.getNode());
11278  AddToWorklist(NewVal.getNode());
11279  WorklistRemover DeadNodes(*this);
11280  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
11281  ++OpsNarrowed;
11282  return NewST;
11283  }
11284  }
11285 
11286  return SDValue();
11287 }
11288 
11289 /// For a given floating point load / store pair, if the load value isn't used
11290 /// by any other operations, then consider transforming the pair to integer
11291 /// load / store operations if the target deems the transformation profitable.
11292 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
11293  StoreSDNode *ST = cast<StoreSDNode>(N);
11294  SDValue Chain = ST->getChain();
11295  SDValue Value = ST->getValue();
11296  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
11297  Value.hasOneUse() &&
11298  Chain == SDValue(Value.getNode(), 1)) {
11299  LoadSDNode *LD = cast<LoadSDNode>(Value);
11300  EVT VT = LD->getMemoryVT();
11301  if (!VT.isFloatingPoint() ||
11302  VT != ST->getMemoryVT() ||
11303  LD->isNonTemporal() ||
11304  ST->isNonTemporal() ||
11305  LD->getPointerInfo().getAddrSpace() != 0 ||
11306  ST->getPointerInfo().getAddrSpace() != 0)
11307  return SDValue();
11308 
11309  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
11310  if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
11311  !TLI.isOperationLegal(ISD::STORE, IntVT) ||
11314  return SDValue();
11315 
11316  unsigned LDAlign = LD->getAlignment();
11317  unsigned STAlign = ST->getAlignment();
11318  Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
11319  unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
11320  if (LDAlign < ABIAlign || STAlign < ABIAlign)
11321  return SDValue();
11322 
11323  SDValue NewLD =
11324  DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
11325  LD->getPointerInfo(), LDAlign);
11326 
11327  SDValue NewST =
11328  DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
11329  ST->getPointerInfo(), STAlign);
11330 
11331  AddToWorklist(NewLD.getNode());
11332  AddToWorklist(NewST.getNode());
11333  WorklistRemover DeadNodes(*this);
11334  DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
11335  ++LdStFP2Int;
11336  return NewST;
11337  }
11338 
11339  return SDValue();
11340 }
11341 
11342 // This is a helper function for visitMUL to check the profitability
11343 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
11344 // MulNode is the original multiply, AddNode is (add x, c1),
11345 // and ConstNode is c2.
11346 //
11347 // If the (add x, c1) has multiple uses, we could increase
11348 // the number of adds if we make this transformation.
11349 // It would only be worth doing this if we can remove a
11350 // multiply in the process. Check for that here.
11351 // To illustrate:
11352 // (A + c1) * c3
11353 // (A + c2) * c3
11354 // We're checking for cases where we have common "c3 * A" expressions.
11355 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
11356  SDValue &AddNode,
11357  SDValue &ConstNode) {
11358  APInt Val;
11359 
11360  // If the add only has one use, this would be OK to do.
11361  if (AddNode.getNode()->hasOneUse())
11362  return true;
11363 
11364  // Walk all the users of the constant with which we're multiplying.
11365  for (SDNode *Use : ConstNode->uses()) {
11366 
11367  if (Use == MulNode) // This use is the one we're on right now. Skip it.
11368  continue;
11369 
11370  if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
11371  SDNode *OtherOp;
11372  SDNode *MulVar = AddNode.getOperand(0).getNode();
11373 
11374  // OtherOp is what we're multiplying against the constant.
11375  if (Use->getOperand(0) == ConstNode)
11376  OtherOp = Use->getOperand(1).getNode();
11377  else
11378  OtherOp = Use->getOperand(0).getNode();
11379 
11380  // Check to see if multiply is with the same operand of our "add".
11381  //
11382  // ConstNode = CONST
11383  // Use = ConstNode * A <-- visiting Use. OtherOp is A.
11384  // ...
11385  // AddNode = (A + c1) <-- MulVar is A.
11386  // = AddNode * ConstNode <-- current visiting instruction.
11387  //
11388  // If we make this transformation, we will have a common
11389  // multiply (ConstNode * A) that we can save.
11390  if (OtherOp == MulVar)
11391  return true;
11392 
11393  // Now check to see if a future expansion will give us a common
11394  // multiply.
11395  //
11396  // ConstNode = CONST
11397  // AddNode = (A + c1)
11398  // ... = AddNode * ConstNode <-- current visiting instruction.
11399  // ...
11400  // OtherOp = (A + c2)
11401  // Use = OtherOp * ConstNode <-- visiting Use.
11402  //
11403  // If we make this transformation, we will have a common
11404  // multiply (CONST * A) after we also do the same transformation
11405  // to the "t2" instruction.
11406  if (OtherOp->getOpcode() == ISD::ADD &&
11408  OtherOp->getOperand(0).getNode() == MulVar)
11409  return true;
11410  }
11411  }
11412 
11413  // Didn't find a case where this would be profitable.
11414  return false;
11415 }
11416 
11417 SDValue DAGCombiner::getMergedConstantVectorStore(
11418  SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
11419  SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
11420  SmallVector<SDValue, 8> BuildVector;
11421 
11422  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
11423  StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
11424  Chains.push_back(St->getChain());
11425  BuildVector.push_back(St->getValue());
11426  }
11427 
11428  return DAG.getBuildVector(Ty, SL, BuildVector);
11429 }
11430 
11431 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
11432  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
11433  unsigned NumStores, bool IsConstantSrc, bool UseVector) {
11434  // Make sure we have something to merge.
11435  if (NumStores < 2)
11436  return false;
11437 
11438  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11439  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11440  unsigned LatestNodeUsed = 0;
11441 
11442  for (unsigned i=0; i < NumStores; ++i) {
11443  // Find a chain for the new wide-store operand. Notice that some
11444  // of the store nodes that we found may not be selected for inclusion
11445  // in the wide store. The chain we use needs to be the chain of the
11446  // latest store node which is *used* and replaced by the wide store.
11447  if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11448  LatestNodeUsed = i;
11449  }
11450 
11451  SmallVector<SDValue, 8> Chains;
11452 
11453  // The latest Node in the DAG.
11454  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11455  SDLoc DL(StoreNodes[0].MemNode);
11456 
11457  SDValue StoredVal;
11458  if (UseVector) {
11459  bool IsVec = MemVT.isVector();
11460  unsigned Elts = NumStores;
11461  if (IsVec) {
11462  // When merging vector stores, get the total number of elements.
11463  Elts *= MemVT.getVectorNumElements();
11464  }
11465  // Get the type for the merged vector store.
11466  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11467  assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
11468 
11469  if (IsConstantSrc) {
11470  StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
11471  } else {
11473  for (unsigned i = 0; i < NumStores; ++i) {
11474  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11475  SDValue Val = St->getValue();
11476  // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
11477  if (Val.getValueType() != MemVT)
11478  return false;
11479  Ops.push_back(Val);
11480  Chains.push_back(St->getChain());
11481  }
11482 
11483  // Build the extracted vector elements back into a vector.
11484  StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
11485  DL, Ty, Ops); }
11486  } else {
11487  // We should always use a vector store when merging extracted vector
11488  // elements, so this path implies a store of constants.
11489  assert(IsConstantSrc && "Merged vector elements should use vector store");
11490 
11491  unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
11492  APInt StoreInt(SizeInBits, 0);
11493 
11494  // Construct a single integer constant which is made of the smaller
11495  // constant inputs.
11496  bool IsLE = DAG.getDataLayout().isLittleEndian();
11497  for (unsigned i = 0; i < NumStores; ++i) {
11498  unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
11499  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
11500  Chains.push_back(St->getChain());
11501 
11502  SDValue Val = St->getValue();
11503  StoreInt <<= ElementSizeBytes * 8;
11504  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
11505  StoreInt |= C->getAPIntValue().zext(SizeInBits);
11506  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
11507  StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
11508  } else {
11509  llvm_unreachable("Invalid constant element type");
11510  }
11511  }
11512 
11513  // Create the new Load and Store operations.
11514  EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11515  StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
11516  }
11517 
11518  assert(!Chains.empty());
11519 
11520  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
11521  SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
11522  FirstInChain->getBasePtr(),
11523  FirstInChain->getPointerInfo(),
11524  FirstInChain->getAlignment());
11525 
11526  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11527  : DAG.getSubtarget().useAA();
11528  if (UseAA) {
11529  // Replace all merged stores with the new store.
11530  for (unsigned i = 0; i < NumStores; ++i)
11531  CombineTo(StoreNodes[i].MemNode, NewStore);
11532  } else {
11533  // Replace the last store with the new store.
11534  CombineTo(LatestOp, NewStore);
11535  // Erase all other stores.
11536  for (unsigned i = 0; i < NumStores; ++i) {
11537  if (StoreNodes[i].MemNode == LatestOp)
11538  continue;
11539  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11540  // ReplaceAllUsesWith will replace all uses that existed when it was
11541  // called, but graph optimizations may cause new ones to appear. For
11542  // example, the case in pr14333 looks like
11543  //
11544  // St's chain -> St -> another store -> X
11545  //
11546  // And the only difference from St to the other store is the chain.
11547  // When we change it's chain to be St's chain they become identical,
11548  // get CSEed and the net result is that X is now a use of St.
11549  // Since we know that St is redundant, just iterate.
11550  while (!St->use_empty())
11551  DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
11552  deleteAndRecombine(St);
11553  }
11554  }
11555 
11556  StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
11557  return true;
11558 }
11559 
11560 void DAGCombiner::getStoreMergeAndAliasCandidates(
11561  StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
11562  SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
11563  // This holds the base pointer, index, and the offset in bytes from the base
11564  // pointer.
11565  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
11566 
11567  // We must have a base and an offset.
11568  if (!BasePtr.Base.getNode())
11569  return;
11570 
11571  // Do not handle stores to undef base pointers.
11572  if (BasePtr.Base.isUndef())
11573  return;
11574 
11575  // Walk up the chain and look for nodes with offsets from the same
11576  // base pointer. Stop when reaching an instruction with a different kind
11577  // or instruction which has a different base pointer.
11578  EVT MemVT = St->getMemoryVT();
11579  unsigned Seq = 0;
11580  StoreSDNode *Index = St;
11581 
11582 
11583  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11584  : DAG.getSubtarget().useAA();
11585 
11586  if (UseAA) {
11587  // Look at other users of the same chain. Stores on the same chain do not
11588  // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
11589  // to be on the same chain, so don't bother looking at adjacent chains.
11590 
11591  SDValue Chain = St->getChain();
11592  for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
11593  if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
11594  if (I.getOperandNo() != 0)
11595  continue;
11596 
11597  if (OtherST->isVolatile() || OtherST->isIndexed())
11598  continue;
11599 
11600  if (OtherST->getMemoryVT() != MemVT)
11601  continue;
11602 
11603  BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
11604 
11605  if (Ptr.equalBaseIndex(BasePtr))
11606  StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
11607  }
11608  }
11609 
11610  return;
11611  }
11612 
11613  while (Index) {
11614  // If the chain has more than one use, then we can't reorder the mem ops.
11615  if (Index != St && !SDValue(Index, 0)->hasOneUse())
11616  break;
11617 
11618  // Find the base pointer and offset for this memory node.
11619  BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
11620 
11621  // Check that the base pointer is the same as the original one.
11622  if (!Ptr.equalBaseIndex(BasePtr))
11623  break;
11624 
11625  // The memory operands must not be volatile.
11626  if (Index->isVolatile() || Index->isIndexed())
11627  break;
11628 
11629  // No truncation.
11630  if (Index->isTruncatingStore())
11631  break;
11632 
11633  // The stored memory type must be the same.
11634  if (Index->getMemoryVT() != MemVT)
11635  break;
11636 
11637  // We do not allow under-aligned stores in order to prevent
11638  // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
11639  // be irrelevant here; what MATTERS is that we not move memory
11640  // operations that potentially overlap past each-other.
11641  if (Index->getAlignment() < MemVT.getStoreSize())
11642  break;
11643 
11644  // We found a potential memory operand to merge.
11645  StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
11646 
11647  // Find the next memory operand in the chain. If the next operand in the
11648  // chain is a store then move up and continue the scan with the next
11649  // memory operand. If the next operand is a load save it and use alias
11650  // information to check if it interferes with anything.
11651  SDNode *NextInChain = Index->getChain().getNode();
11652  while (1) {
11653  if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
11654  // We found a store node. Use it for the next iteration.
11655  Index = STn;
11656  break;
11657  } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
11658  if (Ldn->isVolatile()) {
11659  Index = nullptr;
11660  break;
11661  }
11662 
11663  // Save the load node for later. Continue the scan.
11664  AliasLoadNodes.push_back(Ldn);
11665  NextInChain = Ldn->getChain().getNode();
11666  continue;
11667  } else {
11668  Index = nullptr;
11669  break;
11670  }
11671  }
11672  }
11673 }
11674 
11675 // We need to check that merging these stores does not cause a loop
11676 // in the DAG. Any store candidate may depend on another candidate
11677 // indirectly through its operand (we already consider dependencies
11678 // through the chain). Check in parallel by searching up from
11679 // non-chain operands of candidates.
11680 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
11681  SmallVectorImpl<MemOpLink> &StoreNodes) {
11684  // search ops of store candidates
11685  for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11686  SDNode *n = StoreNodes[i].MemNode;
11687  // Potential loops may happen only through non-chain operands
11688  for (unsigned j = 1; j < n->getNumOperands(); ++j)
11689  Worklist.push_back(n->getOperand(j).getNode());
11690  }
11691  // search through DAG. We can stop early if we find a storenode
11692  for (unsigned i = 0; i < StoreNodes.size(); ++i) {
11693  if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
11694  return false;
11695  }
11696  return true;
11697 }
11698 
11699 bool DAGCombiner::MergeConsecutiveStores(
11700  StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
11701  if (OptLevel == CodeGenOpt::None)
11702  return false;
11703 
11704  EVT MemVT = St->getMemoryVT();
11705  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
11706  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
11707  Attribute::NoImplicitFloat);
11708 
11709  // This function cannot currently deal with non-byte-sized memory sizes.
11710  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
11711  return false;
11712 
11713  if (!MemVT.isSimple())
11714  return false;
11715 
11716  // Perform an early exit check. Do not bother looking at stored values that
11717  // are not constants, loads, or extracted vector elements.
11718  SDValue StoredVal = St->getValue();
11719  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
11720  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
11721  isa<ConstantFPSDNode>(StoredVal);
11722  bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
11723  StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
11724 
11725  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
11726  return false;
11727 
11728  // Don't merge vectors into wider vectors if the source data comes from loads.
11729  // TODO: This restriction can be lifted by using logic similar to the
11730  // ExtractVecSrc case.
11731  if (MemVT.isVector() && IsLoadSrc)
11732  return false;
11733 
11734  // Only look at ends of store sequences.
11735  SDValue Chain = SDValue(St, 0);
11736  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
11737  return false;
11738 
11739  // Save the LoadSDNodes that we find in the chain.
11740  // We need to make sure that these nodes do not interfere with
11741  // any of the store nodes.
11742  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
11743 
11744  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
11745 
11746  // Check if there is anything to merge.
11747  if (StoreNodes.size() < 2)
11748  return false;
11749 
11750  // only do dependence check in AA case
11751  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11752  : DAG.getSubtarget().useAA();
11753  if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
11754  return false;
11755 
11756  // Sort the memory operands according to their distance from the
11757  // base pointer. As a secondary criteria: make sure stores coming
11758  // later in the code come first in the list. This is important for
11759  // the non-UseAA case, because we're merging stores into the FINAL
11760  // store along a chain which potentially contains aliasing stores.
11761  // Thus, if there are multiple stores to the same address, the last
11762  // one can be considered for merging but not the others.
11763  std::sort(StoreNodes.begin(), StoreNodes.end(),
11764  [](MemOpLink LHS, MemOpLink RHS) {
11765  return LHS.OffsetFromBase < RHS.OffsetFromBase ||
11766  (LHS.OffsetFromBase == RHS.OffsetFromBase &&
11767  LHS.SequenceNum < RHS.SequenceNum);
11768  });
11769 
11770  // Scan the memory operations on the chain and find the first non-consecutive
11771  // store memory address.
11772  unsigned LastConsecutiveStore = 0;
11773  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
11774  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
11775 
11776  // Check that the addresses are consecutive starting from the second
11777  // element in the list of stores.
11778  if (i > 0) {
11779  int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
11780  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11781  break;
11782  }
11783 
11784  // Check if this store interferes with any of the loads that we found.
11785  // If we find a load that alias with this store. Stop the sequence.
11786  if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
11787  return isAlias(Ldn, StoreNodes[i].MemNode);
11788  }))
11789  break;
11790 
11791  // Mark this node as useful.
11792  LastConsecutiveStore = i;
11793  }
11794 
11795  // The node with the lowest store address.
11796  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
11797  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
11798  unsigned FirstStoreAlign = FirstInChain->getAlignment();
11799  LLVMContext &Context = *DAG.getContext();
11800  const DataLayout &DL = DAG.getDataLayout();
11801 
11802  // Store the constants into memory as one consecutive store.
11803  if (IsConstantSrc) {
11804  unsigned LastLegalType = 0;
11805  unsigned LastLegalVectorType = 0;
11806  bool NonZero = false;
11807  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11808  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11809  SDValue StoredVal = St->getValue();
11810 
11811  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
11812  NonZero |= !C->isNullValue();
11813  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
11814  NonZero |= !C->getConstantFPValue()->isNullValue();
11815  } else {
11816  // Non-constant.
11817  break;
11818  }
11819 
11820  // Find a legal type for the constant store.
11821  unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11822  EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11823  bool IsFast;
11824  if (TLI.isTypeLegal(StoreTy) &&
11825  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11826  FirstStoreAlign, &IsFast) && IsFast) {
11827  LastLegalType = i+1;
11828  // Or check whether a truncstore is legal.
11829  } else if (TLI.getTypeAction(Context, StoreTy) ==
11831  EVT LegalizedStoredValueTy =
11832  TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
11833  if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11834  TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
11835  FirstStoreAS, FirstStoreAlign, &IsFast) &&
11836  IsFast) {
11837  LastLegalType = i + 1;
11838  }
11839  }
11840 
11841  // We only use vectors if the constant is known to be zero or the target
11842  // allows it and the function is not marked with the noimplicitfloat
11843  // attribute.
11844  if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
11845  FirstStoreAS)) &&
11846  !NoVectors) {
11847  // Find a legal type for the vector store.
11848  EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
11849  if (TLI.isTypeLegal(Ty) &&
11850  TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11851  FirstStoreAlign, &IsFast) && IsFast)
11852  LastLegalVectorType = i + 1;
11853  }
11854  }
11855 
11856  // Check if we found a legal integer type to store.
11857  if (LastLegalType == 0 && LastLegalVectorType == 0)
11858  return false;
11859 
11860  bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
11861  unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
11862 
11863  return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
11864  true, UseVector);
11865  }
11866 
11867  // When extracting multiple vector elements, try to store them
11868  // in one vector store rather than a sequence of scalar stores.
11869  if (IsExtractVecSrc) {
11870  unsigned NumStoresToMerge = 0;
11871  bool IsVec = MemVT.isVector();
11872  for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
11873  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11874  unsigned StoreValOpcode = St->getValue().getOpcode();
11875  // This restriction could be loosened.
11876  // Bail out if any stored values are not elements extracted from a vector.
11877  // It should be possible to handle mixed sources, but load sources need
11878  // more careful handling (see the block of code below that handles
11879  // consecutive loads).
11880  if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
11881  StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
11882  return false;
11883 
11884  // Find a legal type for the vector store.
11885  unsigned Elts = i + 1;
11886  if (IsVec) {
11887  // When merging vector stores, get the total number of elements.
11888  Elts *= MemVT.getVectorNumElements();
11889  }
11890  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
11891  bool IsFast;
11892  if (TLI.isTypeLegal(Ty) &&
11893  TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
11894  FirstStoreAlign, &IsFast) && IsFast)
11895  NumStoresToMerge = i + 1;
11896  }
11897 
11898  return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
11899  false, true);
11900  }
11901 
11902  // Below we handle the case of multiple consecutive stores that
11903  // come from multiple consecutive loads. We merge them into a single
11904  // wide load and a single wide store.
11905 
11906  // Look for load nodes which are used by the stored values.
11907  SmallVector<MemOpLink, 8> LoadNodes;
11908 
11909  // Find acceptable loads. Loads need to have the same chain (token factor),
11910  // must not be zext, volatile, indexed, and they must be consecutive.
11911  BaseIndexOffset LdBasePtr;
11912  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11913  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11914  LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
11915  if (!Ld) break;
11916 
11917  // Loads must only have one use.
11918  if (!Ld->hasNUsesOfValue(1, 0))
11919  break;
11920 
11921  // The memory operands must not be volatile.
11922  if (Ld->isVolatile() || Ld->isIndexed())
11923  break;
11924 
11925  // We do not accept ext loads.
11926  if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
11927  break;
11928 
11929  // The stored memory type must be the same.
11930  if (Ld->getMemoryVT() != MemVT)
11931  break;
11932 
11933  BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
11934  // If this is not the first ptr that we check.
11935  if (LdBasePtr.Base.getNode()) {
11936  // The base ptr must be the same.
11937  if (!LdPtr.equalBaseIndex(LdBasePtr))
11938  break;
11939  } else {
11940  // Check that all other base pointers are the same as this one.
11941  LdBasePtr = LdPtr;
11942  }
11943 
11944  // We found a potential memory operand to merge.
11945  LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
11946  }
11947 
11948  if (LoadNodes.size() < 2)
11949  return false;
11950 
11951  // If we have load/store pair instructions and we only have two values,
11952  // don't bother.
11953  unsigned RequiredAlignment;
11954  if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
11955  St->getAlignment() >= RequiredAlignment)
11956  return false;
11957 
11958  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
11959  unsigned FirstLoadAS = FirstLoad->getAddressSpace();
11960  unsigned FirstLoadAlign = FirstLoad->getAlignment();
11961 
11962  // Scan the memory operations on the chain and find the first non-consecutive
11963  // load memory address. These variables hold the index in the store node
11964  // array.
11965  unsigned LastConsecutiveLoad = 0;
11966  // This variable refers to the size and not index in the array.
11967  unsigned LastLegalVectorType = 0;
11968  unsigned LastLegalIntegerType = 0;
11969  StartAddress = LoadNodes[0].OffsetFromBase;
11970  SDValue FirstChain = FirstLoad->getChain();
11971  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
11972  // All loads must share the same chain.
11973  if (LoadNodes[i].MemNode->getChain() != FirstChain)
11974  break;
11975 
11976  int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
11977  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11978  break;
11979  LastConsecutiveLoad = i;
11980  // Find a legal type for the vector store.
11981  EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
11982  bool IsFastSt, IsFastLd;
11983  if (TLI.isTypeLegal(StoreTy) &&
11984  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11985  FirstStoreAlign, &IsFastSt) && IsFastSt &&
11986  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11987  FirstLoadAlign, &IsFastLd) && IsFastLd) {
11988  LastLegalVectorType = i + 1;
11989  }
11990 
11991  // Find a legal type for the integer store.
11992  unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11993  StoreTy = EVT::getIntegerVT(Context, SizeInBits);
11994  if (TLI.isTypeLegal(StoreTy) &&
11995  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
11996  FirstStoreAlign, &IsFastSt) && IsFastSt &&
11997  TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
11998  FirstLoadAlign, &IsFastLd) && IsFastLd)
11999  LastLegalIntegerType = i + 1;
12000  // Or check whether a truncstore and extload is legal.
12001  else if (TLI.getTypeAction(Context, StoreTy) ==
12003  EVT LegalizedStoredValueTy =
12004  TLI.getTypeToTransformTo(Context, StoreTy);
12005  if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
12006  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12007  TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12008  TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
12009  TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12010  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
12011  IsFastSt &&
12012  TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
12013  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
12014  IsFastLd)
12015  LastLegalIntegerType = i+1;
12016  }
12017  }
12018 
12019  // Only use vector types if the vector type is larger than the integer type.
12020  // If they are the same, use integers.
12021  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
12022  unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
12023 
12024  // We add +1 here because the LastXXX variables refer to location while
12025  // the NumElem refers to array/index size.
12026  unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
12027  NumElem = std::min(LastLegalType, NumElem);
12028 
12029  if (NumElem < 2)
12030  return false;
12031 
12032  // Collect the chains from all merged stores.
12033  SmallVector<SDValue, 8> MergeStoreChains;
12034  MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
12035 
12036  // The latest Node in the DAG.
12037  unsigned LatestNodeUsed = 0;
12038  for (unsigned i=1; i<NumElem; ++i) {
12039  // Find a chain for the new wide-store operand. Notice that some
12040  // of the store nodes that we found may not be selected for inclusion
12041  // in the wide store. The chain we use needs to be the chain of the
12042  // latest store node which is *used* and replaced by the wide store.
12043  if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
12044  LatestNodeUsed = i;
12045 
12046  MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
12047  }
12048 
12049  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
12050 
12051  // Find if it is better to use vectors or integers to load and store
12052  // to memory.
12053  EVT JointMemOpVT;
12054  if (UseVectorTy) {
12055  JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
12056  } else {
12057  unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
12058  JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
12059  }
12060 
12061  SDLoc LoadDL(LoadNodes[0].MemNode);
12062  SDLoc StoreDL(StoreNodes[0].MemNode);
12063 
12064  // The merged loads are required to have the same incoming chain, so
12065  // using the first's chain is acceptable.
12066  SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
12067  FirstLoad->getBasePtr(),
12068  FirstLoad->getPointerInfo(), FirstLoadAlign);
12069 
12070  SDValue NewStoreChain =
12071  DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
12072 
12073  SDValue NewStore =
12074  DAG.getStore(NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
12075  FirstInChain->getPointerInfo(), FirstStoreAlign);
12076 
12077  // Transfer chain users from old loads to the new load.
12078  for (unsigned i = 0; i < NumElem; ++i) {
12079  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
12081  SDValue(NewLoad.getNode(), 1));
12082  }
12083 
12084  if (UseAA) {
12085  // Replace the all stores with the new store.
12086  for (unsigned i = 0; i < NumElem; ++i)
12087  CombineTo(StoreNodes[i].MemNode, NewStore);
12088  } else {
12089  // Replace the last store with the new store.
12090  CombineTo(LatestOp, NewStore);
12091  // Erase all other stores.
12092  for (unsigned i = 0; i < NumElem; ++i) {
12093  // Remove all Store nodes.
12094  if (StoreNodes[i].MemNode == LatestOp)
12095  continue;
12096  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
12097  DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
12098  deleteAndRecombine(St);
12099  }
12100  }
12101 
12102  StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
12103  return true;
12104 }
12105 
12106 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
12107  SDLoc SL(ST);
12108  SDValue ReplStore;
12109 
12110  // Replace the chain to avoid dependency.
12111  if (ST->isTruncatingStore()) {
12112  ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
12113  ST->getBasePtr(), ST->getMemoryVT(),
12114  ST->getMemOperand());
12115  } else {
12116  ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
12117  ST->getMemOperand());
12118  }
12119 
12120  // Create token to keep both nodes around.
12121  SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
12122  MVT::Other, ST->getChain(), ReplStore);
12123 
12124  // Make sure the new and old chains are cleaned up.
12125  AddToWorklist(Token.getNode());
12126 
12127  // Don't add users to work list.
12128  return CombineTo(ST, Token, false);
12129 }
12130 
12131 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
12132  SDValue Value = ST->getValue();
12133  if (Value.getOpcode() == ISD::TargetConstantFP)
12134  return SDValue();
12135 
12136  SDLoc DL(ST);
12137 
12138  SDValue Chain = ST->getChain();
12139  SDValue Ptr = ST->getBasePtr();
12140 
12141  const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
12142 
12143  // NOTE: If the original store is volatile, this transform must not increase
12144  // the number of stores. For example, on x86-32 an f64 can be stored in one
12145  // processor operation but an i64 (which is not legal) requires two. So the
12146  // transform should not be done in this case.
12147 
12148  SDValue Tmp;
12149  switch (CFP->getSimpleValueType(0).SimpleTy) {
12150  default:
12151  llvm_unreachable("Unknown FP type");
12152  case MVT::f16: // We don't do this for these yet.
12153  case MVT::f80:
12154  case MVT::f128:
12155  case MVT::ppcf128:
12156  return SDValue();
12157  case MVT::f32:
12158  if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
12160  ;
12161  Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
12162  bitcastToAPInt().getZExtValue(), SDLoc(CFP),
12163  MVT::i32);
12164  return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
12165  }
12166 
12167  return SDValue();
12168  case MVT::f64:
12169  if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
12170  !ST->isVolatile()) ||
12172  ;
12173  Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
12174  getZExtValue(), SDLoc(CFP), MVT::i64);
12175  return DAG.getStore(Chain, DL, Tmp,
12176  Ptr, ST->getMemOperand());
12177  }
12178 
12179  if (!ST->isVolatile() &&
12181  // Many FP stores are not made apparent until after legalize, e.g. for
12182  // argument passing. Since this is so common, custom legalize the
12183  // 64-bit integer store into two 32-bit stores.
12184  uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
12185  SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
12186  SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
12187  if (DAG.getDataLayout().isBigEndian())
12188  std::swap(Lo, Hi);
12189 
12190  unsigned Alignment = ST->getAlignment();
12191  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12192  AAMDNodes AAInfo = ST->getAAInfo();
12193 
12194  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12195  ST->getAlignment(), MMOFlags, AAInfo);
12196  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12197  DAG.getConstant(4, DL, Ptr.getValueType()));
12198  Alignment = MinAlign(Alignment, 4U);
12199  SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
12200  ST->getPointerInfo().getWithOffset(4),
12201  Alignment, MMOFlags, AAInfo);
12202  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
12203  St0, St1);
12204  }
12205 
12206  return SDValue();
12207  }
12208 }
12209 
12210 SDValue DAGCombiner::visitSTORE(SDNode *N) {
12211  StoreSDNode *ST = cast<StoreSDNode>(N);
12212  SDValue Chain = ST->getChain();
12213  SDValue Value = ST->getValue();
12214  SDValue Ptr = ST->getBasePtr();
12215 
12216  // If this is a store of a bit convert, store the input value if the
12217  // resultant store does not need a higher alignment than the original.
12218  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
12219  ST->isUnindexed()) {
12220  EVT SVT = Value.getOperand(0).getValueType();
12221  if (((!LegalOperations && !ST->isVolatile()) ||
12222  TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
12223  TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
12224  unsigned OrigAlign = ST->getAlignment();
12225  bool Fast = false;
12226  if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
12227  ST->getAddressSpace(), OrigAlign, &Fast) &&
12228  Fast) {
12229  return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
12230  ST->getPointerInfo(), OrigAlign,
12231  ST->getMemOperand()->getFlags(), ST->getAAInfo());
12232  }
12233  }
12234  }
12235 
12236  // Turn 'store undef, Ptr' -> nothing.
12237  if (Value.isUndef() && ST->isUnindexed())
12238  return Chain;
12239 
12240  // Try to infer better alignment information than the store already has.
12241  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
12242  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
12243  if (Align > ST->getAlignment()) {
12244  SDValue NewStore =
12245  DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
12246  ST->getMemoryVT(), Align,
12247  ST->getMemOperand()->getFlags(), ST->getAAInfo());
12248  if (NewStore.getNode() != N)
12249  return CombineTo(ST, NewStore, true);
12250  }
12251  }
12252  }
12253 
12254  // Try transforming a pair floating point load / store ops to integer
12255  // load / store ops.
12256  if (SDValue NewST = TransformFPLoadStorePair(N))
12257  return NewST;
12258 
12259  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
12260  : DAG.getSubtarget().useAA();
12261 #ifndef NDEBUG
12262  if (CombinerAAOnlyFunc.getNumOccurrences() &&
12263  CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
12264  UseAA = false;
12265 #endif
12266  if (UseAA && ST->isUnindexed()) {
12267  // FIXME: We should do this even without AA enabled. AA will just allow
12268  // FindBetterChain to work in more situations. The problem with this is that
12269  // any combine that expects memory operations to be on consecutive chains
12270  // first needs to be updated to look for users of the same chain.
12271 
12272  // Walk up chain skipping non-aliasing memory nodes, on this store and any
12273  // adjacent stores.
12274  if (findBetterNeighborChains(ST)) {
12275  // replaceStoreChain uses CombineTo, which handled all of the worklist
12276  // manipulation. Return the original node to not do anything else.
12277  return SDValue(ST, 0);
12278  }
12279  Chain = ST->getChain();
12280  }
12281 
12282  // Try transforming N to an indexed store.
12283  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12284  return SDValue(N, 0);
12285 
12286  // FIXME: is there such a thing as a truncating indexed store?
12287  if (ST->isTruncatingStore() && ST->isUnindexed() &&
12288  Value.getValueType().isInteger()) {
12289  // See if we can simplify the input to this truncstore with knowledge that
12290  // only the low bits are being used. For example:
12291  // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
12292  SDValue Shorter = GetDemandedBits(
12294  ST->getMemoryVT().getScalarSizeInBits()));
12295  AddToWorklist(Value.getNode());
12296  if (Shorter.getNode())
12297  return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
12298  Ptr, ST->getMemoryVT(), ST->getMemOperand());
12299 
12300  // Otherwise, see if we can simplify the operation with
12301  // SimplifyDemandedBits, which only works if the value has a single use.
12303  Value,
12305  ST->getMemoryVT().getScalarSizeInBits())))
12306  return SDValue(N, 0);
12307  }
12308 
12309  // If this is a load followed by a store to the same location, then the store
12310  // is dead/noop.
12311  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
12312  if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
12313  ST->isUnindexed() && !ST->isVolatile() &&
12314  // There can't be any side effects between the load and store, such as
12315  // a call or store.
12316  Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
12317  // The store is dead, remove it.
12318  return Chain;
12319  }
12320  }
12321 
12322  // If this is a store followed by a store with the same value to the same
12323  // location, then the store is dead/noop.
12324  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
12325  if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
12326  ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
12327  ST1->isUnindexed() && !ST1->isVolatile()) {
12328  // The store is dead, remove it.
12329  return Chain;
12330  }
12331  }
12332 
12333  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
12334  // truncating store. We can do this even if this is already a truncstore.
12335  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
12336  && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
12337  TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
12338  ST->getMemoryVT())) {
12339  return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
12340  Ptr, ST->getMemoryVT(), ST->getMemOperand());
12341  }
12342 
12343  // Only perform this optimization before the types are legal, because we
12344  // don't want to perform this optimization on every DAGCombine invocation.
12345  if (!LegalTypes) {
12346  for (;;) {
12347  // There can be multiple store sequences on the same chain.
12348  // Keep trying to merge store sequences until we are unable to do so
12349  // or until we merge the last store on the chain.
12350  SmallVector<MemOpLink, 8> StoreNodes;
12351  bool Changed = MergeConsecutiveStores(ST, StoreNodes);
12352  if (!Changed) break;
12353 
12354  if (any_of(StoreNodes,
12355  [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
12356  // ST has been merged and no longer exists.
12357  return SDValue(N, 0);
12358  }
12359  }
12360  }
12361 
12362  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
12363  //
12364  // Make sure to do this only after attempting to merge stores in order to
12365  // avoid changing the types of some subset of stores due to visit order,
12366  // preventing their merging.
12367  if (isa<ConstantFPSDNode>(Value)) {
12368  if (SDValue NewSt = replaceStoreOfFPConstant(ST))
12369  return NewSt;
12370  }
12371 
12372  if (SDValue NewSt = splitMergedValStore(ST))
12373  return NewSt;
12374 
12375  return ReduceLoadOpStoreWidth(N);
12376 }
12377 
12378 /// For the instruction sequence of store below, F and I values
12379 /// are bundled together as an i64 value before being stored into memory.
12380 /// Sometimes it is more efficent to generate separate stores for F and I,
12381 /// which can remove the bitwise instructions or sink them to colder places.
12382 ///
12383 /// (store (or (zext (bitcast F to i32) to i64),
12384 /// (shl (zext I to i64), 32)), addr) -->
12385 /// (store F, addr) and (store I, addr+4)
12386 ///
12387 /// Similarly, splitting for other merged store can also be beneficial, like:
12388 /// For pair of {i32, i32}, i64 store --> two i32 stores.
12389 /// For pair of {i32, i16}, i64 store --> two i32 stores.
12390 /// For pair of {i16, i16}, i32 store --> two i16 stores.
12391 /// For pair of {i16, i8}, i32 store --> two i16 stores.
12392 /// For pair of {i8, i8}, i16 store --> two i8 stores.
12393 ///
12394 /// We allow each target to determine specifically which kind of splitting is
12395 /// supported.
12396 ///
12397 /// The store patterns are commonly seen from the simple code snippet below
12398 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
12399 /// void goo(const std::pair<int, float> &);
12400 /// hoo() {
12401 /// ...
12402 /// goo(std::make_pair(tmp, ftmp));
12403 /// ...
12404 /// }
12405 ///
12406 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
12407  if (OptLevel == CodeGenOpt::None)
12408  return SDValue();
12409 
12410  SDValue Val = ST->getValue();
12411  SDLoc DL(ST);
12412 
12413  // Match OR operand.
12414  if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
12415  return SDValue();
12416 
12417  // Match SHL operand and get Lower and Higher parts of Val.
12418  SDValue Op1 = Val.getOperand(0);
12419  SDValue Op2 = Val.getOperand(1);
12420  SDValue Lo, Hi;
12421  if (Op1.getOpcode() != ISD::SHL) {
12422  std::swap(Op1, Op2);
12423  if (Op1.getOpcode() != ISD::SHL)
12424  return SDValue();
12425  }
12426  Lo = Op2;
12427  Hi = Op1.getOperand(0);
12428  if (!Op1.hasOneUse())
12429  return SDValue();
12430 
12431  // Match shift amount to HalfValBitSize.
12432  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
12434  if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
12435  return SDValue();
12436 
12437  // Lo and Hi are zero-extended from int with size less equal than 32
12438  // to i64.
12439  if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
12441  Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
12442  Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
12444  Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
12445  return SDValue();
12446 
12447  // Use the EVT of low and high parts before bitcast as the input
12448  // of target query.
12449  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
12450  ? Lo.getOperand(0).getValueType()
12451  : Lo.getValueType();
12452  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
12453  ? Hi.getOperand(0).getValueType()
12454  : Hi.getValueType();
12455  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
12456  return SDValue();
12457 
12458  // Start to split store.
12459  unsigned Alignment = ST->getAlignment();
12460  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
12461  AAMDNodes AAInfo = ST->getAAInfo();
12462 
12463  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
12464  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
12465  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
12466  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
12467 
12468  SDValue Chain = ST->getChain();
12469  SDValue Ptr = ST->getBasePtr();
12470  // Lower value store.
12471  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
12472  ST->getAlignment(), MMOFlags, AAInfo);
12473  Ptr =
12474  DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
12475  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
12476  // Higher value store.
12477  SDValue St1 =
12478  DAG.getStore(St0, DL, Hi, Ptr,
12479  ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
12480  Alignment / 2, MMOFlags, AAInfo);
12481  return St1;
12482 }
12483 
12484 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
12485  SDValue InVec = N->getOperand(0);
12486  SDValue InVal = N->getOperand(1);
12487  SDValue EltNo = N->getOperand(2);
12488  SDLoc DL(N);
12489 
12490  // If the inserted element is an UNDEF, just use the input vector.
12491  if (InVal.isUndef())
12492  return InVec;
12493 
12494  EVT VT = InVec.getValueType();
12495 
12496  // If we can't generate a legal BUILD_VECTOR, exit
12497  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
12498  return SDValue();
12499 
12500  // Check that we know which element is being inserted
12501  if (!isa<ConstantSDNode>(EltNo))
12502  return SDValue();
12503  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12504 
12505  // Canonicalize insert_vector_elt dag nodes.
12506  // Example:
12507  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
12508  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
12509  //
12510  // Do this only if the child insert_vector node has one use; also
12511  // do this only if indices are both constants and Idx1 < Idx0.
12512  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
12513  && isa<ConstantSDNode>(InVec.getOperand(2))) {
12514  unsigned OtherElt =
12515  cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
12516  if (Elt < OtherElt) {
12517  // Swap nodes.
12518  SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
12519  InVec.getOperand(0), InVal, EltNo);
12520  AddToWorklist(NewOp.getNode());
12521  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
12522  VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
12523  }
12524  }
12525 
12526  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
12527  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
12528  // vector elements.
12530  // Do not combine these two vectors if the output vector will not replace
12531  // the input vector.
12532  if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
12533  Ops.append(InVec.getNode()->op_begin(),
12534  InVec.getNode()->op_end());
12535  } else if (InVec.isUndef()) {
12536  unsigned NElts = VT.getVectorNumElements();
12537  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
12538  } else {
12539  return SDValue();
12540  }
12541 
12542  // Insert the element
12543  if (Elt < Ops.size()) {
12544  // All the operands of BUILD_VECTOR must have the same type;
12545  // we enforce that here.
12546  EVT OpVT = Ops[0].getValueType();
12547  if (InVal.getValueType() != OpVT)
12548  InVal = OpVT.bitsGT(InVal.getValueType()) ?
12549  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
12550  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
12551  Ops[Elt] = InVal;
12552  }
12553 
12554  // Return the new vector
12555  return DAG.getBuildVector(VT, DL, Ops);
12556 }
12557 
12558 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
12559  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
12560  assert(!OriginalLoad->isVolatile());
12561 
12562  EVT ResultVT = EVE->getValueType(0);
12563  EVT VecEltVT = InVecVT.getVectorElementType();
12564  unsigned Align = OriginalLoad->getAlignment();
12565  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
12566  VecEltVT.getTypeForEVT(*DAG.getContext()));
12567 
12568  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
12569  return SDValue();
12570 
12571  Align = NewAlign;
12572 
12573  SDValue NewPtr = OriginalLoad->getBasePtr();
12574  SDValue Offset;
12575  EVT PtrType = NewPtr.getValueType();
12576  MachinePointerInfo MPI;
12577  SDLoc DL(EVE);
12578  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
12579  int Elt = ConstEltNo->getZExtValue();
12580  unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
12581  Offset = DAG.getConstant(PtrOff, DL, PtrType);
12582  MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
12583  } else {
12584  Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
12585  Offset = DAG.getNode(
12586  ISD::MUL, DL, PtrType, Offset,
12587  DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
12588  MPI = OriginalLoad->getPointerInfo();
12589  }
12590  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
12591 
12592  // The replacement we need to do here is a little tricky: we need to
12593  // replace an extractelement of a load with a load.
12594  // Use ReplaceAllUsesOfValuesWith to do the replacement.
12595  // Note that this replacement assumes that the extractvalue is the only
12596  // use of the load; that's okay because we don't want to perform this
12597  // transformation in other cases anyway.
12598  SDValue Load;
12599  SDValue Chain;
12600  if (ResultVT.bitsGT(VecEltVT)) {
12601  // If the result type of vextract is wider than the load, then issue an
12602  // extending load instead.
12603  ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
12604  VecEltVT)
12605  ? ISD::ZEXTLOAD
12606  : ISD::EXTLOAD;
12607  Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
12608  OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
12609  Align, OriginalLoad->getMemOperand()->getFlags(),
12610  OriginalLoad->getAAInfo());
12611  Chain = Load.getValue(1);
12612  } else {
12613  Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
12614  MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
12615  OriginalLoad->getAAInfo());
12616  Chain = Load.getValue(1);
12617  if (ResultVT.bitsLT(VecEltVT))
12618  Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
12619  else
12620  Load = DAG.getBitcast(ResultVT, Load);
12621  }
12622  WorklistRemover DeadNodes(*this);
12623  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
12624  SDValue To[] = { Load, Chain };
12625  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
12626  // Since we're explicitly calling ReplaceAllUses, add the new node to the
12627  // worklist explicitly as well.
12628  AddToWorklist(Load.getNode());
12629  AddUsersToWorklist(Load.getNode()); // Add users too
12630  // Make sure to revisit this node to clean it up; it will usually be dead.
12631  AddToWorklist(EVE);
12632  ++OpsNarrowed;
12633  return SDValue(EVE, 0);
12634 }
12635 
12636 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
12637  // (vextract (scalar_to_vector val, 0) -> val
12638  SDValue InVec = N->getOperand(0);
12639  EVT VT = InVec.getValueType();
12640  EVT NVT = N->getValueType(0);
12641 
12642  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
12643  // Check if the result type doesn't match the inserted element type. A
12644  // SCALAR_TO_VECTOR may truncate the inserted element and the
12645  // EXTRACT_VECTOR_ELT may widen the extracted vector.
12646  SDValue InOp = InVec.getOperand(0);
12647  if (InOp.getValueType() != NVT) {
12648  assert(InOp.getValueType().isInteger() && NVT.isInteger());
12649  return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
12650  }
12651  return InOp;
12652  }
12653 
12654  SDValue EltNo = N->getOperand(1);
12655  ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
12656 
12657  // extract_vector_elt (build_vector x, y), 1 -> y
12658  if (ConstEltNo &&
12659  InVec.getOpcode() == ISD::BUILD_VECTOR &&
12660  TLI.isTypeLegal(VT) &&
12661  (InVec.hasOneUse() ||
12663  SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
12664  EVT InEltVT = Elt.getValueType();
12665 
12666  // Sometimes build_vector's scalar input types do not match result type.
12667  if (NVT == InEltVT)
12668  return Elt;
12669 
12670  // TODO: It may be useful to truncate if free if the build_vector implicitly
12671  // converts.
12672  }
12673 
12674  // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
12675  if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
12676  ConstEltNo->isNullValue() && VT.isInteger()) {
12677  SDValue BCSrc = InVec.getOperand(0);
12678  if (BCSrc.getValueType().isScalarInteger())
12679  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
12680  }
12681 
12682  // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
12683  //
12684  // This only really matters if the index is non-constant since other combines
12685  // on the constant elements already work.
12686  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
12687  EltNo == InVec.getOperand(2)) {
12688  SDValue Elt = InVec.getOperand(1);
12689  return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
12690  }
12691 
12692  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
12693  // We only perform this optimization before the op legalization phase because
12694  // we may introduce new vector instructions which are not backed by TD
12695  // patterns. For example on AVX, extracting elements from a wide vector
12696  // without using extract_subvector. However, if we can find an underlying
12697  // scalar value, then we can always use that.
12698  if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
12699  int NumElem = VT.getVectorNumElements();
12700  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
12701  // Find the new index to extract from.
12702  int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
12703 
12704  // Extracting an undef index is undef.
12705  if (OrigElt == -1)
12706  return DAG.getUNDEF(NVT);
12707 
12708  // Select the right vector half to extract from.
12709  SDValue SVInVec;
12710  if (OrigElt < NumElem) {
12711  SVInVec = InVec->getOperand(0);
12712  } else {
12713  SVInVec = InVec->getOperand(1);
12714  OrigElt -= NumElem;
12715  }
12716 
12717  if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
12718  SDValue InOp = SVInVec.getOperand(OrigElt);
12719  if (InOp.getValueType() != NVT) {
12720  assert(InOp.getValueType().isInteger() && NVT.isInteger());
12721  InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
12722  }
12723 
12724  return InOp;
12725  }
12726 
12727  // FIXME: We should handle recursing on other vector shuffles and
12728  // scalar_to_vector here as well.
12729 
12730  if (!LegalOperations) {
12731  EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
12732  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
12733  DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
12734  }
12735  }
12736 
12737  bool BCNumEltsChanged = false;
12738  EVT ExtVT = VT.getVectorElementType();
12739  EVT LVT = ExtVT;
12740 
12741  // If the result of load has to be truncated, then it's not necessarily
12742  // profitable.
12743  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
12744  return SDValue();
12745 
12746  if (InVec.getOpcode() == ISD::BITCAST) {
12747  // Don't duplicate a load with other uses.
12748  if (!InVec.hasOneUse())
12749  return SDValue();
12750 
12751  EVT BCVT = InVec.getOperand(0).getValueType();
12752  if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
12753  return SDValue();
12754  if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
12755  BCNumEltsChanged = true;
12756  InVec = InVec.getOperand(0);
12757  ExtVT = BCVT.getVectorElementType();
12758  }
12759 
12760  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
12761  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
12762  ISD::isNormalLoad(InVec.getNode()) &&
12763  !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
12764  SDValue Index = N->getOperand(1);
12765  if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
12766  if (!OrigLoad->isVolatile()) {
12767  return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
12768  OrigLoad);
12769  }
12770  }
12771  }
12772 
12773  // Perform only after legalization to ensure build_vector / vector_shuffle
12774  // optimizations have already been done.
12775  if (!LegalOperations) return SDValue();
12776 
12777  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
12778  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
12779  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
12780 
12781  if (ConstEltNo) {
12782  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
12783 
12784  LoadSDNode *LN0 = nullptr;
12785  const ShuffleVectorSDNode *SVN = nullptr;
12786  if (ISD::isNormalLoad(InVec.getNode())) {
12787  LN0 = cast<LoadSDNode>(InVec);
12788  } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
12789  InVec.getOperand(0).getValueType() == ExtVT &&
12790  ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
12791  // Don't duplicate a load with other uses.
12792  if (!InVec.hasOneUse())
12793  return SDValue();
12794 
12795  LN0 = cast<LoadSDNode>(InVec.getOperand(0));
12796  } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
12797  // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
12798  // =>
12799  // (load $addr+1*size)
12800 
12801  // Don't duplicate a load with other uses.
12802  if (!InVec.hasOneUse())
12803  return SDValue();
12804 
12805  // If the bit convert changed the number of elements, it is unsafe
12806  // to examine the mask.
12807  if (BCNumEltsChanged)
12808  return SDValue();
12809 
12810  // Select the input vector, guarding against out of range extract vector.
12811  unsigned NumElems = VT.getVectorNumElements();
12812  int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
12813  InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
12814 
12815  if (InVec.getOpcode() == ISD::BITCAST) {
12816  // Don't duplicate a load with other uses.
12817  if (!InVec.hasOneUse())
12818  return SDValue();
12819 
12820  InVec = InVec.getOperand(0);
12821  }
12822  if (ISD::isNormalLoad(InVec.getNode())) {
12823  LN0 = cast<LoadSDNode>(InVec);
12824  Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
12825  EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
12826  }
12827  }
12828 
12829  // Make sure we found a non-volatile load and the extractelement is
12830  // the only use.
12831  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
12832  return SDValue();
12833 
12834  // If Idx was -1 above, Elt is going to be -1, so just return undef.
12835  if (Elt == -1)
12836  return DAG.getUNDEF(LVT);
12837 
12838  return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
12839  }
12840 
12841  return SDValue();
12842 }
12843 
12844 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
12845 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
12846  // We perform this optimization post type-legalization because
12847  // the type-legalizer often scalarizes integer-promoted vectors.
12848  // Performing this optimization before may create bit-casts which
12849  // will be type-legalized to complex code sequences.
12850  // We perform this optimization only before the operation legalizer because we
12851  // may introduce illegal operations.
12853  return SDValue();
12854 
12855  unsigned NumInScalars = N->getNumOperands();
12856  SDLoc DL(N);
12857  EVT VT = N->getValueType(0);
12858 
12859  // Check to see if this is a BUILD_VECTOR of a bunch of values
12860  // which come from any_extend or zero_extend nodes. If so, we can create
12861  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
12862  // optimizations. We do not handle sign-extend because we can't fill the sign
12863  // using shuffles.
12864  EVT SourceType = MVT::Other;
12865  bool AllAnyExt = true;
12866 
12867  for (unsigned i = 0; i != NumInScalars; ++i) {
12868  SDValue In = N->getOperand(i);
12869  // Ignore undef inputs.
12870  if (In.isUndef()) continue;
12871 
12872  bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
12873  bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
12874 
12875  // Abort if the element is not an extension.
12876  if (!ZeroExt && !AnyExt) {
12877  SourceType = MVT::Other;
12878  break;
12879  }
12880 
12881  // The input is a ZeroExt or AnyExt. Check the original type.
12882  EVT InTy = In.getOperand(0).getValueType();
12883 
12884  // Check that all of the widened source types are the same.
12885  if (SourceType == MVT::Other)
12886  // First time.
12887  SourceType = InTy;
12888  else if (InTy != SourceType) {
12889  // Multiple income types. Abort.
12890  SourceType = MVT::Other;
12891  break;
12892  }
12893 
12894  // Check if all of the extends are ANY_EXTENDs.
12895  AllAnyExt &= AnyExt;
12896  }
12897 
12898  // In order to have valid types, all of the inputs must be extended from the
12899  // same source type and all of the inputs must be any or zero extend.
12900  // Scalar sizes must be a power of two.
12901  EVT OutScalarTy = VT.getScalarType();
12902  bool ValidTypes = SourceType != MVT::Other &&
12903  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
12904  isPowerOf2_32(SourceType.getSizeInBits());
12905 
12906  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
12907  // turn into a single shuffle instruction.
12908  if (!ValidTypes)
12909  return SDValue();
12910 
12911  bool isLE = DAG.getDataLayout().isLittleEndian();
12912  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
12913  assert(ElemRatio > 1 && "Invalid element size ratio");
12914  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
12915  DAG.getConstant(0, DL, SourceType);
12916 
12917  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
12918  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
12919 
12920  // Populate the new build_vector
12921  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12922  SDValue Cast = N->getOperand(i);
12923  assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
12924  Cast.getOpcode() == ISD::ZERO_EXTEND ||
12925  Cast.isUndef()) && "Invalid cast opcode");
12926  SDValue In;
12927  if (Cast.isUndef())
12928  In = DAG.getUNDEF(SourceType);
12929  else
12930  In = Cast->getOperand(0);
12931  unsigned Index = isLE ? (i * ElemRatio) :
12932  (i * ElemRatio + (ElemRatio - 1));
12933 
12934  assert(Index < Ops.size() && "Invalid index");
12935  Ops[Index] = In;
12936  }
12937 
12938  // The type of the new BUILD_VECTOR node.
12939  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
12940  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
12941  "Invalid vector size");
12942  // Check if the new vector type is legal.
12943  if (!isTypeLegal(VecVT)) return SDValue();
12944 
12945  // Make the new BUILD_VECTOR.
12946  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
12947 
12948  // The new BUILD_VECTOR node has the potential to be further optimized.
12949  AddToWorklist(BV.getNode());
12950  // Bitcast to the desired type.
12951  return DAG.getBitcast(VT, BV);
12952 }
12953 
12954 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
12955  EVT VT = N->getValueType(0);
12956 
12957  unsigned NumInScalars = N->getNumOperands();
12958  SDLoc DL(N);
12959 
12960  EVT SrcVT = MVT::Other;
12961  unsigned Opcode = ISD::DELETED_NODE;
12962  unsigned NumDefs = 0;
12963 
12964  for (unsigned i = 0; i != NumInScalars; ++i) {
12965  SDValue In = N->getOperand(i);
12966  unsigned Opc = In.getOpcode();
12967 
12968  if (Opc == ISD::UNDEF)
12969  continue;
12970 
12971  // If all scalar values are floats and converted from integers.
12972  if (Opcode == ISD::DELETED_NODE &&
12973  (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
12974  Opcode = Opc;
12975  }
12976 
12977  if (Opc != Opcode)
12978  return SDValue();
12979 
12980  EVT InVT = In.getOperand(0).getValueType();
12981 
12982  // If all scalar values are typed differently, bail out. It's chosen to
12983  // simplify BUILD_VECTOR of integer types.
12984  if (SrcVT == MVT::Other)
12985  SrcVT = InVT;
12986  if (SrcVT != InVT)
12987  return SDValue();
12988  NumDefs++;
12989  }
12990 
12991  // If the vector has just one element defined, it's not worth to fold it into
12992  // a vectorized one.
12993  if (NumDefs < 2)
12994  return SDValue();
12995 
12996  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
12997  && "Should only handle conversion from integer to float.");
12998  assert(SrcVT != MVT::Other && "Cannot determine source type!");
12999 
13000  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
13001 
13002  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
13003  return SDValue();
13004 
13005  // Just because the floating-point vector type is legal does not necessarily
13006  // mean that the corresponding integer vector type is.
13007  if (!isTypeLegal(NVT))
13008  return SDValue();
13009 
13011  for (unsigned i = 0; i != NumInScalars; ++i) {
13012  SDValue In = N->getOperand(i);
13013 
13014  if (In.isUndef())
13015  Opnds.push_back(DAG.getUNDEF(SrcVT));
13016  else
13017  Opnds.push_back(In.getOperand(0));
13018  }
13019  SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
13020  AddToWorklist(BV.getNode());
13021 
13022  return DAG.getNode(Opcode, DL, VT, BV);
13023 }
13024 
13025 SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
13026  ArrayRef<int> VectorMask,
13027  SDValue VecIn1, SDValue VecIn2,
13028  unsigned LeftIdx) {
13029  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
13030  SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
13031 
13032  EVT VT = N->getValueType(0);
13033  EVT InVT1 = VecIn1.getValueType();
13034  EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
13035 
13036  unsigned Vec2Offset = InVT1.getVectorNumElements();
13037  unsigned NumElems = VT.getVectorNumElements();
13038  unsigned ShuffleNumElems = NumElems;
13039 
13040  // We can't generate a shuffle node with mismatched input and output types.
13041  // Try to make the types match the type of the output.
13042  if (InVT1 != VT || InVT2 != VT) {
13043  if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
13044  // If the output vector length is a multiple of both input lengths,
13045  // we can concatenate them and pad the rest with undefs.
13046  unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
13047  assert(NumConcats >= 2 && "Concat needs at least two inputs!");
13048  SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
13049  ConcatOps[0] = VecIn1;
13050  ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
13051  VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
13052  VecIn2 = SDValue();
13053  } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
13054  if (!TLI.isExtractSubvectorCheap(VT, NumElems))
13055  return SDValue();
13056 
13057  if (!VecIn2.getNode()) {
13058  // If we only have one input vector, and it's twice the size of the
13059  // output, split it in two.
13060  VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
13061  DAG.getConstant(NumElems, DL, IdxTy));
13062  VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
13063  // Since we now have shorter input vectors, adjust the offset of the
13064  // second vector's start.
13065  Vec2Offset = NumElems;
13066  } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
13067  // VecIn1 is wider than the output, and we have another, possibly
13068  // smaller input. Pad the smaller input with undefs, shuffle at the
13069  // input vector width, and extract the output.
13070  // The shuffle type is different than VT, so check legality again.
13071  if (LegalOperations &&
13073  return SDValue();
13074 
13075  // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
13076  // lower it back into a BUILD_VECTOR. So if the inserted type is
13077  // illegal, don't even try.
13078  if (InVT1 != InVT2) {
13079  if (!TLI.isTypeLegal(InVT2))
13080  return SDValue();
13081  VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
13082  DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
13083  }
13084  ShuffleNumElems = NumElems * 2;
13085  } else {
13086  // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
13087  // than VecIn1. We can't handle this for now - this case will disappear
13088  // when we start sorting the vectors by type.
13089  return SDValue();
13090  }
13091  } else {
13092  // TODO: Support cases where the length mismatch isn't exactly by a
13093  // factor of 2.
13094  // TODO: Move this check upwards, so that if we have bad type
13095  // mismatches, we don't create any DAG nodes.
13096  return SDValue();
13097  }
13098  }
13099 
13100  // Initialize mask to undef.
13101  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
13102 
13103  // Only need to run up to the number of elements actually used, not the
13104  // total number of elements in the shuffle - if we are shuffling a wider
13105  // vector, the high lanes should be set to undef.
13106  for (unsigned i = 0; i != NumElems; ++i) {
13107  if (VectorMask[i] <= 0)
13108  continue;
13109 
13110  unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
13111  if (VectorMask[i] == (int)LeftIdx) {
13112  Mask[i] = ExtIndex;
13113  } else if (VectorMask[i] == (int)LeftIdx + 1) {
13114  Mask[i] = Vec2Offset + ExtIndex;
13115  }
13116  }
13117 
13118  // The type the input vectors may have changed above.
13119  InVT1 = VecIn1.getValueType();
13120 
13121  // If we already have a VecIn2, it should have the same type as VecIn1.
13122  // If we don't, get an undef/zero vector of the appropriate type.
13123  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
13124  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
13125 
13126  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
13127  if (ShuffleNumElems > NumElems)
13128  Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
13129 
13130  return Shuffle;
13131 }
13132 
13133 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
13134 // operations. If the types of the vectors we're extracting from allow it,
13135 // turn this into a vector_shuffle node.
13136 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
13137  SDLoc DL(N);
13138  EVT VT = N->getValueType(0);
13139 
13140  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
13141  if (!isTypeLegal(VT))
13142  return SDValue();
13143 
13144  // May only combine to shuffle after legalize if shuffle is legal.
13145  if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
13146  return SDValue();
13147 
13148  bool UsesZeroVector = false;
13149  unsigned NumElems = N->getNumOperands();
13150 
13151  // Record, for each element of the newly built vector, which input vector
13152  // that element comes from. -1 stands for undef, 0 for the zero vector,
13153  // and positive values for the input vectors.
13154  // VectorMask maps each element to its vector number, and VecIn maps vector
13155  // numbers to their initial SDValues.
13156 
13157  SmallVector<int, 8> VectorMask(NumElems, -1);
13159  VecIn.push_back(SDValue());
13160 
13161  for (unsigned i = 0; i != NumElems; ++i) {
13162  SDValue Op = N->getOperand(i);
13163 
13164  if (Op.isUndef())
13165  continue;
13166 
13167  // See if we can use a blend with a zero vector.
13168  // TODO: Should we generalize this to a blend with an arbitrary constant
13169  // vector?
13170  if (isNullConstant(Op) || isNullFPConstant(Op)) {
13171  UsesZeroVector = true;
13172  VectorMask[i] = 0;
13173  continue;
13174  }
13175 
13176  // Not an undef or zero. If the input is something other than an
13177  // EXTRACT_VECTOR_ELT with a constant index, bail out.
13178  if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13179  !isa<ConstantSDNode>(Op.getOperand(1)))
13180  return SDValue();
13181 
13182  SDValue ExtractedFromVec = Op.getOperand(0);
13183 
13184  // All inputs must have the same element type as the output.
13185  if (VT.getVectorElementType() !=
13186  ExtractedFromVec.getValueType().getVectorElementType())
13187  return SDValue();
13188 
13189  // Have we seen this input vector before?
13190  // The vectors are expected to be tiny (usually 1 or 2 elements), so using
13191  // a map back from SDValues to numbers isn't worth it.
13192  unsigned Idx = std::distance(
13193  VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
13194  if (Idx == VecIn.size())
13195  VecIn.push_back(ExtractedFromVec);
13196 
13197  VectorMask[i] = Idx;
13198  }
13199 
13200  // If we didn't find at least one input vector, bail out.
13201  if (VecIn.size() < 2)
13202  return SDValue();
13203 
13204  // TODO: We want to sort the vectors by descending length, so that adjacent
13205  // pairs have similar length, and the longer vector is always first in the
13206  // pair.
13207 
13208  // TODO: Should this fire if some of the input vectors has illegal type (like
13209  // it does now), or should we let legalization run its course first?
13210 
13211  // Shuffle phase:
13212  // Take pairs of vectors, and shuffle them so that the result has elements
13213  // from these vectors in the correct places.
13214  // For example, given:
13215  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
13216  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
13217  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
13218  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
13219  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
13220  // We will generate:
13221  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
13222  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
13223  SmallVector<SDValue, 4> Shuffles;
13224  for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
13225  unsigned LeftIdx = 2 * In + 1;
13226  SDValue VecLeft = VecIn[LeftIdx];
13227  SDValue VecRight =
13228  (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
13229 
13230  if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
13231  VecRight, LeftIdx))
13232  Shuffles.push_back(Shuffle);
13233  else
13234  return SDValue();
13235  }
13236 
13237  // If we need the zero vector as an "ingredient" in the blend tree, add it
13238  // to the list of shuffles.
13239  if (UsesZeroVector)
13240  Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
13241  : DAG.getConstantFP(0.0, DL, VT));
13242 
13243  // If we only have one shuffle, we're done.
13244  if (Shuffles.size() == 1)
13245  return Shuffles[0];
13246 
13247  // Update the vector mask to point to the post-shuffle vectors.
13248  for (int &Vec : VectorMask)
13249  if (Vec == 0)
13250  Vec = Shuffles.size() - 1;
13251  else
13252  Vec = (Vec - 1) / 2;
13253 
13254  // More than one shuffle. Generate a binary tree of blends, e.g. if from
13255  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
13256  // generate:
13257  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
13258  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
13259  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
13260  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
13261  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
13262  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
13263  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
13264 
13265  // Make sure the initial size of the shuffle list is even.
13266  if (Shuffles.size() % 2)
13267  Shuffles.push_back(DAG.getUNDEF(VT));
13268 
13269  for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
13270  if (CurSize % 2) {
13271  Shuffles[CurSize] = DAG.getUNDEF(VT);
13272  CurSize++;
13273  }
13274  for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
13275  int Left = 2 * In;
13276  int Right = 2 * In + 1;
13277  SmallVector<int, 8> Mask(NumElems, -1);
13278  for (unsigned i = 0; i != NumElems; ++i) {
13279  if (VectorMask[i] == Left) {
13280  Mask[i] = i;
13281  VectorMask[i] = In;
13282  } else if (VectorMask[i] == Right) {
13283  Mask[i] = i + NumElems;
13284  VectorMask[i] = In;
13285  }
13286  }
13287 
13288  Shuffles[In] =
13289  DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
13290  }
13291  }
13292 
13293  return Shuffles[0];
13294 }
13295 
13296 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
13297  EVT VT = N->getValueType(0);
13298 
13299  // A vector built entirely of undefs is undef.
13300  if (ISD::allOperandsUndef(N))
13301  return DAG.getUNDEF(VT);
13302 
13303  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
13304  return V;
13305 
13306  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
13307  return V;
13308 
13309  if (SDValue V = reduceBuildVecToShuffle(N))
13310  return V;
13311 
13312  return SDValue();
13313 }
13314 
13316  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13317  EVT OpVT = N->getOperand(0).getValueType();
13318 
13319  // If the operands are legal vectors, leave them alone.
13320  if (TLI.isTypeLegal(OpVT))
13321  return SDValue();
13322 
13323  SDLoc DL(N);
13324  EVT VT = N->getValueType(0);
13326 
13327  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
13328  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13329 
13330  // Keep track of what we encounter.
13331  bool AnyInteger = false;
13332  bool AnyFP = false;
13333  for (const SDValue &Op : N->ops()) {
13334  if (ISD::BITCAST == Op.getOpcode() &&
13335  !Op.getOperand(0).getValueType().isVector())
13336  Ops.push_back(Op.getOperand(0));
13337  else if (ISD::UNDEF == Op.getOpcode())
13338  Ops.push_back(ScalarUndef);
13339  else
13340  return SDValue();
13341 
13342  // Note whether we encounter an integer or floating point scalar.
13343  // If it's neither, bail out, it could be something weird like x86mmx.
13344  EVT LastOpVT = Ops.back().getValueType();
13345  if (LastOpVT.isFloatingPoint())
13346  AnyFP = true;
13347  else if (LastOpVT.isInteger())
13348  AnyInteger = true;
13349  else
13350  return SDValue();
13351  }
13352 
13353  // If any of the operands is a floating point scalar bitcast to a vector,
13354  // use floating point types throughout, and bitcast everything.
13355  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
13356  if (AnyFP) {
13357  SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
13358  ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
13359  if (AnyInteger) {
13360  for (SDValue &Op : Ops) {
13361  if (Op.getValueType() == SVT)
13362  continue;
13363  if (Op.isUndef())
13364  Op = ScalarUndef;
13365  else
13366  Op = DAG.getBitcast(SVT, Op);
13367  }
13368  }
13369  }
13370 
13371  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
13372  VT.getSizeInBits() / SVT.getSizeInBits());
13373  return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
13374 }
13375 
13376 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
13377 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
13378 // most two distinct vectors the same size as the result, attempt to turn this
13379 // into a legal shuffle.
13381  EVT VT = N->getValueType(0);
13382  EVT OpVT = N->getOperand(0).getValueType();
13383  int NumElts = VT.getVectorNumElements();
13384  int NumOpElts = OpVT.getVectorNumElements();
13385 
13386  SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
13388 
13389  for (SDValue Op : N->ops()) {
13390  // Peek through any bitcast.
13391  while (Op.getOpcode() == ISD::BITCAST)
13392  Op = Op.getOperand(0);
13393 
13394  // UNDEF nodes convert to UNDEF shuffle mask values.
13395  if (Op.isUndef()) {
13396  Mask.append((unsigned)NumOpElts, -1);
13397  continue;
13398  }
13399 
13400  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13401  return SDValue();
13402 
13403  // What vector are we extracting the subvector from and at what index?
13404  SDValue ExtVec = Op.getOperand(0);
13405 
13406  // We want the EVT of the original extraction to correctly scale the
13407  // extraction index.
13408  EVT ExtVT = ExtVec.getValueType();
13409 
13410  // Peek through any bitcast.
13411  while (ExtVec.getOpcode() == ISD::BITCAST)
13412  ExtVec = ExtVec.getOperand(0);
13413 
13414  // UNDEF nodes convert to UNDEF shuffle mask values.
13415  if (ExtVec.isUndef()) {
13416  Mask.append((unsigned)NumOpElts, -1);
13417  continue;
13418  }
13419 
13420  if (!isa<ConstantSDNode>(Op.getOperand(1)))
13421  return SDValue();
13422  int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
13423 
13424  // Ensure that we are extracting a subvector from a vector the same
13425  // size as the result.
13426  if (ExtVT.getSizeInBits() != VT.getSizeInBits())
13427  return SDValue();
13428 
13429  // Scale the subvector index to account for any bitcast.
13430  int NumExtElts = ExtVT.getVectorNumElements();
13431  if (0 == (NumExtElts % NumElts))
13432  ExtIdx /= (NumExtElts / NumElts);
13433  else if (0 == (NumElts % NumExtElts))
13434  ExtIdx *= (NumElts / NumExtElts);
13435  else
13436  return SDValue();
13437 
13438  // At most we can reference 2 inputs in the final shuffle.
13439  if (SV0.isUndef() || SV0 == ExtVec) {
13440  SV0 = ExtVec;
13441  for (int i = 0; i != NumOpElts; ++i)
13442  Mask.push_back(i + ExtIdx);
13443  } else if (SV1.isUndef() || SV1 == ExtVec) {
13444  SV1 = ExtVec;
13445  for (int i = 0; i != NumOpElts; ++i)
13446  Mask.push_back(i + ExtIdx + NumElts);
13447  } else {
13448  return SDValue();
13449  }
13450  }
13451 
13452  if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
13453  return SDValue();
13454 
13455  return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
13456  DAG.getBitcast(VT, SV1), Mask);
13457 }
13458 
13459 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
13460  // If we only have one input vector, we don't need to do any concatenation.
13461  if (N->getNumOperands() == 1)
13462  return N->getOperand(0);
13463 
13464  // Check if all of the operands are undefs.
13465  EVT VT = N->getValueType(0);
13466  if (ISD::allOperandsUndef(N))
13467  return DAG.getUNDEF(VT);
13468 
13469  // Optimize concat_vectors where all but the first of the vectors are undef.
13470  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
13471  return Op.isUndef();
13472  })) {
13473  SDValue In = N->getOperand(0);
13474  assert(In.getValueType().isVector() && "Must concat vectors");
13475 
13476  // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
13477  if (In->getOpcode() == ISD::BITCAST &&
13478  !In->getOperand(0)->getValueType(0).isVector()) {
13479  SDValue Scalar = In->getOperand(0);
13480 
13481  // If the bitcast type isn't legal, it might be a trunc of a legal type;
13482  // look through the trunc so we can still do the transform:
13483  // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
13484  if (Scalar->getOpcode() == ISD::TRUNCATE &&
13485  !TLI.isTypeLegal(Scalar.getValueType()) &&
13486  TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
13487  Scalar = Scalar->getOperand(0);
13488 
13489  EVT SclTy = Scalar->getValueType(0);
13490 
13491  if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
13492  return SDValue();
13493 
13494  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
13495  VT.getSizeInBits() / SclTy.getSizeInBits());
13496  if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
13497  return SDValue();
13498 
13499  SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
13500  return DAG.getBitcast(VT, Res);
13501  }
13502  }
13503 
13504  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
13505  // We have already tested above for an UNDEF only concatenation.
13506  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
13507  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
13508  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
13509  return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
13510  };
13511  if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
13513  EVT SVT = VT.getScalarType();
13514 
13515  EVT MinVT = SVT;
13516  if (!SVT.isFloatingPoint()) {
13517  // If BUILD_VECTOR are from built from integer, they may have different
13518  // operand types. Get the smallest type and truncate all operands to it.
13519  bool FoundMinVT = false;
13520  for (const SDValue &Op : N->ops())
13521  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13522  EVT OpSVT = Op.getOperand(0)->getValueType(0);
13523  MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
13524  FoundMinVT = true;
13525  }
13526  assert(FoundMinVT && "Concat vector type mismatch");
13527  }
13528 
13529  for (const SDValue &Op : N->ops()) {
13530  EVT OpVT = Op.getValueType();
13531  unsigned NumElts = OpVT.getVectorNumElements();
13532 
13533  if (ISD::UNDEF == Op.getOpcode())
13534  Opnds.append(NumElts, DAG.getUNDEF(MinVT));
13535 
13536  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
13537  if (SVT.isFloatingPoint()) {
13538  assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
13539  Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
13540  } else {
13541  for (unsigned i = 0; i != NumElts; ++i)
13542  Opnds.push_back(
13543  DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
13544  }
13545  }
13546  }
13547 
13548  assert(VT.getVectorNumElements() == Opnds.size() &&
13549  "Concat vector type mismatch");
13550  return DAG.getBuildVector(VT, SDLoc(N), Opnds);
13551  }
13552 
13553  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
13554  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
13555  return V;
13556 
13557  // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
13558  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
13559  if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
13560  return V;
13561 
13562  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
13563  // nodes often generate nop CONCAT_VECTOR nodes.
13564  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
13565  // place the incoming vectors at the exact same location.
13566  SDValue SingleSource = SDValue();
13567  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
13568 
13569  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
13570  SDValue Op = N->getOperand(i);
13571 
13572  if (Op.isUndef())
13573  continue;
13574 
13575  // Check if this is the identity extract:
13576  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13577  return SDValue();
13578 
13579  // Find the single incoming vector for the extract_subvector.
13580  if (SingleSource.getNode()) {
13581  if (Op.getOperand(0) != SingleSource)
13582  return SDValue();
13583  } else {
13584  SingleSource = Op.getOperand(0);
13585 
13586  // Check the source type is the same as the type of the result.
13587  // If not, this concat may extend the vector, so we can not
13588  // optimize it away.
13589  if (SingleSource.getValueType() != N->getValueType(0))
13590  return SDValue();
13591  }
13592 
13593  unsigned IdentityIndex = i * PartNumElem;
13595  // The extract index must be constant.
13596  if (!CS)
13597  return SDValue();
13598 
13599  // Check that we are reading from the identity index.
13600  if (CS->getZExtValue() != IdentityIndex)
13601  return SDValue();
13602  }
13603 
13604  if (SingleSource.getNode())
13605  return SingleSource;
13606 
13607  return SDValue();
13608 }
13609 
13610 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
13611  EVT NVT = N->getValueType(0);
13612  SDValue V = N->getOperand(0);
13613 
13614  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
13615  // Combine:
13616  // (extract_subvec (concat V1, V2, ...), i)
13617  // Into:
13618  // Vi if possible
13619  // Only operand 0 is checked as 'concat' assumes all inputs of the same
13620  // type.
13621  if (V->getOperand(0).getValueType() != NVT)
13622  return SDValue();
13623  unsigned Idx = N->getConstantOperandVal(1);
13624  unsigned NumElems = NVT.getVectorNumElements();
13625  assert((Idx % NumElems) == 0 &&
13626  "IDX in concat is not a multiple of the result vector length.");
13627  return V->getOperand(Idx / NumElems);
13628  }
13629 
13630  // Skip bitcasting
13631  if (V->getOpcode() == ISD::BITCAST)
13632  V = V.getOperand(0);
13633 
13634  if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
13635  // Handle only simple case where vector being inserted and vector
13636  // being extracted are of same type, and are half size of larger vectors.
13637  EVT BigVT = V->getOperand(0).getValueType();
13638  EVT SmallVT = V->getOperand(1).getValueType();
13639  if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
13640  return SDValue();
13641 
13642  // Only handle cases where both indexes are constants with the same type.
13645 
13646  if (InsIdx && ExtIdx &&
13647  InsIdx->getValueType(0).getSizeInBits() <= 64 &&
13648  ExtIdx->getValueType(0).getSizeInBits() <= 64) {
13649  // Combine:
13650  // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
13651  // Into:
13652  // indices are equal or bit offsets are equal => V1
13653  // otherwise => (extract_subvec V1, ExtIdx)
13654  if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
13655  ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
13656  return DAG.getBitcast(NVT, V->getOperand(1));
13657  return DAG.getNode(
13658  ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
13659  DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
13660  N->getOperand(1));
13661  }
13662  }
13663 
13664  return SDValue();
13665 }
13666 
13668  SDValue V, SelectionDAG &DAG) {
13669  SDLoc DL(V);
13670  EVT VT = V.getValueType();
13671 
13672  switch (V.getOpcode()) {
13673  default:
13674  return V;
13675 
13676  case ISD::CONCAT_VECTORS: {
13677  EVT OpVT = V->getOperand(0).getValueType();
13678  int OpSize = OpVT.getVectorNumElements();
13679  SmallBitVector OpUsedElements(OpSize, false);
13680  bool FoundSimplification = false;
13681  SmallVector<SDValue, 4> NewOps;
13682  NewOps.reserve(V->getNumOperands());
13683  for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
13684  SDValue Op = V->getOperand(i);
13685  bool OpUsed = false;
13686  for (int j = 0; j < OpSize; ++j)
13687  if (UsedElements[i * OpSize + j]) {
13688  OpUsedElements[j] = true;
13689  OpUsed = true;
13690  }
13691  NewOps.push_back(
13692  OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
13693  : DAG.getUNDEF(OpVT));
13694  FoundSimplification |= Op == NewOps.back();
13695  OpUsedElements.reset();
13696  }
13697  if (FoundSimplification)
13698  V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
13699  return V;
13700  }
13701 
13702  case ISD::INSERT_SUBVECTOR: {
13703  SDValue BaseV = V->getOperand(0);
13704  SDValue SubV = V->getOperand(1);
13705  auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
13706  if (!IdxN)
13707  return V;
13708 
13709  int SubSize = SubV.getValueType().getVectorNumElements();
13710  int Idx = IdxN->getZExtValue();
13711  bool SubVectorUsed = false;
13712  SmallBitVector SubUsedElements(SubSize, false);
13713  for (int i = 0; i < SubSize; ++i)
13714  if (UsedElements[i + Idx]) {
13715  SubVectorUsed = true;
13716  SubUsedElements[i] = true;
13717  UsedElements[i + Idx] = false;
13718  }
13719 
13720  // Now recurse on both the base and sub vectors.
13721  SDValue SimplifiedSubV =
13722  SubVectorUsed
13723  ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
13724  : DAG.getUNDEF(SubV.getValueType());
13725  SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
13726  if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
13727  V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
13728  SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
13729  return V;
13730  }
13731  }
13732 }
13733 
13735  SDValue N1, SelectionDAG &DAG) {
13736  EVT VT = SVN->getValueType(0);
13737  int NumElts = VT.getVectorNumElements();
13738  SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
13739  for (int M : SVN->getMask())
13740  if (M >= 0 && M < NumElts)
13741  N0UsedElements[M] = true;
13742  else if (M >= NumElts)
13743  N1UsedElements[M - NumElts] = true;
13744 
13745  SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
13746  SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
13747  if (S0 == N0 && S1 == N1)
13748  return SDValue();
13749 
13750  return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
13751 }
13752 
13753 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
13754 // or turn a shuffle of a single concat into simpler shuffle then concat.
13756  EVT VT = N->getValueType(0);
13757  unsigned NumElts = VT.getVectorNumElements();
13758 
13759  SDValue N0 = N->getOperand(0);
13760  SDValue N1 = N->getOperand(1);
13761  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13762 
13764  EVT ConcatVT = N0.getOperand(0).getValueType();
13765  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
13766  unsigned NumConcats = NumElts / NumElemsPerConcat;
13767 
13768  // Special case: shuffle(concat(A,B)) can be more efficiently represented
13769  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
13770  // half vector elements.
13771  if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
13772  std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
13773  SVN->getMask().end(), [](int i) { return i == -1; })) {
13774  N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
13775  makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
13776  N1 = DAG.getUNDEF(ConcatVT);
13777  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
13778  }
13779 
13780  // Look at every vector that's inserted. We're looking for exact
13781  // subvector-sized copies from a concatenated vector
13782  for (unsigned I = 0; I != NumConcats; ++I) {
13783  // Make sure we're dealing with a copy.
13784  unsigned Begin = I * NumElemsPerConcat;
13785  bool AllUndef = true, NoUndef = true;
13786  for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
13787  if (SVN->getMaskElt(J) >= 0)
13788  AllUndef = false;
13789  else
13790  NoUndef = false;
13791  }
13792 
13793  if (NoUndef) {
13794  if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
13795  return SDValue();
13796 
13797  for (unsigned J = 1; J != NumElemsPerConcat; ++J)
13798  if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
13799  return SDValue();
13800 
13801  unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
13802  if (FirstElt < N0.getNumOperands())
13803  Ops.push_back(N0.getOperand(FirstElt));
13804  else
13805  Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
13806 
13807  } else if (AllUndef) {
13808  Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
13809  } else { // Mixed with general masks and undefs, can't do optimization.
13810  return SDValue();
13811  }
13812  }
13813 
13814  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
13815 }
13816 
13817 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
13818 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
13819 //
13820 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
13821 // a simplification in some sense, but it isn't appropriate in general: some
13822 // BUILD_VECTORs are substantially cheaper than others. The general case
13823 // of a BUILD_VECTOR requires inserting each element individually (or
13824 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
13825 // all constants is a single constant pool load. A BUILD_VECTOR where each
13826 // element is identical is a splat. A BUILD_VECTOR where most of the operands
13827 // are undef lowers to a small number of element insertions.
13828 //
13829 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
13830 // We don't fold shuffles where one side is a non-zero constant, and we don't
13831 // fold shuffles if the resulting BUILD_VECTOR would have duplicate
13832 // non-constant operands. This seems to work out reasonably well in practice.
13834  SelectionDAG &DAG,
13835  const TargetLowering &TLI) {
13836  EVT VT = SVN->getValueType(0);
13837  unsigned NumElts = VT.getVectorNumElements();
13838  SDValue N0 = SVN->getOperand(0);
13839  SDValue N1 = SVN->getOperand(1);
13840 
13841  if (!N0->hasOneUse() || !N1->hasOneUse())
13842  return SDValue();
13843  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
13844  // discussed above.
13845  if (!N1.isUndef()) {
13846  bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
13847  bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
13848  if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
13849  return SDValue();
13850  if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
13851  return SDValue();
13852  }
13853 
13855  SmallSet<SDValue, 16> DuplicateOps;
13856  for (int M : SVN->getMask()) {
13857  SDValue Op = DAG.getUNDEF(VT.getScalarType());
13858  if (M >= 0) {
13859  int Idx = M < (int)NumElts ? M : M - NumElts;
13860  SDValue &S = (M < (int)NumElts ? N0 : N1);
13861  if (S.getOpcode() == ISD::BUILD_VECTOR) {
13862  Op = S.getOperand(Idx);
13863  } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
13864  if (Idx == 0)
13865  Op = S.getOperand(0);
13866  } else {
13867  // Operand can't be combined - bail out.
13868  return SDValue();
13869  }
13870  }
13871 
13872  // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
13873  // fine, but it's likely to generate low-quality code if the target can't
13874  // reconstruct an appropriate shuffle.
13875  if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
13876  if (!DuplicateOps.insert(Op).second)
13877  return SDValue();
13878 
13879  Ops.push_back(Op);
13880  }
13881  // BUILD_VECTOR requires all inputs to be of the same type, find the
13882  // maximum type and extend them all.
13883  EVT SVT = VT.getScalarType();
13884  if (SVT.isInteger())
13885  for (SDValue &Op : Ops)
13886  SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
13887  if (SVT != VT.getScalarType())
13888  for (SDValue &Op : Ops)
13889  Op = TLI.isZExtFree(Op.getValueType(), SVT)
13890  ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
13891  : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
13892  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
13893 }
13894 
13895 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
13896  EVT VT = N->getValueType(0);
13897  unsigned NumElts = VT.getVectorNumElements();
13898 
13899  SDValue N0 = N->getOperand(0);
13900  SDValue N1 = N->getOperand(1);
13901 
13902  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
13903 
13904  // Canonicalize shuffle undef, undef -> undef
13905  if (N0.isUndef() && N1.isUndef())
13906  return DAG.getUNDEF(VT);
13907 
13908  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
13909 
13910  // Canonicalize shuffle v, v -> v, undef
13911  if (N0 == N1) {
13912  SmallVector<int, 8> NewMask;
13913  for (unsigned i = 0; i != NumElts; ++i) {
13914  int Idx = SVN->getMaskElt(i);
13915  if (Idx >= (int)NumElts) Idx -= NumElts;
13916  NewMask.push_back(Idx);
13917  }
13918  return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
13919  }
13920 
13921  // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
13922  if (N0.isUndef())
13923  return DAG.getCommutedVectorShuffle(*SVN);
13924 
13925  // Remove references to rhs if it is undef
13926  if (N1.isUndef()) {
13927  bool Changed = false;
13928  SmallVector<int, 8> NewMask;
13929  for (unsigned i = 0; i != NumElts; ++i) {
13930  int Idx = SVN->getMaskElt(i);
13931  if (Idx >= (int)NumElts) {
13932  Idx = -1;
13933  Changed = true;
13934  }
13935  NewMask.push_back(Idx);
13936  }
13937  if (Changed)
13938  return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
13939  }
13940 
13941  // If it is a splat, check if the argument vector is another splat or a
13942  // build_vector.
13943  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
13944  SDNode *V = N0.getNode();
13945 
13946  // If this is a bit convert that changes the element type of the vector but
13947  // not the number of vector elements, look through it. Be careful not to
13948  // look though conversions that change things like v4f32 to v2f64.
13949  if (V->getOpcode() == ISD::BITCAST) {
13950  SDValue ConvInput = V->getOperand(0);
13951  if (ConvInput.getValueType().isVector() &&
13952  ConvInput.getValueType().getVectorNumElements() == NumElts)
13953  V = ConvInput.getNode();
13954  }
13955 
13956  if (V->getOpcode() == ISD::BUILD_VECTOR) {
13957  assert(V->getNumOperands() == NumElts &&
13958  "BUILD_VECTOR has wrong number of operands");
13959  SDValue Base;
13960  bool AllSame = true;
13961  for (unsigned i = 0; i != NumElts; ++i) {
13962  if (!V->getOperand(i).isUndef()) {
13963  Base = V->getOperand(i);
13964  break;
13965  }
13966  }
13967  // Splat of <u, u, u, u>, return <u, u, u, u>
13968  if (!Base.getNode())
13969  return N0;
13970  for (unsigned i = 0; i != NumElts; ++i) {
13971  if (V->getOperand(i) != Base) {
13972  AllSame = false;
13973  break;
13974  }
13975  }
13976  // Splat of <x, x, x, x>, return <x, x, x, x>
13977  if (AllSame)
13978  return N0;
13979 
13980  // Canonicalize any other splat as a build_vector.
13981  const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
13982  SmallVector<SDValue, 8> Ops(NumElts, Splatted);
13983  SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
13984 
13985  // We may have jumped through bitcasts, so the type of the
13986  // BUILD_VECTOR may not match the type of the shuffle.
13987  if (V->getValueType(0) != VT)
13988  NewBV = DAG.getBitcast(VT, NewBV);
13989  return NewBV;
13990  }
13991  }
13992 
13993  // There are various patterns used to build up a vector from smaller vectors,
13994  // subvectors, or elements. Scan chains of these and replace unused insertions
13995  // or components with undef.
13996  if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
13997  return S;
13998 
13999  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
14001  (N1.isUndef() ||
14002  (N1.getOpcode() == ISD::CONCAT_VECTORS &&
14003  N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
14004  if (SDValue V = partitionShuffleOfConcats(N, DAG))
14005  return V;
14006  }
14007 
14008  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
14009  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
14010  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
14011  if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
14012  return Res;
14013 
14014  // If this shuffle only has a single input that is a bitcasted shuffle,
14015  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
14016  // back to their original types.
14017  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14018  N1.isUndef() && Level < AfterLegalizeVectorOps &&
14019  TLI.isTypeLegal(VT)) {
14020 
14021  // Peek through the bitcast only if there is one user.
14022  SDValue BC0 = N0;
14023  while (BC0.getOpcode() == ISD::BITCAST) {
14024  if (!BC0.hasOneUse())
14025  break;
14026  BC0 = BC0.getOperand(0);
14027  }
14028 
14029  auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
14030  if (Scale == 1)
14031  return SmallVector<int, 8>(Mask.begin(), Mask.end());
14032 
14033  SmallVector<int, 8> NewMask;
14034  for (int M : Mask)
14035  for (int s = 0; s != Scale; ++s)
14036  NewMask.push_back(M < 0 ? -1 : Scale * M + s);
14037  return NewMask;
14038  };
14039 
14040  if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
14041  EVT SVT = VT.getScalarType();
14042  EVT InnerVT = BC0->getValueType(0);
14043  EVT InnerSVT = InnerVT.getScalarType();
14044 
14045  // Determine which shuffle works with the smaller scalar type.
14046  EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
14047  EVT ScaleSVT = ScaleVT.getScalarType();
14048 
14049  if (TLI.isTypeLegal(ScaleVT) &&
14050  0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
14051  0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
14052 
14053  int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
14054  int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
14055 
14056  // Scale the shuffle masks to the smaller scalar type.
14057  ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
14058  SmallVector<int, 8> InnerMask =
14059  ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
14060  SmallVector<int, 8> OuterMask =
14061  ScaleShuffleMask(SVN->getMask(), OuterScale);
14062 
14063  // Merge the shuffle masks.
14064  SmallVector<int, 8> NewMask;
14065  for (int M : OuterMask)
14066  NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
14067 
14068  // Test for shuffle mask legality over both commutations.
14069  SDValue SV0 = BC0->getOperand(0);
14070  SDValue SV1 = BC0->getOperand(1);
14071  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
14072  if (!LegalMask) {
14073  std::swap(SV0, SV1);
14075  LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
14076  }
14077 
14078  if (LegalMask) {
14079  SV0 = DAG.getBitcast(ScaleVT, SV0);
14080  SV1 = DAG.getBitcast(ScaleVT, SV1);
14081  return DAG.getBitcast(
14082  VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
14083  }
14084  }
14085  }
14086  }
14087 
14088  // Canonicalize shuffles according to rules:
14089  // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
14090  // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
14091  // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
14092  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
14094  TLI.isTypeLegal(VT)) {
14095  // The incoming shuffle must be of the same type as the result of the
14096  // current shuffle.
14097  assert(N1->getOperand(0).getValueType() == VT &&
14098  "Shuffle types don't match");
14099 
14100  SDValue SV0 = N1->getOperand(0);
14101  SDValue SV1 = N1->getOperand(1);
14102  bool HasSameOp0 = N0 == SV0;
14103  bool IsSV1Undef = SV1.isUndef();
14104  if (HasSameOp0 || IsSV1Undef || N0 == SV1)
14105  // Commute the operands of this shuffle so that next rule
14106  // will trigger.
14107  return DAG.getCommutedVectorShuffle(*SVN);
14108  }
14109 
14110  // Try to fold according to rules:
14111  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14112  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14113  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14114  // Don't try to fold shuffles with illegal type.
14115  // Only fold if this shuffle is the only user of the other shuffle.
14116  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
14117  Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
14118  ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
14119 
14120  // Don't try to fold splats; they're likely to simplify somehow, or they
14121  // might be free.
14122  if (OtherSV->isSplat())
14123  return SDValue();
14124 
14125  // The incoming shuffle must be of the same type as the result of the
14126  // current shuffle.
14127  assert(OtherSV->getOperand(0).getValueType() == VT &&
14128  "Shuffle types don't match");
14129 
14130  SDValue SV0, SV1;
14132  // Compute the combined shuffle mask for a shuffle with SV0 as the first
14133  // operand, and SV1 as the second operand.
14134  for (unsigned i = 0; i != NumElts; ++i) {
14135  int Idx = SVN->getMaskElt(i);
14136  if (Idx < 0) {
14137  // Propagate Undef.
14138  Mask.push_back(Idx);
14139  continue;
14140  }
14141 
14142  SDValue CurrentVec;
14143  if (Idx < (int)NumElts) {
14144  // This shuffle index refers to the inner shuffle N0. Lookup the inner
14145  // shuffle mask to identify which vector is actually referenced.
14146  Idx = OtherSV->getMaskElt(Idx);
14147  if (Idx < 0) {
14148  // Propagate Undef.
14149  Mask.push_back(Idx);
14150  continue;
14151  }
14152 
14153  CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
14154  : OtherSV->getOperand(1);
14155  } else {
14156  // This shuffle index references an element within N1.
14157  CurrentVec = N1;
14158  }
14159 
14160  // Simple case where 'CurrentVec' is UNDEF.
14161  if (CurrentVec.isUndef()) {
14162  Mask.push_back(-1);
14163  continue;
14164  }
14165 
14166  // Canonicalize the shuffle index. We don't know yet if CurrentVec
14167  // will be the first or second operand of the combined shuffle.
14168  Idx = Idx % NumElts;
14169  if (!SV0.getNode() || SV0 == CurrentVec) {
14170  // Ok. CurrentVec is the left hand side.
14171  // Update the mask accordingly.
14172  SV0 = CurrentVec;
14173  Mask.push_back(Idx);
14174  continue;
14175  }
14176 
14177  // Bail out if we cannot convert the shuffle pair into a single shuffle.
14178  if (SV1.getNode() && SV1 != CurrentVec)
14179  return SDValue();
14180 
14181  // Ok. CurrentVec is the right hand side.
14182  // Update the mask accordingly.
14183  SV1 = CurrentVec;
14184  Mask.push_back(Idx + NumElts);
14185  }
14186 
14187  // Check if all indices in Mask are Undef. In case, propagate Undef.
14188  bool isUndefMask = true;
14189  for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
14190  isUndefMask &= Mask[i] < 0;
14191 
14192  if (isUndefMask)
14193  return DAG.getUNDEF(VT);
14194 
14195  if (!SV0.getNode())
14196  SV0 = DAG.getUNDEF(VT);
14197  if (!SV1.getNode())
14198  SV1 = DAG.getUNDEF(VT);
14199 
14200  // Avoid introducing shuffles with illegal mask.
14201  if (!TLI.isShuffleMaskLegal(Mask, VT)) {
14203 
14204  if (!TLI.isShuffleMaskLegal(Mask, VT))
14205  return SDValue();
14206 
14207  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
14208  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
14209  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
14210  std::swap(SV0, SV1);
14211  }
14212 
14213  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
14214  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
14215  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
14216  return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
14217  }
14218 
14219  return SDValue();
14220 }
14221 
14222 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
14223  SDValue InVal = N->getOperand(0);
14224  EVT VT = N->getValueType(0);
14225 
14226  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
14227  // with a VECTOR_SHUFFLE.
14228  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14229  SDValue InVec = InVal->getOperand(0);
14230  SDValue EltNo = InVal->getOperand(1);
14231 
14232  // FIXME: We could support implicit truncation if the shuffle can be
14233  // scaled to a smaller vector scalar type.
14234  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
14235  if (C0 && VT == InVec.getValueType() &&
14236  VT.getScalarType() == InVal.getValueType()) {
14237  SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
14238  int Elt = C0->getZExtValue();
14239  NewMask[0] = Elt;
14240 
14241  if (TLI.isShuffleMaskLegal(NewMask, VT))
14242  return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
14243  NewMask);
14244  }
14245  }
14246 
14247  return SDValue();
14248 }
14249 
14250 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
14251  EVT VT = N->getValueType(0);
14252  SDValue N0 = N->getOperand(0);
14253  SDValue N1 = N->getOperand(1);
14254  SDValue N2 = N->getOperand(2);
14255 
14256  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
14257  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
14258  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
14259  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
14260  N0.getOperand(1).getValueType() == N1.getValueType() &&
14261  N0.getOperand(2) == N2)
14262  return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
14263  N1, N2);
14264 
14265  if (N0.getValueType() != N1.getValueType())
14266  return SDValue();
14267 
14268  // If the input vector is a concatenation, and the insert replaces
14269  // one of the halves, we can optimize into a single concat_vectors.
14270  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
14271  N2.getOpcode() == ISD::Constant) {
14272  APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
14273 
14274  // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
14275  // (concat_vectors Z, Y)
14276  if (InsIdx == 0)
14277  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
14278  N0.getOperand(1));
14279 
14280  // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
14281  // (concat_vectors X, Z)
14282  if (InsIdx == VT.getVectorNumElements() / 2)
14283  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
14284  N1);
14285  }
14286 
14287  return SDValue();
14288 }
14289 
14290 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
14291  SDValue N0 = N->getOperand(0);
14292 
14293  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
14294  if (N0->getOpcode() == ISD::FP16_TO_FP)
14295  return N0->getOperand(0);
14296 
14297  return SDValue();
14298 }
14299 
14300 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
14301  SDValue N0 = N->getOperand(0);
14302 
14303  // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
14304  if (N0->getOpcode() == ISD::AND) {
14306  if (AndConst && AndConst->getAPIntValue() == 0xffff) {
14307  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
14308  N0.getOperand(0));
14309  }
14310  }
14311 
14312  return SDValue();
14313 }
14314 
14315 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
14316 /// with the destination vector and a zero vector.
14317 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
14318 /// vector_shuffle V, Zero, <0, 4, 2, 4>
14319 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
14320  EVT VT = N->getValueType(0);
14321  SDValue LHS = N->getOperand(0);
14322  SDValue RHS = N->getOperand(1);
14323  SDLoc DL(N);
14324 
14325  // Make sure we're not running after operation legalization where it
14326  // may have custom lowered the vector shuffles.
14327  if (LegalOperations)
14328  return SDValue();
14329 
14330  if (N->getOpcode() != ISD::AND)
14331  return SDValue();
14332 
14333  if (RHS.getOpcode() == ISD::BITCAST)
14334  RHS = RHS.getOperand(0);
14335 
14336  if (RHS.getOpcode() != ISD::BUILD_VECTOR)
14337  return SDValue();
14338 
14339  EVT RVT = RHS.getValueType();
14340  unsigned NumElts = RHS.getNumOperands();
14341 
14342  // Attempt to create a valid clear mask, splitting the mask into
14343  // sub elements and checking to see if each is
14344  // all zeros or all ones - suitable for shuffle masking.
14345  auto BuildClearMask = [&](int Split) {
14346  int NumSubElts = NumElts * Split;
14347  int NumSubBits = RVT.getScalarSizeInBits() / Split;
14348 
14349  SmallVector<int, 8> Indices;
14350  for (int i = 0; i != NumSubElts; ++i) {
14351  int EltIdx = i / Split;
14352  int SubIdx = i % Split;
14353  SDValue Elt = RHS.getOperand(EltIdx);
14354  if (Elt.isUndef()) {
14355  Indices.push_back(-1);
14356  continue;
14357  }
14358 
14359  APInt Bits;
14360  if (isa<ConstantSDNode>(Elt))
14361  Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
14362  else if (isa<ConstantFPSDNode>(Elt))
14363  Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
14364  else
14365  return SDValue();
14366 
14367  // Extract the sub element from the constant bit mask.
14368  if (DAG.getDataLayout().isBigEndian()) {
14369  Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
14370  } else {
14371  Bits = Bits.lshr(SubIdx * NumSubBits);
14372  }
14373 
14374  if (Split > 1)
14375  Bits = Bits.trunc(NumSubBits);
14376 
14377  if (Bits.isAllOnesValue())
14378  Indices.push_back(i);
14379  else if (Bits == 0)
14380  Indices.push_back(i + NumSubElts);
14381  else
14382  return SDValue();
14383  }
14384 
14385  // Let's see if the target supports this vector_shuffle.
14386  EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
14387  EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
14388  if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
14389  return SDValue();
14390 
14391  SDValue Zero = DAG.getConstant(0, DL, ClearVT);
14392  return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
14393  DAG.getBitcast(ClearVT, LHS),
14394  Zero, Indices));
14395  };
14396 
14397  // Determine maximum split level (byte level masking).
14398  int MaxSplit = 1;
14399  if (RVT.getScalarSizeInBits() % 8 == 0)
14400  MaxSplit = RVT.getScalarSizeInBits() / 8;
14401 
14402  for (int Split = 1; Split <= MaxSplit; ++Split)
14403  if (RVT.getScalarSizeInBits() % Split == 0)
14404  if (SDValue S = BuildClearMask(Split))
14405  return S;
14406 
14407  return SDValue();
14408 }
14409 
14410 /// Visit a binary vector operation, like ADD.
14411 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
14412  assert(N->getValueType(0).isVector() &&
14413  "SimplifyVBinOp only works on vectors!");
14414 
14415  SDValue LHS = N->getOperand(0);
14416  SDValue RHS = N->getOperand(1);
14417  SDValue Ops[] = {LHS, RHS};
14418 
14419  // See if we can constant fold the vector operation.
14420  if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
14421  N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
14422  return Fold;
14423 
14424  // Try to convert a constant mask AND into a shuffle clear mask.
14425  if (SDValue Shuffle = XformToShuffleWithZero(N))
14426  return Shuffle;
14427 
14428  // Type legalization might introduce new shuffles in the DAG.
14429  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
14430  // -> (shuffle (VBinOp (A, B)), Undef, Mask).
14431  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
14432  isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
14433  LHS.getOperand(1).isUndef() &&
14434  RHS.getOperand(1).isUndef()) {
14435  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
14436  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
14437 
14438  if (SVN0->getMask().equals(SVN1->getMask())) {
14439  EVT VT = N->getValueType(0);
14440  SDValue UndefVector = LHS.getOperand(1);
14441  SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
14442  LHS.getOperand(0), RHS.getOperand(0),
14443  N->getFlags());
14444  AddUsersToWorklist(N);
14445  return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
14446  SVN0->getMask());
14447  }
14448  }
14449 
14450  return SDValue();
14451 }
14452 
14453 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
14454  SDValue N2) {
14455  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
14456 
14457  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
14458  cast<CondCodeSDNode>(N0.getOperand(2))->get());
14459 
14460  // If we got a simplified select_cc node back from SimplifySelectCC, then
14461  // break it down into a new SETCC node, and a new SELECT node, and then return
14462  // the SELECT node, since we were called with a SELECT node.
14463  if (SCC.getNode()) {
14464  // Check to see if we got a select_cc back (to turn into setcc/select).
14465  // Otherwise, just return whatever node we got back, like fabs.
14466  if (SCC.getOpcode() == ISD::SELECT_CC) {
14467  SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
14468  N0.getValueType(),
14469  SCC.getOperand(0), SCC.getOperand(1),
14470  SCC.getOperand(4));
14471  AddToWorklist(SETCC.getNode());
14472  return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
14473  SCC.getOperand(2), SCC.getOperand(3));
14474  }
14475 
14476  return SCC;
14477  }
14478  return SDValue();
14479 }
14480 
14481 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
14482 /// being selected between, see if we can simplify the select. Callers of this
14483 /// should assume that TheSelect is deleted if this returns true. As such, they
14484 /// should return the appropriate thing (e.g. the node) back to the top-level of
14485 /// the DAG combiner loop to avoid it being looked at.
14486 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
14487  SDValue RHS) {
14488 
14489  // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14490  // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
14491  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
14492  if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
14493  // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
14494  SDValue Sqrt = RHS;
14495  ISD::CondCode CC;
14496  SDValue CmpLHS;
14497  const ConstantFPSDNode *Zero = nullptr;
14498 
14499  if (TheSelect->getOpcode() == ISD::SELECT_CC) {
14500  CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
14501  CmpLHS = TheSelect->getOperand(0);
14502  Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
14503  } else {
14504  // SELECT or VSELECT
14505  SDValue Cmp = TheSelect->getOperand(0);
14506  if (Cmp.getOpcode() == ISD::SETCC) {
14507  CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
14508  CmpLHS = Cmp.getOperand(0);
14509  Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
14510  }
14511  }
14512  if (Zero && Zero->isZero() &&
14513  Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
14514  CC == ISD::SETULT || CC == ISD::SETLT)) {
14515  // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
14516  CombineTo(TheSelect, Sqrt);
14517  return true;
14518  }
14519  }
14520  }
14521  // Cannot simplify select with vector condition
14522  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
14523 
14524  // If this is a select from two identical things, try to pull the operation
14525  // through the select.
14526  if (LHS.getOpcode() != RHS.getOpcode() ||
14527  !LHS.hasOneUse() || !RHS.hasOneUse())
14528  return false;
14529 
14530  // If this is a load and the token chain is identical, replace the select
14531  // of two loads with a load through a select of the address to load from.
14532  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
14533  // constants have been dropped into the constant pool.
14534  if (LHS.getOpcode() == ISD::LOAD) {
14535  LoadSDNode *LLD = cast<LoadSDNode>(LHS);
14536  LoadSDNode *RLD = cast<LoadSDNode>(RHS);
14537 
14538  // Token chains must be identical.
14539  if (LHS.getOperand(0) != RHS.getOperand(0) ||
14540  // Do not let this transformation reduce the number of volatile loads.
14541  LLD->isVolatile() || RLD->isVolatile() ||
14542  // FIXME: If either is a pre/post inc/dec load,
14543  // we'd need to split out the address adjustment.
14544  LLD->isIndexed() || RLD->isIndexed() ||
14545  // If this is an EXTLOAD, the VT's must match.
14546  LLD->getMemoryVT() != RLD->getMemoryVT() ||
14547  // If this is an EXTLOAD, the kind of extension must match.
14548  (LLD->getExtensionType() != RLD->getExtensionType() &&
14549  // The only exception is if one of the extensions is anyext.
14550  LLD->getExtensionType() != ISD::EXTLOAD &&
14551  RLD->getExtensionType() != ISD::EXTLOAD) ||
14552  // FIXME: this discards src value information. This is
14553  // over-conservative. It would be beneficial to be able to remember
14554  // both potential memory locations. Since we are discarding
14555  // src value info, don't do the transformation if the memory
14556  // locations are not in the default address space.
14557  LLD->getPointerInfo().getAddrSpace() != 0 ||
14558  RLD->getPointerInfo().getAddrSpace() != 0 ||
14559  !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
14560  LLD->getBasePtr().getValueType()))
14561  return false;
14562 
14563  // Check that the select condition doesn't reach either load. If so,
14564  // folding this will induce a cycle into the DAG. If not, this is safe to
14565  // xform, so create a select of the addresses.
14566  SDValue Addr;
14567  if (TheSelect->getOpcode() == ISD::SELECT) {
14568  SDNode *CondNode = TheSelect->getOperand(0).getNode();
14569  if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
14570  (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
14571  return false;
14572  // The loads must not depend on one another.
14573  if (LLD->isPredecessorOf(RLD) ||
14574  RLD->isPredecessorOf(LLD))
14575  return false;
14576  Addr = DAG.getSelect(SDLoc(TheSelect),
14577  LLD->getBasePtr().getValueType(),
14578  TheSelect->getOperand(0), LLD->getBasePtr(),
14579  RLD->getBasePtr());
14580  } else { // Otherwise SELECT_CC
14581  SDNode *CondLHS = TheSelect->getOperand(0).getNode();
14582  SDNode *CondRHS = TheSelect->getOperand(1).getNode();
14583 
14584  if ((LLD->hasAnyUseOfValue(1) &&
14585  (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
14586  (RLD->hasAnyUseOfValue(1) &&
14587  (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
14588  return false;
14589 
14590  Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
14591  LLD->getBasePtr().getValueType(),
14592  TheSelect->getOperand(0),
14593  TheSelect->getOperand(1),
14594  LLD->getBasePtr(), RLD->getBasePtr(),
14595  TheSelect->getOperand(4));
14596  }
14597 
14598  SDValue Load;
14599  // It is safe to replace the two loads if they have different alignments,
14600  // but the new load must be the minimum (most restrictive) alignment of the
14601  // inputs.
14602  unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
14603  MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
14604  if (!RLD->isInvariant())
14605  MMOFlags &= ~MachineMemOperand::MOInvariant;
14606  if (!RLD->isDereferenceable())
14608  if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
14609  // FIXME: Discards pointer and AA info.
14610  Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
14611  LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
14612  MMOFlags);
14613  } else {
14614  // FIXME: Discards pointer and AA info.
14615  Load = DAG.getExtLoad(
14616  LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
14617  : LLD->getExtensionType(),
14618  SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
14619  MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
14620  }
14621 
14622  // Users of the select now use the result of the load.
14623  CombineTo(TheSelect, Load);
14624 
14625  // Users of the old loads now use the new load's chain. We know the
14626  // old-load value is dead now.
14627  CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
14628  CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
14629  return true;
14630  }
14631 
14632  return false;
14633 }
14634 
14635 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
14636 /// bitwise 'and'.
14637 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
14638  SDValue N1, SDValue N2, SDValue N3,
14639  ISD::CondCode CC) {
14640  // If this is a select where the false operand is zero and the compare is a
14641  // check of the sign bit, see if we can perform the "gzip trick":
14642  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
14643  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
14644  EVT XType = N0.getValueType();
14645  EVT AType = N2.getValueType();
14646  if (!isNullConstant(N3) || !XType.bitsGE(AType))
14647  return SDValue();
14648 
14649  // If the comparison is testing for a positive value, we have to invert
14650  // the sign bit mask, so only do that transform if the target has a bitwise
14651  // 'and not' instruction (the invert is free).
14652  if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
14653  // (X > -1) ? A : 0
14654  // (X > 0) ? X : 0 <-- This is canonical signed max.
14655  if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
14656  return SDValue();
14657  } else if (CC == ISD::SETLT) {
14658  // (X < 0) ? A : 0
14659  // (X < 1) ? X : 0 <-- This is un-canonicalized signed min.
14660  if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
14661  return SDValue();
14662  } else {
14663  return SDValue();
14664  }
14665 
14666  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
14667  // constant.
14668  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
14669  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
14670  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
14671  unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
14672  SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
14673  SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
14674  AddToWorklist(Shift.getNode());
14675 
14676  if (XType.bitsGT(AType)) {
14677  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14678  AddToWorklist(Shift.getNode());
14679  }
14680 
14681  if (CC == ISD::SETGT)
14682  Shift = DAG.getNOT(DL, Shift, AType);
14683 
14684  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14685  }
14686 
14687  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
14688  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
14689  AddToWorklist(Shift.getNode());
14690 
14691  if (XType.bitsGT(AType)) {
14692  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
14693  AddToWorklist(Shift.getNode());
14694  }
14695 
14696  if (CC == ISD::SETGT)
14697  Shift = DAG.getNOT(DL, Shift, AType);
14698 
14699  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
14700 }
14701 
14702 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
14703 /// where 'cond' is the comparison specified by CC.
14704 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
14705  SDValue N2, SDValue N3, ISD::CondCode CC,
14706  bool NotExtCompare) {
14707  // (x ? y : y) -> y.
14708  if (N2 == N3) return N2;
14709 
14710  EVT VT = N2.getValueType();
14713 
14714  // Determine if the condition we're dealing with is constant
14716  N0, N1, CC, DL, false);
14717  if (SCC.getNode()) AddToWorklist(SCC.getNode());
14718 
14719  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
14720  // fold select_cc true, x, y -> x
14721  // fold select_cc false, x, y -> y
14722  return !SCCC->isNullValue() ? N2 : N3;
14723  }
14724 
14725  // Check to see if we can simplify the select into an fabs node
14726  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
14727  // Allow either -0.0 or 0.0
14728  if (CFP->isZero()) {
14729  // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
14730  if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
14731  N0 == N2 && N3.getOpcode() == ISD::FNEG &&
14732  N2 == N3.getOperand(0))
14733  return DAG.getNode(ISD::FABS, DL, VT, N0);
14734 
14735  // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
14736  if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
14737  N0 == N3 && N2.getOpcode() == ISD::FNEG &&
14738  N2.getOperand(0) == N3)
14739  return DAG.getNode(ISD::FABS, DL, VT, N3);
14740  }
14741  }
14742 
14743  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
14744  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
14745  // in it. This is a win when the constant is not otherwise available because
14746  // it replaces two constant pool loads with one. We only do this if the FP
14747  // type is known to be legal, because if it isn't, then we are before legalize
14748  // types an we want the other legalization to happen first (e.g. to avoid
14749  // messing with soft float) and if the ConstantFP is not legal, because if
14750  // it is legal, we may not need to store the FP constant in a constant pool.
14751  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
14752  if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
14753  if (TLI.isTypeLegal(N2.getValueType()) &&
14754  (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
14756  !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
14757  !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
14758  // If both constants have multiple uses, then we won't need to do an
14759  // extra load, they are likely around in registers for other users.
14760  (TV->hasOneUse() || FV->hasOneUse())) {
14761  Constant *Elts[] = {
14762  const_cast<ConstantFP*>(FV->getConstantFPValue()),
14763  const_cast<ConstantFP*>(TV->getConstantFPValue())
14764  };
14765  Type *FPTy = Elts[0]->getType();
14766  const DataLayout &TD = DAG.getDataLayout();
14767 
14768  // Create a ConstantArray of the two constants.
14769  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
14770  SDValue CPIdx =
14771  DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
14772  TD.getPrefTypeAlignment(FPTy));
14773  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
14774 
14775  // Get the offsets to the 0 and 1 element of the array so that we can
14776  // select between them.
14777  SDValue Zero = DAG.getIntPtrConstant(0, DL);
14778  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
14779  SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
14780 
14781  SDValue Cond = DAG.getSetCC(DL,
14783  N0, N1, CC);
14784  AddToWorklist(Cond.getNode());
14785  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
14786  Cond, One, Zero);
14787  AddToWorklist(CstOffset.getNode());
14788  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
14789  CstOffset);
14790  AddToWorklist(CPIdx.getNode());
14791  return DAG.getLoad(
14792  TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
14794  Alignment);
14795  }
14796  }
14797 
14798  if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
14799  return V;
14800 
14801  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
14802  // where y is has a single bit set.
14803  // A plaintext description would be, we can turn the SELECT_CC into an AND
14804  // when the condition can be materialized as an all-ones register. Any
14805  // single bit-test can be materialized as an all-ones register with
14806  // shift-left and shift-right-arith.
14807  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
14808  N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
14809  SDValue AndLHS = N0->getOperand(0);
14810  ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14811  if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
14812  // Shift the tested bit over the sign bit.
14813  const APInt &AndMask = ConstAndRHS->getAPIntValue();
14814  SDValue ShlAmt =
14815  DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
14816  getShiftAmountTy(AndLHS.getValueType()));
14817  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
14818 
14819  // Now arithmetic right shift it all the way over, so the result is either
14820  // all-ones, or zero.
14821  SDValue ShrAmt =
14822  DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
14824  SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
14825 
14826  return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
14827  }
14828  }
14829 
14830  // fold select C, 16, 0 -> shl C, 4
14831  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
14832  TLI.getBooleanContents(N0.getValueType()) ==
14834 
14835  // If the caller doesn't want us to simplify this into a zext of a compare,
14836  // don't do it.
14837  if (NotExtCompare && N2C->isOne())
14838  return SDValue();
14839 
14840  // Get a SetCC of the condition
14841  // NOTE: Don't create a SETCC if it's not legal on this target.
14842  if (!LegalOperations ||
14844  SDValue Temp, SCC;
14845  // cast from setcc result type to select result type
14846  if (LegalTypes) {
14847  SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
14848  N0, N1, CC);
14849  if (N2.getValueType().bitsLT(SCC.getValueType()))
14850  Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
14851  N2.getValueType());
14852  else
14853  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14854  N2.getValueType(), SCC);
14855  } else {
14856  SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
14857  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
14858  N2.getValueType(), SCC);
14859  }
14860 
14861  AddToWorklist(SCC.getNode());
14862  AddToWorklist(Temp.getNode());
14863 
14864  if (N2C->isOne())
14865  return Temp;
14866 
14867  // shl setcc result by log2 n2c
14868  return DAG.getNode(
14869  ISD::SHL, DL, N2.getValueType(), Temp,
14870  DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
14871  getShiftAmountTy(Temp.getValueType())));
14872  }
14873  }
14874 
14875  // Check to see if this is an integer abs.
14876  // select_cc setg[te] X, 0, X, -X ->
14877  // select_cc setgt X, -1, X, -X ->
14878  // select_cc setl[te] X, 0, -X, X ->
14879  // select_cc setlt X, 1, -X, X ->
14880  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
14881  if (N1C) {
14882  ConstantSDNode *SubC = nullptr;
14883  if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
14884  (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
14885  N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
14886  SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
14887  else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
14888  (N1C->isOne() && CC == ISD::SETLT)) &&
14889  N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
14890  SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
14891 
14892  EVT XType = N0.getValueType();
14893  if (SubC && SubC->isNullValue() && XType.isInteger()) {
14894  SDLoc DL(N0);
14895  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
14896  N0,
14897  DAG.getConstant(XType.getSizeInBits() - 1, DL,
14898  getShiftAmountTy(N0.getValueType())));
14899  SDValue Add = DAG.getNode(ISD::ADD, DL,
14900  XType, N0, Shift);
14901  AddToWorklist(Shift.getNode());
14902  AddToWorklist(Add.getNode());
14903  return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
14904  }
14905  }
14906 
14907  // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
14908  // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
14909  // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
14910  // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
14911  // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
14912  // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
14913  // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
14914  // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
14915  if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
14916  SDValue ValueOnZero = N2;
14917  SDValue Count = N3;
14918  // If the condition is NE instead of E, swap the operands.
14919  if (CC == ISD::SETNE)
14920  std::swap(ValueOnZero, Count);
14921  // Check if the value on zero is a constant equal to the bits in the type.
14922  if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
14923  if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
14924  // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
14925  // legal, combine to just cttz.
14926  if ((Count.getOpcode() == ISD::CTTZ ||
14927  Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
14928  N0 == Count.getOperand(0) &&
14929  (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
14930  return DAG.getNode(ISD::CTTZ, DL, VT, N0);
14931  // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
14932  // legal, combine to just ctlz.
14933  if ((Count.getOpcode() == ISD::CTLZ ||
14934  Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
14935  N0 == Count.getOperand(0) &&
14936  (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
14937  return DAG.getNode(ISD::CTLZ, DL, VT, N0);
14938  }
14939  }
14940  }
14941 
14942  return SDValue();
14943 }
14944 
14945 /// This is a stub for TargetLowering::SimplifySetCC.
14946 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
14947  ISD::CondCode Cond, const SDLoc &DL,
14948  bool foldBooleans) {
14950  DagCombineInfo(DAG, Level, false, this);
14951  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
14952 }
14953 
14954 /// Given an ISD::SDIV node expressing a divide by constant, return
14955 /// a DAG expression to select that will generate the same value by multiplying
14956 /// by a magic number.
14957 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
14958 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
14959  // when optimising for minimum size, we don't want to expand a div to a mul
14960  // and a shift.
14962  return SDValue();
14963 
14965  if (!C)
14966  return SDValue();
14967 
14968  // Avoid division by zero.
14969  if (C->isNullValue())
14970  return SDValue();
14971 
14972  std::vector<SDNode*> Built;
14973  SDValue S =
14974  TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
14975 
14976  for (SDNode *N : Built)
14977  AddToWorklist(N);
14978  return S;
14979 }
14980 
14981 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
14982 /// DAG expression that will generate the same value by right shifting.
14983 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
14985  if (!C)
14986  return SDValue();
14987 
14988  // Avoid division by zero.
14989  if (C->isNullValue())
14990  return SDValue();
14991 
14992  std::vector<SDNode *> Built;
14993  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
14994 
14995  for (SDNode *N : Built)
14996  AddToWorklist(N);
14997  return S;
14998 }
14999 
15000 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
15001 /// expression that will generate the same value by multiplying by a magic
15002 /// number.
15003 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
15004 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
15005  // when optimising for minimum size, we don't want to expand a div to a mul
15006  // and a shift.
15008  return SDValue();
15009 
15011  if (!C)
15012  return SDValue();
15013 
15014  // Avoid division by zero.
15015  if (C->isNullValue())
15016  return SDValue();
15017 
15018  std::vector<SDNode*> Built;
15019  SDValue S =
15020  TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
15021 
15022  for (SDNode *N : Built)
15023  AddToWorklist(N);
15024  return S;
15025 }
15026 
15027 /// Determines the LogBase2 value for a non-null input value using the
15028 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
15029 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
15030  EVT VT = V.getValueType();
15031  unsigned EltBits = VT.getScalarSizeInBits();
15032  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
15033  SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
15034  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
15035  return LogBase2;
15036 }
15037 
15038 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15039 /// For the reciprocal, we need to find the zero of the function:
15040 /// F(X) = A X - 1 [which has a zero at X = 1/A]
15041 /// =>
15042 /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
15043 /// does not require additional intermediate precision]
15044 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
15045  if (Level >= AfterLegalizeDAG)
15046  return SDValue();
15047 
15048  // TODO: Handle half and/or extended types?
15049  EVT VT = Op.getValueType();
15050  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15051  return SDValue();
15052 
15053  // If estimates are explicitly disabled for this function, we're done.
15054  MachineFunction &MF = DAG.getMachineFunction();
15055  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
15056  if (Enabled == TLI.ReciprocalEstimate::Disabled)
15057  return SDValue();
15058 
15059  // Estimates may be explicitly enabled for this type with a custom number of
15060  // refinement steps.
15061  int Iterations = TLI.getDivRefinementSteps(VT, MF);
15062  if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
15063  AddToWorklist(Est.getNode());
15064 
15065  if (Iterations) {
15066  EVT VT = Op.getValueType();
15067  SDLoc DL(Op);
15068  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
15069 
15070  // Newton iterations: Est = Est + Est (1 - Arg * Est)
15071  for (int i = 0; i < Iterations; ++i) {
15072  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
15073  AddToWorklist(NewEst.getNode());
15074 
15075  NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
15076  AddToWorklist(NewEst.getNode());
15077 
15078  NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
15079  AddToWorklist(NewEst.getNode());
15080 
15081  Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
15082  AddToWorklist(Est.getNode());
15083  }
15084  }
15085  return Est;
15086  }
15087 
15088  return SDValue();
15089 }
15090 
15091 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15092 /// For the reciprocal sqrt, we need to find the zero of the function:
15093 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
15094 /// =>
15095 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
15096 /// As a result, we precompute A/2 prior to the iteration loop.
15097 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
15098  unsigned Iterations,
15099  SDNodeFlags *Flags, bool Reciprocal) {
15100  EVT VT = Arg.getValueType();
15101  SDLoc DL(Arg);
15102  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
15103 
15104  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
15105  // this entire sequence requires only one FP constant.
15106  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
15107  AddToWorklist(HalfArg.getNode());
15108 
15109  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
15110  AddToWorklist(HalfArg.getNode());
15111 
15112  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
15113  for (unsigned i = 0; i < Iterations; ++i) {
15114  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
15115  AddToWorklist(NewEst.getNode());
15116 
15117  NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
15118  AddToWorklist(NewEst.getNode());
15119 
15120  NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
15121  AddToWorklist(NewEst.getNode());
15122 
15123  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
15124  AddToWorklist(Est.getNode());
15125  }
15126 
15127  // If non-reciprocal square root is requested, multiply the result by Arg.
15128  if (!Reciprocal) {
15129  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
15130  AddToWorklist(Est.getNode());
15131  }
15132 
15133  return Est;
15134 }
15135 
15136 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
15137 /// For the reciprocal sqrt, we need to find the zero of the function:
15138 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
15139 /// =>
15140 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
15141 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
15142  unsigned Iterations,
15143  SDNodeFlags *Flags, bool Reciprocal) {
15144  EVT VT = Arg.getValueType();
15145  SDLoc DL(Arg);
15146  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
15147  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
15148 
15149  // This routine must enter the loop below to work correctly
15150  // when (Reciprocal == false).
15151  assert(Iterations > 0);
15152 
15153  // Newton iterations for reciprocal square root:
15154  // E = (E * -0.5) * ((A * E) * E + -3.0)
15155  for (unsigned i = 0; i < Iterations; ++i) {
15156  SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
15157  AddToWorklist(AE.getNode());
15158 
15159  SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
15160  AddToWorklist(AEE.getNode());
15161 
15162  SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
15163  AddToWorklist(RHS.getNode());
15164 
15165  // When calculating a square root at the last iteration build:
15166  // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
15167  // (notice a common subexpression)
15168  SDValue LHS;
15169  if (Reciprocal || (i + 1) < Iterations) {
15170  // RSQRT: LHS = (E * -0.5)
15171  LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
15172  } else {
15173  // SQRT: LHS = (A * E) * -0.5
15174  LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
15175  }
15176  AddToWorklist(LHS.getNode());
15177 
15178  Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
15179  AddToWorklist(Est.getNode());
15180  }
15181 
15182  return Est;
15183 }
15184 
15185 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
15186 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
15187 /// Op can be zero.
15188 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
15189  bool Reciprocal) {
15190  if (Level >= AfterLegalizeDAG)
15191  return SDValue();
15192 
15193  // TODO: Handle half and/or extended types?
15194  EVT VT = Op.getValueType();
15195  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
15196  return SDValue();
15197 
15198  // If estimates are explicitly disabled for this function, we're done.
15199  MachineFunction &MF = DAG.getMachineFunction();
15200  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
15201  if (Enabled == TLI.ReciprocalEstimate::Disabled)
15202  return SDValue();
15203 
15204  // Estimates may be explicitly enabled for this type with a custom number of
15205  // refinement steps.
15206  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
15207 
15208  bool UseOneConstNR = false;
15209  if (SDValue Est =
15210  TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
15211  Reciprocal)) {
15212  AddToWorklist(Est.getNode());
15213 
15214  if (Iterations) {
15215  Est = UseOneConstNR
15216  ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
15217  : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
15218 
15219  if (!Reciprocal) {
15220  // Unfortunately, Est is now NaN if the input was exactly 0.0.
15221  // Select out this case and force the answer to 0.0.
15222  EVT VT = Op.getValueType();
15223  SDLoc DL(Op);
15224 
15225  SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
15226  EVT CCVT = getSetCCResultType(VT);
15227  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
15228  AddToWorklist(ZeroCmp.getNode());
15229 
15230  Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
15231  ZeroCmp, FPZero, Est);
15232  AddToWorklist(Est.getNode());
15233  }
15234  }
15235  return Est;
15236  }
15237 
15238  return SDValue();
15239 }
15240 
15241 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15242  return buildSqrtEstimateImpl(Op, Flags, true);
15243 }
15244 
15245 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
15246  return buildSqrtEstimateImpl(Op, Flags, false);
15247 }
15248 
15249 /// Return true if base is a frame index, which is known not to alias with
15250 /// anything but itself. Provides base object and offset as results.
15251 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
15252  const GlobalValue *&GV, const void *&CV) {
15253  // Assume it is a primitive operation.
15254  Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
15255 
15256  // If it's an adding a simple constant then integrate the offset.
15257  if (Base.getOpcode() == ISD::ADD) {
15258  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
15259  Base = Base.getOperand(0);
15260  Offset += C->getZExtValue();
15261  }
15262  }
15263 
15264  // Return the underlying GlobalValue, and update the Offset. Return false
15265  // for GlobalAddressSDNode since the same GlobalAddress may be represented
15266  // by multiple nodes with different offsets.
15267  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
15268  GV = G->getGlobal();
15269  Offset += G->getOffset();
15270  return false;
15271  }
15272 
15273  // Return the underlying Constant value, and update the Offset. Return false
15274  // for ConstantSDNodes since the same constant pool entry may be represented
15275  // by multiple nodes with different offsets.
15276  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
15277  CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
15278  : (const void *)C->getConstVal();
15279  Offset += C->getOffset();
15280  return false;
15281  }
15282  // If it's any of the following then it can't alias with anything but itself.
15283  return isa<FrameIndexSDNode>(Base);
15284 }
15285 
15286 /// Return true if there is any possibility that the two addresses overlap.
15287 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
15288  // If they are the same then they must be aliases.
15289  if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
15290 
15291  // If they are both volatile then they cannot be reordered.
15292  if (Op0->isVolatile() && Op1->isVolatile()) return true;
15293 
15294  // If one operation reads from invariant memory, and the other may store, they
15295  // cannot alias. These should really be checking the equivalent of mayWrite,
15296  // but it only matters for memory nodes other than load /store.
15297  if (Op0->isInvariant() && Op1->writeMem())
15298  return false;
15299 
15300  if (Op1->isInvariant() && Op0->writeMem())
15301  return false;
15302 
15303  // Gather base node and offset information.
15304  SDValue Base1, Base2;
15305  int64_t Offset1, Offset2;
15306  const GlobalValue *GV1, *GV2;
15307  const void *CV1, *CV2;
15308  bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
15309  Base1, Offset1, GV1, CV1);
15310  bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
15311  Base2, Offset2, GV2, CV2);
15312 
15313  // If they have a same base address then check to see if they overlap.
15314  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
15315  return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
15316  (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
15317 
15318  // It is possible for different frame indices to alias each other, mostly
15319  // when tail call optimization reuses return address slots for arguments.
15320  // To catch this case, look up the actual index of frame indices to compute
15321  // the real alias relationship.
15322  if (isFrameIndex1 && isFrameIndex2) {
15324  Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
15325  Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
15326  return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
15327  (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
15328  }
15329 
15330  // Otherwise, if we know what the bases are, and they aren't identical, then
15331  // we know they cannot alias.
15332  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
15333  return false;
15334 
15335  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
15336  // compared to the size and offset of the access, we may be able to prove they
15337  // do not alias. This check is conservative for now to catch cases created by
15338  // splitting vector types.
15339  if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
15340  (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
15341  (Op0->getMemoryVT().getSizeInBits() >> 3 ==
15342  Op1->getMemoryVT().getSizeInBits() >> 3) &&
15343  (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
15344  int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
15345  int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
15346 
15347  // There is no overlap between these relatively aligned accesses of similar
15348  // size, return no alias.
15349  if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
15350  (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
15351  return false;
15352  }
15353 
15354  bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
15355  ? CombinerGlobalAA
15356  : DAG.getSubtarget().useAA();
15357 #ifndef NDEBUG
15358  if (CombinerAAOnlyFunc.getNumOccurrences() &&
15359  CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
15360  UseAA = false;
15361 #endif
15362  if (UseAA &&
15363  Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
15364  // Use alias analysis information.
15365  int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
15366  Op1->getSrcValueOffset());
15367  int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
15368  Op0->getSrcValueOffset() - MinOffset;
15369  int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
15370  Op1->getSrcValueOffset() - MinOffset;
15371  AliasResult AAResult =
15372  AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
15373  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
15374  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
15375  UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
15376  if (AAResult == NoAlias)
15377  return false;
15378  }
15379 
15380  // Otherwise we have to assume they alias.
15381  return true;
15382 }
15383 
15384 /// Walk up chain skipping non-aliasing memory nodes,
15385 /// looking for aliasing nodes and adding them to the Aliases vector.
15386 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
15387  SmallVectorImpl<SDValue> &Aliases) {
15388  SmallVector<SDValue, 8> Chains; // List of chains to visit.
15389  SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
15390 
15391  // Get alias information for node.
15392  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
15393 
15394  // Starting off.
15395  Chains.push_back(OriginalChain);
15396  unsigned Depth = 0;
15397 
15398  // Look at each chain and determine if it is an alias. If so, add it to the
15399  // aliases list. If not, then continue up the chain looking for the next
15400  // candidate.
15401  while (!Chains.empty()) {
15402  SDValue Chain = Chains.pop_back_val();
15403 
15404  // For TokenFactor nodes, look at each operand and only continue up the
15405  // chain until we reach the depth limit.
15406  //
15407  // FIXME: The depth check could be made to return the last non-aliasing
15408  // chain we found before we hit a tokenfactor rather than the original
15409  // chain.
15410  if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
15411  Aliases.clear();
15412  Aliases.push_back(OriginalChain);
15413  return;
15414  }
15415 
15416  // Don't bother if we've been before.
15417  if (!Visited.insert(Chain.getNode()).second)
15418  continue;
15419 
15420  switch (Chain.getOpcode()) {
15421  case ISD::EntryToken:
15422  // Entry token is ideal chain operand, but handled in FindBetterChain.
15423  break;
15424 
15425  case ISD::LOAD:
15426  case ISD::STORE: {
15427  // Get alias information for Chain.
15428  bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
15429  !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
15430 
15431  // If chain is alias then stop here.
15432  if (!(IsLoad && IsOpLoad) &&
15433  isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
15434  Aliases.push_back(Chain);
15435  } else {
15436  // Look further up the chain.
15437  Chains.push_back(Chain.getOperand(0));
15438  ++Depth;
15439  }
15440  break;
15441  }
15442 
15443  case ISD::TokenFactor:
15444  // We have to check each of the operands of the token factor for "small"
15445  // token factors, so we queue them up. Adding the operands to the queue
15446  // (stack) in reverse order maintains the original order and increases the
15447  // likelihood that getNode will find a matching token factor (CSE.)
15448  if (Chain.getNumOperands() > 16) {
15449  Aliases.push_back(Chain);
15450  break;
15451  }
15452  for (unsigned n = Chain.getNumOperands(); n;)
15453  Chains.push_back(Chain.getOperand(--n));
15454  ++Depth;
15455  break;
15456 
15457  default:
15458  // For all other instructions we will just have to take what we can get.
15459  Aliases.push_back(Chain);
15460  break;
15461  }
15462  }
15463 }
15464 
15465 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
15466 /// (aliasing node.)
15467 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
15468  SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
15469 
15470  // Accumulate all the aliases to this node.
15471  GatherAllAliases(N, OldChain, Aliases);
15472 
15473  // If no operands then chain to entry token.
15474  if (Aliases.size() == 0)
15475  return DAG.getEntryNode();
15476 
15477  // If a single operand then chain to it. We don't need to revisit it.
15478  if (Aliases.size() == 1)
15479  return Aliases[0];
15480 
15481  // Construct a custom tailored token factor.
15482  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
15483 }
15484 
15485 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
15486  // This holds the base pointer, index, and the offset in bytes from the base
15487  // pointer.
15488  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
15489 
15490  // We must have a base and an offset.
15491  if (!BasePtr.Base.getNode())
15492  return false;
15493 
15494  // Do not handle stores to undef base pointers.
15495  if (BasePtr.Base.isUndef())
15496  return false;
15497 
15498  SmallVector<StoreSDNode *, 8> ChainedStores;
15499  ChainedStores.push_back(St);
15500 
15501  // Walk up the chain and look for nodes with offsets from the same
15502  // base pointer. Stop when reaching an instruction with a different kind
15503  // or instruction which has a different base pointer.
15504  StoreSDNode *Index = St;
15505  while (Index) {
15506  // If the chain has more than one use, then we can't reorder the mem ops.
15507  if (Index != St && !SDValue(Index, 0)->hasOneUse())
15508  break;
15509 
15510  if (Index->isVolatile() || Index->isIndexed())
15511  break;
15512 
15513  // Find the base pointer and offset for this memory node.
15514  BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
15515 
15516  // Check that the base pointer is the same as the original one.
15517  if (!Ptr.equalBaseIndex(BasePtr))
15518  break;
15519 
15520  // Find the next memory operand in the chain. If the next operand in the
15521  // chain is a store then move up and continue the scan with the next
15522  // memory operand. If the next operand is a load save it and use alias
15523  // information to check if it interferes with anything.
15524  SDNode *NextInChain = Index->getChain().getNode();
15525  while (true) {
15526  if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
15527  // We found a store node. Use it for the next iteration.
15528  if (STn->isVolatile() || STn->isIndexed()) {
15529  Index = nullptr;
15530  break;
15531  }
15532  ChainedStores.push_back(STn);
15533  Index = STn;
15534  break;
15535  } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
15536  NextInChain = Ldn->getChain().getNode();
15537  continue;
15538  } else {
15539  Index = nullptr;
15540  break;
15541  }
15542  }
15543  }
15544 
15545  bool MadeChangeToSt = false;
15547 
15548  for (StoreSDNode *ChainedStore : ChainedStores) {
15549  SDValue Chain = ChainedStore->getChain();
15550  SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
15551 
15552  if (Chain != BetterChain) {
15553  if (ChainedStore == St)
15554  MadeChangeToSt = true;
15555  BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
15556  }
15557  }
15558 
15559  // Do all replacements after finding the replacements to make to avoid making
15560  // the chains more complicated by introducing new TokenFactors.
15561  for (auto Replacement : BetterChains)
15562  replaceStoreChain(Replacement.first, Replacement.second);
15563 
15564  return MadeChangeToSt;
15565 }
15566 
15567 /// This is the entry point for the file.
15569  CodeGenOpt::Level OptLevel) {
15570  /// This is the main entry point to this class.
15571  DAGCombiner(*this, AA, OptLevel).Run(Level);
15572 }
MachineLoop * L
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
virtual bool hasBitPreservingFPLogic(EVT VT) const
Return true if it is safe to transform an integer-domain bitwise operation into the equivalent floati...
bool hasNoUnsignedWrap() const
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned char TargetFlags=0)
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
bool use_empty() const
Return true if there are no uses of this node.
static MVT getIntegerVT(unsigned BitWidth)
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:888
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:762
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
SDValue getValue(unsigned R) const
const SDValue & getValue() const
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
static APInt getSignBit(unsigned BitWidth)
Get the SignBit for a specific bit width.
Definition: APInt.h:451
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant, which is required to be operand #1) half of the integer or float value specified as operand #0.
Definition: ISDOpcodes.h:184
bool hasNoSignedZeros() const
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
Flags getFlags() const
Return the raw flags of the source value,.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
LLVMContext & Context
uint64_t Token
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
DiagnosticInfoOptimizationBase::Argument NV
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:804
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SDValue getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
STATISTIC(NumFunctions,"Total number of functions")
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:304
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
bool isKnownNeverNaN(SDValue Op) const
Test whether the given SDValue is known to never be NaN.
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
size_t i
bool isNON_TRUNCStore(const SDNode *N)
Returns true if the specified node is a non-truncating store.
bool hasOneUse() const
Return true if there is exactly one use of this node.
virtual bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:313
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDVTList getVTList() const
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:449
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:329
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI)
bool isExtended() const
isExtended - Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:113
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:699
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
Clients of various APIs that cause global effects on the DAG can optionally implement this interface...
Definition: SelectionDAG.h:215
iterator end() const
Definition: ArrayRef.h:130
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:329
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:327
static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, SDValue V, SelectionDAG &DAG)
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:330
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger)
Return the result of a logical AND between different comparisons of identical values: ((X op1 Y) & (X...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:536
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:237
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const SDValue & getSrc0() const
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
unsigned getSizeInBits() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
bool isMask(unsigned numBits, const APInt &APIVal)
Definition: APInt.h:1812
unsigned getNumOperands() const
Return the number of values used by this operation.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1203
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
The two locations do not alias at all.
Definition: AliasAnalysis.h:79
const SDValue & getOperand(unsigned Num) const
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:817
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:440
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:330
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize)
static bool isCommutativeBinOp(unsigned Opcode)
Returns true if the opcode is a commutative binary operation.
virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT) const
Return true if the following transform is beneficial: (store (y (conv x)), y*)) -> (store x...
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
iv Induction Variable Users
Definition: IVUsers.cpp:51
static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N)
void changeSign()
Definition: APFloat.h:975
void reserve(size_type N)
Definition: SmallVector.h:377
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV)
Return true if base is a frame index, which is known not to alias with anything but itself...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
const SDValue & getBasePtr() const
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:299
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:999
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector< SDNode * > *Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:180
static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo. ...
virtual bool isFPImmLegal(const APFloat &, EVT) const
Returns true if the target can instruction select the specified FP immediate natively.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:78
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
unsigned getResNo() const
get the index which selects a specific result in the SDNode
void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num)
Like ReplaceAllUsesOfValueWith, but for multiple values at once.
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:212
bool isUndef() const
Return true if the type of the node type undefined.
int64_t getSrcValueOffset() const
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:461
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
bool isAllOnesValue() const
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist)
Returns true if N is a predecessor of any node in Worklist.
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:324
static bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N)
copysign(x, fp_extend(y)) -> copysign(x, y) copysign(x, fp_round(y)) -> copysign(x, y)
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:324
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
static bool isNullConstantOrNullSplatConstant(SDValue N)
A description of a memory reference used in the backend.
bool isRound() const
isRound - Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:188
struct fuzzer::@269 Flags
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:325
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
Shift and rotation operations.
Definition: ISDOpcodes.h:344
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:452
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG)
Base class for LoadSDNode and StoreSDNode.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO)
bool HonorSignDependentRoundingFPMath() const
HonorSignDependentRoundingFPMath - Return true if the codegen must assume that the rounding mode of t...
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1122
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:190
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
static GCRegistry::Add< StatepointGC > D("statepoint-example","an example strategy for statepoint")
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:170
unsigned logBase2(const APInt &APIVal)
Returns the floor log base 2 of the specified APInt value.
Definition: APInt.h:1834
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
MachinePointerInfo getWithOffset(int64_t O) const
SimpleValueType SimpleTy
APInt bitcastToAPInt() const
Definition: APFloat.h:1012
The memory access is dereferenceable (i.e., doesn't trap).
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:206
int getMaskElt(unsigned Idx) const
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT's, which are used to parameterize some operations.
SDValue FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, const SDNodeFlags *Flags=nullptr)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:405
This class is used to represent an MSTORE node.
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
Definition: APInt.h:1892
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
static bool isSimple(Instruction *I)
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:850
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
Generate Min/Max node.
static unsigned getAlignment(GlobalVariable *GV)
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:136
SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef< SDValue > Ops, MachineMemOperand *MMO)
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1015
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
static bool isAnyConstantBuildVector(const SDNode *N)
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, const SDLoc &dl) const
Try to simplify a setcc built with the specified operands and cc.
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
Function Alias Analysis false
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:73
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1279
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:220
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, bool IsTruncating=false, bool IsCompressing=false)
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
const APInt & getAPIntValue() const
virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:125
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
EVT getMemoryVT() const
Return the type of the in-memory value.
const ConstantInt * getConstantIntValue() const
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:875
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:218
bool isPow2VectorType() const
isPow2VectorType - Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:314
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:178
This class is used to represent ISD::STORE nodes.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
virtual bool hasAndNot(SDValue X) const
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:518
virtual const SelectionDAGTargetInfo * getSelectionDAGInfo() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
bool hasPredecessor(const SDNode *N) const
Return true if N is a predecessor of this node.
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector< SDNode * > *Created) const
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators...
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:395
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI)
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:274
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
virtual bool isShuffleMaskLegal(const SmallVectorImpl< int > &, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations, those with specific masks.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
const SDValue & getBasePtr() const
bool isZero() const
Return true if the value is positive or negative zero.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:111
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:283
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:916
size_t use_size() const
Return the number of uses of this node.
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
MVT - Machine Value Type.
static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG)
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
constexpr bool isInt(int64_t x)
isInt - Checks if an integer fits into the given bit width.
Definition: MathExtras.h:264
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
bool isNonTemporal() const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together, or eliminating superfluous nodes.
This is an important base class in LLVM.
Definition: Constant.h:42
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1321
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:279
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:36
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1947
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:888
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:269
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
unsigned getScalarValueSizeInBits() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
CombineLevel
Definition: DAGCombine.h:16
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:939
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:73
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:654
constexpr bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:405
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getOriginalAlignment() const
Returns alignment and volatility of the memory access.
bool hasAllowReciprocal() const
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
uint32_t Offset
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static EVT getFloatingPointVT(unsigned BitWidth)
getFloatingPointVT - Returns the EVT that represents a floating point type with the given number of b...
Definition: ValueTypes.h:55
EVT getVT() const
virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1255
iterator begin() const
Definition: ArrayRef.h:129
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1119
unsigned getOpcode() const
const SDValue & getBasePtr() const
virtual bool isNarrowingProfitable(EVT, EVT) const
Return true if it's profitable to narrow operations of type VT1 to VT2.
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:80
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1397
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::LoadExtType, bool IsExpanding=false)
bool isVolatile() const
const SDValue & getValue() const
ConstantSDNode * isConstOrConstSplat(SDValue V)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:350
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector< SDNode * > *Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
EVT - Extended Value Type.
Definition: ValueTypes.h:31
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:619
iterator erase(const_iterator CI)
Definition: SmallVector.h:431
const SDValue & getMask() const
const APFloat & getValueAPF() const
const ConstantFP * getConstantFPValue() const
bool bitsEq(EVT VT) const
bitsEq - Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:194
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices...
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
This class contains a discriminated union of information about pointers in memory operands...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:391
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:659
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1118
static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth=0)
Return 1 if we can compute the negated form of the specified expression for the same cost as the expr...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
The memory access writes data.
const SDValue & getOffset() const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:200
SmallBitVector & reset()
ArrayRef< int > getMask() const
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type...
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:703
Representation for a specific memory location.
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
bool isBuildVectorAllOnes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are ~0 or undef...
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
Return true if 'Use' is a load or a store that uses N as its base pointer and that N may be folded in...
int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a division of the given type based on the function's attri...
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:292
void setNoUnsignedWrap(bool b)
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset=0)
void dump() const
Dump this node, for debugging.
auto find(R &&Range, const T &Val) -> decltype(std::begin(Range))
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:757
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
bool isNegative() const
Definition: APFloat.h:1035
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
bool isInvariant() const
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provides VTs and return the low/high part...
virtual bool isVectorClearMaskLegal(const SmallVectorImpl< int > &, EVT) const
Similar to isShuffleMaskLegal.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:482
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
unsigned logBase2() const
Definition: APInt.h:1507
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
const SDValue & getChain() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:625
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:510
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
bool hasNoSignedWrap() const
Represents one node in the SelectionDAG.
static SDNode * tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes, bool LegalOperations)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:136
int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a square root of the given type based on the function's attribut...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
static bool isOneConstantOrOneSplatConstant(SDValue N)
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static std::pair< SDValue, SDValue > SplitVSETCC(const SDNode *N, SelectionDAG &DAG)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:513
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.h:550
const SDValue & getValue() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N)
Test whether the given value is a constant int or similar node.
Class for arbitrary precision integers.
Definition: APInt.h:77
const Value * getValue() const
Return the base address of the memory access.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
iterator_range< use_iterator > uses()
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
int64_t getSExtValue() const
op_iterator op_begin() const
static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, SDValue N1, SelectionDAG &DAG)
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth=0)
If isNegatibleForFree returns true, return the newly negated expression.
static use_iterator use_end()
bool isNullFPConstant(SDValue V)
Returns true if V is an FP constant with a value of positive zero.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:235
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out...
virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, int &RefinementSteps, bool &UseOneConstNR, bool Reciprocal) const
Hooks for building estimates in place of slower divisions and square roots.
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger)
Return the result of a logical OR between different comparisons of identical values: ((X op1 Y) | (X ...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
iterator_range< value_op_iterator > op_values() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1942
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:503
Flags
Flags values. These may be or'd together.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const SDValue & getMask() const
The memory access reads data.
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1724
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:342
ConstantFPSDNode * isConstOrConstSplatFP(SDValue V)
Returns the SDNode if it is a constant splat BuildVector or constant float.
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:172
These are IR-level optimization flags that may be propagated to SDNodes.
Represents a use of a SDNode.
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
uint64_t getConstantOperandVal(unsigned i) const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
bool isUndef() const
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes)
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
The memory access always returns the same value (or traps).
const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B, const MVT::SimpleValueType SVT=MVT::SimpleValueType::Any) const
Find the largest common subclass of A and B.
op_iterator op_end() const
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:606
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
unsigned countTrailingOnes() const
Count the number of trailing one bits.
Definition: APInt.h:1385
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
Same for multiplication.
Definition: ISDOpcodes.h:243
static volatile int Zero
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
virtual bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
This class is used to represent an MSCATTER node.
const SDValue & getIndex() const
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
bool isScalarInteger() const
isScalarInteger - Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:128
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:175
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a carry value...
Definition: ISDOpcodes.h:383
void setUnsafeAlgebra(bool b)
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
int getDivRefinementSteps(EVT VT, MachineFunction &MF) const
Return the refinement step count for a division of the given type based on the function's attributes...
SDValue getIndexedStore(SDValue OrigStoe, const SDLoc &dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM)
bool isByteSized() const
isByteSized - Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:183
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:291
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
bool writeMem() const
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
This class is used to represent an MLOAD node.
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:326
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ConstantSDNode with isOpaque() == false return it casted to a ConstantSDNode pointer else n...
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
const SDValue & getBasePtr() const
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
A vector that has set insertion semantics.
Definition: SetVector.h:41
static bool isShift(MachineInstr *MI, unsigned Opcode, int64_t Imm)
static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI)
Return true if divmod libcall is available.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isTruncatingStore() const
Return true if the op does a truncation before store.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:830
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
This class is used to represent an MGATHER node.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:331
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:100
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1343
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:253
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:980
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
static SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1722
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:397
static APInt getNullValue(unsigned numBits)
Get the '0' value.
Definition: APInt.h:465
int * Ptr
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
SDNode * getUser()
This returns the SDNode that contains this Use.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
static void Split(std::vector< std::string > &V, StringRef S)
Split - Splits a string of comma separated items in to a vector of strings.
bool isUIntN(unsigned N, uint64_t x)
isUIntN - Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:360
unsigned getAlignment() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isBigEndian() const
Definition: DataLayout.h:221
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
unsigned getGatherAllAliasesMaxDepth() const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
const fltSemantics & getSemantics() const
Definition: APFloat.h:1043
virtual bool hasPairedLoad(EVT, unsigned &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
unsigned Log2_64(uint64_t Value)
Log2_64 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:519
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:321
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
unsigned getResNo() const
Convenience function for get().getResNo().
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1107
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
const SDNodeFlags * getFlags() const
This could be defined as a virtual function and implemented more simply and directly, but it is not to avoid creating a vtable for this class.
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:213
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap...
This file describes how to lower LLVM code to machine code.
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
uint64_t getBaseAlignment() const
Return the minimum known alignment in bytes of the base address, without the offset.
int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const
Return a ReciprocalEstimate enum value for a square root of the given type based on the function's at...
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
ISD::CondCode get() const
uint64_t getZExtValue() const
static bool isConstantOrConstantVector(SDValue N, bool NoOpaques=false)
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:799
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:326
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:783