LLVM  3.7.0
DAGCombiner.cpp
Go to the documentation of this file.
1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
11 // both before and after the DAG is legalized.
12 //
13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14 // primarily intended to handle simplification opportunities that are implicit
15 // in the LLVM IR and exposed by the various codegen lowering phases.
16 //
17 //===----------------------------------------------------------------------===//
18 
20 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/DataLayout.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/LLVMContext.h"
32 #include "llvm/Support/Debug.h"
40 #include <algorithm>
41 using namespace llvm;
42 
43 #define DEBUG_TYPE "dagcombine"
44 
45 STATISTIC(NodesCombined , "Number of dag nodes combined");
46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
50 STATISTIC(SlicedLoads, "Number of load sliced");
51 
52 namespace {
53  static cl::opt<bool>
54  CombinerAA("combiner-alias-analysis", cl::Hidden,
55  cl::desc("Enable DAG combiner alias-analysis heuristics"));
56 
57  static cl::opt<bool>
58  CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
59  cl::desc("Enable DAG combiner's use of IR alias analysis"));
60 
61  static cl::opt<bool>
62  UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
63  cl::desc("Enable DAG combiner's use of TBAA"));
64 
65 #ifndef NDEBUG
67  CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
68  cl::desc("Only use DAG-combiner alias analysis in this"
69  " function"));
70 #endif
71 
72  /// Hidden option to stress test load slicing, i.e., when this option
73  /// is enabled, load slicing bypasses most of its profitability guards.
74  static cl::opt<bool>
75  StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
76  cl::desc("Bypass the profitability model of load "
77  "slicing"),
78  cl::init(false));
79 
80  static cl::opt<bool>
81  MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
82  cl::desc("DAG combiner may split indexing from loads"));
83 
84 //------------------------------ DAGCombiner ---------------------------------//
85 
86  class DAGCombiner {
87  SelectionDAG &DAG;
88  const TargetLowering &TLI;
90  CodeGenOpt::Level OptLevel;
91  bool LegalOperations;
92  bool LegalTypes;
93  bool ForCodeSize;
94 
95  /// \brief Worklist of all of the nodes that need to be simplified.
96  ///
97  /// This must behave as a stack -- new nodes to process are pushed onto the
98  /// back and when processing we pop off of the back.
99  ///
100  /// The worklist will not contain duplicates but may contain null entries
101  /// due to nodes being deleted from the underlying DAG.
102  SmallVector<SDNode *, 64> Worklist;
103 
104  /// \brief Mapping from an SDNode to its position on the worklist.
105  ///
106  /// This is used to find and remove nodes from the worklist (by nulling
107  /// them) when they are deleted from the underlying DAG. It relies on
108  /// stable indices of nodes within the worklist.
109  DenseMap<SDNode *, unsigned> WorklistMap;
110 
111  /// \brief Set of nodes which have been combined (at least once).
112  ///
113  /// This is used to allow us to reliably add any operands of a DAG node
114  /// which have not yet been combined to the worklist.
115  SmallPtrSet<SDNode *, 64> CombinedNodes;
116 
117  // AA - Used for DAG load/store alias analysis.
118  AliasAnalysis &AA;
119 
120  /// When an instruction is simplified, add all users of the instruction to
121  /// the work lists because they might get more simplified now.
122  void AddUsersToWorklist(SDNode *N) {
123  for (SDNode *Node : N->uses())
124  AddToWorklist(Node);
125  }
126 
127  /// Call the node-specific routine that folds each particular type of node.
128  SDValue visit(SDNode *N);
129 
130  public:
131  /// Add to the worklist making sure its instance is at the back (next to be
132  /// processed.)
133  void AddToWorklist(SDNode *N) {
134  // Skip handle nodes as they can't usefully be combined and confuse the
135  // zero-use deletion strategy.
136  if (N->getOpcode() == ISD::HANDLENODE)
137  return;
138 
139  if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
140  Worklist.push_back(N);
141  }
142 
143  /// Remove all instances of N from the worklist.
144  void removeFromWorklist(SDNode *N) {
145  CombinedNodes.erase(N);
146 
147  auto It = WorklistMap.find(N);
148  if (It == WorklistMap.end())
149  return; // Not in the worklist.
150 
151  // Null out the entry rather than erasing it to avoid a linear operation.
152  Worklist[It->second] = nullptr;
153  WorklistMap.erase(It);
154  }
155 
156  void deleteAndRecombine(SDNode *N);
157  bool recursivelyDeleteUnusedNodes(SDNode *N);
158 
159  SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
160  bool AddTo = true);
161 
162  SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
163  return CombineTo(N, &Res, 1, AddTo);
164  }
165 
166  SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
167  bool AddTo = true) {
168  SDValue To[] = { Res0, Res1 };
169  return CombineTo(N, To, 2, AddTo);
170  }
171 
172  void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
173 
174  private:
175 
176  /// Check the specified integer node value to see if it can be simplified or
177  /// if things it uses can be simplified by bit propagation.
178  /// If so, return true.
179  bool SimplifyDemandedBits(SDValue Op) {
180  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
181  APInt Demanded = APInt::getAllOnesValue(BitWidth);
182  return SimplifyDemandedBits(Op, Demanded);
183  }
184 
185  bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
186 
187  bool CombineToPreIndexedLoadStore(SDNode *N);
188  bool CombineToPostIndexedLoadStore(SDNode *N);
189  SDValue SplitIndexingFromLoad(LoadSDNode *LD);
190  bool SliceUpLoad(SDNode *N);
191 
192  /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
193  /// load.
194  ///
195  /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
196  /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
197  /// \param EltNo index of the vector element to load.
198  /// \param OriginalLoad load that EVE came from to be replaced.
199  /// \returns EVE on success SDValue() on failure.
200  SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
201  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
202  void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
203  SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
204  SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
205  SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
206  SDValue PromoteIntBinOp(SDValue Op);
207  SDValue PromoteIntShiftOp(SDValue Op);
208  SDValue PromoteExtend(SDValue Op);
209  bool PromoteLoad(SDValue Op);
210 
211  void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
212  SDValue Trunc, SDValue ExtLoad, SDLoc DL,
213  ISD::NodeType ExtType);
214 
215  /// Call the node-specific routine that knows how to fold each
216  /// particular type of node. If that doesn't do anything, try the
217  /// target-specific DAG combines.
218  SDValue combine(SDNode *N);
219 
220  // Visitation implementation - Implement dag node combining for different
221  // node types. The semantics are as follows:
222  // Return Value:
223  // SDValue.getNode() == 0 - No change was made
224  // SDValue.getNode() == N - N was replaced, is dead and has been handled.
225  // otherwise - N should be replaced by the returned Operand.
226  //
227  SDValue visitTokenFactor(SDNode *N);
228  SDValue visitMERGE_VALUES(SDNode *N);
229  SDValue visitADD(SDNode *N);
230  SDValue visitSUB(SDNode *N);
231  SDValue visitADDC(SDNode *N);
232  SDValue visitSUBC(SDNode *N);
233  SDValue visitADDE(SDNode *N);
234  SDValue visitSUBE(SDNode *N);
235  SDValue visitMUL(SDNode *N);
236  SDValue visitSDIV(SDNode *N);
237  SDValue visitUDIV(SDNode *N);
238  SDValue visitSREM(SDNode *N);
239  SDValue visitUREM(SDNode *N);
240  SDValue visitMULHU(SDNode *N);
241  SDValue visitMULHS(SDNode *N);
242  SDValue visitSMUL_LOHI(SDNode *N);
243  SDValue visitUMUL_LOHI(SDNode *N);
244  SDValue visitSMULO(SDNode *N);
245  SDValue visitUMULO(SDNode *N);
246  SDValue visitSDIVREM(SDNode *N);
247  SDValue visitUDIVREM(SDNode *N);
248  SDValue visitAND(SDNode *N);
249  SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
250  SDValue visitOR(SDNode *N);
251  SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
252  SDValue visitXOR(SDNode *N);
253  SDValue SimplifyVBinOp(SDNode *N);
254  SDValue visitSHL(SDNode *N);
255  SDValue visitSRA(SDNode *N);
256  SDValue visitSRL(SDNode *N);
257  SDValue visitRotate(SDNode *N);
258  SDValue visitBSWAP(SDNode *N);
259  SDValue visitCTLZ(SDNode *N);
260  SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
261  SDValue visitCTTZ(SDNode *N);
262  SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
263  SDValue visitCTPOP(SDNode *N);
264  SDValue visitSELECT(SDNode *N);
265  SDValue visitVSELECT(SDNode *N);
266  SDValue visitSELECT_CC(SDNode *N);
267  SDValue visitSETCC(SDNode *N);
268  SDValue visitSIGN_EXTEND(SDNode *N);
269  SDValue visitZERO_EXTEND(SDNode *N);
270  SDValue visitANY_EXTEND(SDNode *N);
271  SDValue visitSIGN_EXTEND_INREG(SDNode *N);
272  SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
273  SDValue visitTRUNCATE(SDNode *N);
274  SDValue visitBITCAST(SDNode *N);
275  SDValue visitBUILD_PAIR(SDNode *N);
276  SDValue visitFADD(SDNode *N);
277  SDValue visitFSUB(SDNode *N);
278  SDValue visitFMUL(SDNode *N);
279  SDValue visitFMA(SDNode *N);
280  SDValue visitFDIV(SDNode *N);
281  SDValue visitFREM(SDNode *N);
282  SDValue visitFSQRT(SDNode *N);
283  SDValue visitFCOPYSIGN(SDNode *N);
284  SDValue visitSINT_TO_FP(SDNode *N);
285  SDValue visitUINT_TO_FP(SDNode *N);
286  SDValue visitFP_TO_SINT(SDNode *N);
287  SDValue visitFP_TO_UINT(SDNode *N);
288  SDValue visitFP_ROUND(SDNode *N);
289  SDValue visitFP_ROUND_INREG(SDNode *N);
290  SDValue visitFP_EXTEND(SDNode *N);
291  SDValue visitFNEG(SDNode *N);
292  SDValue visitFABS(SDNode *N);
293  SDValue visitFCEIL(SDNode *N);
294  SDValue visitFTRUNC(SDNode *N);
295  SDValue visitFFLOOR(SDNode *N);
296  SDValue visitFMINNUM(SDNode *N);
297  SDValue visitFMAXNUM(SDNode *N);
298  SDValue visitBRCOND(SDNode *N);
299  SDValue visitBR_CC(SDNode *N);
300  SDValue visitLOAD(SDNode *N);
301  SDValue visitSTORE(SDNode *N);
302  SDValue visitINSERT_VECTOR_ELT(SDNode *N);
303  SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
304  SDValue visitBUILD_VECTOR(SDNode *N);
305  SDValue visitCONCAT_VECTORS(SDNode *N);
306  SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
307  SDValue visitVECTOR_SHUFFLE(SDNode *N);
308  SDValue visitSCALAR_TO_VECTOR(SDNode *N);
309  SDValue visitINSERT_SUBVECTOR(SDNode *N);
310  SDValue visitMLOAD(SDNode *N);
311  SDValue visitMSTORE(SDNode *N);
312  SDValue visitMGATHER(SDNode *N);
313  SDValue visitMSCATTER(SDNode *N);
314  SDValue visitFP_TO_FP16(SDNode *N);
315 
316  SDValue visitFADDForFMACombine(SDNode *N);
317  SDValue visitFSUBForFMACombine(SDNode *N);
318 
319  SDValue XformToShuffleWithZero(SDNode *N);
320  SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
321 
322  SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
323 
324  bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
325  SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
326  SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
327  SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
328  SDValue N3, ISD::CondCode CC,
329  bool NotExtCompare = false);
330  SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
331  SDLoc DL, bool foldBooleans = true);
332 
333  bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
334  SDValue &CC) const;
335  bool isOneUseSetCC(SDValue N) const;
336 
337  SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
338  unsigned HiOp);
339  SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
340  SDValue CombineExtLoad(SDNode *N);
341  SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
342  SDValue BuildSDIV(SDNode *N);
343  SDValue BuildSDIVPow2(SDNode *N);
344  SDValue BuildUDIV(SDNode *N);
345  SDValue BuildReciprocalEstimate(SDValue Op);
346  SDValue BuildRsqrtEstimate(SDValue Op);
347  SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations);
348  SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations);
349  SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
350  bool DemandHighBits = true);
351  SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
352  SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
353  SDValue InnerPos, SDValue InnerNeg,
354  unsigned PosOpcode, unsigned NegOpcode,
355  SDLoc DL);
356  SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
357  SDValue ReduceLoadWidth(SDNode *N);
358  SDValue ReduceLoadOpStoreWidth(SDNode *N);
359  SDValue TransformFPLoadStorePair(SDNode *N);
360  SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
361  SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
362 
363  SDValue GetDemandedBits(SDValue V, const APInt &Mask);
364 
365  /// Walk up chain skipping non-aliasing memory nodes,
366  /// looking for aliasing nodes and adding them to the Aliases vector.
367  void GatherAllAliases(SDNode *N, SDValue OriginalChain,
368  SmallVectorImpl<SDValue> &Aliases);
369 
370  /// Return true if there is any possibility that the two addresses overlap.
371  bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
372 
373  /// Walk up chain skipping non-aliasing memory nodes, looking for a better
374  /// chain (aliasing node.)
375  SDValue FindBetterChain(SDNode *N, SDValue Chain);
376 
377  /// Holds a pointer to an LSBaseSDNode as well as information on where it
378  /// is located in a sequence of memory operations connected by a chain.
379  struct MemOpLink {
380  MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
381  MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
382  // Ptr to the mem node.
383  LSBaseSDNode *MemNode;
384  // Offset from the base ptr.
385  int64_t OffsetFromBase;
386  // What is the sequence number of this mem node.
387  // Lowest mem operand in the DAG starts at zero.
388  unsigned SequenceNum;
389  };
390 
391  /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
392  /// constant build_vector of the stored constant values in Stores.
393  SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
394  SDLoc SL,
395  ArrayRef<MemOpLink> Stores,
396  EVT Ty) const;
397 
398  /// This is a helper function for MergeConsecutiveStores. When the source
399  /// elements of the consecutive stores are all constants or all extracted
400  /// vector elements, try to merge them into one larger store.
401  /// \return True if a merged store was created.
402  bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
403  EVT MemVT, unsigned NumElem,
404  bool IsConstantSrc, bool UseVector);
405 
406  /// This is a helper function for MergeConsecutiveStores.
407  /// Stores that may be merged are placed in StoreNodes.
408  /// Loads that may alias with those stores are placed in AliasLoadNodes.
409  void getStoreMergeAndAliasCandidates(
410  StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
411  SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
412 
413  /// Merge consecutive store operations into a wide store.
414  /// This optimization uses wide integers or vectors when possible.
415  /// \return True if some memory operations were changed.
416  bool MergeConsecutiveStores(StoreSDNode *N);
417 
418  /// \brief Try to transform a truncation where C is a constant:
419  /// (trunc (and X, C)) -> (and (trunc X), (trunc C))
420  ///
421  /// \p N needs to be a truncation and its first operand an AND. Other
422  /// requirements are checked by the function (e.g. that trunc is
423  /// single-use) and if missed an empty SDValue is returned.
424  SDValue distributeTruncateThroughAnd(SDNode *N);
425 
426  public:
427  DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
428  : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
429  OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
430  auto *F = DAG.getMachineFunction().getFunction();
431  ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) ||
432  F->hasFnAttribute(Attribute::MinSize);
433  }
434 
435  /// Runs the dag combiner on all nodes in the work list
436  void Run(CombineLevel AtLevel);
437 
438  SelectionDAG &getDAG() const { return DAG; }
439 
440  /// Returns a type large enough to hold any valid shift amount - before type
441  /// legalization these can be huge.
442  EVT getShiftAmountTy(EVT LHSTy) {
443  assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
444  if (LHSTy.isVector())
445  return LHSTy;
446  auto &DL = DAG.getDataLayout();
447  return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
448  : TLI.getPointerTy(DL);
449  }
450 
451  /// This method returns true if we are running before type legalization or
452  /// if the specified VT is legal.
453  bool isTypeLegal(const EVT &VT) {
454  if (!LegalTypes) return true;
455  return TLI.isTypeLegal(VT);
456  }
457 
458  /// Convenience wrapper around TargetLowering::getSetCCResultType
459  EVT getSetCCResultType(EVT VT) const {
460  return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
461  }
462  };
463 }
464 
465 
466 namespace {
467 /// This class is a DAGUpdateListener that removes any deleted
468 /// nodes from the worklist.
469 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
470  DAGCombiner &DC;
471 public:
472  explicit WorklistRemover(DAGCombiner &dc)
473  : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
474 
475  void NodeDeleted(SDNode *N, SDNode *E) override {
476  DC.removeFromWorklist(N);
477  }
478 };
479 }
480 
481 //===----------------------------------------------------------------------===//
482 // TargetLowering::DAGCombinerInfo implementation
483 //===----------------------------------------------------------------------===//
484 
486  ((DAGCombiner*)DC)->AddToWorklist(N);
487 }
488 
490  ((DAGCombiner*)DC)->removeFromWorklist(N);
491 }
492 
494 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
495  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
496 }
497 
499 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
500  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
501 }
502 
503 
505 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
506  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
507 }
508 
511  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
512 }
513 
514 //===----------------------------------------------------------------------===//
515 // Helper Functions
516 //===----------------------------------------------------------------------===//
517 
518 void DAGCombiner::deleteAndRecombine(SDNode *N) {
519  removeFromWorklist(N);
520 
521  // If the operands of this node are only used by the node, they will now be
522  // dead. Make sure to re-visit them and recursively delete dead nodes.
523  for (const SDValue &Op : N->ops())
524  // For an operand generating multiple values, one of the values may
525  // become dead allowing further simplification (e.g. split index
526  // arithmetic from an indexed load).
527  if (Op->hasOneUse() || Op->getNumValues() > 1)
528  AddToWorklist(Op.getNode());
529 
530  DAG.DeleteNode(N);
531 }
532 
533 /// Return 1 if we can compute the negated form of the specified expression for
534 /// the same cost as the expression itself, or 2 if we can compute the negated
535 /// form more cheaply than the expression itself.
536 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
537  const TargetLowering &TLI,
538  const TargetOptions *Options,
539  unsigned Depth = 0) {
540  // fneg is removable even if it has multiple uses.
541  if (Op.getOpcode() == ISD::FNEG) return 2;
542 
543  // Don't allow anything with multiple uses.
544  if (!Op.hasOneUse()) return 0;
545 
546  // Don't recurse exponentially.
547  if (Depth > 6) return 0;
548 
549  switch (Op.getOpcode()) {
550  default: return false;
551  case ISD::ConstantFP:
552  // Don't invert constant FP values after legalize. The negated constant
553  // isn't necessarily legal.
554  return LegalOperations ? 0 : 1;
555  case ISD::FADD:
556  // FIXME: determine better conditions for this xform.
557  if (!Options->UnsafeFPMath) return 0;
558 
559  // After operation legalization, it might not be legal to create new FSUBs.
560  if (LegalOperations &&
562  return 0;
563 
564  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
565  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
566  Options, Depth + 1))
567  return V;
568  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
569  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
570  Depth + 1);
571  case ISD::FSUB:
572  // We can't turn -(A-B) into B-A when we honor signed zeros.
573  if (!Options->UnsafeFPMath) return 0;
574 
575  // fold (fneg (fsub A, B)) -> (fsub B, A)
576  return 1;
577 
578  case ISD::FMUL:
579  case ISD::FDIV:
580  if (Options->HonorSignDependentRoundingFPMath()) return 0;
581 
582  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
583  if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
584  Options, Depth + 1))
585  return V;
586 
587  return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
588  Depth + 1);
589 
590  case ISD::FP_EXTEND:
591  case ISD::FP_ROUND:
592  case ISD::FSIN:
593  return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
594  Depth + 1);
595  }
596 }
597 
598 /// If isNegatibleForFree returns true, return the newly negated expression.
600  bool LegalOperations, unsigned Depth = 0) {
601  const TargetOptions &Options = DAG.getTarget().Options;
602  // fneg is removable even if it has multiple uses.
603  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
604 
605  // Don't allow anything with multiple uses.
606  assert(Op.hasOneUse() && "Unknown reuse!");
607 
608  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
609  switch (Op.getOpcode()) {
610  default: llvm_unreachable("Unknown code");
611  case ISD::ConstantFP: {
612  APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
613  V.changeSign();
614  return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
615  }
616  case ISD::FADD:
617  // FIXME: determine better conditions for this xform.
618  assert(Options.UnsafeFPMath);
619 
620  // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
621  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
622  DAG.getTargetLoweringInfo(), &Options, Depth+1))
623  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
624  GetNegatedExpression(Op.getOperand(0), DAG,
625  LegalOperations, Depth+1),
626  Op.getOperand(1));
627  // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
628  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
629  GetNegatedExpression(Op.getOperand(1), DAG,
630  LegalOperations, Depth+1),
631  Op.getOperand(0));
632  case ISD::FSUB:
633  // We can't turn -(A-B) into B-A when we honor signed zeros.
634  assert(Options.UnsafeFPMath);
635 
636  // fold (fneg (fsub 0, B)) -> B
637  if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
638  if (N0CFP->isZero())
639  return Op.getOperand(1);
640 
641  // fold (fneg (fsub A, B)) -> (fsub B, A)
642  return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
643  Op.getOperand(1), Op.getOperand(0));
644 
645  case ISD::FMUL:
646  case ISD::FDIV:
647  assert(!Options.HonorSignDependentRoundingFPMath());
648 
649  // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
650  if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
651  DAG.getTargetLoweringInfo(), &Options, Depth+1))
652  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
653  GetNegatedExpression(Op.getOperand(0), DAG,
654  LegalOperations, Depth+1),
655  Op.getOperand(1));
656 
657  // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
658  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
659  Op.getOperand(0),
660  GetNegatedExpression(Op.getOperand(1), DAG,
661  LegalOperations, Depth+1));
662 
663  case ISD::FP_EXTEND:
664  case ISD::FSIN:
665  return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
666  GetNegatedExpression(Op.getOperand(0), DAG,
667  LegalOperations, Depth+1));
668  case ISD::FP_ROUND:
669  return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
670  GetNegatedExpression(Op.getOperand(0), DAG,
671  LegalOperations, Depth+1),
672  Op.getOperand(1));
673  }
674 }
675 
676 // Return true if this node is a setcc, or is a select_cc
677 // that selects between the target values used for true and false, making it
678 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
679 // the appropriate nodes based on the type of node we are checking. This
680 // simplifies life a bit for the callers.
681 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
682  SDValue &CC) const {
683  if (N.getOpcode() == ISD::SETCC) {
684  LHS = N.getOperand(0);
685  RHS = N.getOperand(1);
686  CC = N.getOperand(2);
687  return true;
688  }
689 
690  if (N.getOpcode() != ISD::SELECT_CC ||
691  !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
692  !TLI.isConstFalseVal(N.getOperand(3).getNode()))
693  return false;
694 
695  if (TLI.getBooleanContents(N.getValueType()) ==
697  return false;
698 
699  LHS = N.getOperand(0);
700  RHS = N.getOperand(1);
701  CC = N.getOperand(4);
702  return true;
703 }
704 
705 /// Return true if this is a SetCC-equivalent operation with only one use.
706 /// If this is true, it allows the users to invert the operation for free when
707 /// it is profitable to do so.
708 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
709  SDValue N0, N1, N2;
710  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
711  return true;
712  return false;
713 }
714 
715 /// Returns true if N is a BUILD_VECTOR node whose
716 /// elements are all the same constant or undefined.
717 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
719  if (!C)
720  return false;
721 
722  APInt SplatUndef;
723  unsigned SplatBitSize;
724  bool HasAnyUndefs;
725  EVT EltVT = N->getValueType(0).getVectorElementType();
726  return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
727  HasAnyUndefs) &&
728  EltVT.getSizeInBits() >= SplatBitSize);
729 }
730 
731 // \brief Returns the SDNode if it is a constant integer BuildVector
732 // or constant integer.
734  if (isa<ConstantSDNode>(N))
735  return N.getNode();
737  return N.getNode();
738  return nullptr;
739 }
740 
741 // \brief Returns the SDNode if it is a constant float BuildVector
742 // or constant float.
744  if (isa<ConstantFPSDNode>(N))
745  return N.getNode();
747  return N.getNode();
748  return nullptr;
749 }
750 
751 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
752 // int.
754  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
755  return CN;
756 
757  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
758  BitVector UndefElements;
759  ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
760 
761  // BuildVectors can truncate their operands. Ignore that case here.
762  // FIXME: We blindly ignore splats which include undef which is overly
763  // pessimistic.
764  if (CN && UndefElements.none() &&
765  CN->getValueType(0) == N.getValueType().getScalarType())
766  return CN;
767  }
768 
769  return nullptr;
770 }
771 
772 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
773 // float.
775  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
776  return CN;
777 
778  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
779  BitVector UndefElements;
780  ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
781 
782  if (CN && UndefElements.none())
783  return CN;
784  }
785 
786  return nullptr;
787 }
788 
789 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
790  SDValue N0, SDValue N1) {
791  EVT VT = N0.getValueType();
792  if (N0.getOpcode() == Opc) {
795  // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
796  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
797  return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
798  return SDValue();
799  }
800  if (N0.hasOneUse()) {
801  // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
802  // use
803  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
804  if (!OpNode.getNode())
805  return SDValue();
806  AddToWorklist(OpNode.getNode());
807  return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
808  }
809  }
810  }
811 
812  if (N1.getOpcode() == Opc) {
815  // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
816  if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
817  return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
818  return SDValue();
819  }
820  if (N1.hasOneUse()) {
821  // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
822  // use
823  SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
824  if (!OpNode.getNode())
825  return SDValue();
826  AddToWorklist(OpNode.getNode());
827  return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
828  }
829  }
830  }
831 
832  return SDValue();
833 }
834 
835 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
836  bool AddTo) {
837  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
838  ++NodesCombined;
839  DEBUG(dbgs() << "\nReplacing.1 ";
840  N->dump(&DAG);
841  dbgs() << "\nWith: ";
842  To[0].getNode()->dump(&DAG);
843  dbgs() << " and " << NumTo-1 << " other values\n");
844  for (unsigned i = 0, e = NumTo; i != e; ++i)
845  assert((!To[i].getNode() ||
846  N->getValueType(i) == To[i].getValueType()) &&
847  "Cannot combine value to value of different type!");
848 
849  WorklistRemover DeadNodes(*this);
850  DAG.ReplaceAllUsesWith(N, To);
851  if (AddTo) {
852  // Push the new nodes and any users onto the worklist
853  for (unsigned i = 0, e = NumTo; i != e; ++i) {
854  if (To[i].getNode()) {
855  AddToWorklist(To[i].getNode());
856  AddUsersToWorklist(To[i].getNode());
857  }
858  }
859  }
860 
861  // Finally, if the node is now dead, remove it from the graph. The node
862  // may not be dead if the replacement process recursively simplified to
863  // something else needing this node.
864  if (N->use_empty())
865  deleteAndRecombine(N);
866  return SDValue(N, 0);
867 }
868 
869 void DAGCombiner::
870 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
871  // Replace all uses. If any nodes become isomorphic to other nodes and
872  // are deleted, make sure to remove them from our worklist.
873  WorklistRemover DeadNodes(*this);
874  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
875 
876  // Push the new node and any (possibly new) users onto the worklist.
877  AddToWorklist(TLO.New.getNode());
878  AddUsersToWorklist(TLO.New.getNode());
879 
880  // Finally, if the node is now dead, remove it from the graph. The node
881  // may not be dead if the replacement process recursively simplified to
882  // something else needing this node.
883  if (TLO.Old.getNode()->use_empty())
884  deleteAndRecombine(TLO.Old.getNode());
885 }
886 
887 /// Check the specified integer node value to see if it can be simplified or if
888 /// things it uses can be simplified by bit propagation. If so, return true.
889 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
890  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
891  APInt KnownZero, KnownOne;
892  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
893  return false;
894 
895  // Revisit the node.
896  AddToWorklist(Op.getNode());
897 
898  // Replace the old value with the new one.
899  ++NodesCombined;
900  DEBUG(dbgs() << "\nReplacing.2 ";
901  TLO.Old.getNode()->dump(&DAG);
902  dbgs() << "\nWith: ";
903  TLO.New.getNode()->dump(&DAG);
904  dbgs() << '\n');
905 
906  CommitTargetLoweringOpt(TLO);
907  return true;
908 }
909 
910 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
911  SDLoc dl(Load);
912  EVT VT = Load->getValueType(0);
913  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
914 
915  DEBUG(dbgs() << "\nReplacing.9 ";
916  Load->dump(&DAG);
917  dbgs() << "\nWith: ";
918  Trunc.getNode()->dump(&DAG);
919  dbgs() << '\n');
920  WorklistRemover DeadNodes(*this);
921  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
922  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
923  deleteAndRecombine(Load);
924  AddToWorklist(Trunc.getNode());
925 }
926 
927 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
928  Replace = false;
929  SDLoc dl(Op);
930  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
931  EVT MemVT = LD->getMemoryVT();
933  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
934  : ISD::EXTLOAD)
935  : LD->getExtensionType();
936  Replace = true;
937  return DAG.getExtLoad(ExtType, dl, PVT,
938  LD->getChain(), LD->getBasePtr(),
939  MemVT, LD->getMemOperand());
940  }
941 
942  unsigned Opc = Op.getOpcode();
943  switch (Opc) {
944  default: break;
945  case ISD::AssertSext:
946  return DAG.getNode(ISD::AssertSext, dl, PVT,
947  SExtPromoteOperand(Op.getOperand(0), PVT),
948  Op.getOperand(1));
949  case ISD::AssertZext:
950  return DAG.getNode(ISD::AssertZext, dl, PVT,
951  ZExtPromoteOperand(Op.getOperand(0), PVT),
952  Op.getOperand(1));
953  case ISD::Constant: {
954  unsigned ExtOpc =
956  return DAG.getNode(ExtOpc, dl, PVT, Op);
957  }
958  }
959 
960  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
961  return SDValue();
962  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
963 }
964 
965 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
966  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
967  return SDValue();
968  EVT OldVT = Op.getValueType();
969  SDLoc dl(Op);
970  bool Replace = false;
971  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
972  if (!NewOp.getNode())
973  return SDValue();
974  AddToWorklist(NewOp.getNode());
975 
976  if (Replace)
977  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
978  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
979  DAG.getValueType(OldVT));
980 }
981 
982 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
983  EVT OldVT = Op.getValueType();
984  SDLoc dl(Op);
985  bool Replace = false;
986  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
987  if (!NewOp.getNode())
988  return SDValue();
989  AddToWorklist(NewOp.getNode());
990 
991  if (Replace)
992  ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
993  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
994 }
995 
996 /// Promote the specified integer binary operation if the target indicates it is
997 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
998 /// i32 since i16 instructions are longer.
999 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1000  if (!LegalOperations)
1001  return SDValue();
1002 
1003  EVT VT = Op.getValueType();
1004  if (VT.isVector() || !VT.isInteger())
1005  return SDValue();
1006 
1007  // If operation type is 'undesirable', e.g. i16 on x86, consider
1008  // promoting it.
1009  unsigned Opc = Op.getOpcode();
1010  if (TLI.isTypeDesirableForOp(Opc, VT))
1011  return SDValue();
1012 
1013  EVT PVT = VT;
1014  // Consult target whether it is a good idea to promote this operation and
1015  // what's the right type to promote it to.
1016  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1017  assert(PVT != VT && "Don't know what type to promote to!");
1018 
1019  bool Replace0 = false;
1020  SDValue N0 = Op.getOperand(0);
1021  SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1022  if (!NN0.getNode())
1023  return SDValue();
1024 
1025  bool Replace1 = false;
1026  SDValue N1 = Op.getOperand(1);
1027  SDValue NN1;
1028  if (N0 == N1)
1029  NN1 = NN0;
1030  else {
1031  NN1 = PromoteOperand(N1, PVT, Replace1);
1032  if (!NN1.getNode())
1033  return SDValue();
1034  }
1035 
1036  AddToWorklist(NN0.getNode());
1037  if (NN1.getNode())
1038  AddToWorklist(NN1.getNode());
1039 
1040  if (Replace0)
1041  ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1042  if (Replace1)
1043  ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1044 
1045  DEBUG(dbgs() << "\nPromoting ";
1046  Op.getNode()->dump(&DAG));
1047  SDLoc dl(Op);
1048  return DAG.getNode(ISD::TRUNCATE, dl, VT,
1049  DAG.getNode(Opc, dl, PVT, NN0, NN1));
1050  }
1051  return SDValue();
1052 }
1053 
1054 /// Promote the specified integer shift operation if the target indicates it is
1055 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1056 /// i32 since i16 instructions are longer.
1057 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1058  if (!LegalOperations)
1059  return SDValue();
1060 
1061  EVT VT = Op.getValueType();
1062  if (VT.isVector() || !VT.isInteger())
1063  return SDValue();
1064 
1065  // If operation type is 'undesirable', e.g. i16 on x86, consider
1066  // promoting it.
1067  unsigned Opc = Op.getOpcode();
1068  if (TLI.isTypeDesirableForOp(Opc, VT))
1069  return SDValue();
1070 
1071  EVT PVT = VT;
1072  // Consult target whether it is a good idea to promote this operation and
1073  // what's the right type to promote it to.
1074  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1075  assert(PVT != VT && "Don't know what type to promote to!");
1076 
1077  bool Replace = false;
1078  SDValue N0 = Op.getOperand(0);
1079  if (Opc == ISD::SRA)
1080  N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
1081  else if (Opc == ISD::SRL)
1082  N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
1083  else
1084  N0 = PromoteOperand(N0, PVT, Replace);
1085  if (!N0.getNode())
1086  return SDValue();
1087 
1088  AddToWorklist(N0.getNode());
1089  if (Replace)
1090  ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1091 
1092  DEBUG(dbgs() << "\nPromoting ";
1093  Op.getNode()->dump(&DAG));
1094  SDLoc dl(Op);
1095  return DAG.getNode(ISD::TRUNCATE, dl, VT,
1096  DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
1097  }
1098  return SDValue();
1099 }
1100 
1101 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1102  if (!LegalOperations)
1103  return SDValue();
1104 
1105  EVT VT = Op.getValueType();
1106  if (VT.isVector() || !VT.isInteger())
1107  return SDValue();
1108 
1109  // If operation type is 'undesirable', e.g. i16 on x86, consider
1110  // promoting it.
1111  unsigned Opc = Op.getOpcode();
1112  if (TLI.isTypeDesirableForOp(Opc, VT))
1113  return SDValue();
1114 
1115  EVT PVT = VT;
1116  // Consult target whether it is a good idea to promote this operation and
1117  // what's the right type to promote it to.
1118  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1119  assert(PVT != VT && "Don't know what type to promote to!");
1120  // fold (aext (aext x)) -> (aext x)
1121  // fold (aext (zext x)) -> (zext x)
1122  // fold (aext (sext x)) -> (sext x)
1123  DEBUG(dbgs() << "\nPromoting ";
1124  Op.getNode()->dump(&DAG));
1125  return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1126  }
1127  return SDValue();
1128 }
1129 
1130 bool DAGCombiner::PromoteLoad(SDValue Op) {
1131  if (!LegalOperations)
1132  return false;
1133 
1134  EVT VT = Op.getValueType();
1135  if (VT.isVector() || !VT.isInteger())
1136  return false;
1137 
1138  // If operation type is 'undesirable', e.g. i16 on x86, consider
1139  // promoting it.
1140  unsigned Opc = Op.getOpcode();
1141  if (TLI.isTypeDesirableForOp(Opc, VT))
1142  return false;
1143 
1144  EVT PVT = VT;
1145  // Consult target whether it is a good idea to promote this operation and
1146  // what's the right type to promote it to.
1147  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1148  assert(PVT != VT && "Don't know what type to promote to!");
1149 
1150  SDLoc dl(Op);
1151  SDNode *N = Op.getNode();
1152  LoadSDNode *LD = cast<LoadSDNode>(N);
1153  EVT MemVT = LD->getMemoryVT();
1154  ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
1155  ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
1156  : ISD::EXTLOAD)
1157  : LD->getExtensionType();
1158  SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
1159  LD->getChain(), LD->getBasePtr(),
1160  MemVT, LD->getMemOperand());
1161  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
1162 
1163  DEBUG(dbgs() << "\nPromoting ";
1164  N->dump(&DAG);
1165  dbgs() << "\nTo: ";
1166  Result.getNode()->dump(&DAG);
1167  dbgs() << '\n');
1168  WorklistRemover DeadNodes(*this);
1169  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1170  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1171  deleteAndRecombine(N);
1172  AddToWorklist(Result.getNode());
1173  return true;
1174  }
1175  return false;
1176 }
1177 
1178 /// \brief Recursively delete a node which has no uses and any operands for
1179 /// which it is the only use.
1180 ///
1181 /// Note that this both deletes the nodes and removes them from the worklist.
1182 /// It also adds any nodes who have had a user deleted to the worklist as they
1183 /// may now have only one use and subject to other combines.
1184 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1185  if (!N->use_empty())
1186  return false;
1187 
1189  Nodes.insert(N);
1190  do {
1191  N = Nodes.pop_back_val();
1192  if (!N)
1193  continue;
1194 
1195  if (N->use_empty()) {
1196  for (const SDValue &ChildN : N->op_values())
1197  Nodes.insert(ChildN.getNode());
1198 
1199  removeFromWorklist(N);
1200  DAG.DeleteNode(N);
1201  } else {
1202  AddToWorklist(N);
1203  }
1204  } while (!Nodes.empty());
1205  return true;
1206 }
1207 
1208 //===----------------------------------------------------------------------===//
1209 // Main DAG Combiner implementation
1210 //===----------------------------------------------------------------------===//
1211 
1212 void DAGCombiner::Run(CombineLevel AtLevel) {
1213  // set the instance variables, so that the various visit routines may use it.
1214  Level = AtLevel;
1215  LegalOperations = Level >= AfterLegalizeVectorOps;
1216  LegalTypes = Level >= AfterLegalizeTypes;
1217 
1218  // Add all the dag nodes to the worklist.
1219  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
1220  E = DAG.allnodes_end(); I != E; ++I)
1221  AddToWorklist(I);
1222 
1223  // Create a dummy node (which is not added to allnodes), that adds a reference
1224  // to the root node, preventing it from being deleted, and tracking any
1225  // changes of the root.
1226  HandleSDNode Dummy(DAG.getRoot());
1227 
1228  // while the worklist isn't empty, find a node and
1229  // try and combine it.
1230  while (!WorklistMap.empty()) {
1231  SDNode *N;
1232  // The Worklist holds the SDNodes in order, but it may contain null entries.
1233  do {
1234  N = Worklist.pop_back_val();
1235  } while (!N);
1236 
1237  bool GoodWorklistEntry = WorklistMap.erase(N);
1238  (void)GoodWorklistEntry;
1239  assert(GoodWorklistEntry &&
1240  "Found a worklist entry without a corresponding map entry!");
1241 
1242  // If N has no uses, it is dead. Make sure to revisit all N's operands once
1243  // N is deleted from the DAG, since they too may now be dead or may have a
1244  // reduced number of uses, allowing other xforms.
1245  if (recursivelyDeleteUnusedNodes(N))
1246  continue;
1247 
1248  WorklistRemover DeadNodes(*this);
1249 
1250  // If this combine is running after legalizing the DAG, re-legalize any
1251  // nodes pulled off the worklist.
1252  if (Level == AfterLegalizeDAG) {
1253  SmallSetVector<SDNode *, 16> UpdatedNodes;
1254  bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1255 
1256  for (SDNode *LN : UpdatedNodes) {
1257  AddToWorklist(LN);
1258  AddUsersToWorklist(LN);
1259  }
1260  if (!NIsValid)
1261  continue;
1262  }
1263 
1264  DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1265 
1266  // Add any operands of the new node which have not yet been combined to the
1267  // worklist as well. Because the worklist uniques things already, this
1268  // won't repeatedly process the same operand.
1269  CombinedNodes.insert(N);
1270  for (const SDValue &ChildN : N->op_values())
1271  if (!CombinedNodes.count(ChildN.getNode()))
1272  AddToWorklist(ChildN.getNode());
1273 
1274  SDValue RV = combine(N);
1275 
1276  if (!RV.getNode())
1277  continue;
1278 
1279  ++NodesCombined;
1280 
1281  // If we get back the same node we passed in, rather than a new node or
1282  // zero, we know that the node must have defined multiple values and
1283  // CombineTo was used. Since CombineTo takes care of the worklist
1284  // mechanics for us, we have no work to do in this case.
1285  if (RV.getNode() == N)
1286  continue;
1287 
1288  assert(N->getOpcode() != ISD::DELETED_NODE &&
1289  RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
1290  "Node was deleted but visit returned new node!");
1291 
1292  DEBUG(dbgs() << " ... into: ";
1293  RV.getNode()->dump(&DAG));
1294 
1295  // Transfer debug value.
1296  DAG.TransferDbgValues(SDValue(N, 0), RV);
1297  if (N->getNumValues() == RV.getNode()->getNumValues())
1298  DAG.ReplaceAllUsesWith(N, RV.getNode());
1299  else {
1300  assert(N->getValueType(0) == RV.getValueType() &&
1301  N->getNumValues() == 1 && "Type mismatch");
1302  SDValue OpV = RV;
1303  DAG.ReplaceAllUsesWith(N, &OpV);
1304  }
1305 
1306  // Push the new node and any users onto the worklist
1307  AddToWorklist(RV.getNode());
1308  AddUsersToWorklist(RV.getNode());
1309 
1310  // Finally, if the node is now dead, remove it from the graph. The node
1311  // may not be dead if the replacement process recursively simplified to
1312  // something else needing this node. This will also take care of adding any
1313  // operands which have lost a user to the worklist.
1314  recursivelyDeleteUnusedNodes(N);
1315  }
1316 
1317  // If the root changed (e.g. it was a dead load, update the root).
1318  DAG.setRoot(Dummy.getValue());
1319  DAG.RemoveDeadNodes();
1320 }
1321 
1322 SDValue DAGCombiner::visit(SDNode *N) {
1323  switch (N->getOpcode()) {
1324  default: break;
1325  case ISD::TokenFactor: return visitTokenFactor(N);
1326  case ISD::MERGE_VALUES: return visitMERGE_VALUES(N);
1327  case ISD::ADD: return visitADD(N);
1328  case ISD::SUB: return visitSUB(N);
1329  case ISD::ADDC: return visitADDC(N);
1330  case ISD::SUBC: return visitSUBC(N);
1331  case ISD::ADDE: return visitADDE(N);
1332  case ISD::SUBE: return visitSUBE(N);
1333  case ISD::MUL: return visitMUL(N);
1334  case ISD::SDIV: return visitSDIV(N);
1335  case ISD::UDIV: return visitUDIV(N);
1336  case ISD::SREM: return visitSREM(N);
1337  case ISD::UREM: return visitUREM(N);
1338  case ISD::MULHU: return visitMULHU(N);
1339  case ISD::MULHS: return visitMULHS(N);
1340  case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
1341  case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
1342  case ISD::SMULO: return visitSMULO(N);
1343  case ISD::UMULO: return visitUMULO(N);
1344  case ISD::SDIVREM: return visitSDIVREM(N);
1345  case ISD::UDIVREM: return visitUDIVREM(N);
1346  case ISD::AND: return visitAND(N);
1347  case ISD::OR: return visitOR(N);
1348  case ISD::XOR: return visitXOR(N);
1349  case ISD::SHL: return visitSHL(N);
1350  case ISD::SRA: return visitSRA(N);
1351  case ISD::SRL: return visitSRL(N);
1352  case ISD::ROTR:
1353  case ISD::ROTL: return visitRotate(N);
1354  case ISD::BSWAP: return visitBSWAP(N);
1355  case ISD::CTLZ: return visitCTLZ(N);
1356  case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N);
1357  case ISD::CTTZ: return visitCTTZ(N);
1358  case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
1359  case ISD::CTPOP: return visitCTPOP(N);
1360  case ISD::SELECT: return visitSELECT(N);
1361  case ISD::VSELECT: return visitVSELECT(N);
1362  case ISD::SELECT_CC: return visitSELECT_CC(N);
1363  case ISD::SETCC: return visitSETCC(N);
1364  case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
1365  case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
1366  case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
1367  case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
1368  case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1369  case ISD::TRUNCATE: return visitTRUNCATE(N);
1370  case ISD::BITCAST: return visitBITCAST(N);
1371  case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
1372  case ISD::FADD: return visitFADD(N);
1373  case ISD::FSUB: return visitFSUB(N);
1374  case ISD::FMUL: return visitFMUL(N);
1375  case ISD::FMA: return visitFMA(N);
1376  case ISD::FDIV: return visitFDIV(N);
1377  case ISD::FREM: return visitFREM(N);
1378  case ISD::FSQRT: return visitFSQRT(N);
1379  case ISD::FCOPYSIGN: return visitFCOPYSIGN(N);
1380  case ISD::SINT_TO_FP: return visitSINT_TO_FP(N);
1381  case ISD::UINT_TO_FP: return visitUINT_TO_FP(N);
1382  case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
1383  case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
1384  case ISD::FP_ROUND: return visitFP_ROUND(N);
1385  case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
1386  case ISD::FP_EXTEND: return visitFP_EXTEND(N);
1387  case ISD::FNEG: return visitFNEG(N);
1388  case ISD::FABS: return visitFABS(N);
1389  case ISD::FFLOOR: return visitFFLOOR(N);
1390  case ISD::FMINNUM: return visitFMINNUM(N);
1391  case ISD::FMAXNUM: return visitFMAXNUM(N);
1392  case ISD::FCEIL: return visitFCEIL(N);
1393  case ISD::FTRUNC: return visitFTRUNC(N);
1394  case ISD::BRCOND: return visitBRCOND(N);
1395  case ISD::BR_CC: return visitBR_CC(N);
1396  case ISD::LOAD: return visitLOAD(N);
1397  case ISD::STORE: return visitSTORE(N);
1398  case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N);
1399  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1400  case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
1401  case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
1402  case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N);
1403  case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
1404  case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N);
1405  case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N);
1406  case ISD::MGATHER: return visitMGATHER(N);
1407  case ISD::MLOAD: return visitMLOAD(N);
1408  case ISD::MSCATTER: return visitMSCATTER(N);
1409  case ISD::MSTORE: return visitMSTORE(N);
1410  case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
1411  }
1412  return SDValue();
1413 }
1414 
1416  SDValue RV = visit(N);
1417 
1418  // If nothing happened, try a target-specific DAG combine.
1419  if (!RV.getNode()) {
1420  assert(N->getOpcode() != ISD::DELETED_NODE &&
1421  "Node was deleted but visit returned NULL!");
1422 
1423  if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1424  TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1425 
1426  // Expose the DAG combiner to the target combiner impls.
1428  DagCombineInfo(DAG, Level, false, this);
1429 
1430  RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1431  }
1432  }
1433 
1434  // If nothing happened still, try promoting the operation.
1435  if (!RV.getNode()) {
1436  switch (N->getOpcode()) {
1437  default: break;
1438  case ISD::ADD:
1439  case ISD::SUB:
1440  case ISD::MUL:
1441  case ISD::AND:
1442  case ISD::OR:
1443  case ISD::XOR:
1444  RV = PromoteIntBinOp(SDValue(N, 0));
1445  break;
1446  case ISD::SHL:
1447  case ISD::SRA:
1448  case ISD::SRL:
1449  RV = PromoteIntShiftOp(SDValue(N, 0));
1450  break;
1451  case ISD::SIGN_EXTEND:
1452  case ISD::ZERO_EXTEND:
1453  case ISD::ANY_EXTEND:
1454  RV = PromoteExtend(SDValue(N, 0));
1455  break;
1456  case ISD::LOAD:
1457  if (PromoteLoad(SDValue(N, 0)))
1458  RV = SDValue(N, 0);
1459  break;
1460  }
1461  }
1462 
1463  // If N is a commutative binary node, try commuting it to enable more
1464  // sdisel CSE.
1465  if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1466  N->getNumValues() == 1) {
1467  SDValue N0 = N->getOperand(0);
1468  SDValue N1 = N->getOperand(1);
1469 
1470  // Constant operands are canonicalized to RHS.
1471  if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1472  SDValue Ops[] = {N1, N0};
1473  SDNode *CSENode;
1474  if (const auto *BinNode = dyn_cast<BinaryWithFlagsSDNode>(N)) {
1475  CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1476  &BinNode->Flags);
1477  } else {
1478  CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops);
1479  }
1480  if (CSENode)
1481  return SDValue(CSENode, 0);
1482  }
1483  }
1484 
1485  return RV;
1486 }
1487 
1488 /// Given a node, return its input chain if it has one, otherwise return a null
1489 /// sd operand.
1491  if (unsigned NumOps = N->getNumOperands()) {
1492  if (N->getOperand(0).getValueType() == MVT::Other)
1493  return N->getOperand(0);
1494  if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1495  return N->getOperand(NumOps-1);
1496  for (unsigned i = 1; i < NumOps-1; ++i)
1497  if (N->getOperand(i).getValueType() == MVT::Other)
1498  return N->getOperand(i);
1499  }
1500  return SDValue();
1501 }
1502 
1503 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1504  // If N has two operands, where one has an input chain equal to the other,
1505  // the 'other' chain is redundant.
1506  if (N->getNumOperands() == 2) {
1507  if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1508  return N->getOperand(0);
1509  if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1510  return N->getOperand(1);
1511  }
1512 
1513  SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
1514  SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
1515  SmallPtrSet<SDNode*, 16> SeenOps;
1516  bool Changed = false; // If we should replace this token factor.
1517 
1518  // Start out with this token factor.
1519  TFs.push_back(N);
1520 
1521  // Iterate through token factors. The TFs grows when new token factors are
1522  // encountered.
1523  for (unsigned i = 0; i < TFs.size(); ++i) {
1524  SDNode *TF = TFs[i];
1525 
1526  // Check each of the operands.
1527  for (const SDValue &Op : TF->op_values()) {
1528 
1529  switch (Op.getOpcode()) {
1530  case ISD::EntryToken:
1531  // Entry tokens don't need to be added to the list. They are
1532  // redundant.
1533  Changed = true;
1534  break;
1535 
1536  case ISD::TokenFactor:
1537  if (Op.hasOneUse() &&
1538  std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
1539  // Queue up for processing.
1540  TFs.push_back(Op.getNode());
1541  // Clean up in case the token factor is removed.
1542  AddToWorklist(Op.getNode());
1543  Changed = true;
1544  break;
1545  }
1546  // Fall thru
1547 
1548  default:
1549  // Only add if it isn't already in the list.
1550  if (SeenOps.insert(Op.getNode()).second)
1551  Ops.push_back(Op);
1552  else
1553  Changed = true;
1554  break;
1555  }
1556  }
1557  }
1558 
1559  SDValue Result;
1560 
1561  // If we've changed things around then replace token factor.
1562  if (Changed) {
1563  if (Ops.empty()) {
1564  // The entry token is the only possible outcome.
1565  Result = DAG.getEntryNode();
1566  } else {
1567  // New and improved token factor.
1568  Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
1569  }
1570 
1571  // Add users to worklist if AA is enabled, since it may introduce
1572  // a lot of new chained token factors while removing memory deps.
1573  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
1574  : DAG.getSubtarget().useAA();
1575  return CombineTo(N, Result, UseAA /*add to worklist*/);
1576  }
1577 
1578  return Result;
1579 }
1580 
1581 /// MERGE_VALUES can always be eliminated.
1582 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1583  WorklistRemover DeadNodes(*this);
1584  // Replacing results may cause a different MERGE_VALUES to suddenly
1585  // be CSE'd with N, and carry its uses with it. Iterate until no
1586  // uses remain, to ensure that the node can be safely deleted.
1587  // First add the users of this node to the work list so that they
1588  // can be tried again once they have new operands.
1589  AddUsersToWorklist(N);
1590  do {
1591  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1592  DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
1593  } while (!N->use_empty());
1594  deleteAndRecombine(N);
1595  return SDValue(N, 0); // Return N so it doesn't get rechecked!
1596 }
1597 
1598 static bool isNullConstant(SDValue V) {
1599  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
1600  return Const != nullptr && Const->isNullValue();
1601 }
1602 
1603 static bool isNullFPConstant(SDValue V) {
1605  return Const != nullptr && Const->isZero() && !Const->isNegative();
1606 }
1607 
1608 static bool isAllOnesConstant(SDValue V) {
1609  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
1610  return Const != nullptr && Const->isAllOnesValue();
1611 }
1612 
1613 static bool isOneConstant(SDValue V) {
1614  ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
1615  return Const != nullptr && Const->isOne();
1616 }
1617 
1618 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
1619 /// ContantSDNode pointer else nullptr.
1622  return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1623 }
1624 
1625 SDValue DAGCombiner::visitADD(SDNode *N) {
1626  SDValue N0 = N->getOperand(0);
1627  SDValue N1 = N->getOperand(1);
1628  EVT VT = N0.getValueType();
1629 
1630  // fold vector ops
1631  if (VT.isVector()) {
1632  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1633  return FoldedVOp;
1634 
1635  // fold (add x, 0) -> x, vector edition
1637  return N0;
1639  return N1;
1640  }
1641 
1642  // fold (add x, undef) -> undef
1643  if (N0.getOpcode() == ISD::UNDEF)
1644  return N0;
1645  if (N1.getOpcode() == ISD::UNDEF)
1646  return N1;
1647  // fold (add c1, c2) -> c1+c2
1650  if (N0C && N1C)
1651  return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
1652  // canonicalize constant to RHS
1655  return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
1656  // fold (add x, 0) -> x
1657  if (isNullConstant(N1))
1658  return N0;
1659  // fold (add Sym, c) -> Sym+c
1660  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1661  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
1662  GA->getOpcode() == ISD::GlobalAddress)
1663  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1664  GA->getOffset() +
1665  (uint64_t)N1C->getSExtValue());
1666  // fold ((c1-A)+c2) -> (c1+c2)-A
1667  if (N1C && N0.getOpcode() == ISD::SUB)
1668  if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
1669  SDLoc DL(N);
1670  return DAG.getNode(ISD::SUB, DL, VT,
1671  DAG.getConstant(N1C->getAPIntValue()+
1672  N0C->getAPIntValue(), DL, VT),
1673  N0.getOperand(1));
1674  }
1675  // reassociate add
1676  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
1677  return RADD;
1678  // fold ((0-A) + B) -> B-A
1679  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
1680  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
1681  // fold (A + (0-B)) -> A-B
1682  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
1683  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
1684  // fold (A+(B-A)) -> B
1685  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1686  return N1.getOperand(0);
1687  // fold ((B-A)+A) -> B
1688  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1689  return N0.getOperand(0);
1690  // fold (A+(B-(A+C))) to (B-C)
1691  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1692  N0 == N1.getOperand(1).getOperand(0))
1693  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1694  N1.getOperand(1).getOperand(1));
1695  // fold (A+(B-(C+A))) to (B-C)
1696  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1697  N0 == N1.getOperand(1).getOperand(1))
1698  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
1699  N1.getOperand(1).getOperand(0));
1700  // fold (A+((B-A)+or-C)) to (B+or-C)
1701  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1702  N1.getOperand(0).getOpcode() == ISD::SUB &&
1703  N0 == N1.getOperand(0).getOperand(1))
1704  return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
1705  N1.getOperand(0).getOperand(0), N1.getOperand(1));
1706 
1707  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1708  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1709  SDValue N00 = N0.getOperand(0);
1710  SDValue N01 = N0.getOperand(1);
1711  SDValue N10 = N1.getOperand(0);
1712  SDValue N11 = N1.getOperand(1);
1713 
1714  if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
1715  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1716  DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
1717  DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
1718  }
1719 
1720  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
1721  return SDValue(N, 0);
1722 
1723  // fold (a+b) -> (a|b) iff a and b share no bits.
1724  if (VT.isInteger() && !VT.isVector()) {
1725  APInt LHSZero, LHSOne;
1726  APInt RHSZero, RHSOne;
1727  DAG.computeKnownBits(N0, LHSZero, LHSOne);
1728 
1729  if (LHSZero.getBoolValue()) {
1730  DAG.computeKnownBits(N1, RHSZero, RHSOne);
1731 
1732  // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1733  // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1734  if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){
1735  if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT))
1736  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
1737  }
1738  }
1739  }
1740 
1741  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1742  if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
1743  isNullConstant(N1.getOperand(0).getOperand(0)))
1744  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
1745  DAG.getNode(ISD::SHL, SDLoc(N), VT,
1746  N1.getOperand(0).getOperand(1),
1747  N1.getOperand(1)));
1748  if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
1749  isNullConstant(N0.getOperand(0).getOperand(0)))
1750  return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
1751  DAG.getNode(ISD::SHL, SDLoc(N), VT,
1752  N0.getOperand(0).getOperand(1),
1753  N0.getOperand(1)));
1754 
1755  if (N1.getOpcode() == ISD::AND) {
1756  SDValue AndOp0 = N1.getOperand(0);
1757  unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
1758  unsigned DestBits = VT.getScalarType().getSizeInBits();
1759 
1760  // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
1761  // and similar xforms where the inner op is either ~0 or 0.
1762  if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
1763  SDLoc DL(N);
1764  return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
1765  }
1766  }
1767 
1768  // add (sext i1), X -> sub X, (zext i1)
1769  if (N0.getOpcode() == ISD::SIGN_EXTEND &&
1770  N0.getOperand(0).getValueType() == MVT::i1 &&
1771  !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
1772  SDLoc DL(N);
1773  SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
1774  return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
1775  }
1776 
1777  // add X, (sextinreg Y i1) -> sub X, (and Y 1)
1778  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1779  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1780  if (TN->getVT() == MVT::i1) {
1781  SDLoc DL(N);
1782  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1783  DAG.getConstant(1, DL, VT));
1784  return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
1785  }
1786  }
1787 
1788  return SDValue();
1789 }
1790 
1791 SDValue DAGCombiner::visitADDC(SDNode *N) {
1792  SDValue N0 = N->getOperand(0);
1793  SDValue N1 = N->getOperand(1);
1794  EVT VT = N0.getValueType();
1795 
1796  // If the flag result is dead, turn this into an ADD.
1797  if (!N->hasAnyUseOfValue(1))
1798  return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
1799  DAG.getNode(ISD::CARRY_FALSE,
1800  SDLoc(N), MVT::Glue));
1801 
1802  // canonicalize constant to RHS.
1805  if (N0C && !N1C)
1806  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
1807 
1808  // fold (addc x, 0) -> x + no carry out
1809  if (isNullConstant(N1))
1810  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1811  SDLoc(N), MVT::Glue));
1812 
1813  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1814  APInt LHSZero, LHSOne;
1815  APInt RHSZero, RHSOne;
1816  DAG.computeKnownBits(N0, LHSZero, LHSOne);
1817 
1818  if (LHSZero.getBoolValue()) {
1819  DAG.computeKnownBits(N1, RHSZero, RHSOne);
1820 
1821  // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1822  // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1823  if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
1824  return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
1825  DAG.getNode(ISD::CARRY_FALSE,
1826  SDLoc(N), MVT::Glue));
1827  }
1828 
1829  return SDValue();
1830 }
1831 
1832 SDValue DAGCombiner::visitADDE(SDNode *N) {
1833  SDValue N0 = N->getOperand(0);
1834  SDValue N1 = N->getOperand(1);
1835  SDValue CarryIn = N->getOperand(2);
1836 
1837  // canonicalize constant to RHS
1840  if (N0C && !N1C)
1841  return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
1842  N1, N0, CarryIn);
1843 
1844  // fold (adde x, y, false) -> (addc x, y)
1845  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1846  return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
1847 
1848  return SDValue();
1849 }
1850 
1851 // Since it may not be valid to emit a fold to zero for vector initializers
1852 // check if we can before folding.
1853 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
1854  SelectionDAG &DAG,
1855  bool LegalOperations, bool LegalTypes) {
1856  if (!VT.isVector())
1857  return DAG.getConstant(0, DL, VT);
1858  if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
1859  return DAG.getConstant(0, DL, VT);
1860  return SDValue();
1861 }
1862 
1863 SDValue DAGCombiner::visitSUB(SDNode *N) {
1864  SDValue N0 = N->getOperand(0);
1865  SDValue N1 = N->getOperand(1);
1866  EVT VT = N0.getValueType();
1867 
1868  // fold vector ops
1869  if (VT.isVector()) {
1870  if (SDValue FoldedVOp = SimplifyVBinOp(N))
1871  return FoldedVOp;
1872 
1873  // fold (sub x, 0) -> x, vector edition
1875  return N0;
1876  }
1877 
1878  // fold (sub x, x) -> 0
1879  // FIXME: Refactor this and xor and other similar operations together.
1880  if (N0 == N1)
1881  return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
1882  // fold (sub c1, c2) -> c1-c2
1885  if (N0C && N1C)
1886  return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
1887  // fold (sub x, c) -> (add x, -c)
1888  if (N1C) {
1889  SDLoc DL(N);
1890  return DAG.getNode(ISD::ADD, DL, VT, N0,
1891  DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
1892  }
1893  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1894  if (isAllOnesConstant(N0))
1895  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
1896  // fold A-(A-B) -> B
1897  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
1898  return N1.getOperand(1);
1899  // fold (A+B)-A -> B
1900  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1901  return N0.getOperand(1);
1902  // fold (A+B)-B -> A
1903  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1904  return N0.getOperand(0);
1905  // fold C2-(A+C1) -> (C2-C1)-A
1906  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
1908  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
1909  SDLoc DL(N);
1910  SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
1911  DL, VT);
1912  return DAG.getNode(ISD::SUB, DL, VT, NewC,
1913  N1.getOperand(0));
1914  }
1915  // fold ((A+(B+or-C))-B) -> A+or-C
1916  if (N0.getOpcode() == ISD::ADD &&
1917  (N0.getOperand(1).getOpcode() == ISD::SUB ||
1918  N0.getOperand(1).getOpcode() == ISD::ADD) &&
1919  N0.getOperand(1).getOperand(0) == N1)
1920  return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
1921  N0.getOperand(0), N0.getOperand(1).getOperand(1));
1922  // fold ((A+(C+B))-B) -> A+C
1923  if (N0.getOpcode() == ISD::ADD &&
1924  N0.getOperand(1).getOpcode() == ISD::ADD &&
1925  N0.getOperand(1).getOperand(1) == N1)
1926  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
1927  N0.getOperand(0), N0.getOperand(1).getOperand(0));
1928  // fold ((A-(B-C))-C) -> A-B
1929  if (N0.getOpcode() == ISD::SUB &&
1930  N0.getOperand(1).getOpcode() == ISD::SUB &&
1931  N0.getOperand(1).getOperand(1) == N1)
1932  return DAG.getNode(ISD::SUB, SDLoc(N), VT,
1933  N0.getOperand(0), N0.getOperand(1).getOperand(0));
1934 
1935  // If either operand of a sub is undef, the result is undef
1936  if (N0.getOpcode() == ISD::UNDEF)
1937  return N0;
1938  if (N1.getOpcode() == ISD::UNDEF)
1939  return N1;
1940 
1941  // If the relocation model supports it, consider symbol offsets.
1942  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1943  if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1944  // fold (sub Sym, c) -> Sym-c
1945  if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1946  return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
1947  GA->getOffset() -
1948  (uint64_t)N1C->getSExtValue());
1949  // fold (sub Sym+c1, Sym+c2) -> c1-c2
1950  if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1951  if (GA->getGlobal() == GB->getGlobal())
1952  return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1953  SDLoc(N), VT);
1954  }
1955 
1956  // sub X, (sextinreg Y i1) -> add X, (and Y 1)
1957  if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
1958  VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
1959  if (TN->getVT() == MVT::i1) {
1960  SDLoc DL(N);
1961  SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
1962  DAG.getConstant(1, DL, VT));
1963  return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
1964  }
1965  }
1966 
1967  return SDValue();
1968 }
1969 
1970 SDValue DAGCombiner::visitSUBC(SDNode *N) {
1971  SDValue N0 = N->getOperand(0);
1972  SDValue N1 = N->getOperand(1);
1973  EVT VT = N0.getValueType();
1974 
1975  // If the flag result is dead, turn this into an SUB.
1976  if (!N->hasAnyUseOfValue(1))
1977  return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1),
1978  DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1979  MVT::Glue));
1980 
1981  // fold (subc x, x) -> 0 + no borrow
1982  if (N0 == N1) {
1983  SDLoc DL(N);
1984  return CombineTo(N, DAG.getConstant(0, DL, VT),
1985  DAG.getNode(ISD::CARRY_FALSE, DL,
1986  MVT::Glue));
1987  }
1988 
1989  // fold (subc x, 0) -> x + no borrow
1990  if (isNullConstant(N1))
1991  return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1992  MVT::Glue));
1993 
1994  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
1995  if (isAllOnesConstant(N0))
1996  return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0),
1997  DAG.getNode(ISD::CARRY_FALSE, SDLoc(N),
1998  MVT::Glue));
1999 
2000  return SDValue();
2001 }
2002 
2003 SDValue DAGCombiner::visitSUBE(SDNode *N) {
2004  SDValue N0 = N->getOperand(0);
2005  SDValue N1 = N->getOperand(1);
2006  SDValue CarryIn = N->getOperand(2);
2007 
2008  // fold (sube x, y, false) -> (subc x, y)
2009  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2010  return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
2011 
2012  return SDValue();
2013 }
2014 
2015 SDValue DAGCombiner::visitMUL(SDNode *N) {
2016  SDValue N0 = N->getOperand(0);
2017  SDValue N1 = N->getOperand(1);
2018  EVT VT = N0.getValueType();
2019 
2020  // fold (mul x, undef) -> 0
2021  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2022  return DAG.getConstant(0, SDLoc(N), VT);
2023 
2024  bool N0IsConst = false;
2025  bool N1IsConst = false;
2026  bool N1IsOpaqueConst = false;
2027  bool N0IsOpaqueConst = false;
2028  APInt ConstValue0, ConstValue1;
2029  // fold vector ops
2030  if (VT.isVector()) {
2031  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2032  return FoldedVOp;
2033 
2034  N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
2035  N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
2036  } else {
2037  N0IsConst = isa<ConstantSDNode>(N0);
2038  if (N0IsConst) {
2039  ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
2040  N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
2041  }
2042  N1IsConst = isa<ConstantSDNode>(N1);
2043  if (N1IsConst) {
2044  ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
2045  N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
2046  }
2047  }
2048 
2049  // fold (mul c1, c2) -> c1*c2
2050  if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
2051  return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
2052  N0.getNode(), N1.getNode());
2053 
2054  // canonicalize constant to RHS (vector doesn't have to splat)
2057  return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
2058  // fold (mul x, 0) -> 0
2059  if (N1IsConst && ConstValue1 == 0)
2060  return N1;
2061  // We require a splat of the entire scalar bit width for non-contiguous
2062  // bit patterns.
2063  bool IsFullSplat =
2064  ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
2065  // fold (mul x, 1) -> x
2066  if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
2067  return N0;
2068  // fold (mul x, -1) -> 0-x
2069  if (N1IsConst && ConstValue1.isAllOnesValue()) {
2070  SDLoc DL(N);
2071  return DAG.getNode(ISD::SUB, DL, VT,
2072  DAG.getConstant(0, DL, VT), N0);
2073  }
2074  // fold (mul x, (1 << c)) -> x << c
2075  if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
2076  IsFullSplat) {
2077  SDLoc DL(N);
2078  return DAG.getNode(ISD::SHL, DL, VT, N0,
2079  DAG.getConstant(ConstValue1.logBase2(), DL,
2081  }
2082  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
2083  if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
2084  IsFullSplat) {
2085  unsigned Log2Val = (-ConstValue1).logBase2();
2086  SDLoc DL(N);
2087  // FIXME: If the input is something that is easily negated (e.g. a
2088  // single-use add), we should put the negate there.
2089  return DAG.getNode(ISD::SUB, DL, VT,
2090  DAG.getConstant(0, DL, VT),
2091  DAG.getNode(ISD::SHL, DL, VT, N0,
2092  DAG.getConstant(Log2Val, DL,
2093  getShiftAmountTy(N0.getValueType()))));
2094  }
2095 
2096  APInt Val;
2097  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
2098  if (N1IsConst && N0.getOpcode() == ISD::SHL &&
2099  (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2100  isa<ConstantSDNode>(N0.getOperand(1)))) {
2101  SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
2102  N1, N0.getOperand(1));
2103  AddToWorklist(C3.getNode());
2104  return DAG.getNode(ISD::MUL, SDLoc(N), VT,
2105  N0.getOperand(0), C3);
2106  }
2107 
2108  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
2109  // use.
2110  {
2111  SDValue Sh(nullptr,0), Y(nullptr,0);
2112  // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)).
2113  if (N0.getOpcode() == ISD::SHL &&
2114  (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2115  isa<ConstantSDNode>(N0.getOperand(1))) &&
2116  N0.getNode()->hasOneUse()) {
2117  Sh = N0; Y = N1;
2118  } else if (N1.getOpcode() == ISD::SHL &&
2119  isa<ConstantSDNode>(N1.getOperand(1)) &&
2120  N1.getNode()->hasOneUse()) {
2121  Sh = N1; Y = N0;
2122  }
2123 
2124  if (Sh.getNode()) {
2125  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2126  Sh.getOperand(0), Y);
2127  return DAG.getNode(ISD::SHL, SDLoc(N), VT,
2128  Mul, Sh.getOperand(1));
2129  }
2130  }
2131 
2132  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
2133  if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
2134  (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
2135  isa<ConstantSDNode>(N0.getOperand(1))))
2136  return DAG.getNode(ISD::ADD, SDLoc(N), VT,
2137  DAG.getNode(ISD::MUL, SDLoc(N0), VT,
2138  N0.getOperand(0), N1),
2139  DAG.getNode(ISD::MUL, SDLoc(N1), VT,
2140  N0.getOperand(1), N1));
2141 
2142  // reassociate mul
2143  if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
2144  return RMUL;
2145 
2146  return SDValue();
2147 }
2148 
2149 SDValue DAGCombiner::visitSDIV(SDNode *N) {
2150  SDValue N0 = N->getOperand(0);
2151  SDValue N1 = N->getOperand(1);
2152  EVT VT = N->getValueType(0);
2153 
2154  // fold vector ops
2155  if (VT.isVector())
2156  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2157  return FoldedVOp;
2158 
2159  // fold (sdiv c1, c2) -> c1/c2
2162  if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
2163  return DAG.FoldConstantArithmetic(ISD::SDIV, SDLoc(N), VT, N0C, N1C);
2164  // fold (sdiv X, 1) -> X
2165  if (N1C && N1C->isOne())
2166  return N0;
2167  // fold (sdiv X, -1) -> 0-X
2168  if (N1C && N1C->isAllOnesValue()) {
2169  SDLoc DL(N);
2170  return DAG.getNode(ISD::SUB, DL, VT,
2171  DAG.getConstant(0, DL, VT), N0);
2172  }
2173  // If we know the sign bits of both operands are zero, strength reduce to a
2174  // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2
2175  if (!VT.isVector()) {
2176  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2177  return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(),
2178  N0, N1);
2179  }
2180 
2181  // fold (sdiv X, pow2) -> simple ops after legalize
2182  // FIXME: We check for the exact bit here because the generic lowering gives
2183  // better results in that case. The target-specific lowering should learn how
2184  // to handle exact sdivs efficiently.
2185  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2186  !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
2187  (N1C->getAPIntValue().isPowerOf2() ||
2188  (-N1C->getAPIntValue()).isPowerOf2())) {
2189  // If dividing by powers of two is cheap, then don't perform the following
2190  // fold.
2191  if (TLI.isPow2SDivCheap())
2192  return SDValue();
2193 
2194  // Target-specific implementation of sdiv x, pow2.
2195  SDValue Res = BuildSDIVPow2(N);
2196  if (Res.getNode())
2197  return Res;
2198 
2199  unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
2200  SDLoc DL(N);
2201 
2202  // Splat the sign bit into the register
2203  SDValue SGN =
2204  DAG.getNode(ISD::SRA, DL, VT, N0,
2205  DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
2207  AddToWorklist(SGN.getNode());
2208 
2209  // Add (N0 < 0) ? abs2 - 1 : 0;
2210  SDValue SRL =
2211  DAG.getNode(ISD::SRL, DL, VT, SGN,
2212  DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
2213  getShiftAmountTy(SGN.getValueType())));
2214  SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
2215  AddToWorklist(SRL.getNode());
2216  AddToWorklist(ADD.getNode()); // Divide by pow2
2217  SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
2218  DAG.getConstant(lg2, DL,
2219  getShiftAmountTy(ADD.getValueType())));
2220 
2221  // If we're dividing by a positive value, we're done. Otherwise, we must
2222  // negate the result.
2223  if (N1C->getAPIntValue().isNonNegative())
2224  return SRA;
2225 
2226  AddToWorklist(SRA.getNode());
2227  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
2228  }
2229 
2230  // If integer divide is expensive and we satisfy the requirements, emit an
2231  // alternate sequence.
2232  if (N1C && !TLI.isIntDivCheap()) {
2233  SDValue Op = BuildSDIV(N);
2234  if (Op.getNode()) return Op;
2235  }
2236 
2237  // undef / X -> 0
2238  if (N0.getOpcode() == ISD::UNDEF)
2239  return DAG.getConstant(0, SDLoc(N), VT);
2240  // X / undef -> undef
2241  if (N1.getOpcode() == ISD::UNDEF)
2242  return N1;
2243 
2244  return SDValue();
2245 }
2246 
2247 SDValue DAGCombiner::visitUDIV(SDNode *N) {
2248  SDValue N0 = N->getOperand(0);
2249  SDValue N1 = N->getOperand(1);
2250  EVT VT = N->getValueType(0);
2251 
2252  // fold vector ops
2253  if (VT.isVector())
2254  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2255  return FoldedVOp;
2256 
2257  // fold (udiv c1, c2) -> c1/c2
2260  if (N0C && N1C)
2261  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, SDLoc(N), VT,
2262  N0C, N1C))
2263  return Folded;
2264  // fold (udiv x, (1 << c)) -> x >>u c
2265  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) {
2266  SDLoc DL(N);
2267  return DAG.getNode(ISD::SRL, DL, VT, N0,
2268  DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
2270  }
2271  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
2272  if (N1.getOpcode() == ISD::SHL) {
2273  if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
2274  if (SHC->getAPIntValue().isPowerOf2()) {
2275  EVT ADDVT = N1.getOperand(1).getValueType();
2276  SDLoc DL(N);
2277  SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
2278  N1.getOperand(1),
2279  DAG.getConstant(SHC->getAPIntValue()
2280  .logBase2(),
2281  DL, ADDVT));
2282  AddToWorklist(Add.getNode());
2283  return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
2284  }
2285  }
2286  }
2287  // fold (udiv x, c) -> alternate
2288  if (N1C && !TLI.isIntDivCheap()) {
2289  SDValue Op = BuildUDIV(N);
2290  if (Op.getNode()) return Op;
2291  }
2292 
2293  // undef / X -> 0
2294  if (N0.getOpcode() == ISD::UNDEF)
2295  return DAG.getConstant(0, SDLoc(N), VT);
2296  // X / undef -> undef
2297  if (N1.getOpcode() == ISD::UNDEF)
2298  return N1;
2299 
2300  return SDValue();
2301 }
2302 
2303 SDValue DAGCombiner::visitSREM(SDNode *N) {
2304  SDValue N0 = N->getOperand(0);
2305  SDValue N1 = N->getOperand(1);
2306  EVT VT = N->getValueType(0);
2307 
2308  // fold (srem c1, c2) -> c1%c2
2311  if (N0C && N1C)
2312  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::SREM, SDLoc(N), VT,
2313  N0C, N1C))
2314  return Folded;
2315  // If we know the sign bits of both operands are zero, strength reduce to a
2316  // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15
2317  if (!VT.isVector()) {
2318  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
2319  return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1);
2320  }
2321 
2322  // If X/C can be simplified by the division-by-constant logic, lower
2323  // X%C to the equivalent of X-X/C*C.
2324  if (N1C && !N1C->isNullValue()) {
2325  SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1);
2326  AddToWorklist(Div.getNode());
2327  SDValue OptimizedDiv = combine(Div.getNode());
2328  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2329  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2330  OptimizedDiv, N1);
2331  SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
2332  AddToWorklist(Mul.getNode());
2333  return Sub;
2334  }
2335  }
2336 
2337  // undef % X -> 0
2338  if (N0.getOpcode() == ISD::UNDEF)
2339  return DAG.getConstant(0, SDLoc(N), VT);
2340  // X % undef -> undef
2341  if (N1.getOpcode() == ISD::UNDEF)
2342  return N1;
2343 
2344  return SDValue();
2345 }
2346 
2347 SDValue DAGCombiner::visitUREM(SDNode *N) {
2348  SDValue N0 = N->getOperand(0);
2349  SDValue N1 = N->getOperand(1);
2350  EVT VT = N->getValueType(0);
2351 
2352  // fold (urem c1, c2) -> c1%c2
2355  if (N0C && N1C)
2356  if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UREM, SDLoc(N), VT,
2357  N0C, N1C))
2358  return Folded;
2359  // fold (urem x, pow2) -> (and x, pow2-1)
2360  if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
2361  N1C->getAPIntValue().isPowerOf2()) {
2362  SDLoc DL(N);
2363  return DAG.getNode(ISD::AND, DL, VT, N0,
2364  DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
2365  }
2366  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
2367  if (N1.getOpcode() == ISD::SHL) {
2368  if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
2369  if (SHC->getAPIntValue().isPowerOf2()) {
2370  SDLoc DL(N);
2371  SDValue Add =
2372  DAG.getNode(ISD::ADD, DL, VT, N1,
2373  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
2374  VT));
2375  AddToWorklist(Add.getNode());
2376  return DAG.getNode(ISD::AND, DL, VT, N0, Add);
2377  }
2378  }
2379  }
2380 
2381  // If X/C can be simplified by the division-by-constant logic, lower
2382  // X%C to the equivalent of X-X/C*C.
2383  if (N1C && !N1C->isNullValue()) {
2384  SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1);
2385  AddToWorklist(Div.getNode());
2386  SDValue OptimizedDiv = combine(Div.getNode());
2387  if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
2388  SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
2389  OptimizedDiv, N1);
2390  SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul);
2391  AddToWorklist(Mul.getNode());
2392  return Sub;
2393  }
2394  }
2395 
2396  // undef % X -> 0
2397  if (N0.getOpcode() == ISD::UNDEF)
2398  return DAG.getConstant(0, SDLoc(N), VT);
2399  // X % undef -> undef
2400  if (N1.getOpcode() == ISD::UNDEF)
2401  return N1;
2402 
2403  return SDValue();
2404 }
2405 
2406 SDValue DAGCombiner::visitMULHS(SDNode *N) {
2407  SDValue N0 = N->getOperand(0);
2408  SDValue N1 = N->getOperand(1);
2409  EVT VT = N->getValueType(0);
2410  SDLoc DL(N);
2411 
2412  // fold (mulhs x, 0) -> 0
2413  if (isNullConstant(N1))
2414  return N1;
2415  // fold (mulhs x, 1) -> (sra x, size(x)-1)
2416  if (isOneConstant(N1)) {
2417  SDLoc DL(N);
2418  return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
2419  DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
2420  DL,
2422  }
2423  // fold (mulhs x, undef) -> 0
2424  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2425  return DAG.getConstant(0, SDLoc(N), VT);
2426 
2427  // If the type twice as wide is legal, transform the mulhs to a wider multiply
2428  // plus a shift.
2429  if (VT.isSimple() && !VT.isVector()) {
2430  MVT Simple = VT.getSimpleVT();
2431  unsigned SimpleSize = Simple.getSizeInBits();
2432  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2433  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2434  N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
2435  N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
2436  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2437  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2438  DAG.getConstant(SimpleSize, DL,
2440  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2441  }
2442  }
2443 
2444  return SDValue();
2445 }
2446 
2447 SDValue DAGCombiner::visitMULHU(SDNode *N) {
2448  SDValue N0 = N->getOperand(0);
2449  SDValue N1 = N->getOperand(1);
2450  EVT VT = N->getValueType(0);
2451  SDLoc DL(N);
2452 
2453  // fold (mulhu x, 0) -> 0
2454  if (isNullConstant(N1))
2455  return N1;
2456  // fold (mulhu x, 1) -> 0
2457  if (isOneConstant(N1))
2458  return DAG.getConstant(0, DL, N0.getValueType());
2459  // fold (mulhu x, undef) -> 0
2460  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2461  return DAG.getConstant(0, DL, VT);
2462 
2463  // If the type twice as wide is legal, transform the mulhu to a wider multiply
2464  // plus a shift.
2465  if (VT.isSimple() && !VT.isVector()) {
2466  MVT Simple = VT.getSimpleVT();
2467  unsigned SimpleSize = Simple.getSizeInBits();
2468  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2469  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2470  N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
2471  N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
2472  N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
2473  N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
2474  DAG.getConstant(SimpleSize, DL,
2476  return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
2477  }
2478  }
2479 
2480  return SDValue();
2481 }
2482 
2483 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
2484 /// give the opcodes for the two computations that are being performed. Return
2485 /// true if a simplification was made.
2486 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
2487  unsigned HiOp) {
2488  // If the high half is not needed, just compute the low half.
2489  bool HiExists = N->hasAnyUseOfValue(1);
2490  if (!HiExists &&
2491  (!LegalOperations ||
2492  TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
2493  SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2494  return CombineTo(N, Res, Res);
2495  }
2496 
2497  // If the low half is not needed, just compute the high half.
2498  bool LoExists = N->hasAnyUseOfValue(0);
2499  if (!LoExists &&
2500  (!LegalOperations ||
2501  TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
2502  SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2503  return CombineTo(N, Res, Res);
2504  }
2505 
2506  // If both halves are used, return as it is.
2507  if (LoExists && HiExists)
2508  return SDValue();
2509 
2510  // If the two computed results can be simplified separately, separate them.
2511  if (LoExists) {
2512  SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
2513  AddToWorklist(Lo.getNode());
2514  SDValue LoOpt = combine(Lo.getNode());
2515  if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
2516  (!LegalOperations ||
2517  TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
2518  return CombineTo(N, LoOpt, LoOpt);
2519  }
2520 
2521  if (HiExists) {
2522  SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
2523  AddToWorklist(Hi.getNode());
2524  SDValue HiOpt = combine(Hi.getNode());
2525  if (HiOpt.getNode() && HiOpt != Hi &&
2526  (!LegalOperations ||
2527  TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
2528  return CombineTo(N, HiOpt, HiOpt);
2529  }
2530 
2531  return SDValue();
2532 }
2533 
2534 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
2535  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
2536  if (Res.getNode()) return Res;
2537 
2538  EVT VT = N->getValueType(0);
2539  SDLoc DL(N);
2540 
2541  // If the type is twice as wide is legal, transform the mulhu to a wider
2542  // multiply plus a shift.
2543  if (VT.isSimple() && !VT.isVector()) {
2544  MVT Simple = VT.getSimpleVT();
2545  unsigned SimpleSize = Simple.getSizeInBits();
2546  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2547  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2548  SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
2549  SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
2550  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2551  // Compute the high part as N1.
2552  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2553  DAG.getConstant(SimpleSize, DL,
2555  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2556  // Compute the low part as N0.
2557  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2558  return CombineTo(N, Lo, Hi);
2559  }
2560  }
2561 
2562  return SDValue();
2563 }
2564 
2565 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
2566  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
2567  if (Res.getNode()) return Res;
2568 
2569  EVT VT = N->getValueType(0);
2570  SDLoc DL(N);
2571 
2572  // If the type is twice as wide is legal, transform the mulhu to a wider
2573  // multiply plus a shift.
2574  if (VT.isSimple() && !VT.isVector()) {
2575  MVT Simple = VT.getSimpleVT();
2576  unsigned SimpleSize = Simple.getSizeInBits();
2577  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
2578  if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
2579  SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
2580  SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
2581  Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
2582  // Compute the high part as N1.
2583  Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
2584  DAG.getConstant(SimpleSize, DL,
2586  Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
2587  // Compute the low part as N0.
2588  Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
2589  return CombineTo(N, Lo, Hi);
2590  }
2591  }
2592 
2593  return SDValue();
2594 }
2595 
2596 SDValue DAGCombiner::visitSMULO(SDNode *N) {
2597  // (smulo x, 2) -> (saddo x, x)
2598  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2599  if (C2->getAPIntValue() == 2)
2600  return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
2601  N->getOperand(0), N->getOperand(0));
2602 
2603  return SDValue();
2604 }
2605 
2606 SDValue DAGCombiner::visitUMULO(SDNode *N) {
2607  // (umulo x, 2) -> (uaddo x, x)
2608  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
2609  if (C2->getAPIntValue() == 2)
2610  return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
2611  N->getOperand(0), N->getOperand(0));
2612 
2613  return SDValue();
2614 }
2615 
2616 SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
2617  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
2618  if (Res.getNode()) return Res;
2619 
2620  return SDValue();
2621 }
2622 
2623 SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
2624  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
2625  if (Res.getNode()) return Res;
2626 
2627  return SDValue();
2628 }
2629 
2630 /// If this is a binary operator with two operands of the same opcode, try to
2631 /// simplify it.
2632 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
2633  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2634  EVT VT = N0.getValueType();
2635  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
2636 
2637  // Bail early if none of these transforms apply.
2638  if (N0.getNode()->getNumOperands() == 0) return SDValue();
2639 
2640  // For each of OP in AND/OR/XOR:
2641  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
2642  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
2643  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
2644  // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
2645  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
2646  //
2647  // do not sink logical op inside of a vector extend, since it may combine
2648  // into a vsetcc.
2649  EVT Op0VT = N0.getOperand(0).getValueType();
2650  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2651  N0.getOpcode() == ISD::SIGN_EXTEND ||
2652  N0.getOpcode() == ISD::BSWAP ||
2653  // Avoid infinite looping with PromoteIntBinOp.
2654  (N0.getOpcode() == ISD::ANY_EXTEND &&
2655  (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2656  (N0.getOpcode() == ISD::TRUNCATE &&
2657  (!TLI.isZExtFree(VT, Op0VT) ||
2658  !TLI.isTruncateFree(Op0VT, VT)) &&
2659  TLI.isTypeLegal(Op0VT))) &&
2660  !VT.isVector() &&
2661  Op0VT == N1.getOperand(0).getValueType() &&
2662  (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2663  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2664  N0.getOperand(0).getValueType(),
2665  N0.getOperand(0), N1.getOperand(0));
2666  AddToWorklist(ORNode.getNode());
2667  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
2668  }
2669 
2670  // For each of OP in SHL/SRL/SRA/AND...
2671  // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2672  // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z)
2673  // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2674  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2675  N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2676  N0.getOperand(1) == N1.getOperand(1)) {
2677  SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
2678  N0.getOperand(0).getValueType(),
2679  N0.getOperand(0), N1.getOperand(0));
2680  AddToWorklist(ORNode.getNode());
2681  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
2682  ORNode, N0.getOperand(1));
2683  }
2684 
2685  // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
2686  // Only perform this optimization after type legalization and before
2687  // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
2688  // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
2689  // we don't want to undo this promotion.
2690  // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
2691  // on scalars.
2692  if ((N0.getOpcode() == ISD::BITCAST ||
2693  N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
2695  SDValue In0 = N0.getOperand(0);
2696  SDValue In1 = N1.getOperand(0);
2697  EVT In0Ty = In0.getValueType();
2698  EVT In1Ty = In1.getValueType();
2699  SDLoc DL(N);
2700  // If both incoming values are integers, and the original types are the
2701  // same.
2702  if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
2703  SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
2704  SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
2705  AddToWorklist(Op.getNode());
2706  return BC;
2707  }
2708  }
2709 
2710  // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
2711  // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
2712  // If both shuffles use the same mask, and both shuffle within a single
2713  // vector, then it is worthwhile to move the swizzle after the operation.
2714  // The type-legalizer generates this pattern when loading illegal
2715  // vector types from memory. In many cases this allows additional shuffle
2716  // optimizations.
2717  // There are other cases where moving the shuffle after the xor/and/or
2718  // is profitable even if shuffles don't perform a swizzle.
2719  // If both shuffles use the same mask, and both shuffles have the same first
2720  // or second operand, then it might still be profitable to move the shuffle
2721  // after the xor/and/or operation.
2723  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
2724  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
2725 
2726  assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
2727  "Inputs to shuffles are not the same type");
2728 
2729  // Check that both shuffles use the same mask. The masks are known to be of
2730  // the same length because the result vector type is the same.
2731  // Check also that shuffles have only one use to avoid introducing extra
2732  // instructions.
2733  if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
2734  SVN0->getMask().equals(SVN1->getMask())) {
2735  SDValue ShOp = N0->getOperand(1);
2736 
2737  // Don't try to fold this node if it requires introducing a
2738  // build vector of all zeros that might be illegal at this stage.
2739  if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
2740  if (!LegalTypes)
2741  ShOp = DAG.getConstant(0, SDLoc(N), VT);
2742  else
2743  ShOp = SDValue();
2744  }
2745 
2746  // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
2747  // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C)
2748  // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
2749  if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
2750  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2751  N0->getOperand(0), N1->getOperand(0));
2752  AddToWorklist(NewNode.getNode());
2753  return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
2754  &SVN0->getMask()[0]);
2755  }
2756 
2757  // Don't try to fold this node if it requires introducing a
2758  // build vector of all zeros that might be illegal at this stage.
2759  ShOp = N0->getOperand(0);
2760  if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
2761  if (!LegalTypes)
2762  ShOp = DAG.getConstant(0, SDLoc(N), VT);
2763  else
2764  ShOp = SDValue();
2765  }
2766 
2767  // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
2768  // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B))
2769  // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
2770  if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
2771  SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
2772  N0->getOperand(1), N1->getOperand(1));
2773  AddToWorklist(NewNode.getNode());
2774  return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
2775  &SVN0->getMask()[0]);
2776  }
2777  }
2778  }
2779 
2780  return SDValue();
2781 }
2782 
2783 /// This contains all DAGCombine rules which reduce two values combined by
2784 /// an And operation to a single value. This makes them reusable in the context
2785 /// of visitSELECT(). Rules involving constants are not included as
2786 /// visitSELECT() already handles those cases.
2787 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
2788  SDNode *LocReference) {
2789  EVT VT = N1.getValueType();
2790 
2791  // fold (and x, undef) -> 0
2792  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2793  return DAG.getConstant(0, SDLoc(LocReference), VT);
2794  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2795  SDValue LL, LR, RL, RR, CC0, CC1;
2796  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2797  ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2798  ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2799 
2800  if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2801  LL.getValueType().isInteger()) {
2802  // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2803  if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
2804  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2805  LR.getValueType(), LL, RL);
2806  AddToWorklist(ORNode.getNode());
2807  return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2808  }
2809  if (isAllOnesConstant(LR)) {
2810  // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2811  if (Op1 == ISD::SETEQ) {
2812  SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
2813  LR.getValueType(), LL, RL);
2814  AddToWorklist(ANDNode.getNode());
2815  return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
2816  }
2817  // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
2818  if (Op1 == ISD::SETGT) {
2819  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
2820  LR.getValueType(), LL, RL);
2821  AddToWorklist(ORNode.getNode());
2822  return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
2823  }
2824  }
2825  }
2826  // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
2827  if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
2828  Op0 == Op1 && LL.getValueType().isInteger() &&
2829  Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
2830  (isAllOnesConstant(LR) && isNullConstant(RR)))) {
2831  SDLoc DL(N0);
2832  SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
2833  LL, DAG.getConstant(1, DL,
2834  LL.getValueType()));
2835  AddToWorklist(ADDNode.getNode());
2836  return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
2837  DAG.getConstant(2, DL, LL.getValueType()),
2838  ISD::SETUGE);
2839  }
2840  // canonicalize equivalent to ll == rl
2841  if (LL == RR && LR == RL) {
2842  Op1 = ISD::getSetCCSwappedOperands(Op1);
2843  std::swap(RL, RR);
2844  }
2845  if (LL == RL && LR == RR) {
2846  bool isInteger = LL.getValueType().isInteger();
2847  ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2848  if (Result != ISD::SETCC_INVALID &&
2849  (!LegalOperations ||
2850  (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
2851  TLI.isOperationLegal(ISD::SETCC,
2853  return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
2854  LL, LR, Result);
2855  }
2856  }
2857 
2858  if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
2859  VT.getSizeInBits() <= 64) {
2860  if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2861  APInt ADDC = ADDI->getAPIntValue();
2862  if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2863  // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
2864  // immediate for an add, but it is legal if its top c2 bits are set,
2865  // transform the ADD so the immediate doesn't need to be materialized
2866  // in a register.
2867  if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
2869  SRLI->getZExtValue());
2870  if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
2871  ADDC |= Mask;
2872  if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
2873  SDLoc DL(N0);
2874  SDValue NewAdd =
2875  DAG.getNode(ISD::ADD, DL, VT,
2876  N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
2877  CombineTo(N0.getNode(), NewAdd);
2878  // Return N so it doesn't get rechecked!
2879  return SDValue(LocReference, 0);
2880  }
2881  }
2882  }
2883  }
2884  }
2885  }
2886 
2887  return SDValue();
2888 }
2889 
2890 SDValue DAGCombiner::visitAND(SDNode *N) {
2891  SDValue N0 = N->getOperand(0);
2892  SDValue N1 = N->getOperand(1);
2893  EVT VT = N1.getValueType();
2894 
2895  // fold vector ops
2896  if (VT.isVector()) {
2897  if (SDValue FoldedVOp = SimplifyVBinOp(N))
2898  return FoldedVOp;
2899 
2900  // fold (and x, 0) -> 0, vector edition
2902  // do not return N0, because undef node may exist in N0
2903  return DAG.getConstant(
2906  SDLoc(N), N0.getValueType());
2908  // do not return N1, because undef node may exist in N1
2909  return DAG.getConstant(
2912  SDLoc(N), N1.getValueType());
2913 
2914  // fold (and x, -1) -> x, vector edition
2916  return N1;
2918  return N0;
2919  }
2920 
2921  // fold (and c1, c2) -> c1&c2
2924  if (N0C && N1C && !N1C->isOpaque())
2925  return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
2926  // canonicalize constant to RHS
2929  return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
2930  // fold (and x, -1) -> x
2931  if (isAllOnesConstant(N1))
2932  return N0;
2933  // if (and x, c) is known to be zero, return 0
2934  unsigned BitWidth = VT.getScalarType().getSizeInBits();
2935  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
2936  APInt::getAllOnesValue(BitWidth)))
2937  return DAG.getConstant(0, SDLoc(N), VT);
2938  // reassociate and
2939  if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
2940  return RAND;
2941  // fold (and (or x, C), D) -> D if (C & D) == D
2942  if (N1C && N0.getOpcode() == ISD::OR)
2943  if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
2944  if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
2945  return N1;
2946  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
2947  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
2948  SDValue N0Op0 = N0.getOperand(0);
2949  APInt Mask = ~N1C->getAPIntValue();
2950  Mask = Mask.trunc(N0Op0.getValueSizeInBits());
2951  if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
2952  SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
2953  N0.getValueType(), N0Op0);
2954 
2955  // Replace uses of the AND with uses of the Zero extend node.
2956  CombineTo(N, Zext);
2957 
2958  // We actually want to replace all uses of the any_extend with the
2959  // zero_extend, to avoid duplicating things. This will later cause this
2960  // AND to be folded.
2961  CombineTo(N0.getNode(), Zext);
2962  return SDValue(N, 0); // Return N so it doesn't get rechecked!
2963  }
2964  }
2965  // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
2966  // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
2967  // already be zero by virtue of the width of the base type of the load.
2968  //
2969  // the 'X' node here can either be nothing or an extract_vector_elt to catch
2970  // more cases.
2971  if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
2972  N0.getOperand(0).getOpcode() == ISD::LOAD) ||
2973  N0.getOpcode() == ISD::LOAD) {
2974  LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
2975  N0 : N0.getOperand(0) );
2976 
2977  // Get the constant (if applicable) the zero'th operand is being ANDed with.
2978  // This can be a pure constant or a vector splat, in which case we treat the
2979  // vector as a scalar and use the splat value.
2981  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
2982  Constant = C->getAPIntValue();
2983  } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
2984  APInt SplatValue, SplatUndef;
2985  unsigned SplatBitSize;
2986  bool HasAnyUndefs;
2987  bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
2988  SplatBitSize, HasAnyUndefs);
2989  if (IsSplat) {
2990  // Undef bits can contribute to a possible optimisation if set, so
2991  // set them.
2992  SplatValue |= SplatUndef;
2993 
2994  // The splat value may be something like "0x00FFFFFF", which means 0 for
2995  // the first vector value and FF for the rest, repeating. We need a mask
2996  // that will apply equally to all members of the vector, so AND all the
2997  // lanes of the constant together.
2998  EVT VT = Vector->getValueType(0);
2999  unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
3000 
3001  // If the splat value has been compressed to a bitlength lower
3002  // than the size of the vector lane, we need to re-expand it to
3003  // the lane size.
3004  if (BitWidth > SplatBitSize)
3005  for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
3006  SplatBitSize < BitWidth;
3007  SplatBitSize = SplatBitSize * 2)
3008  SplatValue |= SplatValue.shl(SplatBitSize);
3009 
3010  // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
3011  // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
3012  if (SplatBitSize % BitWidth == 0) {
3013  Constant = APInt::getAllOnesValue(BitWidth);
3014  for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
3015  Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
3016  }
3017  }
3018  }
3019 
3020  // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
3021  // actually legal and isn't going to get expanded, else this is a false
3022  // optimisation.
3023  bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
3024  Load->getValueType(0),
3025  Load->getMemoryVT());
3026 
3027  // Resize the constant to the same size as the original memory access before
3028  // extension. If it is still the AllOnesValue then this AND is completely
3029  // unneeded.
3030  Constant =
3031  Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
3032 
3033  bool B;
3034  switch (Load->getExtensionType()) {
3035  default: B = false; break;
3036  case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
3037  case ISD::ZEXTLOAD:
3038  case ISD::NON_EXTLOAD: B = true; break;
3039  }
3040 
3041  if (B && Constant.isAllOnesValue()) {
3042  // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
3043  // preserve semantics once we get rid of the AND.
3044  SDValue NewLoad(Load, 0);
3045  if (Load->getExtensionType() == ISD::EXTLOAD) {
3046  NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
3047  Load->getValueType(0), SDLoc(Load),
3048  Load->getChain(), Load->getBasePtr(),
3049  Load->getOffset(), Load->getMemoryVT(),
3050  Load->getMemOperand());
3051  // Replace uses of the EXTLOAD with the new ZEXTLOAD.
3052  if (Load->getNumValues() == 3) {
3053  // PRE/POST_INC loads have 3 values.
3054  SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
3055  NewLoad.getValue(2) };
3056  CombineTo(Load, To, 3, true);
3057  } else {
3058  CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
3059  }
3060  }
3061 
3062  // Fold the AND away, taking care not to fold to the old load node if we
3063  // replaced it.
3064  CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
3065 
3066  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3067  }
3068  }
3069 
3070  // fold (and (load x), 255) -> (zextload x, i8)
3071  // fold (and (extload x, i16), 255) -> (zextload x, i8)
3072  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
3073  if (N1C && (N0.getOpcode() == ISD::LOAD ||
3074  (N0.getOpcode() == ISD::ANY_EXTEND &&
3075  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
3076  bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
3077  LoadSDNode *LN0 = HasAnyExt
3078  ? cast<LoadSDNode>(N0.getOperand(0))
3079  : cast<LoadSDNode>(N0);
3080  if (LN0->getExtensionType() != ISD::SEXTLOAD &&
3081  LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
3082  uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
3083  if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
3084  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
3085  EVT LoadedVT = LN0->getMemoryVT();
3086  EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
3087 
3088  if (ExtVT == LoadedVT &&
3089  (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
3090  ExtVT))) {
3091 
3092  SDValue NewLoad =
3093  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3094  LN0->getChain(), LN0->getBasePtr(), ExtVT,
3095  LN0->getMemOperand());
3096  AddToWorklist(N);
3097  CombineTo(LN0, NewLoad, NewLoad.getValue(1));
3098  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3099  }
3100 
3101  // Do not change the width of a volatile load.
3102  // Do not generate loads of non-round integer types since these can
3103  // be expensive (and would be wrong if the type is not byte sized).
3104  if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
3105  (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy,
3106  ExtVT))) {
3107  EVT PtrType = LN0->getOperand(1).getValueType();
3108 
3109  unsigned Alignment = LN0->getAlignment();
3110  SDValue NewPtr = LN0->getBasePtr();
3111 
3112  // For big endian targets, we need to add an offset to the pointer
3113  // to load the correct bytes. For little endian systems, we merely
3114  // need to read fewer bytes from the same pointer.
3115  if (DAG.getDataLayout().isBigEndian()) {
3116  unsigned LVTStoreBytes = LoadedVT.getStoreSize();
3117  unsigned EVTStoreBytes = ExtVT.getStoreSize();
3118  unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
3119  SDLoc DL(LN0);
3120  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
3121  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
3122  Alignment = MinAlign(Alignment, PtrOff);
3123  }
3124 
3125  AddToWorklist(NewPtr.getNode());
3126 
3127  SDValue Load =
3128  DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
3129  LN0->getChain(), NewPtr,
3130  LN0->getPointerInfo(),
3131  ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
3132  LN0->isInvariant(), Alignment, LN0->getAAInfo());
3133  AddToWorklist(N);
3134  CombineTo(LN0, Load, Load.getValue(1));
3135  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3136  }
3137  }
3138  }
3139  }
3140 
3141  if (SDValue Combined = visitANDLike(N0, N1, N))
3142  return Combined;
3143 
3144  // Simplify: (and (op x...), (op y...)) -> (op (and x, y))
3145  if (N0.getOpcode() == N1.getOpcode()) {
3146  SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
3147  if (Tmp.getNode()) return Tmp;
3148  }
3149 
3150  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
3151  // fold (and (sra)) -> (and (srl)) when possible.
3152  if (!VT.isVector() &&
3154  return SDValue(N, 0);
3155 
3156  // fold (zext_inreg (extload x)) -> (zextload x)
3157  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
3158  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3159  EVT MemVT = LN0->getMemoryVT();
3160  // If we zero all the possible extended bits, then we can turn this into
3161  // a zextload if we are running before legalize or the operation is legal.
3162  unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
3163  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3164  BitWidth - MemVT.getScalarType().getSizeInBits())) &&
3165  ((!LegalOperations && !LN0->isVolatile()) ||
3166  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3167  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3168  LN0->getChain(), LN0->getBasePtr(),
3169  MemVT, LN0->getMemOperand());
3170  AddToWorklist(N);
3171  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3172  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3173  }
3174  }
3175  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
3176  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3177  N0.hasOneUse()) {
3178  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3179  EVT MemVT = LN0->getMemoryVT();
3180  // If we zero all the possible extended bits, then we can turn this into
3181  // a zextload if we are running before legalize or the operation is legal.
3182  unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
3183  if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
3184  BitWidth - MemVT.getScalarType().getSizeInBits())) &&
3185  ((!LegalOperations && !LN0->isVolatile()) ||
3186  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
3187  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
3188  LN0->getChain(), LN0->getBasePtr(),
3189  MemVT, LN0->getMemOperand());
3190  AddToWorklist(N);
3191  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
3192  return SDValue(N, 0); // Return N so it doesn't get rechecked!
3193  }
3194  }
3195  // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
3196  if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
3197  SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
3198  N0.getOperand(1), false);
3199  if (BSwap.getNode())
3200  return BSwap;
3201  }
3202 
3203  return SDValue();
3204 }
3205 
3206 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
3207 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
3208  bool DemandHighBits) {
3209  if (!LegalOperations)
3210  return SDValue();
3211 
3212  EVT VT = N->getValueType(0);
3213  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
3214  return SDValue();
3215  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3216  return SDValue();
3217 
3218  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
3219  bool LookPassAnd0 = false;
3220  bool LookPassAnd1 = false;
3221  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
3222  std::swap(N0, N1);
3223  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
3224  std::swap(N0, N1);
3225  if (N0.getOpcode() == ISD::AND) {
3226  if (!N0.getNode()->hasOneUse())
3227  return SDValue();
3229  if (!N01C || N01C->getZExtValue() != 0xFF00)
3230  return SDValue();
3231  N0 = N0.getOperand(0);
3232  LookPassAnd0 = true;
3233  }
3234 
3235  if (N1.getOpcode() == ISD::AND) {
3236  if (!N1.getNode()->hasOneUse())
3237  return SDValue();
3239  if (!N11C || N11C->getZExtValue() != 0xFF)
3240  return SDValue();
3241  N1 = N1.getOperand(0);
3242  LookPassAnd1 = true;
3243  }
3244 
3245  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
3246  std::swap(N0, N1);
3247  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
3248  return SDValue();
3249  if (!N0.getNode()->hasOneUse() ||
3250  !N1.getNode()->hasOneUse())
3251  return SDValue();
3252 
3255  if (!N01C || !N11C)
3256  return SDValue();
3257  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
3258  return SDValue();
3259 
3260  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
3261  SDValue N00 = N0->getOperand(0);
3262  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
3263  if (!N00.getNode()->hasOneUse())
3264  return SDValue();
3266  if (!N001C || N001C->getZExtValue() != 0xFF)
3267  return SDValue();
3268  N00 = N00.getOperand(0);
3269  LookPassAnd0 = true;
3270  }
3271 
3272  SDValue N10 = N1->getOperand(0);
3273  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
3274  if (!N10.getNode()->hasOneUse())
3275  return SDValue();
3277  if (!N101C || N101C->getZExtValue() != 0xFF00)
3278  return SDValue();
3279  N10 = N10.getOperand(0);
3280  LookPassAnd1 = true;
3281  }
3282 
3283  if (N00 != N10)
3284  return SDValue();
3285 
3286  // Make sure everything beyond the low halfword gets set to zero since the SRL
3287  // 16 will clear the top bits.
3288  unsigned OpSizeInBits = VT.getSizeInBits();
3289  if (DemandHighBits && OpSizeInBits > 16) {
3290  // If the left-shift isn't masked out then the only way this is a bswap is
3291  // if all bits beyond the low 8 are 0. In that case the entire pattern
3292  // reduces to a left shift anyway: leave it for other parts of the combiner.
3293  if (!LookPassAnd0)
3294  return SDValue();
3295 
3296  // However, if the right shift isn't masked out then it might be because
3297  // it's not needed. See if we can spot that too.
3298  if (!LookPassAnd1 &&
3299  !DAG.MaskedValueIsZero(
3300  N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
3301  return SDValue();
3302  }
3303 
3304  SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
3305  if (OpSizeInBits > 16) {
3306  SDLoc DL(N);
3307  Res = DAG.getNode(ISD::SRL, DL, VT, Res,
3308  DAG.getConstant(OpSizeInBits - 16, DL,
3309  getShiftAmountTy(VT)));
3310  }
3311  return Res;
3312 }
3313 
3314 /// Return true if the specified node is an element that makes up a 32-bit
3315 /// packed halfword byteswap.
3316 /// ((x & 0x000000ff) << 8) |
3317 /// ((x & 0x0000ff00) >> 8) |
3318 /// ((x & 0x00ff0000) << 8) |
3319 /// ((x & 0xff000000) >> 8)
3321  if (!N.getNode()->hasOneUse())
3322  return false;
3323 
3324  unsigned Opc = N.getOpcode();
3325  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
3326  return false;
3327 
3329  if (!N1C)
3330  return false;
3331 
3332  unsigned Num;
3333  switch (N1C->getZExtValue()) {
3334  default:
3335  return false;
3336  case 0xFF: Num = 0; break;
3337  case 0xFF00: Num = 1; break;
3338  case 0xFF0000: Num = 2; break;
3339  case 0xFF000000: Num = 3; break;
3340  }
3341 
3342  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
3343  SDValue N0 = N.getOperand(0);
3344  if (Opc == ISD::AND) {
3345  if (Num == 0 || Num == 2) {
3346  // (x >> 8) & 0xff
3347  // (x >> 8) & 0xff0000
3348  if (N0.getOpcode() != ISD::SRL)
3349  return false;
3351  if (!C || C->getZExtValue() != 8)
3352  return false;
3353  } else {
3354  // (x << 8) & 0xff00
3355  // (x << 8) & 0xff000000
3356  if (N0.getOpcode() != ISD::SHL)
3357  return false;
3359  if (!C || C->getZExtValue() != 8)
3360  return false;
3361  }
3362  } else if (Opc == ISD::SHL) {
3363  // (x & 0xff) << 8
3364  // (x & 0xff0000) << 8
3365  if (Num != 0 && Num != 2)
3366  return false;
3368  if (!C || C->getZExtValue() != 8)
3369  return false;
3370  } else { // Opc == ISD::SRL
3371  // (x & 0xff00) >> 8
3372  // (x & 0xff000000) >> 8
3373  if (Num != 1 && Num != 3)
3374  return false;
3376  if (!C || C->getZExtValue() != 8)
3377  return false;
3378  }
3379 
3380  if (Parts[Num])
3381  return false;
3382 
3383  Parts[Num] = N0.getOperand(0).getNode();
3384  return true;
3385 }
3386 
3387 /// Match a 32-bit packed halfword bswap. That is
3388 /// ((x & 0x000000ff) << 8) |
3389 /// ((x & 0x0000ff00) >> 8) |
3390 /// ((x & 0x00ff0000) << 8) |
3391 /// ((x & 0xff000000) >> 8)
3392 /// => (rotl (bswap x), 16)
3393 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
3394  if (!LegalOperations)
3395  return SDValue();
3396 
3397  EVT VT = N->getValueType(0);
3398  if (VT != MVT::i32)
3399  return SDValue();
3400  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
3401  return SDValue();
3402 
3403  // Look for either
3404  // (or (or (and), (and)), (or (and), (and)))
3405  // (or (or (or (and), (and)), (and)), (and))
3406  if (N0.getOpcode() != ISD::OR)
3407  return SDValue();
3408  SDValue N00 = N0.getOperand(0);
3409  SDValue N01 = N0.getOperand(1);
3410  SDNode *Parts[4] = {};
3411 
3412  if (N1.getOpcode() == ISD::OR &&
3413  N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
3414  // (or (or (and), (and)), (or (and), (and)))
3415  SDValue N000 = N00.getOperand(0);
3416  if (!isBSwapHWordElement(N000, Parts))
3417  return SDValue();
3418 
3419  SDValue N001 = N00.getOperand(1);
3420  if (!isBSwapHWordElement(N001, Parts))
3421  return SDValue();
3422  SDValue N010 = N01.getOperand(0);
3423  if (!isBSwapHWordElement(N010, Parts))
3424  return SDValue();
3425  SDValue N011 = N01.getOperand(1);
3426  if (!isBSwapHWordElement(N011, Parts))
3427  return SDValue();
3428  } else {
3429  // (or (or (or (and), (and)), (and)), (and))
3430  if (!isBSwapHWordElement(N1, Parts))
3431  return SDValue();
3432  if (!isBSwapHWordElement(N01, Parts))
3433  return SDValue();
3434  if (N00.getOpcode() != ISD::OR)
3435  return SDValue();
3436  SDValue N000 = N00.getOperand(0);
3437  if (!isBSwapHWordElement(N000, Parts))
3438  return SDValue();
3439  SDValue N001 = N00.getOperand(1);
3440  if (!isBSwapHWordElement(N001, Parts))
3441  return SDValue();
3442  }
3443 
3444  // Make sure the parts are all coming from the same node.
3445  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
3446  return SDValue();
3447 
3448  SDLoc DL(N);
3449  SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
3450  SDValue(Parts[0], 0));
3451 
3452  // Result of the bswap should be rotated by 16. If it's not legal, then
3453  // do (x << 16) | (x >> 16).
3454  SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
3455  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
3456  return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
3457  if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
3458  return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
3459  return DAG.getNode(ISD::OR, DL, VT,
3460  DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
3461  DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
3462 }
3463 
3464 /// This contains all DAGCombine rules which reduce two values combined by
3465 /// an Or operation to a single value \see visitANDLike().
3466 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
3467  EVT VT = N1.getValueType();
3468  // fold (or x, undef) -> -1
3469  if (!LegalOperations &&
3470  (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
3471  EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
3472  return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
3473  SDLoc(LocReference), VT);
3474  }
3475  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
3476  SDValue LL, LR, RL, RR, CC0, CC1;
3477  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
3478  ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
3479  ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
3480 
3481  if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
3482  // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
3483  // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
3484  if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
3485  SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
3486  LR.getValueType(), LL, RL);
3487  AddToWorklist(ORNode.getNode());
3488  return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
3489  }
3490  // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
3491  // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1)
3492  if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
3493  SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
3494  LR.getValueType(), LL, RL);
3495  AddToWorklist(ANDNode.getNode());
3496  return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
3497  }
3498  }
3499  // canonicalize equivalent to ll == rl
3500  if (LL == RR && LR == RL) {
3501  Op1 = ISD::getSetCCSwappedOperands(Op1);
3502  std::swap(RL, RR);
3503  }
3504  if (LL == RL && LR == RR) {
3505  bool isInteger = LL.getValueType().isInteger();
3506  ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
3507  if (Result != ISD::SETCC_INVALID &&
3508  (!LegalOperations ||
3509  (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
3510  TLI.isOperationLegal(ISD::SETCC,
3512  return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
3513  LL, LR, Result);
3514  }
3515  }
3516 
3517  // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible.
3518  if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3519  // Don't increase # computations.
3520  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3521  // We can only do this xform if we know that bits from X that are set in C2
3522  // but not in C1 are already zero. Likewise for Y.
3523  if (const ConstantSDNode *N0O1C =
3525  if (const ConstantSDNode *N1O1C =
3527  // We can only do this xform if we know that bits from X that are set in
3528  // C2 but not in C1 are already zero. Likewise for Y.
3529  const APInt &LHSMask = N0O1C->getAPIntValue();
3530  const APInt &RHSMask = N1O1C->getAPIntValue();
3531 
3532  if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
3533  DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
3534  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3535  N0.getOperand(0), N1.getOperand(0));
3536  SDLoc DL(LocReference);
3537  return DAG.getNode(ISD::AND, DL, VT, X,
3538  DAG.getConstant(LHSMask | RHSMask, DL, VT));
3539  }
3540  }
3541  }
3542  }
3543 
3544  // (or (and X, M), (and X, N)) -> (and X, (or M, N))
3545  if (N0.getOpcode() == ISD::AND &&
3546  N1.getOpcode() == ISD::AND &&
3547  N0.getOperand(0) == N1.getOperand(0) &&
3548  // Don't increase # computations.
3549  (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
3550  SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
3551  N0.getOperand(1), N1.getOperand(1));
3552  return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
3553  }
3554 
3555  return SDValue();
3556 }
3557 
3558 SDValue DAGCombiner::visitOR(SDNode *N) {
3559  SDValue N0 = N->getOperand(0);
3560  SDValue N1 = N->getOperand(1);
3561  EVT VT = N1.getValueType();
3562 
3563  // fold vector ops
3564  if (VT.isVector()) {
3565  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3566  return FoldedVOp;
3567 
3568  // fold (or x, 0) -> x, vector edition
3570  return N1;
3572  return N0;
3573 
3574  // fold (or x, -1) -> -1, vector edition
3576  // do not return N0, because undef node may exist in N0
3577  return DAG.getConstant(
3580  SDLoc(N), N0.getValueType());
3582  // do not return N1, because undef node may exist in N1
3583  return DAG.getConstant(
3586  SDLoc(N), N1.getValueType());
3587 
3588  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
3589  // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
3590  // Do this only if the resulting shuffle is legal.
3591  if (isa<ShuffleVectorSDNode>(N0) &&
3592  isa<ShuffleVectorSDNode>(N1) &&
3593  // Avoid folding a node with illegal type.
3594  TLI.isTypeLegal(VT) &&
3595  N0->getOperand(1) == N1->getOperand(1) &&
3597  bool CanFold = true;
3598  unsigned NumElts = VT.getVectorNumElements();
3599  const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
3600  const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
3601  // We construct two shuffle masks:
3602  // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
3603  // and N1 as the second operand.
3604  // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
3605  // and N0 as the second operand.
3606  // We do this because OR is commutable and therefore there might be
3607  // two ways to fold this node into a shuffle.
3608  SmallVector<int,4> Mask1;
3609  SmallVector<int,4> Mask2;
3610 
3611  for (unsigned i = 0; i != NumElts && CanFold; ++i) {
3612  int M0 = SV0->getMaskElt(i);
3613  int M1 = SV1->getMaskElt(i);
3614 
3615  // Both shuffle indexes are undef. Propagate Undef.
3616  if (M0 < 0 && M1 < 0) {
3617  Mask1.push_back(M0);
3618  Mask2.push_back(M0);
3619  continue;
3620  }
3621 
3622  if (M0 < 0 || M1 < 0 ||
3623  (M0 < (int)NumElts && M1 < (int)NumElts) ||
3624  (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
3625  CanFold = false;
3626  break;
3627  }
3628 
3629  Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
3630  Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
3631  }
3632 
3633  if (CanFold) {
3634  // Fold this sequence only if the resulting shuffle is 'legal'.
3635  if (TLI.isShuffleMaskLegal(Mask1, VT))
3636  return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
3637  N1->getOperand(0), &Mask1[0]);
3638  if (TLI.isShuffleMaskLegal(Mask2, VT))
3639  return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
3640  N0->getOperand(0), &Mask2[0]);
3641  }
3642  }
3643  }
3644 
3645  // fold (or c1, c2) -> c1|c2
3648  if (N0C && N1C && !N1C->isOpaque())
3649  return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
3650  // canonicalize constant to RHS
3653  return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
3654  // fold (or x, 0) -> x
3655  if (isNullConstant(N1))
3656  return N0;
3657  // fold (or x, -1) -> -1
3658  if (isAllOnesConstant(N1))
3659  return N1;
3660  // fold (or x, c) -> c iff (x & ~c) == 0
3661  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
3662  return N1;
3663 
3664  if (SDValue Combined = visitORLike(N0, N1, N))
3665  return Combined;
3666 
3667  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
3668  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
3669  if (BSwap.getNode())
3670  return BSwap;
3671  BSwap = MatchBSwapHWordLow(N, N0, N1);
3672  if (BSwap.getNode())
3673  return BSwap;
3674 
3675  // reassociate or
3676  if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
3677  return ROR;
3678  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
3679  // iff (c1 & c2) == 0.
3680  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
3681  isa<ConstantSDNode>(N0.getOperand(1))) {
3682  ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
3683  if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
3684  if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
3685  N1C, C1))
3686  return DAG.getNode(
3687  ISD::AND, SDLoc(N), VT,
3688  DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
3689  return SDValue();
3690  }
3691  }
3692  // Simplify: (or (op x...), (op y...)) -> (op (or x, y))
3693  if (N0.getOpcode() == N1.getOpcode()) {
3694  SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
3695  if (Tmp.getNode()) return Tmp;
3696  }
3697 
3698  // See if this is some rotate idiom.
3699  if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
3700  return SDValue(Rot, 0);
3701 
3702  // Simplify the operands using demanded-bits information.
3703  if (!VT.isVector() &&
3705  return SDValue(N, 0);
3706 
3707  return SDValue();
3708 }
3709 
3710 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
3711 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
3712  if (Op.getOpcode() == ISD::AND) {
3713  if (isa<ConstantSDNode>(Op.getOperand(1))) {
3714  Mask = Op.getOperand(1);
3715  Op = Op.getOperand(0);
3716  } else {
3717  return false;
3718  }
3719  }
3720 
3721  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
3722  Shift = Op;
3723  return true;
3724  }
3725 
3726  return false;
3727 }
3728 
3729 // Return true if we can prove that, whenever Neg and Pos are both in the
3730 // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that
3731 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
3732 //
3733 // (or (shift1 X, Neg), (shift2 X, Pos))
3734 //
3735 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
3736 // in direction shift1 by Neg. The range [0, OpSize) means that we only need
3737 // to consider shift amounts with defined behavior.
3738 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) {
3739  // If OpSize is a power of 2 then:
3740  //
3741  // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1)
3742  // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize).
3743  //
3744  // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check
3745  // for the stronger condition:
3746  //
3747  // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A]
3748  //
3749  // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1)
3750  // we can just replace Neg with Neg' for the rest of the function.
3751  //
3752  // In other cases we check for the even stronger condition:
3753  //
3754  // Neg == OpSize - Pos [B]
3755  //
3756  // for all Neg and Pos. Note that the (or ...) then invokes undefined
3757  // behavior if Pos == 0 (and consequently Neg == OpSize).
3758  //
3759  // We could actually use [A] whenever OpSize is a power of 2, but the
3760  // only extra cases that it would match are those uninteresting ones
3761  // where Neg and Pos are never in range at the same time. E.g. for
3762  // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
3763  // as well as (sub 32, Pos), but:
3764  //
3765  // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
3766  //
3767  // always invokes undefined behavior for 32-bit X.
3768  //
3769  // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise.
3770  unsigned MaskLoBits = 0;
3771  if (Neg.getOpcode() == ISD::AND &&
3772  isPowerOf2_64(OpSize) &&
3773  Neg.getOperand(1).getOpcode() == ISD::Constant &&
3774  cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) {
3775  Neg = Neg.getOperand(0);
3776  MaskLoBits = Log2_64(OpSize);
3777  }
3778 
3779  // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
3780  if (Neg.getOpcode() != ISD::SUB)
3781  return 0;
3783  if (!NegC)
3784  return 0;
3785  SDValue NegOp1 = Neg.getOperand(1);
3786 
3787  // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with
3788  // Pos'. The truncation is redundant for the purpose of the equality.
3789  if (MaskLoBits &&
3790  Pos.getOpcode() == ISD::AND &&
3791  Pos.getOperand(1).getOpcode() == ISD::Constant &&
3792  cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1)
3793  Pos = Pos.getOperand(0);
3794 
3795  // The condition we need is now:
3796  //
3797  // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask
3798  //
3799  // If NegOp1 == Pos then we need:
3800  //
3801  // OpSize & Mask == NegC & Mask
3802  //
3803  // (because "x & Mask" is a truncation and distributes through subtraction).
3804  APInt Width;
3805  if (Pos == NegOp1)
3806  Width = NegC->getAPIntValue();
3807  // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
3808  // Then the condition we want to prove becomes:
3809  //
3810  // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask
3811  //
3812  // which, again because "x & Mask" is a truncation, becomes:
3813  //
3814  // NegC & Mask == (OpSize - PosC) & Mask
3815  // OpSize & Mask == (NegC + PosC) & Mask
3816  else if (Pos.getOpcode() == ISD::ADD &&
3817  Pos.getOperand(0) == NegOp1 &&
3818  Pos.getOperand(1).getOpcode() == ISD::Constant)
3819  Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() +
3820  NegC->getAPIntValue());
3821  else
3822  return false;
3823 
3824  // Now we just need to check that OpSize & Mask == Width & Mask.
3825  if (MaskLoBits)
3826  // Opsize & Mask is 0 since Mask is Opsize - 1.
3827  return Width.getLoBits(MaskLoBits) == 0;
3828  return Width == OpSize;
3829 }
3830 
3831 // A subroutine of MatchRotate used once we have found an OR of two opposite
3832 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces
3833 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
3834 // former being preferred if supported. InnerPos and InnerNeg are Pos and
3835 // Neg with outer conversions stripped away.
3836 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
3837  SDValue Neg, SDValue InnerPos,
3838  SDValue InnerNeg, unsigned PosOpcode,
3839  unsigned NegOpcode, SDLoc DL) {
3840  // fold (or (shl x, (*ext y)),
3841  // (srl x, (*ext (sub 32, y)))) ->
3842  // (rotl x, y) or (rotr x, (sub 32, y))
3843  //
3844  // fold (or (shl x, (*ext (sub 32, y))),
3845  // (srl x, (*ext y))) ->
3846  // (rotr x, y) or (rotl x, (sub 32, y))
3847  EVT VT = Shifted.getValueType();
3848  if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) {
3849  bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
3850  return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
3851  HasPos ? Pos : Neg).getNode();
3852  }
3853 
3854  return nullptr;
3855 }
3856 
3857 // MatchRotate - Handle an 'or' of two operands. If this is one of the many
3858 // idioms for rotate, and if the target supports rotation instructions, generate
3859 // a rot[lr].
3860 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
3861  // Must be a legal type. Expanded 'n promoted things won't work with rotates.
3862  EVT VT = LHS.getValueType();
3863  if (!TLI.isTypeLegal(VT)) return nullptr;
3864 
3865  // The target must have at least one rotate flavor.
3866  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
3867  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
3868  if (!HasROTL && !HasROTR) return nullptr;
3869 
3870  // Match "(X shl/srl V1) & V2" where V2 may not be present.
3871  SDValue LHSShift; // The shift.
3872  SDValue LHSMask; // AND value if any.
3873  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
3874  return nullptr; // Not part of a rotate.
3875 
3876  SDValue RHSShift; // The shift.
3877  SDValue RHSMask; // AND value if any.
3878  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
3879  return nullptr; // Not part of a rotate.
3880 
3881  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
3882  return nullptr; // Not shifting the same value.
3883 
3884  if (LHSShift.getOpcode() == RHSShift.getOpcode())
3885  return nullptr; // Shifts must disagree.
3886 
3887  // Canonicalize shl to left side in a shl/srl pair.
3888  if (RHSShift.getOpcode() == ISD::SHL) {
3889  std::swap(LHS, RHS);
3890  std::swap(LHSShift, RHSShift);
3891  std::swap(LHSMask , RHSMask );
3892  }
3893 
3894  unsigned OpSizeInBits = VT.getSizeInBits();
3895  SDValue LHSShiftArg = LHSShift.getOperand(0);
3896  SDValue LHSShiftAmt = LHSShift.getOperand(1);
3897  SDValue RHSShiftArg = RHSShift.getOperand(0);
3898  SDValue RHSShiftAmt = RHSShift.getOperand(1);
3899 
3900  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
3901  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
3902  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
3903  RHSShiftAmt.getOpcode() == ISD::Constant) {
3904  uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
3905  uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
3906  if ((LShVal + RShVal) != OpSizeInBits)
3907  return nullptr;
3908 
3909  SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
3910  LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
3911 
3912  // If there is an AND of either shifted operand, apply it to the result.
3913  if (LHSMask.getNode() || RHSMask.getNode()) {
3914  APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
3915 
3916  if (LHSMask.getNode()) {
3917  APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
3918  Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
3919  }
3920  if (RHSMask.getNode()) {
3921  APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
3922  Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
3923  }
3924 
3925  Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, DL, VT));
3926  }
3927 
3928  return Rot.getNode();
3929  }
3930 
3931  // If there is a mask here, and we have a variable shift, we can't be sure
3932  // that we're masking out the right stuff.
3933  if (LHSMask.getNode() || RHSMask.getNode())
3934  return nullptr;
3935 
3936  // If the shift amount is sign/zext/any-extended just peel it off.
3937  SDValue LExtOp0 = LHSShiftAmt;
3938  SDValue RExtOp0 = RHSShiftAmt;
3939  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
3940  LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
3941  LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
3942  LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
3943  (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
3944  RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
3945  RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
3946  RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
3947  LExtOp0 = LHSShiftAmt.getOperand(0);
3948  RExtOp0 = RHSShiftAmt.getOperand(0);
3949  }
3950 
3951  SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
3952  LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
3953  if (TryL)
3954  return TryL;
3955 
3956  SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
3957  RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
3958  if (TryR)
3959  return TryR;
3960 
3961  return nullptr;
3962 }
3963 
3964 SDValue DAGCombiner::visitXOR(SDNode *N) {
3965  SDValue N0 = N->getOperand(0);
3966  SDValue N1 = N->getOperand(1);
3967  EVT VT = N0.getValueType();
3968 
3969  // fold vector ops
3970  if (VT.isVector()) {
3971  if (SDValue FoldedVOp = SimplifyVBinOp(N))
3972  return FoldedVOp;
3973 
3974  // fold (xor x, 0) -> x, vector edition
3976  return N1;
3978  return N0;
3979  }
3980 
3981  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
3982  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
3983  return DAG.getConstant(0, SDLoc(N), VT);
3984  // fold (xor x, undef) -> undef
3985  if (N0.getOpcode() == ISD::UNDEF)
3986  return N0;
3987  if (N1.getOpcode() == ISD::UNDEF)
3988  return N1;
3989  // fold (xor c1, c2) -> c1^c2
3992  if (N0C && N1C)
3993  return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
3994  // canonicalize constant to RHS
3997  return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
3998  // fold (xor x, 0) -> x
3999  if (isNullConstant(N1))
4000  return N0;
4001  // reassociate xor
4002  if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
4003  return RXOR;
4004 
4005  // fold !(x cc y) -> (x !cc y)
4006  SDValue LHS, RHS, CC;
4007  if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
4008  bool isInt = LHS.getValueType().isInteger();
4009  ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
4010  isInt);
4011 
4012  if (!LegalOperations ||
4013  TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
4014  switch (N0.getOpcode()) {
4015  default:
4016  llvm_unreachable("Unhandled SetCC Equivalent!");
4017  case ISD::SETCC:
4018  return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
4019  case ISD::SELECT_CC:
4020  return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
4021  N0.getOperand(3), NotCC);
4022  }
4023  }
4024  }
4025 
4026  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
4027  if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
4028  N0.getNode()->hasOneUse() &&
4029  isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
4030  SDValue V = N0.getOperand(0);
4031  SDLoc DL(N0);
4032  V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
4033  DAG.getConstant(1, DL, V.getValueType()));
4034  AddToWorklist(V.getNode());
4035  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
4036  }
4037 
4038  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
4039  if (isOneConstant(N1) && VT == MVT::i1 &&
4040  (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4041  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4042  if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
4043  unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4044  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4045  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4046  AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4047  return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4048  }
4049  }
4050  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
4051  if (isAllOnesConstant(N1) &&
4052  (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
4053  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
4054  if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
4055  unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
4056  LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
4057  RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
4058  AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
4059  return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
4060  }
4061  }
4062  // fold (xor (and x, y), y) -> (and (not x), y)
4063  if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
4064  N0->getOperand(1) == N1) {
4065  SDValue X = N0->getOperand(0);
4066  SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
4067  AddToWorklist(NotX.getNode());
4068  return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
4069  }
4070  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
4071  if (N1C && N0.getOpcode() == ISD::XOR) {
4072  if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
4073  SDLoc DL(N);
4074  return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4075  DAG.getConstant(N1C->getAPIntValue() ^
4076  N00C->getAPIntValue(), DL, VT));
4077  }
4078  if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
4079  SDLoc DL(N);
4080  return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
4081  DAG.getConstant(N1C->getAPIntValue() ^
4082  N01C->getAPIntValue(), DL, VT));
4083  }
4084  }
4085  // fold (xor x, x) -> 0
4086  if (N0 == N1)
4087  return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
4088 
4089  // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
4090  // Here is a concrete example of this equivalence:
4091  // i16 x == 14
4092  // i16 shl == 1 << 14 == 16384 == 0b0100000000000000
4093  // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
4094  //
4095  // =>
4096  //
4097  // i16 ~1 == 0b1111111111111110
4098  // i16 rol(~1, 14) == 0b1011111111111111
4099  //
4100  // Some additional tips to help conceptualize this transform:
4101  // - Try to see the operation as placing a single zero in a value of all ones.
4102  // - There exists no value for x which would allow the result to contain zero.
4103  // - Values of x larger than the bitwidth are undefined and do not require a
4104  // consistent result.
4105  // - Pushing the zero left requires shifting one bits in from the right.
4106  // A rotate left of ~1 is a nice way of achieving the desired result.
4107  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
4108  && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
4109  SDLoc DL(N);
4110  return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
4111  N0.getOperand(1));
4112  }
4113 
4114  // Simplify: xor (op x...), (op y...) -> (op (xor x, y))
4115  if (N0.getOpcode() == N1.getOpcode()) {
4116  SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
4117  if (Tmp.getNode()) return Tmp;
4118  }
4119 
4120  // Simplify the expression using non-local knowledge.
4121  if (!VT.isVector() &&
4123  return SDValue(N, 0);
4124 
4125  return SDValue();
4126 }
4127 
4128 /// Handle transforms common to the three shifts, when the shift amount is a
4129 /// constant.
4130 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
4131  SDNode *LHS = N->getOperand(0).getNode();
4132  if (!LHS->hasOneUse()) return SDValue();
4133 
4134  // We want to pull some binops through shifts, so that we have (and (shift))
4135  // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
4136  // thing happens with address calculations, so it's important to canonicalize
4137  // it.
4138  bool HighBitSet = false; // Can we transform this if the high bit is set?
4139 
4140  switch (LHS->getOpcode()) {
4141  default: return SDValue();
4142  case ISD::OR:
4143  case ISD::XOR:
4144  HighBitSet = false; // We can only transform sra if the high bit is clear.
4145  break;
4146  case ISD::AND:
4147  HighBitSet = true; // We can only transform sra if the high bit is set.
4148  break;
4149  case ISD::ADD:
4150  if (N->getOpcode() != ISD::SHL)
4151  return SDValue(); // only shl(add) not sr[al](add).
4152  HighBitSet = false; // We can only transform sra if the high bit is clear.
4153  break;
4154  }
4155 
4156  // We require the RHS of the binop to be a constant and not opaque as well.
4157  ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
4158  if (!BinOpCst) return SDValue();
4159 
4160  // FIXME: disable this unless the input to the binop is a shift by a constant.
4161  // If it is not a shift, it pessimizes some common cases like:
4162  //
4163  // void foo(int *X, int i) { X[i & 1235] = 1; }
4164  // int bar(int *X, int i) { return X[i & 255]; }
4165  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
4166  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
4167  BinOpLHSVal->getOpcode() != ISD::SRA &&
4168  BinOpLHSVal->getOpcode() != ISD::SRL) ||
4169  !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
4170  return SDValue();
4171 
4172  EVT VT = N->getValueType(0);
4173 
4174  // If this is a signed shift right, and the high bit is modified by the
4175  // logical operation, do not perform the transformation. The highBitSet
4176  // boolean indicates the value of the high bit of the constant which would
4177  // cause it to be modified for this operation.
4178  if (N->getOpcode() == ISD::SRA) {
4179  bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
4180  if (BinOpRHSSignSet != HighBitSet)
4181  return SDValue();
4182  }
4183 
4184  if (!TLI.isDesirableToCommuteWithShift(LHS))
4185  return SDValue();
4186 
4187  // Fold the constants, shifting the binop RHS by the shift amount.
4188  SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
4189  N->getValueType(0),
4190  LHS->getOperand(1), N->getOperand(1));
4191  assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
4192 
4193  // Create the new shift.
4194  SDValue NewShift = DAG.getNode(N->getOpcode(),
4195  SDLoc(LHS->getOperand(0)),
4196  VT, LHS->getOperand(0), N->getOperand(1));
4197 
4198  // Create the new binop.
4199  return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
4200 }
4201 
4202 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
4203  assert(N->getOpcode() == ISD::TRUNCATE);
4204  assert(N->getOperand(0).getOpcode() == ISD::AND);
4205 
4206  // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
4207  if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
4208  SDValue N01 = N->getOperand(0).getOperand(1);
4209 
4210  if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
4211  if (!N01C->isOpaque()) {
4212  EVT TruncVT = N->getValueType(0);
4213  SDValue N00 = N->getOperand(0).getOperand(0);
4214  APInt TruncC = N01C->getAPIntValue();
4215  TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
4216  SDLoc DL(N);
4217 
4218  return DAG.getNode(ISD::AND, DL, TruncVT,
4219  DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
4220  DAG.getConstant(TruncC, DL, TruncVT));
4221  }
4222  }
4223  }
4224 
4225  return SDValue();
4226 }
4227 
4228 SDValue DAGCombiner::visitRotate(SDNode *N) {
4229  // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
4230  if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
4231  N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
4232  SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
4233  if (NewOp1.getNode())
4234  return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
4235  N->getOperand(0), NewOp1);
4236  }
4237  return SDValue();
4238 }
4239 
4240 SDValue DAGCombiner::visitSHL(SDNode *N) {
4241  SDValue N0 = N->getOperand(0);
4242  SDValue N1 = N->getOperand(1);
4243  EVT VT = N0.getValueType();
4244  unsigned OpSizeInBits = VT.getScalarSizeInBits();
4245 
4246  // fold vector ops
4248  if (VT.isVector()) {
4249  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4250  return FoldedVOp;
4251 
4253  // If setcc produces all-one true value then:
4254  // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
4255  if (N1CV && N1CV->isConstant()) {
4256  if (N0.getOpcode() == ISD::AND) {
4257  SDValue N00 = N0->getOperand(0);
4258  SDValue N01 = N0->getOperand(1);
4260 
4261  if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
4262  TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
4264  if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
4265  N01CV, N1CV))
4266  return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
4267  }
4268  } else {
4269  N1C = isConstOrConstSplat(N1);
4270  }
4271  }
4272  }
4273 
4274  // fold (shl c1, c2) -> c1<<c2
4276  if (N0C && N1C && !N1C->isOpaque())
4277  return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
4278  // fold (shl 0, x) -> 0
4279  if (isNullConstant(N0))
4280  return N0;
4281  // fold (shl x, c >= size(x)) -> undef
4282  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
4283  return DAG.getUNDEF(VT);
4284  // fold (shl x, 0) -> x
4285  if (N1C && N1C->isNullValue())
4286  return N0;
4287  // fold (shl undef, x) -> 0
4288  if (N0.getOpcode() == ISD::UNDEF)
4289  return DAG.getConstant(0, SDLoc(N), VT);
4290  // if (shl x, c) is known to be zero, return 0
4291  if (DAG.MaskedValueIsZero(SDValue(N, 0),
4292  APInt::getAllOnesValue(OpSizeInBits)))
4293  return DAG.getConstant(0, SDLoc(N), VT);
4294  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
4295  if (N1.getOpcode() == ISD::TRUNCATE &&
4296  N1.getOperand(0).getOpcode() == ISD::AND) {
4297  SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
4298  if (NewOp1.getNode())
4299  return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
4300  }
4301 
4302  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4303  return SDValue(N, 0);
4304 
4305  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
4306  if (N1C && N0.getOpcode() == ISD::SHL) {
4307  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4308  uint64_t c1 = N0C1->getZExtValue();
4309  uint64_t c2 = N1C->getZExtValue();
4310  SDLoc DL(N);
4311  if (c1 + c2 >= OpSizeInBits)
4312  return DAG.getConstant(0, DL, VT);
4313  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4314  DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4315  }
4316  }
4317 
4318  // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
4319  // For this to be valid, the second form must not preserve any of the bits
4320  // that are shifted out by the inner shift in the first form. This means
4321  // the outer shift size must be >= the number of bits added by the ext.
4322  // As a corollary, we don't care what kind of ext it is.
4323  if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
4324  N0.getOpcode() == ISD::ANY_EXTEND ||
4325  N0.getOpcode() == ISD::SIGN_EXTEND) &&
4326  N0.getOperand(0).getOpcode() == ISD::SHL) {
4327  SDValue N0Op0 = N0.getOperand(0);
4328  if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4329  uint64_t c1 = N0Op0C1->getZExtValue();
4330  uint64_t c2 = N1C->getZExtValue();
4331  EVT InnerShiftVT = N0Op0.getValueType();
4332  uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
4333  if (c2 >= OpSizeInBits - InnerShiftSize) {
4334  SDLoc DL(N0);
4335  if (c1 + c2 >= OpSizeInBits)
4336  return DAG.getConstant(0, DL, VT);
4337  return DAG.getNode(ISD::SHL, DL, VT,
4338  DAG.getNode(N0.getOpcode(), DL, VT,
4339  N0Op0->getOperand(0)),
4340  DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4341  }
4342  }
4343  }
4344 
4345  // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
4346  // Only fold this if the inner zext has no other uses to avoid increasing
4347  // the total number of instructions.
4348  if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
4349  N0.getOperand(0).getOpcode() == ISD::SRL) {
4350  SDValue N0Op0 = N0.getOperand(0);
4351  if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
4352  uint64_t c1 = N0Op0C1->getZExtValue();
4353  if (c1 < VT.getScalarSizeInBits()) {
4354  uint64_t c2 = N1C->getZExtValue();
4355  if (c1 == c2) {
4356  SDValue NewOp0 = N0.getOperand(0);
4357  EVT CountVT = NewOp0.getOperand(1).getValueType();
4358  SDLoc DL(N);
4359  SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
4360  NewOp0,
4361  DAG.getConstant(c2, DL, CountVT));
4362  AddToWorklist(NewSHL.getNode());
4363  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
4364  }
4365  }
4366  }
4367  }
4368 
4369  // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
4370  // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
4371  if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
4372  cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
4373  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4374  uint64_t C1 = N0C1->getZExtValue();
4375  uint64_t C2 = N1C->getZExtValue();
4376  SDLoc DL(N);
4377  if (C1 <= C2)
4378  return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4379  DAG.getConstant(C2 - C1, DL, N1.getValueType()));
4380  return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
4381  DAG.getConstant(C1 - C2, DL, N1.getValueType()));
4382  }
4383  }
4384 
4385  // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
4386  // (and (srl x, (sub c1, c2), MASK)
4387  // Only fold this if the inner shift has no other uses -- if it does, folding
4388  // this will increase the total number of instructions.
4389  if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4390  if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
4391  uint64_t c1 = N0C1->getZExtValue();
4392  if (c1 < OpSizeInBits) {
4393  uint64_t c2 = N1C->getZExtValue();
4394  APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
4395  SDValue Shift;
4396  if (c2 > c1) {
4397  Mask = Mask.shl(c2 - c1);
4398  SDLoc DL(N);
4399  Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
4400  DAG.getConstant(c2 - c1, DL, N1.getValueType()));
4401  } else {
4402  Mask = Mask.lshr(c1 - c2);
4403  SDLoc DL(N);
4404  Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4405  DAG.getConstant(c1 - c2, DL, N1.getValueType()));
4406  }
4407  SDLoc DL(N0);
4408  return DAG.getNode(ISD::AND, DL, VT, Shift,
4409  DAG.getConstant(Mask, DL, VT));
4410  }
4411  }
4412  }
4413  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
4414  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
4415  unsigned BitSize = VT.getScalarSizeInBits();
4416  SDLoc DL(N);
4417  SDValue HiBitsMask =
4418  DAG.getConstant(APInt::getHighBitsSet(BitSize,
4419  BitSize - N1C->getZExtValue()),
4420  DL, VT);
4421  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4422  HiBitsMask);
4423  }
4424 
4425  // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
4426  // Variant of version done on multiply, except mul by a power of 2 is turned
4427  // into a shift.
4428  APInt Val;
4429  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
4430  (isa<ConstantSDNode>(N0.getOperand(1)) ||
4431  isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
4432  SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
4433  SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
4434  return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
4435  }
4436 
4437  if (N1C && !N1C->isOpaque()) {
4438  SDValue NewSHL = visitShiftByConstant(N, N1C);
4439  if (NewSHL.getNode())
4440  return NewSHL;
4441  }
4442 
4443  return SDValue();
4444 }
4445 
4446 SDValue DAGCombiner::visitSRA(SDNode *N) {
4447  SDValue N0 = N->getOperand(0);
4448  SDValue N1 = N->getOperand(1);
4449  EVT VT = N0.getValueType();
4450  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
4451 
4452  // fold vector ops
4454  if (VT.isVector()) {
4455  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4456  return FoldedVOp;
4457 
4458  N1C = isConstOrConstSplat(N1);
4459  }
4460 
4461  // fold (sra c1, c2) -> (sra c1, c2)
4463  if (N0C && N1C && !N1C->isOpaque())
4464  return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
4465  // fold (sra 0, x) -> 0
4466  if (isNullConstant(N0))
4467  return N0;
4468  // fold (sra -1, x) -> -1
4469  if (isAllOnesConstant(N0))
4470  return N0;
4471  // fold (sra x, (setge c, size(x))) -> undef
4472  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
4473  return DAG.getUNDEF(VT);
4474  // fold (sra x, 0) -> x
4475  if (N1C && N1C->isNullValue())
4476  return N0;
4477  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
4478  // sext_inreg.
4479  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
4480  unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
4481  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
4482  if (VT.isVector())
4483  ExtVT = EVT::getVectorVT(*DAG.getContext(),
4484  ExtVT, VT.getVectorNumElements());
4485  if ((!LegalOperations ||
4486  TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
4487  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
4488  N0.getOperand(0), DAG.getValueType(ExtVT));
4489  }
4490 
4491  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
4492  if (N1C && N0.getOpcode() == ISD::SRA) {
4493  if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
4494  unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
4495  if (Sum >= OpSizeInBits)
4496  Sum = OpSizeInBits - 1;
4497  SDLoc DL(N);
4498  return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
4499  DAG.getConstant(Sum, DL, N1.getValueType()));
4500  }
4501  }
4502 
4503  // fold (sra (shl X, m), (sub result_size, n))
4504  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
4505  // result_size - n != m.
4506  // If truncate is free for the target sext(shl) is likely to result in better
4507  // code.
4508  if (N0.getOpcode() == ISD::SHL && N1C) {
4509  // Get the two constanst of the shifts, CN0 = m, CN = n.
4510  const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
4511  if (N01C) {
4512  LLVMContext &Ctx = *DAG.getContext();
4513  // Determine what the truncate's result bitsize and type would be.
4514  EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
4515 
4516  if (VT.isVector())
4517  TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
4518 
4519  // Determine the residual right-shift amount.
4520  signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
4521 
4522  // If the shift is not a no-op (in which case this should be just a sign
4523  // extend already), the truncated to type is legal, sign_extend is legal
4524  // on that type, and the truncate to that type is both legal and free,
4525  // perform the transform.
4526  if ((ShiftAmt > 0) &&
4527  TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
4528  TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
4529  TLI.isTruncateFree(VT, TruncVT)) {
4530 
4531  SDLoc DL(N);
4532  SDValue Amt = DAG.getConstant(ShiftAmt, DL,
4534  SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
4535  N0.getOperand(0), Amt);
4536  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
4537  Shift);
4538  return DAG.getNode(ISD::SIGN_EXTEND, DL,
4539  N->getValueType(0), Trunc);
4540  }
4541  }
4542  }
4543 
4544  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
4545  if (N1.getOpcode() == ISD::TRUNCATE &&
4546  N1.getOperand(0).getOpcode() == ISD::AND) {
4547  SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
4548  if (NewOp1.getNode())
4549  return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
4550  }
4551 
4552  // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
4553  // if c1 is equal to the number of bits the trunc removes
4554  if (N0.getOpcode() == ISD::TRUNCATE &&
4555  (N0.getOperand(0).getOpcode() == ISD::SRL ||
4556  N0.getOperand(0).getOpcode() == ISD::SRA) &&
4557  N0.getOperand(0).hasOneUse() &&
4558  N0.getOperand(0).getOperand(1).hasOneUse() &&
4559  N1C) {
4560  SDValue N0Op0 = N0.getOperand(0);
4561  if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
4562  unsigned LargeShiftVal = LargeShift->getZExtValue();
4563  EVT LargeVT = N0Op0.getValueType();
4564 
4565  if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
4566  SDLoc DL(N);
4567  SDValue Amt =
4568  DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
4570  SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
4571  N0Op0.getOperand(0), Amt);
4572  return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
4573  }
4574  }
4575  }
4576 
4577  // Simplify, based on bits shifted out of the LHS.
4578  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4579  return SDValue(N, 0);
4580 
4581 
4582  // If the sign bit is known to be zero, switch this to a SRL.
4583  if (DAG.SignBitIsZero(N0))
4584  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
4585 
4586  if (N1C && !N1C->isOpaque()) {
4587  SDValue NewSRA = visitShiftByConstant(N, N1C);
4588  if (NewSRA.getNode())
4589  return NewSRA;
4590  }
4591 
4592  return SDValue();
4593 }
4594 
4595 SDValue DAGCombiner::visitSRL(SDNode *N) {
4596  SDValue N0 = N->getOperand(0);
4597  SDValue N1 = N->getOperand(1);
4598  EVT VT = N0.getValueType();
4599  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
4600 
4601  // fold vector ops
4603  if (VT.isVector()) {
4604  if (SDValue FoldedVOp = SimplifyVBinOp(N))
4605  return FoldedVOp;
4606 
4607  N1C = isConstOrConstSplat(N1);
4608  }
4609 
4610  // fold (srl c1, c2) -> c1 >>u c2
4612  if (N0C && N1C && !N1C->isOpaque())
4613  return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
4614  // fold (srl 0, x) -> 0
4615  if (isNullConstant(N0))
4616  return N0;
4617  // fold (srl x, c >= size(x)) -> undef
4618  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
4619  return DAG.getUNDEF(VT);
4620  // fold (srl x, 0) -> x
4621  if (N1C && N1C->isNullValue())
4622  return N0;
4623  // if (srl x, c) is known to be zero, return 0
4624  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4625  APInt::getAllOnesValue(OpSizeInBits)))
4626  return DAG.getConstant(0, SDLoc(N), VT);
4627 
4628  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
4629  if (N1C && N0.getOpcode() == ISD::SRL) {
4630  if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
4631  uint64_t c1 = N01C->getZExtValue();
4632  uint64_t c2 = N1C->getZExtValue();
4633  SDLoc DL(N);
4634  if (c1 + c2 >= OpSizeInBits)
4635  return DAG.getConstant(0, DL, VT);
4636  return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
4637  DAG.getConstant(c1 + c2, DL, N1.getValueType()));
4638  }
4639  }
4640 
4641  // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
4642  if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
4643  N0.getOperand(0).getOpcode() == ISD::SRL &&
4644  isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
4645  uint64_t c1 =
4646  cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
4647  uint64_t c2 = N1C->getZExtValue();
4648  EVT InnerShiftVT = N0.getOperand(0).getValueType();
4649  EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
4650  uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
4651  // This is only valid if the OpSizeInBits + c1 = size of inner shift.
4652  if (c1 + OpSizeInBits == InnerShiftSize) {
4653  SDLoc DL(N0);
4654  if (c1 + c2 >= InnerShiftSize)
4655  return DAG.getConstant(0, DL, VT);
4656  return DAG.getNode(ISD::TRUNCATE, DL, VT,
4657  DAG.getNode(ISD::SRL, DL, InnerShiftVT,
4658  N0.getOperand(0)->getOperand(0),
4659  DAG.getConstant(c1 + c2, DL,
4660  ShiftCountVT)));
4661  }
4662  }
4663 
4664  // fold (srl (shl x, c), c) -> (and x, cst2)
4665  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
4666  unsigned BitSize = N0.getScalarValueSizeInBits();
4667  if (BitSize <= 64) {
4668  uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
4669  SDLoc DL(N);
4670  return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
4671  DAG.getConstant(~0ULL >> ShAmt, DL, VT));
4672  }
4673  }
4674 
4675  // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
4676  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4677  // Shifting in all undef bits?
4678  EVT SmallVT = N0.getOperand(0).getValueType();
4679  unsigned BitSize = SmallVT.getScalarSizeInBits();
4680  if (N1C->getZExtValue() >= BitSize)
4681  return DAG.getUNDEF(VT);
4682 
4683  if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
4684  uint64_t ShiftAmt = N1C->getZExtValue();
4685  SDLoc DL0(N0);
4686  SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
4687  N0.getOperand(0),
4688  DAG.getConstant(ShiftAmt, DL0,
4689  getShiftAmountTy(SmallVT)));
4690  AddToWorklist(SmallShift.getNode());
4691  APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
4692  SDLoc DL(N);
4693  return DAG.getNode(ISD::AND, DL, VT,
4694  DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
4695  DAG.getConstant(Mask, DL, VT));
4696  }
4697  }
4698 
4699  // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
4700  // bit, which is unmodified by sra.
4701  if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
4702  if (N0.getOpcode() == ISD::SRA)
4703  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
4704  }
4705 
4706  // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
4707  if (N1C && N0.getOpcode() == ISD::CTLZ &&
4708  N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
4709  APInt KnownZero, KnownOne;
4710  DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
4711 
4712  // If any of the input bits are KnownOne, then the input couldn't be all
4713  // zeros, thus the result of the srl will always be zero.
4714  if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
4715 
4716  // If all of the bits input the to ctlz node are known to be zero, then
4717  // the result of the ctlz is "32" and the result of the shift is one.
4718  APInt UnknownBits = ~KnownZero;
4719  if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
4720 
4721  // Otherwise, check to see if there is exactly one bit input to the ctlz.
4722  if ((UnknownBits & (UnknownBits - 1)) == 0) {
4723  // Okay, we know that only that the single bit specified by UnknownBits
4724  // could be set on input to the CTLZ node. If this bit is set, the SRL
4725  // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
4726  // to an SRL/XOR pair, which is likely to simplify more.
4727  unsigned ShAmt = UnknownBits.countTrailingZeros();
4728  SDValue Op = N0.getOperand(0);
4729 
4730  if (ShAmt) {
4731  SDLoc DL(N0);
4732  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4733  DAG.getConstant(ShAmt, DL,
4735  AddToWorklist(Op.getNode());
4736  }
4737 
4738  SDLoc DL(N);
4739  return DAG.getNode(ISD::XOR, DL, VT,
4740  Op, DAG.getConstant(1, DL, VT));
4741  }
4742  }
4743 
4744  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
4745  if (N1.getOpcode() == ISD::TRUNCATE &&
4746  N1.getOperand(0).getOpcode() == ISD::AND) {
4747  SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
4748  if (NewOp1.getNode())
4749  return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
4750  }
4751 
4752  // fold operands of srl based on knowledge that the low bits are not
4753  // demanded.
4754  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
4755  return SDValue(N, 0);
4756 
4757  if (N1C && !N1C->isOpaque()) {
4758  SDValue NewSRL = visitShiftByConstant(N, N1C);
4759  if (NewSRL.getNode())
4760  return NewSRL;
4761  }
4762 
4763  // Attempt to convert a srl of a load into a narrower zero-extending load.
4764  SDValue NarrowLoad = ReduceLoadWidth(N);
4765  if (NarrowLoad.getNode())
4766  return NarrowLoad;
4767 
4768  // Here is a common situation. We want to optimize:
4769  //
4770  // %a = ...
4771  // %b = and i32 %a, 2
4772  // %c = srl i32 %b, 1
4773  // brcond i32 %c ...
4774  //
4775  // into
4776  //
4777  // %a = ...
4778  // %b = and %a, 2
4779  // %c = setcc eq %b, 0
4780  // brcond %c ...
4781  //
4782  // However when after the source operand of SRL is optimized into AND, the SRL
4783  // itself may not be optimized further. Look for it and add the BRCOND into
4784  // the worklist.
4785  if (N->hasOneUse()) {
4786  SDNode *Use = *N->use_begin();
4787  if (Use->getOpcode() == ISD::BRCOND)
4788  AddToWorklist(Use);
4789  else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
4790  // Also look pass the truncate.
4791  Use = *Use->use_begin();
4792  if (Use->getOpcode() == ISD::BRCOND)
4793  AddToWorklist(Use);
4794  }
4795  }
4796 
4797  return SDValue();
4798 }
4799 
4800 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
4801  SDValue N0 = N->getOperand(0);
4802  EVT VT = N->getValueType(0);
4803 
4804  // fold (bswap c1) -> c2
4806  return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
4807  // fold (bswap (bswap x)) -> x
4808  if (N0.getOpcode() == ISD::BSWAP)
4809  return N0->getOperand(0);
4810  return SDValue();
4811 }
4812 
4813 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
4814  SDValue N0 = N->getOperand(0);
4815  EVT VT = N->getValueType(0);
4816 
4817  // fold (ctlz c1) -> c2
4819  return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
4820  return SDValue();
4821 }
4822 
4823 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
4824  SDValue N0 = N->getOperand(0);
4825  EVT VT = N->getValueType(0);
4826 
4827  // fold (ctlz_zero_undef c1) -> c2
4829  return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
4830  return SDValue();
4831 }
4832 
4833 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
4834  SDValue N0 = N->getOperand(0);
4835  EVT VT = N->getValueType(0);
4836 
4837  // fold (cttz c1) -> c2
4839  return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
4840  return SDValue();
4841 }
4842 
4843 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
4844  SDValue N0 = N->getOperand(0);
4845  EVT VT = N->getValueType(0);
4846 
4847  // fold (cttz_zero_undef c1) -> c2
4849  return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
4850  return SDValue();
4851 }
4852 
4853 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
4854  SDValue N0 = N->getOperand(0);
4855  EVT VT = N->getValueType(0);
4856 
4857  // fold (ctpop c1) -> c2
4859  return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
4860  return SDValue();
4861 }
4862 
4863 
4864 /// \brief Generate Min/Max node
4866  SDValue True, SDValue False,
4867  ISD::CondCode CC, const TargetLowering &TLI,
4868  SelectionDAG &DAG) {
4869  if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
4870  return SDValue();
4871 
4872  switch (CC) {
4873  case ISD::SETOLT:
4874  case ISD::SETOLE:
4875  case ISD::SETLT:
4876  case ISD::SETLE:
4877  case ISD::SETULT:
4878  case ISD::SETULE: {
4879  unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
4880  if (TLI.isOperationLegal(Opcode, VT))
4881  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
4882  return SDValue();
4883  }
4884  case ISD::SETOGT:
4885  case ISD::SETOGE:
4886  case ISD::SETGT:
4887  case ISD::SETGE:
4888  case ISD::SETUGT:
4889  case ISD::SETUGE: {
4890  unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
4891  if (TLI.isOperationLegal(Opcode, VT))
4892  return DAG.getNode(Opcode, DL, VT, LHS, RHS);
4893  return SDValue();
4894  }
4895  default:
4896  return SDValue();
4897  }
4898 }
4899 
4900 SDValue DAGCombiner::visitSELECT(SDNode *N) {
4901  SDValue N0 = N->getOperand(0);
4902  SDValue N1 = N->getOperand(1);
4903  SDValue N2 = N->getOperand(2);
4904  EVT VT = N->getValueType(0);
4905  EVT VT0 = N0.getValueType();
4906 
4907  // fold (select C, X, X) -> X
4908  if (N1 == N2)
4909  return N1;
4910  if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
4911  // fold (select true, X, Y) -> X
4912  // fold (select false, X, Y) -> Y
4913  return !N0C->isNullValue() ? N1 : N2;
4914  }
4915  // fold (select C, 1, X) -> (or C, X)
4916  if (VT == MVT::i1 && isOneConstant(N1))
4917  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
4918  // fold (select C, 0, 1) -> (xor C, 1)
4919  // We can't do this reliably if integer based booleans have different contents
4920  // to floating point based booleans. This is because we can't tell whether we
4921  // have an integer-based boolean or a floating-point-based boolean unless we
4922  // can find the SETCC that produced it and inspect its operands. This is
4923  // fairly easy if C is the SETCC node, but it can potentially be
4924  // undiscoverable (or not reasonably discoverable). For example, it could be
4925  // in another basic block or it could require searching a complicated
4926  // expression.
4927  if (VT.isInteger() &&
4928  (VT0 == MVT::i1 || (VT0.isInteger() &&
4929  TLI.getBooleanContents(false, false) ==
4930  TLI.getBooleanContents(false, true) &&
4931  TLI.getBooleanContents(false, false) ==
4933  isNullConstant(N1) && isOneConstant(N2)) {
4934  SDValue XORNode;
4935  if (VT == VT0) {
4936  SDLoc DL(N);
4937  return DAG.getNode(ISD::XOR, DL, VT0,
4938  N0, DAG.getConstant(1, DL, VT0));
4939  }
4940  SDLoc DL0(N0);
4941  XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
4942  N0, DAG.getConstant(1, DL0, VT0));
4943  AddToWorklist(XORNode.getNode());
4944  if (VT.bitsGT(VT0))
4945  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
4946  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
4947  }
4948  // fold (select C, 0, X) -> (and (not C), X)
4949  if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
4950  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
4951  AddToWorklist(NOTNode.getNode());
4952  return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
4953  }
4954  // fold (select C, X, 1) -> (or (not C), X)
4955  if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
4956  SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
4957  AddToWorklist(NOTNode.getNode());
4958  return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
4959  }
4960  // fold (select C, X, 0) -> (and C, X)
4961  if (VT == MVT::i1 && isNullConstant(N2))
4962  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
4963  // fold (select X, X, Y) -> (or X, Y)
4964  // fold (select X, 1, Y) -> (or X, Y)
4965  if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
4966  return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
4967  // fold (select X, Y, X) -> (and X, Y)
4968  // fold (select X, Y, 0) -> (and X, Y)
4969  if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
4970  return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
4971 
4972  // If we can fold this based on the true/false value, do so.
4973  if (SimplifySelectOps(N, N1, N2))
4974  return SDValue(N, 0); // Don't revisit N.
4975 
4976  // fold selects based on a setcc into other things, such as min/max/abs
4977  if (N0.getOpcode() == ISD::SETCC) {
4978  // select x, y (fcmp lt x, y) -> fminnum x, y
4979  // select x, y (fcmp gt x, y) -> fmaxnum x, y
4980  //
4981  // This is OK if we don't care about what happens if either operand is a
4982  // NaN.
4983  //
4984 
4985  // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
4986  // no signed zeros as well as no nans.
4987  const TargetOptions &Options = DAG.getTarget().Options;
4988  if (Options.UnsafeFPMath &&
4989  VT.isFloatingPoint() && N0.hasOneUse() &&
4990  DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
4991  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4992 
4993  SDValue FMinMax =
4994  combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1),
4995  N1, N2, CC, TLI, DAG);
4996  if (FMinMax)
4997  return FMinMax;
4998  }
4999 
5000  if ((!LegalOperations &&
5001  TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
5002  TLI.isOperationLegal(ISD::SELECT_CC, VT))
5003  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
5004  N0.getOperand(0), N0.getOperand(1),
5005  N1, N2, N0.getOperand(2));
5006  return SimplifySelect(SDLoc(N), N0, N1, N2);
5007  }
5008 
5009  if (VT0 == MVT::i1) {
5010  if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
5011  // select (and Cond0, Cond1), X, Y
5012  // -> select Cond0, (select Cond1, X, Y), Y
5013  if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
5014  SDValue Cond0 = N0->getOperand(0);
5015  SDValue Cond1 = N0->getOperand(1);
5016  SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5017  N1.getValueType(), Cond1, N1, N2);
5018  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
5019  InnerSelect, N2);
5020  }
5021  // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
5022  if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
5023  SDValue Cond0 = N0->getOperand(0);
5024  SDValue Cond1 = N0->getOperand(1);
5025  SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
5026  N1.getValueType(), Cond1, N1, N2);
5027  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
5028  InnerSelect);
5029  }
5030  }
5031 
5032  // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
5033  if (N1->getOpcode() == ISD::SELECT) {
5034  SDValue N1_0 = N1->getOperand(0);
5035  SDValue N1_1 = N1->getOperand(1);
5036  SDValue N1_2 = N1->getOperand(2);
5037  if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
5038  // Create the actual and node if we can generate good code for it.
5039  if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
5040  SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
5041  N0, N1_0);
5042  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
5043  N1_1, N2);
5044  }
5045  // Otherwise see if we can optimize the "and" to a better pattern.
5046  if (SDValue Combined = visitANDLike(N0, N1_0, N))
5047  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5048  N1_1, N2);
5049  }
5050  }
5051  // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
5052  if (N2->getOpcode() == ISD::SELECT) {
5053  SDValue N2_0 = N2->getOperand(0);
5054  SDValue N2_1 = N2->getOperand(1);
5055  SDValue N2_2 = N2->getOperand(2);
5056  if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
5057  // Create the actual or node if we can generate good code for it.
5058  if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) {
5059  SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
5060  N0, N2_0);
5061  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
5062  N1, N2_2);
5063  }
5064  // Otherwise see if we can optimize to a better pattern.
5065  if (SDValue Combined = visitORLike(N0, N2_0, N))
5066  return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
5067  N1, N2_2);
5068  }
5069  }
5070  }
5071 
5072  return SDValue();
5073 }
5074 
5075 static
5076 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
5077  SDLoc DL(N);
5078  EVT LoVT, HiVT;
5079  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
5080 
5081  // Split the inputs.
5082  SDValue Lo, Hi, LL, LH, RL, RH;
5083  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
5084  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
5085 
5086  Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
5087  Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
5088 
5089  return std::make_pair(Lo, Hi);
5090 }
5091 
5092 // This function assumes all the vselect's arguments are CONCAT_VECTOR
5093 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
5095  SDLoc dl(N);
5096  SDValue Cond = N->getOperand(0);
5097  SDValue LHS = N->getOperand(1);
5098  SDValue RHS = N->getOperand(2);
5099  EVT VT = N->getValueType(0);
5100  int NumElems = VT.getVectorNumElements();
5101  assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
5102  RHS.getOpcode() == ISD::CONCAT_VECTORS &&
5103  Cond.getOpcode() == ISD::BUILD_VECTOR);
5104 
5105  // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
5106  // binary ones here.
5107  if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
5108  return SDValue();
5109 
5110  // We're sure we have an even number of elements due to the
5111  // concat_vectors we have as arguments to vselect.
5112  // Skip BV elements until we find one that's not an UNDEF
5113  // After we find an UNDEF element, keep looping until we get to half the
5114  // length of the BV and see if all the non-undef nodes are the same.
5115  ConstantSDNode *BottomHalf = nullptr;
5116  for (int i = 0; i < NumElems / 2; ++i) {
5117  if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
5118  continue;
5119 
5120  if (BottomHalf == nullptr)
5121  BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5122  else if (Cond->getOperand(i).getNode() != BottomHalf)
5123  return SDValue();
5124  }
5125 
5126  // Do the same for the second half of the BuildVector
5127  ConstantSDNode *TopHalf = nullptr;
5128  for (int i = NumElems / 2; i < NumElems; ++i) {
5129  if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
5130  continue;
5131 
5132  if (TopHalf == nullptr)
5133  TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
5134  else if (Cond->getOperand(i).getNode() != TopHalf)
5135  return SDValue();
5136  }
5137 
5138  assert(TopHalf && BottomHalf &&
5139  "One half of the selector was all UNDEFs and the other was all the "
5140  "same value. This should have been addressed before this function.");
5141  return DAG.getNode(
5142  ISD::CONCAT_VECTORS, dl, VT,
5143  BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
5144  TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
5145 }
5146 
5147 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
5148 
5149  if (Level >= AfterLegalizeTypes)
5150  return SDValue();
5151 
5152  MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
5153  SDValue Mask = MSC->getMask();
5154  SDValue Data = MSC->getValue();
5155  SDLoc DL(N);
5156 
5157  // If the MSCATTER data type requires splitting and the mask is provided by a
5158  // SETCC, then split both nodes and its operands before legalization. This
5159  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5160  // and enables future optimizations (e.g. min/max pattern matching on X86).
5161  if (Mask.getOpcode() != ISD::SETCC)
5162  return SDValue();
5163 
5164  // Check if any splitting is required.
5165  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5167  return SDValue();
5168  SDValue MaskLo, MaskHi, Lo, Hi;
5169  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5170 
5171  EVT LoVT, HiVT;
5172  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
5173 
5174  SDValue Chain = MSC->getChain();
5175 
5176  EVT MemoryVT = MSC->getMemoryVT();
5177  unsigned Alignment = MSC->getOriginalAlignment();
5178 
5179  EVT LoMemVT, HiMemVT;
5180  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5181 
5182  SDValue DataLo, DataHi;
5183  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5184 
5185  SDValue BasePtr = MSC->getBasePtr();
5186  SDValue IndexLo, IndexHi;
5187  std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
5188 
5189  MachineMemOperand *MMO = DAG.getMachineFunction().
5190  getMachineMemOperand(MSC->getPointerInfo(),
5192  Alignment, MSC->getAAInfo(), MSC->getRanges());
5193 
5194  SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
5195  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
5196  DL, OpsLo, MMO);
5197 
5198  SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
5199  Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
5200  DL, OpsHi, MMO);
5201 
5202  AddToWorklist(Lo.getNode());
5203  AddToWorklist(Hi.getNode());
5204 
5205  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5206 }
5207 
5208 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
5209 
5210  if (Level >= AfterLegalizeTypes)
5211  return SDValue();
5212 
5214  SDValue Mask = MST->getMask();
5215  SDValue Data = MST->getValue();
5216  SDLoc DL(N);
5217 
5218  // If the MSTORE data type requires splitting and the mask is provided by a
5219  // SETCC, then split both nodes and its operands before legalization. This
5220  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5221  // and enables future optimizations (e.g. min/max pattern matching on X86).
5222  if (Mask.getOpcode() == ISD::SETCC) {
5223 
5224  // Check if any splitting is required.
5225  if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
5227  return SDValue();
5228 
5229  SDValue MaskLo, MaskHi, Lo, Hi;
5230  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5231 
5232  EVT LoVT, HiVT;
5233  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
5234 
5235  SDValue Chain = MST->getChain();
5236  SDValue Ptr = MST->getBasePtr();
5237 
5238  EVT MemoryVT = MST->getMemoryVT();
5239  unsigned Alignment = MST->getOriginalAlignment();
5240 
5241  // if Alignment is equal to the vector size,
5242  // take the half of it for the second part
5243  unsigned SecondHalfAlignment =
5244  (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
5245  Alignment/2 : Alignment;
5246 
5247  EVT LoMemVT, HiMemVT;
5248  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5249 
5250  SDValue DataLo, DataHi;
5251  std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
5252 
5253  MachineMemOperand *MMO = DAG.getMachineFunction().
5254  getMachineMemOperand(MST->getPointerInfo(),
5256  Alignment, MST->getAAInfo(), MST->getRanges());
5257 
5258  Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
5259  MST->isTruncatingStore());
5260 
5261  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5262  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5263  DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5264 
5265  MMO = DAG.getMachineFunction().
5266  getMachineMemOperand(MST->getPointerInfo(),
5268  SecondHalfAlignment, MST->getAAInfo(),
5269  MST->getRanges());
5270 
5271  Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
5272  MST->isTruncatingStore());
5273 
5274  AddToWorklist(Lo.getNode());
5275  AddToWorklist(Hi.getNode());
5276 
5277  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
5278  }
5279  return SDValue();
5280 }
5281 
5282 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
5283 
5284  if (Level >= AfterLegalizeTypes)
5285  return SDValue();
5286 
5288  SDValue Mask = MGT->getMask();
5289  SDLoc DL(N);
5290 
5291  // If the MGATHER result requires splitting and the mask is provided by a
5292  // SETCC, then split both nodes and its operands before legalization. This
5293  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5294  // and enables future optimizations (e.g. min/max pattern matching on X86).
5295 
5296  if (Mask.getOpcode() != ISD::SETCC)
5297  return SDValue();
5298 
5299  EVT VT = N->getValueType(0);
5300 
5301  // Check if any splitting is required.
5302  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5304  return SDValue();
5305 
5306  SDValue MaskLo, MaskHi, Lo, Hi;
5307  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5308 
5309  SDValue Src0 = MGT->getValue();
5310  SDValue Src0Lo, Src0Hi;
5311  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5312 
5313  EVT LoVT, HiVT;
5314  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
5315 
5316  SDValue Chain = MGT->getChain();
5317  EVT MemoryVT = MGT->getMemoryVT();
5318  unsigned Alignment = MGT->getOriginalAlignment();
5319 
5320  EVT LoMemVT, HiMemVT;
5321  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5322 
5323  SDValue BasePtr = MGT->getBasePtr();
5324  SDValue Index = MGT->getIndex();
5325  SDValue IndexLo, IndexHi;
5326  std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
5327 
5328  MachineMemOperand *MMO = DAG.getMachineFunction().
5329  getMachineMemOperand(MGT->getPointerInfo(),
5331  Alignment, MGT->getAAInfo(), MGT->getRanges());
5332 
5333  SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
5334  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
5335  MMO);
5336 
5337  SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
5338  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
5339  MMO);
5340 
5341  AddToWorklist(Lo.getNode());
5342  AddToWorklist(Hi.getNode());
5343 
5344  // Build a factor node to remember that this load is independent of the
5345  // other one.
5346  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5347  Hi.getValue(1));
5348 
5349  // Legalized the chain result - switch anything that used the old chain to
5350  // use the new one.
5351  DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
5352 
5353  SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5354 
5355  SDValue RetOps[] = { GatherRes, Chain };
5356  return DAG.getMergeValues(RetOps, DL);
5357 }
5358 
5359 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
5360 
5361  if (Level >= AfterLegalizeTypes)
5362  return SDValue();
5363 
5365  SDValue Mask = MLD->getMask();
5366  SDLoc DL(N);
5367 
5368  // If the MLOAD result requires splitting and the mask is provided by a
5369  // SETCC, then split both nodes and its operands before legalization. This
5370  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5371  // and enables future optimizations (e.g. min/max pattern matching on X86).
5372 
5373  if (Mask.getOpcode() == ISD::SETCC) {
5374  EVT VT = N->getValueType(0);
5375 
5376  // Check if any splitting is required.
5377  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5379  return SDValue();
5380 
5381  SDValue MaskLo, MaskHi, Lo, Hi;
5382  std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
5383 
5384  SDValue Src0 = MLD->getSrc0();
5385  SDValue Src0Lo, Src0Hi;
5386  std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
5387 
5388  EVT LoVT, HiVT;
5389  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
5390 
5391  SDValue Chain = MLD->getChain();
5392  SDValue Ptr = MLD->getBasePtr();
5393  EVT MemoryVT = MLD->getMemoryVT();
5394  unsigned Alignment = MLD->getOriginalAlignment();
5395 
5396  // if Alignment is equal to the vector size,
5397  // take the half of it for the second part
5398  unsigned SecondHalfAlignment =
5399  (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
5400  Alignment/2 : Alignment;
5401 
5402  EVT LoMemVT, HiMemVT;
5403  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
5404 
5405  MachineMemOperand *MMO = DAG.getMachineFunction().
5406  getMachineMemOperand(MLD->getPointerInfo(),
5408  Alignment, MLD->getAAInfo(), MLD->getRanges());
5409 
5410  Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
5412 
5413  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
5414  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5415  DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
5416 
5417  MMO = DAG.getMachineFunction().
5418  getMachineMemOperand(MLD->getPointerInfo(),
5420  SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
5421 
5422  Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
5424 
5425  AddToWorklist(Lo.getNode());
5426  AddToWorklist(Hi.getNode());
5427 
5428  // Build a factor node to remember that this load is independent of the
5429  // other one.
5430  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
5431  Hi.getValue(1));
5432 
5433  // Legalized the chain result - switch anything that used the old chain to
5434  // use the new one.
5435  DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
5436 
5437  SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5438 
5439  SDValue RetOps[] = { LoadRes, Chain };
5440  return DAG.getMergeValues(RetOps, DL);
5441  }
5442  return SDValue();
5443 }
5444 
5445 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
5446  SDValue N0 = N->getOperand(0);
5447  SDValue N1 = N->getOperand(1);
5448  SDValue N2 = N->getOperand(2);
5449  SDLoc DL(N);
5450 
5451  // Canonicalize integer abs.
5452  // vselect (setg[te] X, 0), X, -X ->
5453  // vselect (setgt X, -1), X, -X ->
5454  // vselect (setl[te] X, 0), -X, X ->
5455  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
5456  if (N0.getOpcode() == ISD::SETCC) {
5457  SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
5458  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
5459  bool isAbs = false;
5460  bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
5461 
5462  if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
5463  (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
5464  N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
5466  else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
5467  N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
5468  isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5469 
5470  if (isAbs) {
5471  EVT VT = LHS.getValueType();
5472  SDValue Shift = DAG.getNode(
5473  ISD::SRA, DL, VT, LHS,
5474  DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
5475  SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
5476  AddToWorklist(Shift.getNode());
5477  AddToWorklist(Add.getNode());
5478  return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
5479  }
5480  }
5481 
5482  if (SimplifySelectOps(N, N1, N2))
5483  return SDValue(N, 0); // Don't revisit N.
5484 
5485  // If the VSELECT result requires splitting and the mask is provided by a
5486  // SETCC, then split both nodes and its operands before legalization. This
5487  // prevents the type legalizer from unrolling SETCC into scalar comparisons
5488  // and enables future optimizations (e.g. min/max pattern matching on X86).
5489  if (N0.getOpcode() == ISD::SETCC) {
5490  EVT VT = N->getValueType(0);
5491 
5492  // Check if any splitting is required.
5493  if (TLI.getTypeAction(*DAG.getContext(), VT) !=
5495  return SDValue();
5496 
5497  SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
5498  std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
5499  std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
5500  std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
5501 
5502  Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
5503  Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
5504 
5505  // Add the new VSELECT nodes to the work list in case they need to be split
5506  // again.
5507  AddToWorklist(Lo.getNode());
5508  AddToWorklist(Hi.getNode());
5509 
5510  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
5511  }
5512 
5513  // Fold (vselect (build_vector all_ones), N1, N2) -> N1
5515  return N1;
5516  // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
5518  return N2;
5519 
5520  // The ConvertSelectToConcatVector function is assuming both the above
5521  // checks for (vselect (build_vector all{ones,zeros) ...) have been made
5522  // and addressed.
5523  if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
5524  N2.getOpcode() == ISD::CONCAT_VECTORS &&
5526  SDValue CV = ConvertSelectToConcatVector(N, DAG);
5527  if (CV.getNode())
5528  return CV;
5529  }
5530 
5531  return SDValue();
5532 }
5533 
5534 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
5535  SDValue N0 = N->getOperand(0);
5536  SDValue N1 = N->getOperand(1);
5537  SDValue N2 = N->getOperand(2);
5538  SDValue N3 = N->getOperand(3);
5539  SDValue N4 = N->getOperand(4);
5540  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
5541 
5542  // fold select_cc lhs, rhs, x, x, cc -> x
5543  if (N2 == N3)
5544  return N2;
5545 
5546  // Determine if the condition we're dealing with is constant
5548  N0, N1, CC, SDLoc(N), false);
5549  if (SCC.getNode()) {
5550  AddToWorklist(SCC.getNode());
5551 
5552  if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
5553  if (!SCCC->isNullValue())
5554  return N2; // cond always true -> true val
5555  else
5556  return N3; // cond always false -> false val
5557  } else if (SCC->getOpcode() == ISD::UNDEF) {
5558  // When the condition is UNDEF, just return the first operand. This is
5559  // coherent the DAG creation, no setcc node is created in this case
5560  return N2;
5561  } else if (SCC.getOpcode() == ISD::SETCC) {
5562  // Fold to a simpler select_cc
5563  return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
5564  SCC.getOperand(0), SCC.getOperand(1), N2, N3,
5565  SCC.getOperand(2));
5566  }
5567  }
5568 
5569  // If we can fold this based on the true/false value, do so.
5570  if (SimplifySelectOps(N, N2, N3))
5571  return SDValue(N, 0); // Don't revisit N.
5572 
5573  // fold select_cc into other things, such as min/max/abs
5574  return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
5575 }
5576 
5577 SDValue DAGCombiner::visitSETCC(SDNode *N) {
5578  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
5579  cast<CondCodeSDNode>(N->getOperand(2))->get(),
5580  SDLoc(N));
5581 }
5582 
5583 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
5584 /// a build_vector of constants.
5585 /// This function is called by the DAGCombiner when visiting sext/zext/aext
5586 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
5587 /// Vector extends are not folded if operations are legal; this is to
5588 /// avoid introducing illegal build_vector dag nodes.
5590  SelectionDAG &DAG, bool LegalTypes,
5591  bool LegalOperations) {
5592  unsigned Opcode = N->getOpcode();
5593  SDValue N0 = N->getOperand(0);
5594  EVT VT = N->getValueType(0);
5595 
5596  assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
5597  Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
5598  && "Expected EXTEND dag node in input!");
5599 
5600  // fold (sext c1) -> c1
5601  // fold (zext c1) -> c1
5602  // fold (aext c1) -> c1
5603  if (isa<ConstantSDNode>(N0))
5604  return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
5605 
5606  // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
5607  // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
5608  // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
5609  EVT SVT = VT.getScalarType();
5610  if (!(VT.isVector() &&
5611  (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
5613  return nullptr;
5614 
5615  // We can fold this node into a build_vector.
5616  unsigned VTBits = SVT.getSizeInBits();
5617  unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
5619  unsigned NumElts = VT.getVectorNumElements();
5620  SDLoc DL(N);
5621 
5622  for (unsigned i=0; i != NumElts; ++i) {
5623  SDValue Op = N0->getOperand(i);
5624  if (Op->getOpcode() == ISD::UNDEF) {
5625  Elts.push_back(DAG.getUNDEF(SVT));
5626  continue;
5627  }
5628 
5629  SDLoc DL(Op);
5630  // Get the constant value and if needed trunc it to the size of the type.
5631  // Nodes like build_vector might have constants wider than the scalar type.
5632  APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
5633  if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
5634  Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
5635  else
5636  Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
5637  }
5638 
5639  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
5640 }
5641 
5642 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
5643 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
5644 // transformation. Returns true if extension are possible and the above
5645 // mentioned transformation is profitable.
5647  unsigned ExtOpc,
5648  SmallVectorImpl<SDNode *> &ExtendNodes,
5649  const TargetLowering &TLI) {
5650  bool HasCopyToRegUses = false;
5651  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
5652  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
5653  UE = N0.getNode()->use_end();
5654  UI != UE; ++UI) {
5655  SDNode *User = *UI;
5656  if (User == N)
5657  continue;
5658  if (UI.getUse().getResNo() != N0.getResNo())
5659  continue;
5660  // FIXME: Only extend SETCC N, N and SETCC N, c for now.
5661  if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
5662  ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
5663  if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
5664  // Sign bits will be lost after a zext.
5665  return false;
5666  bool Add = false;
5667  for (unsigned i = 0; i != 2; ++i) {
5668  SDValue UseOp = User->getOperand(i);
5669  if (UseOp == N0)
5670  continue;
5671  if (!isa<ConstantSDNode>(UseOp))
5672  return false;
5673  Add = true;
5674  }
5675  if (Add)
5676  ExtendNodes.push_back(User);
5677  continue;
5678  }
5679  // If truncates aren't free and there are users we can't
5680  // extend, it isn't worthwhile.
5681  if (!isTruncFree)
5682  return false;
5683  // Remember if this value is live-out.
5684  if (User->getOpcode() == ISD::CopyToReg)
5685  HasCopyToRegUses = true;
5686  }
5687 
5688  if (HasCopyToRegUses) {
5689  bool BothLiveOut = false;
5690  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
5691  UI != UE; ++UI) {
5692  SDUse &Use = UI.getUse();
5693  if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
5694  BothLiveOut = true;
5695  break;
5696  }
5697  }
5698  if (BothLiveOut)
5699  // Both unextended and extended values are live out. There had better be
5700  // a good reason for the transformation.
5701  return ExtendNodes.size();
5702  }
5703  return true;
5704 }
5705 
5706 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
5707  SDValue Trunc, SDValue ExtLoad, SDLoc DL,
5708  ISD::NodeType ExtType) {
5709  // Extend SetCC uses if necessary.
5710  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
5711  SDNode *SetCC = SetCCs[i];
5713 
5714  for (unsigned j = 0; j != 2; ++j) {
5715  SDValue SOp = SetCC->getOperand(j);
5716  if (SOp == Trunc)
5717  Ops.push_back(ExtLoad);
5718  else
5719  Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
5720  }
5721 
5722  Ops.push_back(SetCC->getOperand(2));
5723  CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
5724  }
5725 }
5726 
5727 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
5728 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
5729  SDValue N0 = N->getOperand(0);
5730  EVT DstVT = N->getValueType(0);
5731  EVT SrcVT = N0.getValueType();
5732 
5733  assert((N->getOpcode() == ISD::SIGN_EXTEND ||
5734  N->getOpcode() == ISD::ZERO_EXTEND) &&
5735  "Unexpected node type (not an extend)!");
5736 
5737  // fold (sext (load x)) to multiple smaller sextloads; same for zext.
5738  // For example, on a target with legal v4i32, but illegal v8i32, turn:
5739  // (v8i32 (sext (v8i16 (load x))))
5740  // into:
5741  // (v8i32 (concat_vectors (v4i32 (sextload x)),
5742  // (v4i32 (sextload (x + 16)))))
5743  // Where uses of the original load, i.e.:
5744  // (v8i16 (load x))
5745  // are replaced with:
5746  // (v8i16 (truncate
5747  // (v8i32 (concat_vectors (v4i32 (sextload x)),
5748  // (v4i32 (sextload (x + 16)))))))
5749  //
5750  // This combine is only applicable to illegal, but splittable, vectors.
5751  // All legal types, and illegal non-vector types, are handled elsewhere.
5752  // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
5753  //
5754  if (N0->getOpcode() != ISD::LOAD)
5755  return SDValue();
5756 
5757  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5758 
5759  if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
5760  !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
5761  !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
5762  return SDValue();
5763 
5764  SmallVector<SDNode *, 4> SetCCs;
5765  if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
5766  return SDValue();
5767 
5768  ISD::LoadExtType ExtType =
5770 
5771  // Try to split the vector types to get down to legal types.
5772  EVT SplitSrcVT = SrcVT;
5773  EVT SplitDstVT = DstVT;
5774  while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
5775  SplitSrcVT.getVectorNumElements() > 1) {
5776  SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
5777  SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
5778  }
5779 
5780  if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
5781  return SDValue();
5782 
5783  SDLoc DL(N);
5784  const unsigned NumSplits =
5785  DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
5786  const unsigned Stride = SplitSrcVT.getStoreSize();
5788  SmallVector<SDValue, 4> Chains;
5789 
5790  SDValue BasePtr = LN0->getBasePtr();
5791  for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
5792  const unsigned Offset = Idx * Stride;
5793  const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
5794 
5795  SDValue SplitLoad = DAG.getExtLoad(
5796  ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
5797  LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
5798  LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
5799  Align, LN0->getAAInfo());
5800 
5801  BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
5802  DAG.getConstant(Stride, DL, BasePtr.getValueType()));
5803 
5804  Loads.push_back(SplitLoad.getValue(0));
5805  Chains.push_back(SplitLoad.getValue(1));
5806  }
5807 
5808  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
5809  SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
5810 
5811  CombineTo(N, NewValue);
5812 
5813  // Replace uses of the original load (before extension)
5814  // with a truncate of the concatenated sextloaded vectors.
5815  SDValue Trunc =
5816  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
5817  CombineTo(N0.getNode(), Trunc, NewChain);
5818  ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
5819  (ISD::NodeType)N->getOpcode());
5820  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5821 }
5822 
5823 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
5824  SDValue N0 = N->getOperand(0);
5825  EVT VT = N->getValueType(0);
5826 
5827  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
5828  LegalOperations))
5829  return SDValue(Res, 0);
5830 
5831  // fold (sext (sext x)) -> (sext x)
5832  // fold (sext (aext x)) -> (sext x)
5833  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
5834  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
5835  N0.getOperand(0));
5836 
5837  if (N0.getOpcode() == ISD::TRUNCATE) {
5838  // fold (sext (truncate (load x))) -> (sext (smaller load x))
5839  // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
5840  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
5841  if (NarrowLoad.getNode()) {
5842  SDNode* oye = N0.getNode()->getOperand(0).getNode();
5843  if (NarrowLoad.getNode() != N0.getNode()) {
5844  CombineTo(N0.getNode(), NarrowLoad);
5845  // CombineTo deleted the truncate, if needed, but not what's under it.
5846  AddToWorklist(oye);
5847  }
5848  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5849  }
5850 
5851  // See if the value being truncated is already sign extended. If so, just
5852  // eliminate the trunc/sext pair.
5853  SDValue Op = N0.getOperand(0);
5854  unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits();
5855  unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits();
5856  unsigned DestBits = VT.getScalarType().getSizeInBits();
5857  unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
5858 
5859  if (OpBits == DestBits) {
5860  // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign
5861  // bits, it is already ready.
5862  if (NumSignBits > DestBits-MidBits)
5863  return Op;
5864  } else if (OpBits < DestBits) {
5865  // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign
5866  // bits, just sext from i32.
5867  if (NumSignBits > OpBits-MidBits)
5868  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
5869  } else {
5870  // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign
5871  // bits, just truncate to i32.
5872  if (NumSignBits > OpBits-MidBits)
5873  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
5874  }
5875 
5876  // fold (sext (truncate x)) -> (sextinreg x).
5877  if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
5878  N0.getValueType())) {
5879  if (OpBits < DestBits)
5880  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
5881  else if (OpBits > DestBits)
5882  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
5883  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
5884  DAG.getValueType(N0.getValueType()));
5885  }
5886  }
5887 
5888  // fold (sext (load x)) -> (sext (truncate (sextload x)))
5889  // Only generate vector extloads when 1) they're legal, and 2) they are
5890  // deemed desirable by the target.
5892  ((!LegalOperations && !VT.isVector() &&
5893  !cast<LoadSDNode>(N0)->isVolatile()) ||
5894  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
5895  bool DoXform = true;
5896  SmallVector<SDNode*, 4> SetCCs;
5897  if (!N0.hasOneUse())
5898  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
5899  if (VT.isVector())
5900  DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
5901  if (DoXform) {
5902  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5903  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
5904  LN0->getChain(),
5905  LN0->getBasePtr(), N0.getValueType(),
5906  LN0->getMemOperand());
5907  CombineTo(N, ExtLoad);
5908  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
5909  N0.getValueType(), ExtLoad);
5910  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
5911  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
5913  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5914  }
5915  }
5916 
5917  // fold (sext (load x)) to multiple smaller sextloads.
5918  // Only on illegal but splittable vectors.
5919  if (SDValue ExtLoad = CombineExtLoad(N))
5920  return ExtLoad;
5921 
5922  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
5923  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
5924  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
5925  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
5926  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5927  EVT MemVT = LN0->getMemoryVT();
5928  if ((!LegalOperations && !LN0->isVolatile()) ||
5929  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
5930  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
5931  LN0->getChain(),
5932  LN0->getBasePtr(), MemVT,
5933  LN0->getMemOperand());
5934  CombineTo(N, ExtLoad);
5935  CombineTo(N0.getNode(),
5936  DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
5937  N0.getValueType(), ExtLoad),
5938  ExtLoad.getValue(1));
5939  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5940  }
5941  }
5942 
5943  // fold (sext (and/or/xor (load x), cst)) ->
5944  // (and/or/xor (sextload x), (sext cst))
5945  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
5946  N0.getOpcode() == ISD::XOR) &&
5947  isa<LoadSDNode>(N0.getOperand(0)) &&
5948  N0.getOperand(1).getOpcode() == ISD::Constant &&
5949  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
5950  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
5951  LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
5952  if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
5953  bool DoXform = true;
5954  SmallVector<SDNode*, 4> SetCCs;
5955  if (!N0.hasOneUse())
5957  SetCCs, TLI);
5958  if (DoXform) {
5959  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
5960  LN0->getChain(), LN0->getBasePtr(),
5961  LN0->getMemoryVT(),
5962  LN0->getMemOperand());
5963  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
5964  Mask = Mask.sext(VT.getSizeInBits());
5965  SDLoc DL(N);
5966  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
5967  ExtLoad, DAG.getConstant(Mask, DL, VT));
5968  SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
5969  SDLoc(N0.getOperand(0)),
5970  N0.getOperand(0).getValueType(), ExtLoad);
5971  CombineTo(N, And);
5972  CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
5973  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
5975  return SDValue(N, 0); // Return N so it doesn't get rechecked!
5976  }
5977  }
5978  }
5979 
5980  if (N0.getOpcode() == ISD::SETCC) {
5981  EVT N0VT = N0.getOperand(0).getValueType();
5982  // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
5983  // Only do this before legalize for now.
5984  if (VT.isVector() && !LegalOperations &&
5985  TLI.getBooleanContents(N0VT) ==
5987  // On some architectures (such as SSE/NEON/etc) the SETCC result type is
5988  // of the same size as the compared operands. Only optimize sext(setcc())
5989  // if this is the case.
5990  EVT SVT = getSetCCResultType(N0VT);
5991 
5992  // We know that the # elements of the results is the same as the
5993  // # elements of the compare (and the # elements of the compare result
5994  // for that matter). Check to see that they are the same size. If so,
5995  // we know that the element size of the sext'd result matches the
5996  // element size of the compare operands.
5997  if (VT.getSizeInBits() == SVT.getSizeInBits())
5998  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
5999  N0.getOperand(1),
6000  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6001 
6002  // If the desired elements are smaller or larger than the source
6003  // elements we can use a matching integer vector type and then
6004  // truncate/sign extend
6005  EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6006  if (SVT == MatchingVectorType) {
6007  SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
6008  N0.getOperand(0), N0.getOperand(1),
6009  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6010  return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
6011  }
6012  }
6013 
6014  // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
6015  unsigned ElementWidth = VT.getScalarType().getSizeInBits();
6016  SDLoc DL(N);
6017  SDValue NegOne =
6018  DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
6019  SDValue SCC =
6020  SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
6021  NegOne, DAG.getConstant(0, DL, VT),
6022  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
6023  if (SCC.getNode()) return SCC;
6024 
6025  if (!VT.isVector()) {
6026  EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
6027  if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) {
6028  SDLoc DL(N);
6029  ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
6030  SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
6031  N0.getOperand(0), N0.getOperand(1), CC);
6032  return DAG.getSelect(DL, VT, SetCC,
6033  NegOne, DAG.getConstant(0, DL, VT));
6034  }
6035  }
6036  }
6037 
6038  // fold (sext x) -> (zext x) if the sign bit is known zero.
6039  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
6040  DAG.SignBitIsZero(N0))
6041  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
6042 
6043  return SDValue();
6044 }
6045 
6046 // isTruncateOf - If N is a truncate of some other value, return true, record
6047 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
6048 // This function computes KnownZero to avoid a duplicated call to
6049 // computeKnownBits in the caller.
6050 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
6051  APInt &KnownZero) {
6052  APInt KnownOne;
6053  if (N->getOpcode() == ISD::TRUNCATE) {
6054  Op = N->getOperand(0);
6055  DAG.computeKnownBits(Op, KnownZero, KnownOne);
6056  return true;
6057  }
6058 
6059  if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
6060  cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
6061  return false;
6062 
6063  SDValue Op0 = N->getOperand(0);
6064  SDValue Op1 = N->getOperand(1);
6065  assert(Op0.getValueType() == Op1.getValueType());
6066 
6067  if (isNullConstant(Op0))
6068  Op = Op1;
6069  else if (isNullConstant(Op1))
6070  Op = Op0;
6071  else
6072  return false;
6073 
6074  DAG.computeKnownBits(Op, KnownZero, KnownOne);
6075 
6076  if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
6077  return false;
6078 
6079  return true;
6080 }
6081 
6082 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
6083  SDValue N0 = N->getOperand(0);
6084  EVT VT = N->getValueType(0);
6085 
6086  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6087  LegalOperations))
6088  return SDValue(Res, 0);
6089 
6090  // fold (zext (zext x)) -> (zext x)
6091  // fold (zext (aext x)) -> (zext x)
6092  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
6093  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
6094  N0.getOperand(0));
6095 
6096  // fold (zext (truncate x)) -> (zext x) or
6097  // (zext (truncate x)) -> (truncate x)
6098  // This is valid when the truncated bits of x are already zero.
6099  // FIXME: We should extend this to work for vectors too.
6100  SDValue Op;
6101  APInt KnownZero;
6102  if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
6103  APInt TruncatedBits =
6104  (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
6105  APInt(Op.getValueSizeInBits(), 0) :
6107  N0.getValueSizeInBits(),
6109  VT.getSizeInBits()));
6110  if (TruncatedBits == (KnownZero & TruncatedBits)) {
6111  if (VT.bitsGT(Op.getValueType()))
6112  return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
6113  if (VT.bitsLT(Op.getValueType()))
6114  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6115 
6116  return Op;
6117  }
6118  }
6119 
6120  // fold (zext (truncate (load x))) -> (zext (smaller load x))
6121  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
6122  if (N0.getOpcode() == ISD::TRUNCATE) {
6123  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
6124  if (NarrowLoad.getNode()) {
6125  SDNode* oye = N0.getNode()->getOperand(0).getNode();
6126  if (NarrowLoad.getNode() != N0.getNode()) {
6127  CombineTo(N0.getNode(), NarrowLoad);
6128  // CombineTo deleted the truncate, if needed, but not what's under it.
6129  AddToWorklist(oye);
6130  }
6131  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6132  }
6133  }
6134 
6135  // fold (zext (truncate x)) -> (and x, mask)
6136  if (N0.getOpcode() == ISD::TRUNCATE &&
6137  (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
6138 
6139  // fold (zext (truncate (load x))) -> (zext (smaller load x))
6140  // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
6141  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
6142  if (NarrowLoad.getNode()) {
6143  SDNode* oye = N0.getNode()->getOperand(0).getNode();
6144  if (NarrowLoad.getNode() != N0.getNode()) {
6145  CombineTo(N0.getNode(), NarrowLoad);
6146  // CombineTo deleted the truncate, if needed, but not what's under it.
6147  AddToWorklist(oye);
6148  }
6149  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6150  }
6151 
6152  SDValue Op = N0.getOperand(0);
6153  if (Op.getValueType().bitsLT(VT)) {
6154  Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
6155  AddToWorklist(Op.getNode());
6156  } else if (Op.getValueType().bitsGT(VT)) {
6157  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
6158  AddToWorklist(Op.getNode());
6159  }
6160  return DAG.getZeroExtendInReg(Op, SDLoc(N),
6161  N0.getValueType().getScalarType());
6162  }
6163 
6164  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
6165  // if either of the casts is not free.
6166  if (N0.getOpcode() == ISD::AND &&
6167  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6168  N0.getOperand(1).getOpcode() == ISD::Constant &&
6169  (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6170  N0.getValueType()) ||
6171  !TLI.isZExtFree(N0.getValueType(), VT))) {
6172  SDValue X = N0.getOperand(0).getOperand(0);
6173  if (X.getValueType().bitsLT(VT)) {
6174  X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
6175  } else if (X.getValueType().bitsGT(VT)) {
6176  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6177  }
6178  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6179  Mask = Mask.zext(VT.getSizeInBits());
6180  SDLoc DL(N);
6181  return DAG.getNode(ISD::AND, DL, VT,
6182  X, DAG.getConstant(Mask, DL, VT));
6183  }
6184 
6185  // fold (zext (load x)) -> (zext (truncate (zextload x)))
6186  // Only generate vector extloads when 1) they're legal, and 2) they are
6187  // deemed desirable by the target.
6189  ((!LegalOperations && !VT.isVector() &&
6190  !cast<LoadSDNode>(N0)->isVolatile()) ||
6191  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
6192  bool DoXform = true;
6193  SmallVector<SDNode*, 4> SetCCs;
6194  if (!N0.hasOneUse())
6195  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
6196  if (VT.isVector())
6197  DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
6198  if (DoXform) {
6199  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6200  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6201  LN0->getChain(),
6202  LN0->getBasePtr(), N0.getValueType(),
6203  LN0->getMemOperand());
6204  CombineTo(N, ExtLoad);
6205  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6206  N0.getValueType(), ExtLoad);
6207  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6208 
6209  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6211  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6212  }
6213  }
6214 
6215  // fold (zext (load x)) to multiple smaller zextloads.
6216  // Only on illegal but splittable vectors.
6217  if (SDValue ExtLoad = CombineExtLoad(N))
6218  return ExtLoad;
6219 
6220  // fold (zext (and/or/xor (load x), cst)) ->
6221  // (and/or/xor (zextload x), (zext cst))
6222  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
6223  N0.getOpcode() == ISD::XOR) &&
6224  isa<LoadSDNode>(N0.getOperand(0)) &&
6225  N0.getOperand(1).getOpcode() == ISD::Constant &&
6226  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
6227  (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
6228  LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
6229  if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
6230  bool DoXform = true;
6231  SmallVector<SDNode*, 4> SetCCs;
6232  if (!N0.hasOneUse())
6234  SetCCs, TLI);
6235  if (DoXform) {
6236  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
6237  LN0->getChain(), LN0->getBasePtr(),
6238  LN0->getMemoryVT(),
6239  LN0->getMemOperand());
6240  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6241  Mask = Mask.zext(VT.getSizeInBits());
6242  SDLoc DL(N);
6243  SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
6244  ExtLoad, DAG.getConstant(Mask, DL, VT));
6245  SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
6246  SDLoc(N0.getOperand(0)),
6247  N0.getOperand(0).getValueType(), ExtLoad);
6248  CombineTo(N, And);
6249  CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
6250  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
6252  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6253  }
6254  }
6255  }
6256 
6257  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
6258  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
6259  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
6260  ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
6261  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6262  EVT MemVT = LN0->getMemoryVT();
6263  if ((!LegalOperations && !LN0->isVolatile()) ||
6264  TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
6265  SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
6266  LN0->getChain(),
6267  LN0->getBasePtr(), MemVT,
6268  LN0->getMemOperand());
6269  CombineTo(N, ExtLoad);
6270  CombineTo(N0.getNode(),
6271  DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
6272  ExtLoad),
6273  ExtLoad.getValue(1));
6274  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6275  }
6276  }
6277 
6278  if (N0.getOpcode() == ISD::SETCC) {
6279  if (!LegalOperations && VT.isVector() &&
6281  EVT N0VT = N0.getOperand(0).getValueType();
6282  if (getSetCCResultType(N0VT) == N0.getValueType())
6283  return SDValue();
6284 
6285  // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
6286  // Only do this before legalize for now.
6287  EVT EltVT = VT.getVectorElementType();
6288  SDLoc DL(N);
6289  SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
6290  DAG.getConstant(1, DL, EltVT));
6291  if (VT.getSizeInBits() == N0VT.getSizeInBits())
6292  // We know that the # elements of the results is the same as the
6293  // # elements of the compare (and the # elements of the compare result
6294  // for that matter). Check to see that they are the same size. If so,
6295  // we know that the element size of the sext'd result matches the
6296  // element size of the compare operands.
6297  return DAG.getNode(ISD::AND, DL, VT,
6298  DAG.getSetCC(DL, VT, N0.getOperand(0),
6299  N0.getOperand(1),
6300  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
6301  DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
6302  OneOps));
6303 
6304  // If the desired elements are smaller or larger than the source
6305  // elements we can use a matching integer vector type and then
6306  // truncate/sign extend
6307  EVT MatchingElementType =
6308  EVT::getIntegerVT(*DAG.getContext(),
6309  N0VT.getScalarType().getSizeInBits());
6310  EVT MatchingVectorType =
6311  EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
6312  N0VT.getVectorNumElements());
6313  SDValue VsetCC =
6314  DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
6315  N0.getOperand(1),
6316  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6317  return DAG.getNode(ISD::AND, DL, VT,
6318  DAG.getSExtOrTrunc(VsetCC, DL, VT),
6319  DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
6320  }
6321 
6322  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6323  SDLoc DL(N);
6324  SDValue SCC =
6325  SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
6326  DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
6327  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
6328  if (SCC.getNode()) return SCC;
6329  }
6330 
6331  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
6332  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
6333  isa<ConstantSDNode>(N0.getOperand(1)) &&
6334  N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
6335  N0.hasOneUse()) {
6336  SDValue ShAmt = N0.getOperand(1);
6337  unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
6338  if (N0.getOpcode() == ISD::SHL) {
6339  SDValue InnerZExt = N0.getOperand(0);
6340  // If the original shl may be shifting out bits, do not perform this
6341  // transformation.
6342  unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
6343  InnerZExt.getOperand(0).getValueType().getSizeInBits();
6344  if (ShAmtVal > KnownZeroBits)
6345  return SDValue();
6346  }
6347 
6348  SDLoc DL(N);
6349 
6350  // Ensure that the shift amount is wide enough for the shifted value.
6351  if (VT.getSizeInBits() >= 256)
6352  ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
6353 
6354  return DAG.getNode(N0.getOpcode(), DL, VT,
6355  DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
6356  ShAmt);
6357  }
6358 
6359  return SDValue();
6360 }
6361 
6362 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
6363  SDValue N0 = N->getOperand(0);
6364  EVT VT = N->getValueType(0);
6365 
6366  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6367  LegalOperations))
6368  return SDValue(Res, 0);
6369 
6370  // fold (aext (aext x)) -> (aext x)
6371  // fold (aext (zext x)) -> (zext x)
6372  // fold (aext (sext x)) -> (sext x)
6373  if (N0.getOpcode() == ISD::ANY_EXTEND ||
6374  N0.getOpcode() == ISD::ZERO_EXTEND ||
6375  N0.getOpcode() == ISD::SIGN_EXTEND)
6376  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
6377 
6378  // fold (aext (truncate (load x))) -> (aext (smaller load x))
6379  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
6380  if (N0.getOpcode() == ISD::TRUNCATE) {
6381  SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
6382  if (NarrowLoad.getNode()) {
6383  SDNode* oye = N0.getNode()->getOperand(0).getNode();
6384  if (NarrowLoad.getNode() != N0.getNode()) {
6385  CombineTo(N0.getNode(), NarrowLoad);
6386  // CombineTo deleted the truncate, if needed, but not what's under it.
6387  AddToWorklist(oye);
6388  }
6389  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6390  }
6391  }
6392 
6393  // fold (aext (truncate x))
6394  if (N0.getOpcode() == ISD::TRUNCATE) {
6395  SDValue TruncOp = N0.getOperand(0);
6396  if (TruncOp.getValueType() == VT)
6397  return TruncOp; // x iff x size == zext size.
6398  if (TruncOp.getValueType().bitsGT(VT))
6399  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
6400  return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
6401  }
6402 
6403  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
6404  // if the trunc is not free.
6405  if (N0.getOpcode() == ISD::AND &&
6406  N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6407  N0.getOperand(1).getOpcode() == ISD::Constant &&
6408  !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
6409  N0.getValueType())) {
6410  SDValue X = N0.getOperand(0).getOperand(0);
6411  if (X.getValueType().bitsLT(VT)) {
6412  X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
6413  } else if (X.getValueType().bitsGT(VT)) {
6414  X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
6415  }
6416  APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
6417  Mask = Mask.zext(VT.getSizeInBits());
6418  SDLoc DL(N);
6419  return DAG.getNode(ISD::AND, DL, VT,
6420  X, DAG.getConstant(Mask, DL, VT));
6421  }
6422 
6423  // fold (aext (load x)) -> (aext (truncate (extload x)))
6424  // None of the supported targets knows how to perform load and any_ext
6425  // on vectors in one instruction. We only perform this transformation on
6426  // scalars.
6427  if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
6428  ISD::isUNINDEXEDLoad(N0.getNode()) &&
6429  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
6430  bool DoXform = true;
6431  SmallVector<SDNode*, 4> SetCCs;
6432  if (!N0.hasOneUse())
6433  DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
6434  if (DoXform) {
6435  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6436  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
6437  LN0->getChain(),
6438  LN0->getBasePtr(), N0.getValueType(),
6439  LN0->getMemOperand());
6440  CombineTo(N, ExtLoad);
6441  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6442  N0.getValueType(), ExtLoad);
6443  CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
6444  ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
6445  ISD::ANY_EXTEND);
6446  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6447  }
6448  }
6449 
6450  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
6451  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
6452  // fold (aext ( extload x)) -> (aext (truncate (extload x)))
6453  if (N0.getOpcode() == ISD::LOAD &&
6455  N0.hasOneUse()) {
6456  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6457  ISD::LoadExtType ExtType = LN0->getExtensionType();
6458  EVT MemVT = LN0->getMemoryVT();
6459  if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
6460  SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
6461  VT, LN0->getChain(), LN0->getBasePtr(),
6462  MemVT, LN0->getMemOperand());
6463  CombineTo(N, ExtLoad);
6464  CombineTo(N0.getNode(),
6465  DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
6466  N0.getValueType(), ExtLoad),
6467  ExtLoad.getValue(1));
6468  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6469  }
6470  }
6471 
6472  if (N0.getOpcode() == ISD::SETCC) {
6473  // For vectors:
6474  // aext(setcc) -> vsetcc
6475  // aext(setcc) -> truncate(vsetcc)
6476  // aext(setcc) -> aext(vsetcc)
6477  // Only do this before legalize for now.
6478  if (VT.isVector() && !LegalOperations) {
6479  EVT N0VT = N0.getOperand(0).getValueType();
6480  // We know that the # elements of the results is the same as the
6481  // # elements of the compare (and the # elements of the compare result
6482  // for that matter). Check to see that they are the same size. If so,
6483  // we know that the element size of the sext'd result matches the
6484  // element size of the compare operands.
6485  if (VT.getSizeInBits() == N0VT.getSizeInBits())
6486  return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
6487  N0.getOperand(1),
6488  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6489  // If the desired elements are smaller or larger than the source
6490  // elements we can use a matching integer vector type and then
6491  // truncate/any extend
6492  else {
6493  EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
6494  SDValue VsetCC =
6495  DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
6496  N0.getOperand(1),
6497  cast<CondCodeSDNode>(N0.getOperand(2))->get());
6498  return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
6499  }
6500  }
6501 
6502  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
6503  SDLoc DL(N);
6504  SDValue SCC =
6505  SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
6506  DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
6507  cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
6508  if (SCC.getNode())
6509  return SCC;
6510  }
6511 
6512  return SDValue();
6513 }
6514 
6515 /// See if the specified operand can be simplified with the knowledge that only
6516 /// the bits specified by Mask are used. If so, return the simpler operand,
6517 /// otherwise return a null SDValue.
6518 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
6519  switch (V.getOpcode()) {
6520  default: break;
6521  case ISD::Constant: {
6522  const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
6523  assert(CV && "Const value should be ConstSDNode.");
6524  const APInt &CVal = CV->getAPIntValue();
6525  APInt NewVal = CVal & Mask;
6526  if (NewVal != CVal)
6527  return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
6528  break;
6529  }
6530  case ISD::OR:
6531  case ISD::XOR:
6532  // If the LHS or RHS don't contribute bits to the or, drop them.
6533  if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
6534  return V.getOperand(1);
6535  if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
6536  return V.getOperand(0);
6537  break;
6538  case ISD::SRL:
6539  // Only look at single-use SRLs.
6540  if (!V.getNode()->hasOneUse())
6541  break;
6542  if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
6543  // See if we can recursively simplify the LHS.
6544  unsigned Amt = RHSC->getZExtValue();
6545 
6546  // Watch out for shift count overflow though.
6547  if (Amt >= Mask.getBitWidth()) break;
6548  APInt NewMask = Mask << Amt;
6549  SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
6550  if (SimplifyLHS.getNode())
6551  return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
6552  SimplifyLHS, V.getOperand(1));
6553  }
6554  }
6555  return SDValue();
6556 }
6557 
6558 /// If the result of a wider load is shifted to right of N bits and then
6559 /// truncated to a narrower type and where N is a multiple of number of bits of
6560 /// the narrower type, transform it to a narrower load from address + N / num of
6561 /// bits of new type. If the result is to be extended, also fold the extension
6562 /// to form a extending load.
6563 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
6564  unsigned Opc = N->getOpcode();
6565 
6567  SDValue N0 = N->getOperand(0);
6568  EVT VT = N->getValueType(0);
6569  EVT ExtVT = VT;
6570 
6571  // This transformation isn't valid for vector loads.
6572  if (VT.isVector())
6573  return SDValue();
6574 
6575  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
6576  // extended to VT.
6577  if (Opc == ISD::SIGN_EXTEND_INREG) {
6578  ExtType = ISD::SEXTLOAD;
6579  ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6580  } else if (Opc == ISD::SRL) {
6581  // Another special-case: SRL is basically zero-extending a narrower value.
6582  ExtType = ISD::ZEXTLOAD;
6583  N0 = SDValue(N, 0);
6585  if (!N01) return SDValue();
6586  ExtVT = EVT::getIntegerVT(*DAG.getContext(),
6587  VT.getSizeInBits() - N01->getZExtValue());
6588  }
6589  if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
6590  return SDValue();
6591 
6592  unsigned EVTBits = ExtVT.getSizeInBits();
6593 
6594  // Do not generate loads of non-round integer types since these can
6595  // be expensive (and would be wrong if the type is not byte sized).
6596  if (!ExtVT.isRound())
6597  return SDValue();
6598 
6599  unsigned ShAmt = 0;
6600  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
6601  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6602  ShAmt = N01->getZExtValue();
6603  // Is the shift amount a multiple of size of VT?
6604  if ((ShAmt & (EVTBits-1)) == 0) {
6605  N0 = N0.getOperand(0);
6606  // Is the load width a multiple of size of VT?
6607  if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
6608  return SDValue();
6609  }
6610 
6611  // At this point, we must have a load or else we can't do the transform.
6612  if (!isa<LoadSDNode>(N0)) return SDValue();
6613 
6614  // Because a SRL must be assumed to *need* to zero-extend the high bits
6615  // (as opposed to anyext the high bits), we can't combine the zextload
6616  // lowering of SRL and an sextload.
6617  if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
6618  return SDValue();
6619 
6620  // If the shift amount is larger than the input type then we're not
6621  // accessing any of the loaded bytes. If the load was a zextload/extload
6622  // then the result of the shift+trunc is zero/undef (handled elsewhere).
6623  if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
6624  return SDValue();
6625  }
6626  }
6627 
6628  // If the load is shifted left (and the result isn't shifted back right),
6629  // we can fold the truncate through the shift.
6630  unsigned ShLeftAmt = 0;
6631  if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
6632  ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
6633  if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6634  ShLeftAmt = N01->getZExtValue();
6635  N0 = N0.getOperand(0);
6636  }
6637  }
6638 
6639  // If we haven't found a load, we can't narrow it. Don't transform one with
6640  // multiple uses, this would require adding a new load.
6641  if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
6642  return SDValue();
6643 
6644  // Don't change the width of a volatile load.
6645  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6646  if (LN0->isVolatile())
6647  return SDValue();
6648 
6649  // Verify that we are actually reducing a load width here.
6650  if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
6651  return SDValue();
6652 
6653  // For the transform to be legal, the load must produce only two values
6654  // (the value loaded and the chain). Don't transform a pre-increment
6655  // load, for example, which produces an extra value. Otherwise the
6656  // transformation is not equivalent, and the downstream logic to replace
6657  // uses gets things wrong.
6658  if (LN0->getNumValues() > 2)
6659  return SDValue();
6660 
6661  // If the load that we're shrinking is an extload and we're not just
6662  // discarding the extension we can't simply shrink the load. Bail.
6663  // TODO: It would be possible to merge the extensions in some cases.
6664  if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
6665  LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
6666  return SDValue();
6667 
6668  if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
6669  return SDValue();
6670 
6671  EVT PtrType = N0.getOperand(1).getValueType();
6672 
6673  if (PtrType == MVT::Untyped || PtrType.isExtended())
6674  // It's not possible to generate a constant of extended or untyped type.
6675  return SDValue();
6676 
6677  // For big endian targets, we need to adjust the offset to the pointer to
6678  // load the correct bytes.
6679  if (DAG.getDataLayout().isBigEndian()) {
6680  unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
6681  unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
6682  ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
6683  }
6684 
6685  uint64_t PtrOff = ShAmt / 8;
6686  unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
6687  SDLoc DL(LN0);
6688  SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
6689  PtrType, LN0->getBasePtr(),
6690  DAG.getConstant(PtrOff, DL, PtrType));
6691  AddToWorklist(NewPtr.getNode());
6692 
6693  SDValue Load;
6694  if (ExtType == ISD::NON_EXTLOAD)
6695  Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
6696  LN0->getPointerInfo().getWithOffset(PtrOff),
6697  LN0->isVolatile(), LN0->isNonTemporal(),
6698  LN0->isInvariant(), NewAlign, LN0->getAAInfo());
6699  else
6700  Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
6701  LN0->getPointerInfo().getWithOffset(PtrOff),
6702  ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
6703  LN0->isInvariant(), NewAlign, LN0->getAAInfo());
6704 
6705  // Replace the old load's chain with the new load's chain.
6706  WorklistRemover DeadNodes(*this);
6707  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
6708 
6709  // Shift the result left, if we've swallowed a left shift.
6710  SDValue Result = Load;
6711  if (ShLeftAmt != 0) {
6712  EVT ShImmTy = getShiftAmountTy(Result.getValueType());
6713  if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
6714  ShImmTy = VT;
6715  // If the shift amount is as large as the result size (but, presumably,
6716  // no larger than the source) then the useful bits of the result are
6717  // zero; we can't simply return the shortened shift, because the result
6718  // of that operation is undefined.
6719  SDLoc DL(N0);
6720  if (ShLeftAmt >= VT.getSizeInBits())
6721  Result = DAG.getConstant(0, DL, VT);
6722  else
6723  Result = DAG.getNode(ISD::SHL, DL, VT,
6724  Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
6725  }
6726 
6727  // Return the new loaded value.
6728  return Result;
6729 }
6730 
6731 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
6732  SDValue N0 = N->getOperand(0);
6733  SDValue N1 = N->getOperand(1);
6734  EVT VT = N->getValueType(0);
6735  EVT EVT = cast<VTSDNode>(N1)->getVT();
6736  unsigned VTBits = VT.getScalarType().getSizeInBits();
6737  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
6738 
6739  // fold (sext_in_reg c1) -> c1
6740  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
6741  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
6742 
6743  // If the input is already sign extended, just drop the extension.
6744  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
6745  return N0;
6746 
6747  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
6748  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
6749  EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
6750  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6751  N0.getOperand(0), N1);
6752 
6753  // fold (sext_in_reg (sext x)) -> (sext x)
6754  // fold (sext_in_reg (aext x)) -> (sext x)
6755  // if x is small enough.
6756  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
6757  SDValue N00 = N0.getOperand(0);
6758  if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
6759  (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
6760  return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
6761  }
6762 
6763  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
6764  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
6765  return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
6766 
6767  // fold operands of sext_in_reg based on knowledge that the top bits are not
6768  // demanded.
6769  if (SimplifyDemandedBits(SDValue(N, 0)))
6770  return SDValue(N, 0);
6771 
6772  // fold (sext_in_reg (load x)) -> (smaller sextload x)
6773  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
6774  SDValue NarrowLoad = ReduceLoadWidth(N);
6775  if (NarrowLoad.getNode())
6776  return NarrowLoad;
6777 
6778  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
6779  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
6780  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
6781  if (N0.getOpcode() == ISD::SRL) {
6782  if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
6783  if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
6784  // We can turn this into an SRA iff the input to the SRL is already sign
6785  // extended enough.
6786  unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
6787  if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
6788  return DAG.getNode(ISD::SRA, SDLoc(N), VT,
6789  N0.getOperand(0), N0.getOperand(1));
6790  }
6791  }
6792 
6793  // fold (sext_inreg (extload x)) -> (sextload x)
6794  if (ISD::isEXTLoad(N0.getNode()) &&
6795  ISD::isUNINDEXEDLoad(N0.getNode()) &&
6796  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
6797  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
6798  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
6799  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6800  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6801  LN0->getChain(),
6802  LN0->getBasePtr(), EVT,
6803  LN0->getMemOperand());
6804  CombineTo(N, ExtLoad);
6805  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6806  AddToWorklist(ExtLoad.getNode());
6807  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6808  }
6809  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
6810  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
6811  N0.hasOneUse() &&
6812  EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
6813  ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
6814  TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
6815  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
6816  SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
6817  LN0->getChain(),
6818  LN0->getBasePtr(), EVT,
6819  LN0->getMemOperand());
6820  CombineTo(N, ExtLoad);
6821  CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
6822  return SDValue(N, 0); // Return N so it doesn't get rechecked!
6823  }
6824 
6825  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
6826  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
6827  SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
6828  N0.getOperand(1), false);
6829  if (BSwap.getNode())
6830  return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
6831  BSwap, N1);
6832  }
6833 
6834  // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs
6835  // into a build_vector.
6838  unsigned NumElts = N0->getNumOperands();
6839  unsigned ShAmt = VTBits - EVTBits;
6840 
6841  for (unsigned i = 0; i != NumElts; ++i) {
6842  SDValue Op = N0->getOperand(i);
6843  if (Op->getOpcode() == ISD::UNDEF) {
6844  Elts.push_back(Op);
6845  continue;
6846  }
6847 
6848  ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op);
6849  const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue());
6850  Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(),
6851  SDLoc(Op), Op.getValueType()));
6852  }
6853 
6854  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts);
6855  }
6856 
6857  return SDValue();
6858 }
6859 
6860 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
6861  SDValue N0 = N->getOperand(0);
6862  EVT VT = N->getValueType(0);
6863 
6864  if (N0.getOpcode() == ISD::UNDEF)
6865  return DAG.getUNDEF(VT);
6866 
6867  if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
6868  LegalOperations))
6869  return SDValue(Res, 0);
6870 
6871  return SDValue();
6872 }
6873 
6874 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
6875  SDValue N0 = N->getOperand(0);
6876  EVT VT = N->getValueType(0);
6877  bool isLE = DAG.getDataLayout().isLittleEndian();
6878 
6879  // noop truncate
6880  if (N0.getValueType() == N->getValueType(0))
6881  return N0;
6882  // fold (truncate c1) -> c1
6884  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
6885  // fold (truncate (truncate x)) -> (truncate x)
6886  if (N0.getOpcode() == ISD::TRUNCATE)
6887  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
6888  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
6889  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
6890  N0.getOpcode() == ISD::SIGN_EXTEND ||
6891  N0.getOpcode() == ISD::ANY_EXTEND) {
6892  if (N0.getOperand(0).getValueType().bitsLT(VT))
6893  // if the source is smaller than the dest, we still need an extend
6894  return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
6895  N0.getOperand(0));
6896  if (N0.getOperand(0).getValueType().bitsGT(VT))
6897  // if the source is larger than the dest, than we just need the truncate
6898  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
6899  // if the source and dest are the same type, we can drop both the extend
6900  // and the truncate.
6901  return N0.getOperand(0);
6902  }
6903 
6904  // Fold extract-and-trunc into a narrow extract. For example:
6905  // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
6906  // i32 y = TRUNCATE(i64 x)
6907  // -- becomes --
6908  // v16i8 b = BITCAST (v2i64 val)
6909  // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
6910  //
6911  // Note: We only run this optimization after type legalization (which often
6912  // creates this pattern) and before operation legalization after which
6913  // we need to be more careful about the vector instructions that we generate.
6914  if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6915  LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
6916 
6917  EVT VecTy = N0.getOperand(0).getValueType();
6918  EVT ExTy = N0.getValueType();
6919  EVT TrTy = N->getValueType(0);
6920 
6921  unsigned NumElem = VecTy.getVectorNumElements();
6922  unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
6923 
6924  EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
6925  assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
6926 
6927  SDValue EltNo = N0->getOperand(1);
6928  if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
6929  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
6930  EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
6931  int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
6932 
6933  SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
6934  NVT, N0.getOperand(0));
6935 
6936  SDLoc DL(N);
6937  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
6938  DL, TrTy, V,
6939  DAG.getConstant(Index, DL, IndexTy));
6940  }
6941  }
6942 
6943  // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
6944  if (N0.getOpcode() == ISD::SELECT) {
6945  EVT SrcVT = N0.getValueType();
6946  if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
6947  TLI.isTruncateFree(SrcVT, VT)) {
6948  SDLoc SL(N0);
6949  SDValue Cond = N0.getOperand(0);
6950  SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
6951  SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
6952  return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
6953  }
6954  }
6955 
6956  // Fold a series of buildvector, bitcast, and truncate if possible.
6957  // For example fold
6958  // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
6959  // (2xi32 (buildvector x, y)).
6960  if (Level == AfterLegalizeVectorOps && VT.isVector() &&
6961  N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
6962  N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
6963  N0.getOperand(0).hasOneUse()) {
6964 
6965  SDValue BuildVect = N0.getOperand(0);
6966  EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
6967  EVT TruncVecEltTy = VT.getVectorElementType();
6968 
6969  // Check that the element types match.
6970  if (BuildVectEltTy == TruncVecEltTy) {
6971  // Now we only need to compute the offset of the truncated elements.
6972  unsigned BuildVecNumElts = BuildVect.getNumOperands();
6973  unsigned TruncVecNumElts = VT.getVectorNumElements();
6974  unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
6975 
6976  assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
6977  "Invalid number of elements");
6978 
6980  for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
6981  Opnds.push_back(BuildVect.getOperand(i));
6982 
6983  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
6984  }
6985  }
6986 
6987  // See if we can simplify the input to this truncate through knowledge that
6988  // only the low bits are being used.
6989  // For example "trunc (or (shl x, 8), y)" // -> trunc y
6990  // Currently we only perform this optimization on scalars because vectors
6991  // may have different active low bits.
6992  if (!VT.isVector()) {
6993  SDValue Shorter =
6994  GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
6995  VT.getSizeInBits()));
6996  if (Shorter.getNode())
6997  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
6998  }
6999  // fold (truncate (load x)) -> (smaller load x)
7000  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
7001  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
7002  SDValue Reduced = ReduceLoadWidth(N);
7003  if (Reduced.getNode())
7004  return Reduced;
7005  // Handle the case where the load remains an extending load even
7006  // after truncation.
7007  if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
7008  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7009  if (!LN0->isVolatile() &&
7010  LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
7011  SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
7012  VT, LN0->getChain(), LN0->getBasePtr(),
7013  LN0->getMemoryVT(),
7014  LN0->getMemOperand());
7015  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
7016  return NewLoad;
7017  }
7018  }
7019  }
7020  // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
7021  // where ... are all 'undef'.
7022  if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
7023  SmallVector<EVT, 8> VTs;
7024  SDValue V;
7025  unsigned Idx = 0;
7026  unsigned NumDefs = 0;
7027 
7028  for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
7029  SDValue X = N0.getOperand(i);
7030  if (X.getOpcode() != ISD::UNDEF) {
7031  V = X;
7032  Idx = i;
7033  NumDefs++;
7034  }
7035  // Stop if more than one members are non-undef.
7036  if (NumDefs > 1)
7037  break;
7038  VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
7039  VT.getVectorElementType(),
7041  }
7042 
7043  if (NumDefs == 0)
7044  return DAG.getUNDEF(VT);
7045 
7046  if (NumDefs == 1) {
7047  assert(V.getNode() && "The single defined operand is empty!");
7049  for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
7050  if (i != Idx) {
7051  Opnds.push_back(DAG.getUNDEF(VTs[i]));
7052  continue;
7053  }
7054  SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
7055  AddToWorklist(NV.getNode());
7056  Opnds.push_back(NV);
7057  }
7058  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
7059  }
7060  }
7061 
7062  // Simplify the operands using demanded-bits information.
7063  if (!VT.isVector() &&
7065  return SDValue(N, 0);
7066 
7067  return SDValue();
7068 }
7069 
7070 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
7071  SDValue Elt = N->getOperand(i);
7072  if (Elt.getOpcode() != ISD::MERGE_VALUES)
7073  return Elt.getNode();
7074  return Elt.getOperand(Elt.getResNo()).getNode();
7075 }
7076 
7077 /// build_pair (load, load) -> load
7078 /// if load locations are consecutive.
7079 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
7080  assert(N->getOpcode() == ISD::BUILD_PAIR);
7081 
7084  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
7085  LD1->getAddressSpace() != LD2->getAddressSpace())
7086  return SDValue();
7087  EVT LD1VT = LD1->getValueType(0);
7088 
7089  if (ISD::isNON_EXTLoad(LD2) &&
7090  LD2->hasOneUse() &&
7091  // If both are volatile this would reduce the number of volatile loads.
7092  // If one is volatile it might be ok, but play conservative and bail out.
7093  !LD1->isVolatile() &&
7094  !LD2->isVolatile() &&
7095  DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
7096  unsigned Align = LD1->getAlignment();
7097  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
7098  VT.getTypeForEVT(*DAG.getContext()));
7099 
7100  if (NewAlign <= Align &&
7101  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
7102  return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
7103  LD1->getBasePtr(), LD1->getPointerInfo(),
7104  false, false, false, Align);
7105  }
7106 
7107  return SDValue();
7108 }
7109 
7110 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
7111  SDValue N0 = N->getOperand(0);
7112  EVT VT = N->getValueType(0);
7113 
7114  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
7115  // Only do this before legalize, since afterward the target may be depending
7116  // on the bitconvert.
7117  // First check to see if this is all constant.
7118  if (!LegalTypes &&
7119  N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
7120  VT.isVector()) {
7121  bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
7122 
7123  EVT DestEltVT = N->getValueType(0).getVectorElementType();
7124  assert(!DestEltVT.isVector() &&
7125  "Element type of vector ValueType must not be vector!");
7126  if (isSimple)
7127  return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
7128  }
7129 
7130  // If the input is a constant, let getNode fold it.
7131  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
7132  // If we can't allow illegal operations, we need to check that this is just
7133  // a fp -> int or int -> conversion and that the resulting operation will
7134  // be legal.
7135  if (!LegalOperations ||
7136  (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
7137  TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
7138  (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
7139  TLI.isOperationLegal(ISD::Constant, VT)))
7140  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
7141  }
7142 
7143  // (conv (conv x, t1), t2) -> (conv x, t2)
7144  if (N0.getOpcode() == ISD::BITCAST)
7145  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
7146  N0.getOperand(0));
7147 
7148  // fold (conv (load x)) -> (load (conv*)x)
7149  // If the resultant load doesn't need a higher alignment than the original!
7150  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
7151  // Do not change the width of a volatile load.
7152  !cast<LoadSDNode>(N0)->isVolatile() &&
7153  // Do not remove the cast if the types differ in endian layout.
7154  TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
7155  TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
7156  (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
7157  TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
7158  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7159  unsigned Align = DAG.getDataLayout().getABITypeAlignment(
7160  VT.getTypeForEVT(*DAG.getContext()));
7161  unsigned OrigAlign = LN0->getAlignment();
7162 
7163  if (Align <= OrigAlign) {
7164  SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
7165  LN0->getBasePtr(), LN0->getPointerInfo(),
7166  LN0->isVolatile(), LN0->isNonTemporal(),
7167  LN0->isInvariant(), OrigAlign,
7168  LN0->getAAInfo());
7169  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
7170  return Load;
7171  }
7172  }
7173 
7174  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
7175  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
7176  // This often reduces constant pool loads.
7177  if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
7178  (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
7179  N0.getNode()->hasOneUse() && VT.isInteger() &&
7180  !VT.isVector() && !N0.getValueType().isVector()) {
7181  SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
7182  N0.getOperand(0));
7183  AddToWorklist(NewConv.getNode());
7184 
7185  SDLoc DL(N);
7186  APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7187  if (N0.getOpcode() == ISD::FNEG)
7188  return DAG.getNode(ISD::XOR, DL, VT,
7189  NewConv, DAG.getConstant(SignBit, DL, VT));
7190  assert(N0.getOpcode() == ISD::FABS);
7191  return DAG.getNode(ISD::AND, DL, VT,
7192  NewConv, DAG.getConstant(~SignBit, DL, VT));
7193  }
7194 
7195  // fold (bitconvert (fcopysign cst, x)) ->
7196  // (or (and (bitconvert x), sign), (and cst, (not sign)))
7197  // Note that we don't handle (copysign x, cst) because this can always be
7198  // folded to an fneg or fabs.
7199  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
7200  isa<ConstantFPSDNode>(N0.getOperand(0)) &&
7201  VT.isInteger() && !VT.isVector()) {
7202  unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
7203  EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
7204  if (isTypeLegal(IntXVT)) {
7205  SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
7206  IntXVT, N0.getOperand(1));
7207  AddToWorklist(X.getNode());
7208 
7209  // If X has a different width than the result/lhs, sext it or truncate it.
7210  unsigned VTWidth = VT.getSizeInBits();
7211  if (OrigXWidth < VTWidth) {
7212  X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
7213  AddToWorklist(X.getNode());
7214  } else if (OrigXWidth > VTWidth) {
7215  // To get the sign bit in the right place, we have to shift it right
7216  // before truncating.
7217  SDLoc DL(X);
7218  X = DAG.getNode(ISD::SRL, DL,
7219  X.getValueType(), X,
7220  DAG.getConstant(OrigXWidth-VTWidth, DL,
7221  X.getValueType()));
7222  AddToWorklist(X.getNode());
7223  X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7224  AddToWorklist(X.getNode());
7225  }
7226 
7227  APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
7228  X = DAG.getNode(ISD::AND, SDLoc(X), VT,
7229  X, DAG.getConstant(SignBit, SDLoc(X), VT));
7230  AddToWorklist(X.getNode());
7231 
7232  SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
7233  VT, N0.getOperand(0));
7234  Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
7235  Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
7236  AddToWorklist(Cst.getNode());
7237 
7238  return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
7239  }
7240  }
7241 
7242  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
7243  if (N0.getOpcode() == ISD::BUILD_PAIR) {
7244  SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
7245  if (CombineLD.getNode())
7246  return CombineLD;
7247  }
7248 
7249  // Remove double bitcasts from shuffles - this is often a legacy of
7250  // XformToShuffleWithZero being used to combine bitmaskings (of
7251  // float vectors bitcast to integer vectors) into shuffles.
7252  // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
7253  if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
7254  N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
7257  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
7258 
7259  // If operands are a bitcast, peek through if it casts the original VT.
7260  // If operands are a UNDEF or constant, just bitcast back to original VT.
7261  auto PeekThroughBitcast = [&](SDValue Op) {
7262  if (Op.getOpcode() == ISD::BITCAST &&
7263  Op.getOperand(0)->getValueType(0) == VT)
7264  return SDValue(Op.getOperand(0));
7267  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7268  return SDValue();
7269  };
7270 
7271  SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
7272  SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
7273  if (!(SV0 && SV1))
7274  return SDValue();
7275 
7276  int MaskScale =
7278  SmallVector<int, 8> NewMask;
7279  for (int M : SVN->getMask())
7280  for (int i = 0; i != MaskScale; ++i)
7281  NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
7282 
7283  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7284  if (!LegalMask) {
7285  std::swap(SV0, SV1);
7287  LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
7288  }
7289 
7290  if (LegalMask)
7291  return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
7292  }
7293 
7294  return SDValue();
7295 }
7296 
7297 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
7298  EVT VT = N->getValueType(0);
7299  return CombineConsecutiveLoads(N, VT);
7300 }
7301 
7302 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
7303 /// operands. DstEltVT indicates the destination element value type.
7304 SDValue DAGCombiner::
7305 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
7306  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
7307 
7308  // If this is already the right type, we're done.
7309  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
7310 
7311  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
7312  unsigned DstBitSize = DstEltVT.getSizeInBits();
7313 
7314  // If this is a conversion of N elements of one type to N elements of another
7315  // type, convert each element. This handles FP<->INT cases.
7316  if (SrcBitSize == DstBitSize) {
7317  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7319 
7320  // Due to the FP element handling below calling this routine recursively,
7321  // we can end up with a scalar-to-vector node here.
7322  if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
7323  return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
7324  DAG.getNode(ISD::BITCAST, SDLoc(BV),
7325  DstEltVT, BV->getOperand(0)));
7326 
7328  for (SDValue Op : BV->op_values()) {
7329  // If the vector element type is not legal, the BUILD_VECTOR operands
7330  // are promoted and implicitly truncated. Make that explicit here.
7331  if (Op.getValueType() != SrcEltVT)
7332  Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
7333  Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
7334  DstEltVT, Op));
7335  AddToWorklist(Ops.back().getNode());
7336  }
7337  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
7338  }
7339 
7340  // Otherwise, we're growing or shrinking the elements. To avoid having to
7341  // handle annoying details of growing/shrinking FP values, we convert them to
7342  // int first.
7343  if (SrcEltVT.isFloatingPoint()) {
7344  // Convert the input float vector to a int vector where the elements are the
7345  // same sizes.
7346  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
7347  BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
7348  SrcEltVT = IntVT;
7349  }
7350 
7351  // Now we know the input is an integer vector. If the output is a FP type,
7352  // convert to integer first, then to FP of the right size.
7353  if (DstEltVT.isFloatingPoint()) {
7354  EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
7355  SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
7356 
7357  // Next, convert to FP elements of the same size.
7358  return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
7359  }
7360 
7361  SDLoc DL(BV);
7362 
7363  // Okay, we know the src/dst types are both integers of differing types.
7364  // Handling growing first.
7365  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
7366  if (SrcBitSize < DstBitSize) {
7367  unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
7368 
7370  for (unsigned i = 0, e = BV->getNumOperands(); i != e;
7371  i += NumInputsPerOutput) {
7372  bool isLE = DAG.getDataLayout().isLittleEndian();
7373  APInt NewBits = APInt(DstBitSize, 0);
7374  bool EltIsUndef = true;
7375  for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
7376  // Shift the previously computed bits over.
7377  NewBits <<= SrcBitSize;
7378  SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
7379  if (Op.getOpcode() == ISD::UNDEF) continue;
7380  EltIsUndef = false;
7381 
7382  NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
7383  zextOrTrunc(SrcBitSize).zext(DstBitSize);
7384  }
7385 
7386  if (EltIsUndef)
7387  Ops.push_back(DAG.getUNDEF(DstEltVT));
7388  else
7389  Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
7390  }
7391 
7392  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
7393  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
7394  }
7395 
7396  // Finally, this must be the case where we are shrinking elements: each input
7397  // turns into multiple outputs.
7398  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
7399  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
7400  NumOutputsPerInput*BV->getNumOperands());
7402 
7403  for (const SDValue &Op : BV->op_values()) {
7404  if (Op.getOpcode() == ISD::UNDEF) {
7405  Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
7406  continue;
7407  }
7408 
7409  APInt OpVal = cast<ConstantSDNode>(Op)->
7410  getAPIntValue().zextOrTrunc(SrcBitSize);
7411 
7412  for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
7413  APInt ThisVal = OpVal.trunc(DstBitSize);
7414  Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
7415  OpVal = OpVal.lshr(DstBitSize);
7416  }
7417 
7418  // For big endian targets, swap the order of the pieces of each element.
7419  if (DAG.getDataLayout().isBigEndian())
7420  std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
7421  }
7422 
7423  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
7424 }
7425 
7426 /// Try to perform FMA combining on a given FADD node.
7427 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
7428  SDValue N0 = N->getOperand(0);
7429  SDValue N1 = N->getOperand(1);
7430  EVT VT = N->getValueType(0);
7431  SDLoc SL(N);
7432 
7433  const TargetOptions &Options = DAG.getTarget().Options;
7434  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
7435  Options.UnsafeFPMath);
7436 
7437  // Floating-point multiply-add with intermediate rounding.
7438  bool HasFMAD = (LegalOperations &&
7439  TLI.isOperationLegal(ISD::FMAD, VT));
7440 
7441  // Floating-point multiply-add without intermediate rounding.
7442  bool HasFMA = ((!LegalOperations ||
7443  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
7444  TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7445  UnsafeFPMath);
7446 
7447  // No valid opcode, do not combine.
7448  if (!HasFMAD && !HasFMA)
7449  return SDValue();
7450 
7451  // Always prefer FMAD to FMA for precision.
7452  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7453  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7454  bool LookThroughFPExt = TLI.isFPExtFree(VT);
7455 
7456  // fold (fadd (fmul x, y), z) -> (fma x, y, z)
7457  if (N0.getOpcode() == ISD::FMUL &&
7458  (Aggressive || N0->hasOneUse())) {
7459  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7460  N0.getOperand(0), N0.getOperand(1), N1);
7461  }
7462 
7463  // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
7464  // Note: Commutes FADD operands.
7465  if (N1.getOpcode() == ISD::FMUL &&
7466  (Aggressive || N1->hasOneUse())) {
7467  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7468  N1.getOperand(0), N1.getOperand(1), N0);
7469  }
7470 
7471  // Look through FP_EXTEND nodes to do more combining.
7472  if (UnsafeFPMath && LookThroughFPExt) {
7473  // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
7474  if (N0.getOpcode() == ISD::FP_EXTEND) {
7475  SDValue N00 = N0.getOperand(0);
7476  if (N00.getOpcode() == ISD::FMUL)
7477  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7478  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7479  N00.getOperand(0)),
7480  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7481  N00.getOperand(1)), N1);
7482  }
7483 
7484  // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
7485  // Note: Commutes FADD operands.
7486  if (N1.getOpcode() == ISD::FP_EXTEND) {
7487  SDValue N10 = N1.getOperand(0);
7488  if (N10.getOpcode() == ISD::FMUL)
7489  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7490  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7491  N10.getOperand(0)),
7492  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7493  N10.getOperand(1)), N0);
7494  }
7495  }
7496 
7497  // More folding opportunities when target permits.
7498  if ((UnsafeFPMath || HasFMAD) && Aggressive) {
7499  // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
7500  if (N0.getOpcode() == PreferredFusedOpcode &&
7501  N0.getOperand(2).getOpcode() == ISD::FMUL) {
7502  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7503  N0.getOperand(0), N0.getOperand(1),
7504  DAG.getNode(PreferredFusedOpcode, SL, VT,
7505  N0.getOperand(2).getOperand(0),
7506  N0.getOperand(2).getOperand(1),
7507  N1));
7508  }
7509 
7510  // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
7511  if (N1->getOpcode() == PreferredFusedOpcode &&
7512  N1.getOperand(2).getOpcode() == ISD::FMUL) {
7513  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7514  N1.getOperand(0), N1.getOperand(1),
7515  DAG.getNode(PreferredFusedOpcode, SL, VT,
7516  N1.getOperand(2).getOperand(0),
7517  N1.getOperand(2).getOperand(1),
7518  N0));
7519  }
7520 
7521  if (UnsafeFPMath && LookThroughFPExt) {
7522  // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
7523  // -> (fma x, y, (fma (fpext u), (fpext v), z))
7524  auto FoldFAddFMAFPExtFMul = [&] (
7525  SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
7526  return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
7527  DAG.getNode(PreferredFusedOpcode, SL, VT,
7528  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
7529  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
7530  Z));
7531  };
7532  if (N0.getOpcode() == PreferredFusedOpcode) {
7533  SDValue N02 = N0.getOperand(2);
7534  if (N02.getOpcode() == ISD::FP_EXTEND) {
7535  SDValue N020 = N02.getOperand(0);
7536  if (N020.getOpcode() == ISD::FMUL)
7537  return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
7538  N020.getOperand(0), N020.getOperand(1),
7539  N1);
7540  }
7541  }
7542 
7543  // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
7544  // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
7545  // FIXME: This turns two single-precision and one double-precision
7546  // operation into two double-precision operations, which might not be
7547  // interesting for all targets, especially GPUs.
7548  auto FoldFAddFPExtFMAFMul = [&] (
7549  SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
7550  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7551  DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
7552  DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
7553  DAG.getNode(PreferredFusedOpcode, SL, VT,
7554  DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
7555  DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
7556  Z));
7557  };
7558  if (N0.getOpcode() == ISD::FP_EXTEND) {
7559  SDValue N00 = N0.getOperand(0);
7560  if (N00.getOpcode() == PreferredFusedOpcode) {
7561  SDValue N002 = N00.getOperand(2);
7562  if (N002.getOpcode() == ISD::FMUL)
7563  return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
7564  N002.getOperand(0), N002.getOperand(1),
7565  N1);
7566  }
7567  }
7568 
7569  // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
7570  // -> (fma y, z, (fma (fpext u), (fpext v), x))
7571  if (N1.getOpcode() == PreferredFusedOpcode) {
7572  SDValue N12 = N1.getOperand(2);
7573  if (N12.getOpcode() == ISD::FP_EXTEND) {
7574  SDValue N120 = N12.getOperand(0);
7575  if (N120.getOpcode() == ISD::FMUL)
7576  return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
7577  N120.getOperand(0), N120.getOperand(1),
7578  N0);
7579  }
7580  }
7581 
7582  // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
7583  // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
7584  // FIXME: This turns two single-precision and one double-precision
7585  // operation into two double-precision operations, which might not be
7586  // interesting for all targets, especially GPUs.
7587  if (N1.getOpcode() == ISD::FP_EXTEND) {
7588  SDValue N10 = N1.getOperand(0);
7589  if (N10.getOpcode() == PreferredFusedOpcode) {
7590  SDValue N102 = N10.getOperand(2);
7591  if (N102.getOpcode() == ISD::FMUL)
7592  return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
7593  N102.getOperand(0), N102.getOperand(1),
7594  N0);
7595  }
7596  }
7597  }
7598  }
7599 
7600  return SDValue();
7601 }
7602 
7603 /// Try to perform FMA combining on a given FSUB node.
7604 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
7605  SDValue N0 = N->getOperand(0);
7606  SDValue N1 = N->getOperand(1);
7607  EVT VT = N->getValueType(0);
7608  SDLoc SL(N);
7609 
7610  const TargetOptions &Options = DAG.getTarget().Options;
7611  bool UnsafeFPMath = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
7612  Options.UnsafeFPMath);
7613 
7614  // Floating-point multiply-add with intermediate rounding.
7615  bool HasFMAD = (LegalOperations &&
7616  TLI.isOperationLegal(ISD::FMAD, VT));
7617 
7618  // Floating-point multiply-add without intermediate rounding.
7619  bool HasFMA = ((!LegalOperations ||
7620  TLI.isOperationLegalOrCustom(ISD::FMA, VT)) &&
7621  TLI.isFMAFasterThanFMulAndFAdd(VT) &&
7622  UnsafeFPMath);
7623 
7624  // No valid opcode, do not combine.
7625  if (!HasFMAD && !HasFMA)
7626  return SDValue();
7627 
7628  // Always prefer FMAD to FMA for precision.
7629  unsigned int PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
7630  bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
7631  bool LookThroughFPExt = TLI.isFPExtFree(VT);
7632 
7633  // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
7634  if (N0.getOpcode() == ISD::FMUL &&
7635  (Aggressive || N0->hasOneUse())) {
7636  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7637  N0.getOperand(0), N0.getOperand(1),
7638  DAG.getNode(ISD::FNEG, SL, VT, N1));
7639  }
7640 
7641  // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
7642  // Note: Commutes FSUB operands.
7643  if (N1.getOpcode() == ISD::FMUL &&
7644  (Aggressive || N1->hasOneUse()))
7645  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7646  DAG.getNode(ISD::FNEG, SL, VT,
7647  N1.getOperand(0)),
7648  N1.getOperand(1), N0);
7649 
7650  // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
7651  if (N0.getOpcode() == ISD::FNEG &&
7652  N0.getOperand(0).getOpcode() == ISD::FMUL &&
7653  (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
7654  SDValue N00 = N0.getOperand(0).getOperand(0);
7655  SDValue N01 = N0.getOperand(0).getOperand(1);
7656  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7657  DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
7658  DAG.getNode(ISD::FNEG, SL, VT, N1));
7659  }
7660 
7661  // Look through FP_EXTEND nodes to do more combining.
7662  if (UnsafeFPMath && LookThroughFPExt) {
7663  // fold (fsub (fpext (fmul x, y)), z)
7664  // -> (fma (fpext x), (fpext y), (fneg z))
7665  if (N0.getOpcode() == ISD::FP_EXTEND) {
7666  SDValue N00 = N0.getOperand(0);
7667  if (N00.getOpcode() == ISD::FMUL)
7668  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7669  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7670  N00.getOperand(0)),
7671  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7672  N00.getOperand(1)),
7673  DAG.getNode(ISD::FNEG, SL, VT, N1));
7674  }
7675 
7676  // fold (fsub x, (fpext (fmul y, z)))
7677  // -> (fma (fneg (fpext y)), (fpext z), x)
7678  // Note: Commutes FSUB operands.
7679  if (N1.getOpcode() == ISD::FP_EXTEND) {
7680  SDValue N10 = N1.getOperand(0);
7681  if (N10.getOpcode() == ISD::FMUL)
7682  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7683  DAG.getNode(ISD::FNEG, SL, VT,
7684  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7685  N10.getOperand(0))),
7686  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7687  N10.getOperand(1)),
7688  N0);
7689  }
7690 
7691  // fold (fsub (fpext (fneg (fmul, x, y))), z)
7692  // -> (fneg (fma (fpext x), (fpext y), z))
7693  // Note: This could be removed with appropriate canonicalization of the
7694  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
7695  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
7696  // from implementing the canonicalization in visitFSUB.
7697  if (N0.getOpcode() == ISD::FP_EXTEND) {
7698  SDValue N00 = N0.getOperand(0);
7699  if (N00.getOpcode() == ISD::FNEG) {
7700  SDValue N000 = N00.getOperand(0);
7701  if (N000.getOpcode() == ISD::FMUL) {
7702  return DAG.getNode(ISD::FNEG, SL, VT,
7703  DAG.getNode(PreferredFusedOpcode, SL, VT,
7704  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7705  N000.getOperand(0)),
7706  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7707  N000.getOperand(1)),
7708  N1));
7709  }
7710  }
7711  }
7712 
7713  // fold (fsub (fneg (fpext (fmul, x, y))), z)
7714  // -> (fneg (fma (fpext x)), (fpext y), z)
7715  // Note: This could be removed with appropriate canonicalization of the
7716  // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
7717  // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
7718  // from implementing the canonicalization in visitFSUB.
7719  if (N0.getOpcode() == ISD::FNEG) {
7720  SDValue N00 = N0.getOperand(0);
7721  if (N00.getOpcode() == ISD::FP_EXTEND) {
7722  SDValue N000 = N00.getOperand(0);
7723  if (N000.getOpcode() == ISD::FMUL) {
7724  return DAG.getNode(ISD::FNEG, SL, VT,
7725  DAG.getNode(PreferredFusedOpcode, SL, VT,
7726  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7727  N000.getOperand(0)),
7728  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7729  N000.getOperand(1)),
7730  N1));
7731  }
7732  }
7733  }
7734 
7735  }
7736 
7737  // More folding opportunities when target permits.
7738  if ((UnsafeFPMath || HasFMAD) && Aggressive) {
7739  // fold (fsub (fma x, y, (fmul u, v)), z)
7740  // -> (fma x, y (fma u, v, (fneg z)))
7741  if (N0.getOpcode() == PreferredFusedOpcode &&
7742  N0.getOperand(2).getOpcode() == ISD::FMUL) {
7743  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7744  N0.getOperand(0), N0.getOperand(1),
7745  DAG.getNode(PreferredFusedOpcode, SL, VT,
7746  N0.getOperand(2).getOperand(0),
7747  N0.getOperand(2).getOperand(1),
7748  DAG.getNode(ISD::FNEG, SL, VT,
7749  N1)));
7750  }
7751 
7752  // fold (fsub x, (fma y, z, (fmul u, v)))
7753  // -> (fma (fneg y), z, (fma (fneg u), v, x))
7754  if (N1.getOpcode() == PreferredFusedOpcode &&
7755  N1.getOperand(2).getOpcode() == ISD::FMUL) {
7756  SDValue N20 = N1.getOperand(2).getOperand(0);
7757  SDValue N21 = N1.getOperand(2).getOperand(1);
7758  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7759  DAG.getNode(ISD::FNEG, SL, VT,
7760  N1.getOperand(0)),
7761  N1.getOperand(1),
7762  DAG.getNode(PreferredFusedOpcode, SL, VT,
7763  DAG.getNode(ISD::FNEG, SL, VT, N20),
7764 
7765  N21, N0));
7766  }
7767 
7768  if (UnsafeFPMath && LookThroughFPExt) {
7769  // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
7770  // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
7771  if (N0.getOpcode() == PreferredFusedOpcode) {
7772  SDValue N02 = N0.getOperand(2);
7773  if (N02.getOpcode() == ISD::FP_EXTEND) {
7774  SDValue N020 = N02.getOperand(0);
7775  if (N020.getOpcode() == ISD::FMUL)
7776  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7777  N0.getOperand(0), N0.getOperand(1),
7778  DAG.getNode(PreferredFusedOpcode, SL, VT,
7779  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7780  N020.getOperand(0)),
7781  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7782  N020.getOperand(1)),
7783  DAG.getNode(ISD::FNEG, SL, VT,
7784  N1)));
7785  }
7786  }
7787 
7788  // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
7789  // -> (fma (fpext x), (fpext y),
7790  // (fma (fpext u), (fpext v), (fneg z)))
7791  // FIXME: This turns two single-precision and one double-precision
7792  // operation into two double-precision operations, which might not be
7793  // interesting for all targets, especially GPUs.
7794  if (N0.getOpcode() == ISD::FP_EXTEND) {
7795  SDValue N00 = N0.getOperand(0);
7796  if (N00.getOpcode() == PreferredFusedOpcode) {
7797  SDValue N002 = N00.getOperand(2);
7798  if (N002.getOpcode() == ISD::FMUL)
7799  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7800  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7801  N00.getOperand(0)),
7802  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7803  N00.getOperand(1)),
7804  DAG.getNode(PreferredFusedOpcode, SL, VT,
7805  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7806  N002.getOperand(0)),
7807  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7808  N002.getOperand(1)),
7809  DAG.getNode(ISD::FNEG, SL, VT,
7810  N1)));
7811  }
7812  }
7813 
7814  // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
7815  // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
7816  if (N1.getOpcode() == PreferredFusedOpcode &&
7817  N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
7818  SDValue N120 = N1.getOperand(2).getOperand(0);
7819  if (N120.getOpcode() == ISD::FMUL) {
7820  SDValue N1200 = N120.getOperand(0);
7821  SDValue N1201 = N120.getOperand(1);
7822  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7823  DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
7824  N1.getOperand(1),
7825  DAG.getNode(PreferredFusedOpcode, SL, VT,
7826  DAG.getNode(ISD::FNEG, SL, VT,
7827  DAG.getNode(ISD::FP_EXTEND, SL,
7828  VT, N1200)),
7829  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7830  N1201),
7831  N0));
7832  }
7833  }
7834 
7835  // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
7836  // -> (fma (fneg (fpext y)), (fpext z),
7837  // (fma (fneg (fpext u)), (fpext v), x))
7838  // FIXME: This turns two single-precision and one double-precision
7839  // operation into two double-precision operations, which might not be
7840  // interesting for all targets, especially GPUs.
7841  if (N1.getOpcode() == ISD::FP_EXTEND &&
7842  N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
7843  SDValue N100 = N1.getOperand(0).getOperand(0);
7844  SDValue N101 = N1.getOperand(0).getOperand(1);
7845  SDValue N102 = N1.getOperand(0).getOperand(2);
7846  if (N102.getOpcode() == ISD::FMUL) {
7847  SDValue N1020 = N102.getOperand(0);
7848  SDValue N1021 = N102.getOperand(1);
7849  return DAG.getNode(PreferredFusedOpcode, SL, VT,
7850  DAG.getNode(ISD::FNEG, SL, VT,
7851  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7852  N100)),
7853  DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
7854  DAG.getNode(PreferredFusedOpcode, SL, VT,
7855  DAG.getNode(ISD::FNEG, SL, VT,
7856  DAG.getNode(ISD::FP_EXTEND, SL,
7857  VT, N1020)),
7858  DAG.getNode(ISD::FP_EXTEND, SL, VT,
7859  N1021),
7860  N0));
7861  }
7862  }
7863  }
7864  }
7865 
7866  return SDValue();
7867 }
7868 
7869 SDValue DAGCombiner::visitFADD(SDNode *N) {
7870  SDValue N0 = N->getOperand(0);
7871  SDValue N1 = N->getOperand(1);
7874  EVT VT = N->getValueType(0);
7875  SDLoc DL(N);
7876  const TargetOptions &Options = DAG.getTarget().Options;
7877 
7878  // fold vector ops
7879  if (VT.isVector())
7880  if (SDValue FoldedVOp = SimplifyVBinOp(N))
7881  return FoldedVOp;
7882 
7883  // fold (fadd c1, c2) -> c1 + c2
7884  if (N0CFP && N1CFP)
7885  return DAG.getNode(ISD::FADD, DL, VT, N0, N1);
7886 
7887  // canonicalize constant to RHS
7888  if (N0CFP && !N1CFP)
7889  return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
7890 
7891  // fold (fadd A, (fneg B)) -> (fsub A, B)
7892  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
7893  isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
7894  return DAG.getNode(ISD::FSUB, DL, VT, N0,
7895  GetNegatedExpression(N1, DAG, LegalOperations));
7896 
7897  // fold (fadd (fneg A), B) -> (fsub B, A)
7898  if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
7899  isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
7900  return DAG.getNode(ISD::FSUB, DL, VT, N1,
7901  GetNegatedExpression(N0, DAG, LegalOperations));
7902 
7903  // If 'unsafe math' is enabled, fold lots of things.
7904  if (Options.UnsafeFPMath) {
7905  // No FP constant should be created after legalization as Instruction
7906  // Selection pass has a hard time dealing with FP constants.
7907  bool AllowNewConst = (Level < AfterLegalizeDAG);
7908 
7909  // fold (fadd A, 0) -> A
7910  if (N1CFP && N1CFP->isZero())
7911  return N0;
7912 
7913  // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
7914  if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
7915  isa<ConstantFPSDNode>(N0.getOperand(1)))
7916  return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
7917  DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1));
7918 
7919  // If allowed, fold (fadd (fneg x), x) -> 0.0
7920  if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
7921  return DAG.getConstantFP(0.0, DL, VT);
7922 
7923  // If allowed, fold (fadd x, (fneg x)) -> 0.0
7924  if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
7925  return DAG.getConstantFP(0.0, DL, VT);
7926 
7927  // We can fold chains of FADD's of the same value into multiplications.
7928  // This transform is not safe in general because we are reducing the number
7929  // of rounding steps.
7930  if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
7931  if (N0.getOpcode() == ISD::FMUL) {
7934 
7935  // (fadd (fmul x, c), x) -> (fmul x, c+1)
7936  if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
7937  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
7938  DAG.getConstantFP(1.0, DL, VT));
7939  return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
7940  }
7941 
7942  // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
7943  if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
7944  N1.getOperand(0) == N1.getOperand(1) &&
7945  N0.getOperand(0) == N1.getOperand(0)) {
7946  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP01, 0),
7947  DAG.getConstantFP(2.0, DL, VT));
7948  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
7949  }
7950  }
7951 
7952  if (N1.getOpcode() == ISD::FMUL) {
7955 
7956  // (fadd x, (fmul x, c)) -> (fmul x, c+1)
7957  if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
7958  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
7959  DAG.getConstantFP(1.0, DL, VT));
7960  return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
7961  }
7962 
7963  // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
7964  if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
7965  N0.getOperand(0) == N0.getOperand(1) &&
7966  N1.getOperand(0) == N0.getOperand(0)) {
7967  SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, SDValue(CFP11, 0),
7968  DAG.getConstantFP(2.0, DL, VT));
7969  return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
7970  }
7971  }
7972 
7973  if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
7975  // (fadd (fadd x, x), x) -> (fmul x, 3.0)
7976  if (!CFP && N0.getOperand(0) == N0.getOperand(1) &&
7977  (N0.getOperand(0) == N1)) {
7978  return DAG.getNode(ISD::FMUL, DL, VT,
7979  N1, DAG.getConstantFP(3.0, DL, VT));
7980  }
7981  }
7982 
7983  if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
7985  // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
7986  if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
7987  N1.getOperand(0) == N0) {
7988  return DAG.getNode(ISD::FMUL, DL, VT,
7989  N0, DAG.getConstantFP(3.0, DL, VT));
7990  }
7991  }
7992 
7993  // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
7994  if (AllowNewConst &&
7995  N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
7996  N0.getOperand(0) == N0.getOperand(1) &&
7997  N1.getOperand(0) == N1.getOperand(1) &&
7998  N0.getOperand(0) == N1.getOperand(0)) {
7999  return DAG.getNode(ISD::FMUL, DL, VT,
8000  N0.getOperand(0), DAG.getConstantFP(4.0, DL, VT));
8001  }
8002  }
8003  } // enable-unsafe-fp-math
8004 
8005  // FADD -> FMA combines:
8006  SDValue Fused = visitFADDForFMACombine(N);
8007  if (Fused) {
8008  AddToWorklist(Fused.getNode());
8009  return Fused;
8010  }
8011 
8012  return SDValue();
8013 }
8014 
8015 SDValue DAGCombiner::visitFSUB(SDNode *N) {
8016  SDValue N0 = N->getOperand(0);
8017  SDValue N1 = N->getOperand(1);
8020  EVT VT = N->getValueType(0);
8021  SDLoc dl(N);
8022  const TargetOptions &Options = DAG.getTarget().Options;
8023 
8024  // fold vector ops
8025  if (VT.isVector())
8026  if (SDValue FoldedVOp = SimplifyVBinOp(N))
8027  return FoldedVOp;
8028 
8029  // fold (fsub c1, c2) -> c1-c2
8030  if (N0CFP && N1CFP)
8031  return DAG.getNode(ISD::FSUB, dl, VT, N0, N1);
8032 
8033  // fold (fsub A, (fneg B)) -> (fadd A, B)
8034  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8035  return DAG.getNode(ISD::FADD, dl, VT, N0,
8036  GetNegatedExpression(N1, DAG, LegalOperations));
8037 
8038  // If 'unsafe math' is enabled, fold lots of things.
8039  if (Options.UnsafeFPMath) {
8040  // (fsub A, 0) -> A
8041  if (N1CFP && N1CFP->isZero())
8042  return N0;
8043 
8044  // (fsub 0, B) -> -B
8045  if (N0CFP && N0CFP->isZero()) {
8046  if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
8047  return GetNegatedExpression(N1, DAG, LegalOperations);
8048  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8049  return DAG.getNode(ISD::FNEG, dl, VT, N1);
8050  }
8051 
8052  // (fsub x, x) -> 0.0
8053  if (N0 == N1)
8054  return DAG.getConstantFP(0.0f, dl, VT);
8055 
8056  // (fsub x, (fadd x, y)) -> (fneg y)
8057  // (fsub x, (fadd y, x)) -> (fneg y)
8058  if (N1.getOpcode() == ISD::FADD) {
8059  SDValue N10 = N1->getOperand(0);
8060  SDValue N11 = N1->getOperand(1);
8061 
8062  if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
8063  return GetNegatedExpression(N11, DAG, LegalOperations);
8064 
8065  if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
8066  return GetNegatedExpression(N10, DAG, LegalOperations);
8067  }
8068  }
8069 
8070  // FSUB -> FMA combines:
8071  SDValue Fused = visitFSUBForFMACombine(N);
8072  if (Fused) {
8073  AddToWorklist(Fused.getNode());
8074  return Fused;
8075  }
8076 
8077  return SDValue();
8078 }
8079 
8080 SDValue DAGCombiner::visitFMUL(SDNode *N) {
8081  SDValue N0 = N->getOperand(0);
8082  SDValue N1 = N->getOperand(1);
8085  EVT VT = N->getValueType(0);
8086  SDLoc DL(N);
8087  const TargetOptions &Options = DAG.getTarget().Options;
8088 
8089  // fold vector ops
8090  if (VT.isVector()) {
8091  // This just handles C1 * C2 for vectors. Other vector folds are below.
8092  if (SDValue FoldedVOp = SimplifyVBinOp(N))
8093  return FoldedVOp;
8094  }
8095 
8096  // fold (fmul c1, c2) -> c1*c2
8097  if (N0CFP && N1CFP)
8098  return DAG.getNode(ISD::FMUL, DL, VT, N0, N1);
8099 
8100  // canonicalize constant to RHS
8103  return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
8104 
8105  // fold (fmul A, 1.0) -> A
8106  if (N1CFP && N1CFP->isExactlyValue(1.0))
8107  return N0;
8108 
8109  if (Options.UnsafeFPMath) {
8110  // fold (fmul A, 0) -> 0
8111  if (N1CFP && N1CFP->isZero())
8112  return N1;
8113 
8114  // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
8115  if (N0.getOpcode() == ISD::FMUL) {
8116  // Fold scalars or any vector constants (not just splats).
8117  // This fold is done in general by InstCombine, but extra fmul insts
8118  // may have been generated during lowering.
8119  SDValue N00 = N0.getOperand(0);
8120  SDValue N01 = N0.getOperand(1);
8121  auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
8122  auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
8123  auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
8124 
8125  // Check 1: Make sure that the first operand of the inner multiply is NOT
8126  // a constant. Otherwise, we may induce infinite looping.
8127  if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
8128  // Check 2: Make sure that the second operand of the inner multiply and
8129  // the second operand of the outer multiply are constants.
8130  if ((N1CFP && isConstOrConstSplatFP(N01)) ||
8131  (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
8132  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
8133  return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
8134  }
8135  }
8136  }
8137 
8138  // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
8139  // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
8140  // during an early run of DAGCombiner can prevent folding with fmuls
8141  // inserted during lowering.
8142  if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) {
8143  const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
8144  SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
8145  return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
8146  }
8147  }
8148 
8149  // fold (fmul X, 2.0) -> (fadd X, X)
8150  if (N1CFP && N1CFP->isExactlyValue(+2.0))
8151  return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
8152 
8153  // fold (fmul X, -1.0) -> (fneg X)
8154  if (N1CFP && N1CFP->isExactlyValue(-1.0))
8155  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8156  return DAG.getNode(ISD::FNEG, DL, VT, N0);
8157 
8158  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
8159  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8160  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8161  // Both can be negated for free, check to see if at least one is cheaper
8162  // negated.
8163  if (LHSNeg == 2 || RHSNeg == 2)
8164  return DAG.getNode(ISD::FMUL, DL, VT,
8165  GetNegatedExpression(N0, DAG, LegalOperations),
8166  GetNegatedExpression(N1, DAG, LegalOperations));
8167  }
8168  }
8169 
8170  return SDValue();
8171 }
8172 
8173 SDValue DAGCombiner::visitFMA(SDNode *N) {
8174  SDValue N0 = N->getOperand(0);
8175  SDValue N1 = N->getOperand(1);
8176  SDValue N2 = N->getOperand(2);
8179  EVT VT = N->getValueType(0);
8180  SDLoc dl(N);
8181  const TargetOptions &Options = DAG.getTarget().Options;
8182 
8183  // Constant fold FMA.
8184  if (isa<ConstantFPSDNode>(N0) &&
8185  isa<ConstantFPSDNode>(N1) &&
8186  isa<ConstantFPSDNode>(N2)) {
8187  return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
8188  }
8189 
8190  if (Options.UnsafeFPMath) {
8191  if (N0CFP && N0CFP->isZero())
8192  return N2;
8193  if (N1CFP && N1CFP->isZero())
8194  return N2;
8195  }
8196  if (N0CFP && N0CFP->isExactlyValue(1.0))
8197  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
8198  if (N1CFP && N1CFP->isExactlyValue(1.0))
8199  return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
8200 
8201  // Canonicalize (fma c, x, y) -> (fma x, c, y)
8202  if (N0CFP && !N1CFP)
8203  return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
8204 
8205  // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
8206  if (Options.UnsafeFPMath && N1CFP &&
8207  N2.getOpcode() == ISD::FMUL &&
8208  N0 == N2.getOperand(0) &&
8209  N2.getOperand(1).getOpcode() == ISD::ConstantFP) {
8210  return DAG.getNode(ISD::FMUL, dl, VT, N0,
8211  DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1)));
8212  }
8213 
8214 
8215  // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
8216  if (Options.UnsafeFPMath &&
8217  N0.getOpcode() == ISD::FMUL && N1CFP &&
8218  N0.getOperand(1).getOpcode() == ISD::ConstantFP) {
8219  return DAG.getNode(ISD::FMA, dl, VT,
8220  N0.getOperand(0),
8221  DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)),
8222  N2);
8223  }
8224 
8225  // (fma x, 1, y) -> (fadd x, y)
8226  // (fma x, -1, y) -> (fadd (fneg x), y)
8227  if (N1CFP) {
8228  if (N1CFP->isExactlyValue(1.0))
8229  return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
8230 
8231  if (N1CFP->isExactlyValue(-1.0) &&
8232  (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
8233  SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
8234  AddToWorklist(RHSNeg.getNode());
8235  return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
8236  }
8237  }
8238 
8239  // (fma x, c, x) -> (fmul x, (c+1))
8240  if (Options.UnsafeFPMath && N1CFP && N0 == N2)
8241  return DAG.getNode(ISD::FMUL, dl, VT, N0,
8242  DAG.getNode(ISD::FADD, dl, VT,
8243  N1, DAG.getConstantFP(1.0, dl, VT)));
8244 
8245  // (fma x, c, (fneg x)) -> (fmul x, (c-1))
8246  if (Options.UnsafeFPMath && N1CFP &&
8247  N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0)
8248  return DAG.getNode(ISD::FMUL, dl, VT, N0,
8249  DAG.getNode(ISD::FADD, dl, VT,
8250  N1, DAG.getConstantFP(-1.0, dl, VT)));
8251 
8252 
8253  return SDValue();
8254 }
8255 
8256 SDValue DAGCombiner::visitFDIV(SDNode *N) {
8257  SDValue N0 = N->getOperand(0);
8258  SDValue N1 = N->getOperand(1);
8261  EVT VT = N->getValueType(0);
8262  SDLoc DL(N);
8263  const TargetOptions &Options = DAG.getTarget().Options;
8264 
8265  // fold vector ops
8266  if (VT.isVector())
8267  if (SDValue FoldedVOp = SimplifyVBinOp(N))
8268  return FoldedVOp;
8269 
8270  // fold (fdiv c1, c2) -> c1/c2
8271  if (N0CFP && N1CFP)
8272  return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1);
8273 
8274  if (Options.UnsafeFPMath) {
8275  // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
8276  if (N1CFP) {
8277  // Compute the reciprocal 1.0 / c2.
8278  APFloat N1APF = N1CFP->getValueAPF();
8279  APFloat Recip(N1APF.getSemantics(), 1); // 1.0
8281  // Only do the transform if the reciprocal is a legal fp immediate that
8282  // isn't too nasty (eg NaN, denormal, ...).
8283  if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
8284  (!LegalOperations ||
8285  // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
8286  // backend)... we should handle this gracefully after Legalize.
8287  // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
8288  TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
8289  TLI.isFPImmLegal(Recip, VT)))
8290  return DAG.getNode(ISD::FMUL, DL, VT, N0,
8291  DAG.getConstantFP(Recip, DL, VT));
8292  }
8293 
8294  // If this FDIV is part of a reciprocal square root, it may be folded
8295  // into a target-specific square root estimate instruction.
8296  if (N1.getOpcode() == ISD::FSQRT) {
8297  if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) {
8298  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
8299  }
8300  } else if (N1.getOpcode() == ISD::FP_EXTEND &&
8301  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8302  if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
8303  RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
8304  AddToWorklist(RV.getNode());
8305  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
8306  }
8307  } else if (N1.getOpcode() == ISD::FP_ROUND &&
8308  N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8309  if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) {
8310  RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
8311  AddToWorklist(RV.getNode());
8312  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
8313  }
8314  } else if (N1.getOpcode() == ISD::FMUL) {
8315  // Look through an FMUL. Even though this won't remove the FDIV directly,
8316  // it's still worthwhile to get rid of the FSQRT if possible.
8317  SDValue SqrtOp;
8318  SDValue OtherOp;
8319  if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
8320  SqrtOp = N1.getOperand(0);
8321  OtherOp = N1.getOperand(1);
8322  } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
8323  SqrtOp = N1.getOperand(1);
8324  OtherOp = N1.getOperand(0);
8325  }
8326  if (SqrtOp.getNode()) {
8327  // We found a FSQRT, so try to make this fold:
8328  // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
8329  if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) {
8330  RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp);
8331  AddToWorklist(RV.getNode());
8332  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
8333  }
8334  }
8335  }
8336 
8337  // Fold into a reciprocal estimate and multiply instead of a real divide.
8338  if (SDValue RV = BuildReciprocalEstimate(N1)) {
8339  AddToWorklist(RV.getNode());
8340  return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
8341  }
8342  }
8343 
8344  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
8345  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
8346  if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
8347  // Both can be negated for free, check to see if at least one is cheaper
8348  // negated.
8349  if (LHSNeg == 2 || RHSNeg == 2)
8350  return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
8351  GetNegatedExpression(N0, DAG, LegalOperations),
8352  GetNegatedExpression(N1, DAG, LegalOperations));
8353  }
8354  }
8355 
8356  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
8357  // reciprocal.
8358  // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
8359  // Notice that this is not always beneficial. One reason is different target
8360  // may have different costs for FDIV and FMUL, so sometimes the cost of two
8361  // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
8362  // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
8363  if (Options.UnsafeFPMath) {
8364  // Skip if current node is a reciprocal.
8365  if (N0CFP && N0CFP->isExactlyValue(1.0))
8366  return SDValue();
8367 
8368  // Find all FDIV users of the same divisor.
8369  // Use a set because duplicates may be present in the user list.
8371  for (auto *U : N1->uses())
8372  if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1)
8373  Users.insert(U);
8374 
8375  if (TLI.combineRepeatedFPDivisors(Users.size())) {
8376  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
8377  // FIXME: This optimization requires some level of fast-math, so the
8378  // created reciprocal node should at least have the 'allowReciprocal'
8379  // fast-math-flag set.
8380  SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1);
8381 
8382  // Dividend / Divisor -> Dividend * Reciprocal
8383  for (auto *U : Users) {
8384  SDValue Dividend = U->getOperand(0);
8385  if (Dividend != FPOne) {
8386  SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
8387  Reciprocal);
8388  CombineTo(U, NewNode);
8389  } else if (U != Reciprocal.getNode()) {
8390  // In the absence of fast-math-flags, this user node is always the
8391  // same node as Reciprocal, but with FMF they may be different nodes.
8392  CombineTo(U, Reciprocal);
8393  }
8394  }
8395  return SDValue(N, 0); // N was replaced.
8396  }
8397  }
8398 
8399  return SDValue();
8400 }
8401 
8402 SDValue DAGCombiner::visitFREM(SDNode *N) {
8403  SDValue N0 = N->getOperand(0);
8404  SDValue N1 = N->getOperand(1);
8407  EVT VT = N->getValueType(0);
8408 
8409  // fold (frem c1, c2) -> fmod(c1,c2)
8410  if (N0CFP && N1CFP)
8411  return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1);
8412 
8413  return SDValue();
8414 }
8415 
8416 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
8417  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
8418  return SDValue();
8419 
8420  // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
8421  SDValue RV = BuildRsqrtEstimate(N->getOperand(0));
8422  if (!RV)
8423  return SDValue();
8424 
8425  EVT VT = RV.getValueType();
8426  SDLoc DL(N);
8427  RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV);
8428  AddToWorklist(RV.getNode());
8429 
8430  // Unfortunately, RV is now NaN if the input was exactly 0.
8431  // Select out this case and force the answer to 0.
8432  SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
8433  EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8434  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
8435  AddToWorklist(ZeroCmp.getNode());
8436  AddToWorklist(RV.getNode());
8437 
8438  return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
8439  ZeroCmp, Zero, RV);
8440 }
8441 
8442 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
8443  SDValue N0 = N->getOperand(0);
8444  SDValue N1 = N->getOperand(1);
8447  EVT VT = N->getValueType(0);
8448 
8449  if (N0CFP && N1CFP) // Constant fold
8450  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
8451 
8452  if (N1CFP) {
8453  const APFloat& V = N1CFP->getValueAPF();
8454  // copysign(x, c1) -> fabs(x) iff ispos(c1)
8455  // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
8456  if (!V.isNegative()) {
8457  if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
8458  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
8459  } else {
8460  if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
8461  return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
8462  DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
8463  }
8464  }
8465 
8466  // copysign(fabs(x), y) -> copysign(x, y)
8467  // copysign(fneg(x), y) -> copysign(x, y)
8468  // copysign(copysign(x,z), y) -> copysign(x, y)
8469  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
8470  N0.getOpcode() == ISD::FCOPYSIGN)
8471  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8472  N0.getOperand(0), N1);
8473 
8474  // copysign(x, abs(y)) -> abs(x)
8475  if (N1.getOpcode() == ISD::FABS)
8476  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
8477 
8478  // copysign(x, copysign(y,z)) -> copysign(x, z)
8479  if (N1.getOpcode() == ISD::FCOPYSIGN)
8480  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8481  N0, N1.getOperand(1));
8482 
8483  // copysign(x, fp_extend(y)) -> copysign(x, y)
8484  // copysign(x, fp_round(y)) -> copysign(x, y)
8485  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
8486  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8487  N0, N1.getOperand(0));
8488 
8489  return SDValue();
8490 }
8491 
8492 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
8493  SDValue N0 = N->getOperand(0);
8494  EVT VT = N->getValueType(0);
8495  EVT OpVT = N0.getValueType();
8496 
8497  // fold (sint_to_fp c1) -> c1fp
8499  // ...but only if the target supports immediate floating-point values
8500  (!LegalOperations ||
8501  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
8502  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
8503 
8504  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
8505  // but UINT_TO_FP is legal on this target, try to convert.
8506  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
8507  TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
8508  // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
8509  if (DAG.SignBitIsZero(N0))
8510  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
8511  }
8512 
8513  // The next optimizations are desirable only if SELECT_CC can be lowered.
8514  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
8515  // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
8516  if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
8517  !VT.isVector() &&
8518  (!LegalOperations ||
8519  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
8520  SDLoc DL(N);
8521  SDValue Ops[] =
8522  { N0.getOperand(0), N0.getOperand(1),
8523  DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
8524  N0.getOperand(2) };
8525  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
8526  }
8527 
8528  // fold (sint_to_fp (zext (setcc x, y, cc))) ->
8529  // (select_cc x, y, 1.0, 0.0,, cc)
8530  if (N0.getOpcode() == ISD::ZERO_EXTEND &&
8531  N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
8532  (!LegalOperations ||
8533  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
8534  SDLoc DL(N);
8535  SDValue Ops[] =
8536  { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
8537  DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
8538  N0.getOperand(0).getOperand(2) };
8539  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
8540  }
8541  }
8542 
8543  return SDValue();
8544 }
8545 
8546 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
8547  SDValue N0 = N->getOperand(0);
8548  EVT VT = N->getValueType(0);
8549  EVT OpVT = N0.getValueType();
8550 
8551  // fold (uint_to_fp c1) -> c1fp
8553  // ...but only if the target supports immediate floating-point values
8554  (!LegalOperations ||
8555  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
8556  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
8557 
8558  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
8559  // but SINT_TO_FP is legal on this target, try to convert.
8560  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
8561  TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
8562  // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
8563  if (DAG.SignBitIsZero(N0))
8564  return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
8565  }
8566 
8567  // The next optimizations are desirable only if SELECT_CC can be lowered.
8568  if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
8569  // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
8570 
8571  if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
8572  (!LegalOperations ||
8573  TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
8574  SDLoc DL(N);
8575  SDValue Ops[] =
8576  { N0.getOperand(0), N0.getOperand(1),
8577  DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
8578  N0.getOperand(2) };
8579  return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
8580  }
8581  }
8582 
8583  return SDValue();
8584 }
8585 
8586 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
8588  SDValue N0 = N->getOperand(0);
8589  EVT VT = N->getValueType(0);
8590 
8591  if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
8592  return SDValue();
8593 
8594  SDValue Src = N0.getOperand(0);
8595  EVT SrcVT = Src.getValueType();
8596  bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
8597  bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
8598 
8599  // We can safely assume the conversion won't overflow the output range,
8600  // because (for example) (uint8_t)18293.f is undefined behavior.
8601 
8602  // Since we can assume the conversion won't overflow, our decision as to
8603  // whether the input will fit in the float should depend on the minimum
8604  // of the input range and output range.
8605 
8606  // This means this is also safe for a signed input and unsigned output, since
8607  // a negative input would lead to undefined behavior.
8608  unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
8609  unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
8610  unsigned ActualSize = std::min(InputSize, OutputSize);
8611  const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
8612 
8613  // We can only fold away the float conversion if the input range can be
8614  // represented exactly in the float range.
8615  if (APFloat::semanticsPrecision(sem) >= ActualSize) {
8616  if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
8617  unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
8618  : ISD::ZERO_EXTEND;
8619  return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
8620  }
8621  if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
8622  return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
8623  if (SrcVT == VT)
8624  return Src;
8625  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
8626  }
8627  return SDValue();
8628 }
8629 
8630 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
8631  SDValue N0 = N->getOperand(0);
8632  EVT VT = N->getValueType(0);
8633 
8634  // fold (fp_to_sint c1fp) -> c1
8636  return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
8637 
8638  return FoldIntToFPToInt(N, DAG);
8639 }
8640 
8641 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
8642  SDValue N0 = N->getOperand(0);
8643  EVT VT = N->getValueType(0);
8644 
8645  // fold (fp_to_uint c1fp) -> c1
8647  return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
8648 
8649  return FoldIntToFPToInt(N, DAG);
8650 }
8651 
8652 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
8653  SDValue N0 = N->getOperand(0);
8654  SDValue N1 = N->getOperand(1);
8656  EVT VT = N->getValueType(0);
8657 
8658  // fold (fp_round c1fp) -> c1fp
8659  if (N0CFP)
8660  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
8661 
8662  // fold (fp_round (fp_extend x)) -> x
8663  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
8664  return N0.getOperand(0);
8665 
8666  // fold (fp_round (fp_round x)) -> (fp_round x)
8667  if (N0.getOpcode() == ISD::FP_ROUND) {
8668  const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
8669  const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
8670  // If the first fp_round isn't a value preserving truncation, it might
8671  // introduce a tie in the second fp_round, that wouldn't occur in the
8672  // single-step fp_round we want to fold to.
8673  // In other words, double rounding isn't the same as rounding.
8674  // Also, this is a value preserving truncation iff both fp_round's are.
8675  if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
8676  SDLoc DL(N);
8677  return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
8678  DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
8679  }
8680  }
8681 
8682  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
8683  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
8684  SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
8685  N0.getOperand(0), N1);
8686  AddToWorklist(Tmp.getNode());
8687  return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
8688  Tmp, N0.getOperand(1));
8689  }
8690 
8691  return SDValue();
8692 }
8693 
8694 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
8695  SDValue N0 = N->getOperand(0);
8696  EVT VT = N->getValueType(0);
8697  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
8699 
8700  // fold (fp_round_inreg c1fp) -> c1fp
8701  if (N0CFP && isTypeLegal(EVT)) {
8702  SDLoc DL(N);
8703  SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
8704  return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
8705  }
8706 
8707  return SDValue();
8708 }
8709 
8710 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
8711  SDValue N0 = N->getOperand(0);
8712  EVT VT = N->getValueType(0);
8713 
8714  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
8715  if (N->hasOneUse() &&
8716  N->use_begin()->getOpcode() == ISD::FP_ROUND)
8717  return SDValue();
8718 
8719  // fold (fp_extend c1fp) -> c1fp
8721  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
8722 
8723  // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
8724  if (N0.getOpcode() == ISD::FP16_TO_FP &&
8725  TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
8726  return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
8727 
8728  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
8729  // value of X.
8730  if (N0.getOpcode() == ISD::FP_ROUND
8731  && N0.getNode()->getConstantOperandVal(1) == 1) {
8732  SDValue In = N0.getOperand(0);
8733  if (In.getValueType() == VT) return In;
8734  if (VT.bitsLT(In.getValueType()))
8735  return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
8736  In, N0.getOperand(1));
8737  return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
8738  }
8739 
8740  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
8741  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
8742  TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
8743  LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8744  SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
8745  LN0->getChain(),
8746  LN0->getBasePtr(), N0.getValueType(),
8747  LN0->getMemOperand());
8748  CombineTo(N, ExtLoad);
8749  CombineTo(N0.getNode(),
8750  DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
8751  N0.getValueType(), ExtLoad,
8752  DAG.getIntPtrConstant(1, SDLoc(N0))),
8753  ExtLoad.getValue(1));
8754  return SDValue(N, 0); // Return N so it doesn't get rechecked!
8755  }
8756 
8757  return SDValue();
8758 }
8759 
8760 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
8761  SDValue N0 = N->getOperand(0);
8762  EVT VT = N->getValueType(0);
8763 
8764  // fold (fceil c1) -> fceil(c1)
8766  return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
8767 
8768  return SDValue();
8769 }
8770 
8771 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
8772  SDValue N0 = N->getOperand(0);
8773  EVT VT = N->getValueType(0);
8774 
8775  // fold (ftrunc c1) -> ftrunc(c1)
8777  return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
8778 
8779  return SDValue();
8780 }
8781 
8782 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
8783  SDValue N0 = N->getOperand(0);
8784  EVT VT = N->getValueType(0);
8785 
8786  // fold (ffloor c1) -> ffloor(c1)
8788  return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
8789 
8790  return SDValue();
8791 }
8792 
8793 // FIXME: FNEG and FABS have a lot in common; refactor.
8794 SDValue DAGCombiner::visitFNEG(SDNode *N) {
8795  SDValue N0 = N->getOperand(0);
8796  EVT VT = N->getValueType(0);
8797 
8798  // Constant fold FNEG.
8800  return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
8801 
8802  if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
8803  &DAG.getTarget().Options))
8804  return GetNegatedExpression(N0, DAG, LegalOperations);
8805 
8806  // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
8807  // constant pool values.
8808  if (!TLI.isFNegFree(VT) &&
8809  N0.getOpcode() == ISD::BITCAST &&
8810  N0.getNode()->hasOneUse()) {
8811  SDValue Int = N0.getOperand(0);
8812  EVT IntVT = Int.getValueType();
8813  if (IntVT.isInteger() && !IntVT.isVector()) {
8814  APInt SignMask;
8815  if (N0.getValueType().isVector()) {
8816  // For a vector, get a mask such as 0x80... per scalar element
8817  // and splat it.
8819  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
8820  } else {
8821  // For a scalar, just generate 0x80...
8822  SignMask = APInt::getSignBit(IntVT.getSizeInBits());
8823  }
8824  SDLoc DL0(N0);
8825  Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
8826  DAG.getConstant(SignMask, DL0, IntVT));
8827  AddToWorklist(Int.getNode());
8828  return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
8829  }
8830  }
8831 
8832  // (fneg (fmul c, x)) -> (fmul -c, x)
8833  if (N0.getOpcode() == ISD::FMUL &&
8834  (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
8836  if (CFP1) {
8837  APFloat CVal = CFP1->getValueAPF();
8838  CVal.changeSign();
8839  if (Level >= AfterLegalizeDAG &&
8840  (TLI.isFPImmLegal(CVal, N->getValueType(0)) ||
8841  TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0))))
8842  return DAG.getNode(
8843  ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
8844  DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)));
8845  }
8846  }
8847 
8848  return SDValue();
8849 }
8850 
8851 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
8852  SDValue N0 = N->getOperand(0);
8853  SDValue N1 = N->getOperand(1);
8854  const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8855  const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8856 
8857  if (N0CFP && N1CFP) {
8858  const APFloat &C0 = N0CFP->getValueAPF();
8859  const APFloat &C1 = N1CFP->getValueAPF();
8860  return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), N->getValueType(0));
8861  }
8862 
8863  if (N0CFP) {
8864  EVT VT = N->getValueType(0);
8865  // Canonicalize to constant on RHS.
8866  return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
8867  }
8868 
8869  return SDValue();
8870 }
8871 
8872 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
8873  SDValue N0 = N->getOperand(0);
8874  SDValue N1 = N->getOperand(1);
8875  const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
8876  const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
8877 
8878  if (N0CFP && N1CFP) {
8879  const APFloat &C0 = N0CFP->getValueAPF();
8880  const APFloat &C1 = N1CFP->getValueAPF();
8881  return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), N->getValueType(0));
8882  }
8883 
8884  if (N0CFP) {
8885  EVT VT = N->getValueType(0);
8886  // Canonicalize to constant on RHS.
8887  return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
8888  }
8889 
8890  return SDValue();
8891 }
8892 
8893 SDValue DAGCombiner::visitFABS(SDNode *N) {
8894  SDValue N0 = N->getOperand(0);
8895  EVT VT = N->getValueType(0);
8896 
8897  // fold (fabs c1) -> fabs(c1)
8899  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
8900 
8901  // fold (fabs (fabs x)) -> (fabs x)
8902  if (N0.getOpcode() == ISD::FABS)
8903  return N->getOperand(0);
8904 
8905  // fold (fabs (fneg x)) -> (fabs x)
8906  // fold (fabs (fcopysign x, y)) -> (fabs x)
8907  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
8908  return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
8909 
8910  // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
8911  // constant pool values.
8912  if (!TLI.isFAbsFree(VT) &&
8913  N0.getOpcode() == ISD::BITCAST &&
8914  N0.getNode()->hasOneUse()) {
8915  SDValue Int = N0.getOperand(0);
8916  EVT IntVT = Int.getValueType();
8917  if (IntVT.isInteger() && !IntVT.isVector()) {
8918  APInt SignMask;
8919  if (N0.getValueType().isVector()) {
8920  // For a vector, get a mask such as 0x7f... per scalar element
8921  // and splat it.
8922  SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
8923  SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
8924  } else {
8925  // For a scalar, just generate 0x7f...
8926  SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
8927  }
8928  SDLoc DL(N0);
8929  Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
8930  DAG.getConstant(SignMask, DL, IntVT));
8931  AddToWorklist(Int.getNode());
8932  return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
8933  }
8934  }
8935 
8936  return SDValue();
8937 }
8938 
8939 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
8940  SDValue Chain = N->getOperand(0);
8941  SDValue N1 = N->getOperand(1);
8942  SDValue N2 = N->getOperand(2);
8943 
8944  // If N is a constant we could fold this into a fallthrough or unconditional
8945  // branch. However that doesn't happen very often in normal code, because
8946  // Instcombine/SimplifyCFG should have handled the available opportunities.
8947  // If we did this folding here, it would be necessary to update the
8948  // MachineBasicBlock CFG, which is awkward.
8949 
8950  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
8951  // on the target.
8952  if (N1.getOpcode() == ISD::SETCC &&
8953  TLI.isOperationLegalOrCustom(ISD::BR_CC,
8954  N1.getOperand(0).getValueType())) {
8955  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
8956  Chain, N1.getOperand(2),
8957  N1.getOperand(0), N1.getOperand(1), N2);
8958  }
8959 
8960  if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
8961  ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
8962  (N1.getOperand(0).hasOneUse() &&
8963  N1.getOperand(0).getOpcode() == ISD::SRL))) {
8964  SDNode *Trunc = nullptr;
8965  if (N1.getOpcode() == ISD::TRUNCATE) {
8966  // Look pass the truncate.
8967  Trunc = N1.getNode();
8968  N1 = N1.getOperand(0);
8969  }
8970 
8971  // Match this pattern so that we can generate simpler code:
8972  //
8973  // %a = ...
8974  // %b = and i32 %a, 2
8975  // %c = srl i32 %b, 1
8976  // brcond i32 %c ...
8977  //
8978  // into
8979  //
8980  // %a = ...
8981  // %b = and i32 %a, 2
8982  // %c = setcc eq %b, 0
8983  // brcond %c ...
8984  //
8985  // This applies only when the AND constant value has one bit set and the
8986  // SRL constant is equal to the log2 of the AND constant. The back-end is
8987  // smart enough to convert the result into a TEST/JMP sequence.
8988  SDValue Op0 = N1.getOperand(0);
8989  SDValue Op1 = N1.getOperand(1);
8990 
8991  if (Op0.getOpcode() == ISD::AND &&
8992  Op1.getOpcode() == ISD::Constant) {
8993  SDValue AndOp1 = Op0.getOperand(1);
8994 
8995  if (AndOp1.getOpcode() == ISD::Constant) {
8996  const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
8997 
8998  if (AndConst.isPowerOf2() &&
8999  cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
9000  SDLoc DL(N);
9001  SDValue SetCC =
9002  DAG.getSetCC(DL,
9004  Op0, DAG.getConstant(0, DL, Op0.getValueType()),
9005  ISD::SETNE);
9006 
9007  SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
9008  MVT::Other, Chain, SetCC, N2);
9009  // Don't add the new BRCond into the worklist or else SimplifySelectCC
9010  // will convert it back to (X & C1) >> C2.
9011  CombineTo(N, NewBRCond, false);
9012  // Truncate is dead.
9013  if (Trunc)
9014  deleteAndRecombine(Trunc);
9015  // Replace the uses of SRL with SETCC
9016  WorklistRemover DeadNodes(*this);
9017  DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9018  deleteAndRecombine(N1.getNode());
9019  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9020  }
9021  }
9022  }
9023 
9024  if (Trunc)
9025  // Restore N1 if the above transformation doesn't match.
9026  N1 = N->getOperand(1);
9027  }
9028 
9029  // Transform br(xor(x, y)) -> br(x != y)
9030  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
9031  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
9032  SDNode *TheXor = N1.getNode();
9033  SDValue Op0 = TheXor->getOperand(0);
9034  SDValue Op1 = TheXor->getOperand(1);
9035  if (Op0.getOpcode() == Op1.getOpcode()) {
9036  // Avoid missing important xor optimizations.
9037  SDValue Tmp = visitXOR(TheXor);
9038  if (Tmp.getNode()) {
9039  if (Tmp.getNode() != TheXor) {
9040  DEBUG(dbgs() << "\nReplacing.8 ";
9041  TheXor->dump(&DAG);
9042  dbgs() << "\nWith: ";
9043  Tmp.getNode()->dump(&DAG);
9044  dbgs() << '\n');
9045  WorklistRemover DeadNodes(*this);
9046  DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
9047  deleteAndRecombine(TheXor);
9048  return DAG.getNode(ISD::BRCOND, SDLoc(N),
9049  MVT::Other, Chain, Tmp, N2);
9050  }
9051 
9052  // visitXOR has changed XOR's operands or replaced the XOR completely,
9053  // bail out.
9054  return SDValue(N, 0);
9055  }
9056  }
9057 
9058  if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
9059  bool Equal = false;
9060  if (isOneConstant(Op0) && Op0.hasOneUse() &&
9061  Op0.getOpcode() == ISD::XOR) {
9062  TheXor = Op0.getNode();
9063  Equal = true;
9064  }
9065 
9066  EVT SetCCVT = N1.getValueType();
9067  if (LegalTypes)
9068  SetCCVT = getSetCCResultType(SetCCVT);
9069  SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
9070  SetCCVT,
9071  Op0, Op1,
9072  Equal ? ISD::SETEQ : ISD::SETNE);
9073  // Replace the uses of XOR with SETCC
9074  WorklistRemover DeadNodes(*this);
9075  DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
9076  deleteAndRecombine(N1.getNode());
9077  return DAG.getNode(ISD::BRCOND, SDLoc(N),
9078  MVT::Other, Chain, SetCC, N2);
9079  }
9080  }
9081 
9082  return SDValue();
9083 }
9084 
9085 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
9086 //
9087 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
9088  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
9089  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
9090 
9091  // If N is a constant we could fold this into a fallthrough or unconditional
9092  // branch. However that doesn't happen very often in normal code, because
9093  // Instcombine/SimplifyCFG should have handled the available opportunities.
9094  // If we did this folding here, it would be necessary to update the
9095  // MachineBasicBlock CFG, which is awkward.
9096 
9097  // Use SimplifySetCC to simplify SETCC's.
9098  SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
9099  CondLHS, CondRHS, CC->get(), SDLoc(N),
9100  false);
9101  if (Simp.getNode()) AddToWorklist(Simp.getNode());
9102 
9103  // fold to a simpler setcc
9104  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
9105  return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
9106  N->getOperand(0), Simp.getOperand(2),
9107  Simp.getOperand(0), Simp.getOperand(1),
9108  N->getOperand(4));
9109 
9110  return SDValue();
9111 }
9112 
9113 /// Return true if 'Use' is a load or a store that uses N as its base pointer
9114 /// and that N may be folded in the load / store addressing mode.
9116  SelectionDAG &DAG,
9117  const TargetLowering &TLI) {
9118  EVT VT;
9119  unsigned AS;
9120 
9121  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
9122  if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
9123  return false;
9124  VT = LD->getMemoryVT();
9125  AS = LD->getAddressSpace();
9126  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
9127  if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
9128  return false;
9129  VT = ST->getMemoryVT();
9130  AS = ST->getAddressSpace();
9131  } else
9132  return false;
9133 
9135  if (N->getOpcode() == ISD::ADD) {
9137  if (Offset)
9138  // [reg +/- imm]
9139  AM.BaseOffs = Offset->getSExtValue();
9140  else
9141  // [reg +/- reg]
9142  AM.Scale = 1;
9143  } else if (N->getOpcode() == ISD::SUB) {
9145  if (Offset)
9146  // [reg +/- imm]
9147  AM.BaseOffs = -Offset->getSExtValue();
9148  else
9149  // [reg +/- reg]
9150  AM.Scale = 1;
9151  } else
9152  return false;
9153 
9154  return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
9155  VT.getTypeForEVT(*DAG.getContext()), AS);
9156 }
9157 
9158 /// Try turning a load/store into a pre-indexed load/store when the base
9159 /// pointer is an add or subtract and it has other uses besides the load/store.
9160 /// After the transformation, the new indexed load/store has effectively folded
9161 /// the add/subtract in and all of its other uses are redirected to the
9162 /// new load/store.
9163 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
9164  if (Level < AfterLegalizeDAG)
9165  return false;
9166 
9167  bool isLoad = true;
9168  SDValue Ptr;
9169  EVT VT;
9170  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9171  if (LD->isIndexed())
9172  return false;
9173  VT = LD->getMemoryVT();
9174  if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
9175  !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
9176  return false;
9177  Ptr = LD->getBasePtr();
9178  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9179  if (ST->isIndexed())
9180  return false;
9181  VT = ST->getMemoryVT();
9182  if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
9183  !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
9184  return false;
9185  Ptr = ST->getBasePtr();
9186  isLoad = false;
9187  } else {
9188  return false;
9189  }
9190 
9191  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
9192  // out. There is no reason to make this a preinc/predec.
9193  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
9194  Ptr.getNode()->hasOneUse())
9195  return false;
9196 
9197  // Ask the target to do addressing mode selection.
9198  SDValue BasePtr;
9199  SDValue Offset;
9201  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
9202  return false;
9203 
9204  // Backends without true r+i pre-indexed forms may need to pass a
9205  // constant base with a variable offset so that constant coercion
9206  // will work with the patterns in canonical form.
9207  bool Swapped = false;
9208  if (isa<ConstantSDNode>(BasePtr)) {
9209  std::swap(BasePtr, Offset);
9210  Swapped = true;
9211  }
9212 
9213  // Don't create a indexed load / store with zero offset.
9214  if (isNullConstant(Offset))
9215  return false;
9216 
9217  // Try turning it into a pre-indexed load / store except when:
9218  // 1) The new base ptr is a frame index.
9219  // 2) If N is a store and the new base ptr is either the same as or is a
9220  // predecessor of the value being stored.
9221  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
9222  // that would create a cycle.
9223  // 4) All uses are load / store ops that use it as old base ptr.
9224 
9225  // Check #1. Preinc'ing a frame index would require copying the stack pointer
9226  // (plus the implicit offset) to a register to preinc anyway.
9227  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9228  return false;
9229 
9230  // Check #2.
9231  if (!isLoad) {
9232  SDValue Val = cast<StoreSDNode>(N)->getValue();
9233  if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
9234  return false;
9235  }
9236 
9237  // If the offset is a constant, there may be other adds of constants that
9238  // can be folded with this one. We should do this to avoid having to keep
9239  // a copy of the original base pointer.
9240  SmallVector<SDNode *, 16> OtherUses;
9241  if (isa<ConstantSDNode>(Offset))
9242  for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
9243  UE = BasePtr.getNode()->use_end();
9244  UI != UE; ++UI) {
9245  SDUse &Use = UI.getUse();
9246  // Skip the use that is Ptr and uses of other results from BasePtr's
9247  // node (important for nodes that return multiple results).
9248  if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
9249  continue;
9250 
9251  if (Use.getUser()->isPredecessorOf(N))
9252  continue;
9253 
9254  if (Use.getUser()->getOpcode() != ISD::ADD &&
9255  Use.getUser()->getOpcode() != ISD::SUB) {
9256  OtherUses.clear();
9257  break;
9258  }
9259 
9260  SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
9261  if (!isa<ConstantSDNode>(Op1)) {
9262  OtherUses.clear();
9263  break;
9264  }
9265 
9266  // FIXME: In some cases, we can be smarter about this.
9267  if (Op1.getValueType() != Offset.getValueType()) {
9268  OtherUses.clear();
9269  break;
9270  }
9271 
9272  OtherUses.push_back(Use.getUser());
9273  }
9274 
9275  if (Swapped)
9276  std::swap(BasePtr, Offset);
9277 
9278  // Now check for #3 and #4.
9279  bool RealUse = false;
9280 
9281  // Caches for hasPredecessorHelper
9284 
9285  for (SDNode *Use : Ptr.getNode()->uses()) {
9286  if (Use == N)
9287  continue;
9288  if (N->hasPredecessorHelper(Use, Visited, Worklist))
9289  return false;
9290 
9291  // If Ptr may be folded in addressing mode of other use, then it's
9292  // not profitable to do this transformation.
9293  if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
9294  RealUse = true;
9295  }
9296 
9297  if (!RealUse)
9298  return false;
9299 
9300  SDValue Result;
9301  if (isLoad)
9302  Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9303  BasePtr, Offset, AM);
9304  else
9305  Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9306  BasePtr, Offset, AM);
9307  ++PreIndexedNodes;
9308  ++NodesCombined;
9309  DEBUG(dbgs() << "\nReplacing.4 ";
9310  N->dump(&DAG);
9311  dbgs() << "\nWith: ";
9312  Result.getNode()->dump(&DAG);
9313  dbgs() << '\n');
9314  WorklistRemover DeadNodes(*this);
9315  if (isLoad) {
9316  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9317  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9318  } else {
9319  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9320  }
9321 
9322  // Finally, since the node is now dead, remove it from the graph.
9323  deleteAndRecombine(N);
9324 
9325  if (Swapped)
9326  std::swap(BasePtr, Offset);
9327 
9328  // Replace other uses of BasePtr that can be updated to use Ptr
9329  for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
9330  unsigned OffsetIdx = 1;
9331  if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
9332  OffsetIdx = 0;
9333  assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
9334  BasePtr.getNode() && "Expected BasePtr operand");
9335 
9336  // We need to replace ptr0 in the following expression:
9337  // x0 * offset0 + y0 * ptr0 = t0
9338  // knowing that
9339  // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
9340  //
9341  // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
9342  // indexed load/store and the expresion that needs to be re-written.
9343  //
9344  // Therefore, we have:
9345  // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
9346 
9347  ConstantSDNode *CN =
9348  cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
9349  int X0, X1, Y0, Y1;
9350  APInt Offset0 = CN->getAPIntValue();
9351  APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
9352 
9353  X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
9354  Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
9355  X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
9356  Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
9357 
9358  unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
9359 
9360  APInt CNV = Offset0;
9361  if (X0 < 0) CNV = -CNV;
9362  if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
9363  else CNV = CNV - Offset1;
9364 
9365  SDLoc DL(OtherUses[i]);
9366 
9367  // We can now generate the new expression.
9368  SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
9369  SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
9370 
9371  SDValue NewUse = DAG.getNode(Opcode,
9372  DL,
9373  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
9374  DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
9375  deleteAndRecombine(OtherUses[i]);
9376  }
9377 
9378  // Replace the uses of Ptr with uses of the updated base value.
9379  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
9380  deleteAndRecombine(Ptr.getNode());
9381 
9382  return true;
9383 }
9384 
9385 /// Try to combine a load/store with a add/sub of the base pointer node into a
9386 /// post-indexed load/store. The transformation folded the add/subtract into the
9387 /// new indexed load/store effectively and all of its uses are redirected to the
9388 /// new load/store.
9389 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
9390  if (Level < AfterLegalizeDAG)
9391  return false;
9392 
9393  bool isLoad = true;
9394  SDValue Ptr;
9395  EVT VT;
9396  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9397  if (LD->isIndexed())
9398  return false;
9399  VT = LD->getMemoryVT();
9400  if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
9401  !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
9402  return false;
9403  Ptr = LD->getBasePtr();
9404  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
9405  if (ST->isIndexed())
9406  return false;
9407  VT = ST->getMemoryVT();
9408  if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
9409  !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
9410  return false;
9411  Ptr = ST->getBasePtr();
9412  isLoad = false;
9413  } else {
9414  return false;
9415  }
9416 
9417  if (Ptr.getNode()->hasOneUse())
9418  return false;
9419 
9420  for (SDNode *Op : Ptr.getNode()->uses()) {
9421  if (Op == N ||
9422  (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
9423  continue;
9424 
9425  SDValue BasePtr;
9426  SDValue Offset;
9428  if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
9429  // Don't create a indexed load / store with zero offset.
9430  if (isNullConstant(Offset))
9431  continue;
9432 
9433  // Try turning it into a post-indexed load / store except when
9434  // 1) All uses are load / store ops that use it as base ptr (and
9435  // it may be folded as addressing mmode).
9436  // 2) Op must be independent of N, i.e. Op is neither a predecessor
9437  // nor a successor of N. Otherwise, if Op is folded that would
9438  // create a cycle.
9439 
9440  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
9441  continue;
9442 
9443  // Check for #1.
9444  bool TryNext = false;
9445  for (SDNode *Use : BasePtr.getNode()->uses()) {
9446  if (Use == Ptr.getNode())
9447  continue;
9448 
9449  // If all the uses are load / store addresses, then don't do the
9450  // transformation.
9451  if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
9452  bool RealUse = false;
9453  for (SDNode *UseUse : Use->uses()) {
9454  if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
9455  RealUse = true;
9456  }
9457 
9458  if (!RealUse) {
9459  TryNext = true;
9460  break;
9461  }
9462  }
9463  }
9464 
9465  if (TryNext)
9466  continue;
9467 
9468  // Check for #2
9469  if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
9470  SDValue Result = isLoad
9471  ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
9472  BasePtr, Offset, AM)
9473  : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
9474  BasePtr, Offset, AM);
9475  ++PostIndexedNodes;
9476  ++NodesCombined;
9477  DEBUG(dbgs() << "\nReplacing.5 ";
9478  N->dump(&DAG);
9479  dbgs() << "\nWith: ";
9480  Result.getNode()->dump(&DAG);
9481  dbgs() << '\n');
9482  WorklistRemover DeadNodes(*this);
9483  if (isLoad) {
9484  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
9485  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
9486  } else {
9487  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
9488  }
9489 
9490  // Finally, since the node is now dead, remove it from the graph.
9491  deleteAndRecombine(N);
9492 
9493  // Replace the uses of Use with uses of the updated base value.
9494  DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
9495  Result.getValue(isLoad ? 1 : 0));
9496  deleteAndRecombine(Op);
9497  return true;
9498  }
9499  }
9500  }
9501 
9502  return false;
9503 }
9504 
9505 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
9506 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
9508  assert(AM != ISD::UNINDEXED);
9509  SDValue BP = LD->getOperand(1);
9510  SDValue Inc = LD->getOperand(2);
9511 
9512  // Some backends use TargetConstants for load offsets, but don't expect
9513  // TargetConstants in general ADD nodes. We can convert these constants into
9514  // regular Constants (if the constant is not opaque).
9515  assert((Inc.getOpcode() != ISD::TargetConstant ||
9516  !cast<ConstantSDNode>(Inc)->isOpaque()) &&
9517  "Cannot split out indexing using opaque target constants");
9518  if (Inc.getOpcode() == ISD::TargetConstant) {
9519  ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
9520  Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
9521  ConstInc->getValueType(0));
9522  }
9523 
9524  unsigned Opc =
9525  (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
9526  return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
9527 }
9528 
9529 SDValue DAGCombiner::visitLOAD(SDNode *N) {
9530  LoadSDNode *LD = cast<LoadSDNode>(N);
9531  SDValue Chain = LD->getChain();
9532  SDValue Ptr = LD->getBasePtr();
9533 
9534  // If load is not volatile and there are no uses of the loaded value (and
9535  // the updated indexed value in case of indexed loads), change uses of the
9536  // chain value into uses of the chain input (i.e. delete the dead load).
9537  if (!LD->isVolatile()) {
9538  if (N->getValueType(1) == MVT::Other) {
9539  // Unindexed loads.
9540  if (!N->hasAnyUseOfValue(0)) {
9541  // It's not safe to use the two value CombineTo variant here. e.g.
9542  // v1, chain2 = load chain1, loc
9543  // v2, chain3 = load chain2, loc
9544  // v3 = add v2, c
9545  // Now we replace use of chain2 with chain1. This makes the second load
9546  // isomorphic to the one we are deleting, and thus makes this load live.
9547  DEBUG(dbgs() << "\nReplacing.6 ";
9548  N->dump(&DAG);
9549  dbgs() << "\nWith chain: ";
9550  Chain.getNode()->dump(&DAG);
9551  dbgs() << "\n");
9552  WorklistRemover DeadNodes(*this);
9553  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
9554 
9555  if (N->use_empty())
9556  deleteAndRecombine(N);
9557 
9558  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9559  }
9560  } else {
9561  // Indexed loads.
9562  assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
9563 
9564  // If this load has an opaque TargetConstant offset, then we cannot split
9565  // the indexing into an add/sub directly (that TargetConstant may not be
9566  // valid for a different type of node, and we cannot convert an opaque
9567  // target constant into a regular constant).
9568  bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
9569  cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
9570 
9571  if (!N->hasAnyUseOfValue(0) &&
9572  ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
9573  SDValue Undef = DAG.getUNDEF(N->getValueType(0));
9574  SDValue Index;
9575  if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
9576  Index = SplitIndexingFromLoad(LD);
9577  // Try to fold the base pointer arithmetic into subsequent loads and
9578  // stores.
9579  AddUsersToWorklist(N);
9580  } else
9581  Index = DAG.getUNDEF(N->getValueType(1));
9582  DEBUG(dbgs() << "\nReplacing.7 ";
9583  N->dump(&DAG);
9584  dbgs() << "\nWith: ";
9585  Undef.getNode()->dump(&DAG);
9586  dbgs() << " and 2 other values\n");
9587  WorklistRemover DeadNodes(*this);
9588  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
9589  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
9590  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
9591  deleteAndRecombine(N);
9592  return SDValue(N, 0); // Return N so it doesn't get rechecked!
9593  }
9594  }
9595  }
9596 
9597  // If this load is directly stored, replace the load value with the stored
9598  // value.
9599  // TODO: Handle store large -> read small portion.
9600  // TODO: Handle TRUNCSTORE/LOADEXT
9601  if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
9602  if (ISD::isNON_TRUNCStore(Chain.getNode())) {
9603  StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
9604  if (PrevST->getBasePtr() == Ptr &&
9605  PrevST->getValue().getValueType() == N->getValueType(0))
9606  return CombineTo(N, Chain.getOperand(1), Chain);
9607  }
9608  }
9609 
9610  // Try to infer better alignment information than the load already has.
9611  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
9612  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
9613  if (Align > LD->getMemOperand()->getBaseAlignment()) {
9614  SDValue NewLoad =
9615  DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
9616  LD->getValueType(0),
9617  Chain, Ptr, LD->getPointerInfo(),
9618  LD->getMemoryVT(),
9619  LD->isVolatile(), LD->isNonTemporal(),
9620  LD->isInvariant(), Align, LD->getAAInfo());
9621  if (NewLoad.getNode() != N)
9622  return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
9623  }
9624  }
9625  }
9626 
9627  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
9628  : DAG.getSubtarget().useAA();
9629 #ifndef NDEBUG
9630  if (CombinerAAOnlyFunc.getNumOccurrences() &&
9631  CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
9632  UseAA = false;
9633 #endif
9634  if (UseAA && LD->isUnindexed()) {
9635  // Walk up chain skipping non-aliasing memory nodes.
9636  SDValue BetterChain = FindBetterChain(N, Chain);
9637 
9638  // If there is a better chain.
9639  if (Chain != BetterChain) {
9640  SDValue ReplLoad;
9641 
9642  // Replace the chain to void dependency.
9643  if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
9644  ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
9645  BetterChain, Ptr, LD->getMemOperand());
9646  } else {
9647  ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
9648  LD->getValueType(0),
9649  BetterChain, Ptr, LD->getMemoryVT(),
9650  LD->getMemOperand());
9651  }
9652 
9653  // Create token factor to keep old chain connected.
9654  SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
9655  MVT::Other, Chain, ReplLoad.getValue(1));
9656 
9657  // Make sure the new and old chains are cleaned up.
9658  AddToWorklist(Token.getNode());
9659 
9660  // Replace uses with load result and token factor. Don't add users
9661  // to work list.
9662  return CombineTo(N, ReplLoad.getValue(0), Token, false);
9663  }
9664  }
9665 
9666  // Try transforming N to an indexed load.
9667  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
9668  return SDValue(N, 0);
9669 
9670  // Try to slice up N to more direct loads if the slices are mapped to
9671  // different register banks or pairing can take place.
9672  if (SliceUpLoad(N))
9673  return SDValue(N, 0);
9674 
9675  return SDValue();
9676 }
9677 
9678 namespace {
9679 /// \brief Helper structure used to slice a load in smaller loads.
9680 /// Basically a slice is obtained from the following sequence:
9681 /// Origin = load Ty1, Base
9682 /// Shift = srl Ty1 Origin, CstTy Amount
9683 /// Inst = trunc Shift to Ty2
9684 ///
9685 /// Then, it will be rewriten into:
9686 /// Slice = load SliceTy, Base + SliceOffset
9687 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
9688 ///
9689 /// SliceTy is deduced from the number of bits that are actually used to
9690 /// build Inst.
9691 struct LoadedSlice {
9692  /// \brief Helper structure used to compute the cost of a slice.
9693  struct Cost {
9694  /// Are we optimizing for code size.
9695  bool ForCodeSize;
9696  /// Various cost.
9697  unsigned Loads;
9698  unsigned Truncates;
9699  unsigned CrossRegisterBanksCopies;
9700  unsigned ZExts;
9701  unsigned Shift;
9702 
9703  Cost(bool ForCodeSize = false)
9704  : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
9705  CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
9706 
9707  /// \brief Get the cost of one isolated slice.
9708  Cost(const LoadedSlice &LS, bool ForCodeSize = false)
9709  : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
9710  CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
9711  EVT TruncType = LS.Inst->getValueType(0);
9712  EVT LoadedType = LS.getLoadedType();
9713  if (TruncType != LoadedType &&
9714  !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
9715  ZExts = 1;
9716  }
9717 
9718  /// \brief Account for slicing gain in the current cost.
9719  /// Slicing provide a few gains like removing a shift or a
9720  /// truncate. This method allows to grow the cost of the original
9721  /// load with the gain from this slice.
9722  void addSliceGain(const LoadedSlice &LS) {
9723  // Each slice saves a truncate.
9724  const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
9725  if (!TLI.isTruncateFree(LS.Inst->getValueType(0),
9726  LS.Inst->getOperand(0).getValueType()))
9727  ++Truncates;
9728  // If there is a shift amount, this slice gets rid of it.
9729  if (LS.Shift)
9730  ++Shift;
9731  // If this slice can merge a cross register bank copy, account for it.
9732  if (LS.canMergeExpensiveCrossRegisterBankCopy())
9733  ++CrossRegisterBanksCopies;
9734  }
9735 
9736  Cost &operator+=(const Cost &RHS) {
9737  Loads += RHS.Loads;
9738  Truncates += RHS.Truncates;
9739  CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
9740  ZExts += RHS.ZExts;
9741  Shift += RHS.Shift;
9742  return *this;
9743  }
9744 
9745  bool operator==(const Cost &RHS) const {
9746  return Loads == RHS.Loads && Truncates == RHS.Truncates &&
9747  CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
9748  ZExts == RHS.ZExts && Shift == RHS.Shift;
9749  }
9750 
9751  bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
9752 
9753  bool operator<(const Cost &RHS) const {
9754  // Assume cross register banks copies are as expensive as loads.
9755  // FIXME: Do we want some more target hooks?
9756  unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
9757  unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
9758  // Unless we are optimizing for code size, consider the
9759  // expensive operation first.
9760  if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
9761  return ExpensiveOpsLHS < ExpensiveOpsRHS;
9762  return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
9763  (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
9764  }
9765 
9766  bool operator>(const Cost &RHS) const { return RHS < *this; }
9767 
9768  bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
9769 
9770  bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
9771  };
9772  // The last instruction that represent the slice. This should be a
9773  // truncate instruction.
9774  SDNode *Inst;
9775  // The original load instruction.
9776  LoadSDNode *Origin;
9777  // The right shift amount in bits from the original load.
9778  unsigned Shift;
9779  // The DAG from which Origin came from.
9780  // This is used to get some contextual information about legal types, etc.
9781  SelectionDAG *DAG;
9782 
9783  LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
9784  unsigned Shift = 0, SelectionDAG *DAG = nullptr)
9785  : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
9786 
9787  /// \brief Get the bits used in a chunk of bits \p BitWidth large.
9788  /// \return Result is \p BitWidth and has used bits set to 1 and
9789  /// not used bits set to 0.
9790  APInt getUsedBits() const {
9791  // Reproduce the trunc(lshr) sequence:
9792  // - Start from the truncated value.
9793  // - Zero extend to the desired bit width.
9794  // - Shift left.
9795  assert(Origin && "No original load to compare against.");
9796  unsigned BitWidth = Origin->getValueSizeInBits(0);
9797  assert(Inst && "This slice is not bound to an instruction");
9798  assert(Inst->getValueSizeInBits(0) <= BitWidth &&
9799  "Extracted slice is bigger than the whole type!");
9800  APInt UsedBits(Inst->getValueSizeInBits(0), 0);
9801  UsedBits.setAllBits();
9802  UsedBits = UsedBits.zext(BitWidth);
9803  UsedBits <<= Shift;
9804  return UsedBits;
9805  }
9806 
9807  /// \brief Get the size of the slice to be loaded in bytes.
9808  unsigned getLoadedSize() const {
9809  unsigned SliceSize = getUsedBits().countPopulation();
9810  assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
9811  return SliceSize / 8;
9812  }
9813 
9814  /// \brief Get the type that will be loaded for this slice.
9815  /// Note: This may not be the final type for the slice.
9816  EVT getLoadedType() const {
9817  assert(DAG && "Missing context");
9818  LLVMContext &Ctxt = *DAG->getContext();
9819  return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
9820  }
9821 
9822  /// \brief Get the alignment of the load used for this slice.
9823  unsigned getAlignment() const {
9824  unsigned Alignment = Origin->getAlignment();
9825  unsigned Offset = getOffsetFromBase();
9826  if (Offset != 0)
9827  Alignment = MinAlign(Alignment, Alignment + Offset);
9828  return Alignment;
9829  }
9830 
9831  /// \brief Check if this slice can be rewritten with legal operations.
9832  bool isLegal() const {
9833  // An invalid slice is not legal.
9834  if (!Origin || !Inst || !DAG)
9835  return false;
9836 
9837  // Offsets are for indexed load only, we do not handle that.
9838  if (Origin->getOffset().getOpcode() != ISD::UNDEF)
9839  return false;
9840 
9841  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
9842 
9843  // Check that the type is legal.
9844  EVT SliceType = getLoadedType();
9845  if (!TLI.isTypeLegal(SliceType))
9846  return false;
9847 
9848  // Check that the load is legal for this type.
9849  if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
9850  return false;
9851 
9852  // Check that the offset can be computed.
9853  // 1. Check its type.
9854  EVT PtrType = Origin->getBasePtr().getValueType();
9855  if (PtrType == MVT::Untyped || PtrType.isExtended())
9856  return false;
9857 
9858  // 2. Check that it fits in the immediate.
9859  if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
9860  return false;
9861 
9862  // 3. Check that the computation is legal.
9863  if (!TLI.isOperationLegal(ISD::ADD, PtrType))
9864  return false;
9865 
9866  // Check that the zext is legal if it needs one.
9867  EVT TruncateType = Inst->getValueType(0);
9868  if (TruncateType != SliceType &&
9869  !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
9870  return false;
9871 
9872  return true;
9873  }
9874 
9875  /// \brief Get the offset in bytes of this slice in the original chunk of
9876  /// bits.
9877  /// \pre DAG != nullptr.
9878  uint64_t getOffsetFromBase() const {
9879  assert(DAG && "Missing context.");
9880  bool IsBigEndian = DAG->getDataLayout().isBigEndian();
9881  assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
9882  uint64_t Offset = Shift / 8;
9883  unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
9884  assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
9885  "The size of the original loaded type is not a multiple of a"
9886  " byte.");
9887  // If Offset is bigger than TySizeInBytes, it means we are loading all
9888  // zeros. This should have been optimized before in the process.
9889  assert(TySizeInBytes > Offset &&
9890  "Invalid shift amount for given loaded size");
9891  if (IsBigEndian)
9892  Offset = TySizeInBytes - Offset - getLoadedSize();
9893  return Offset;
9894  }
9895 
9896  /// \brief Generate the sequence of instructions to load the slice
9897  /// represented by this object and redirect the uses of this slice to
9898  /// this new sequence of instructions.
9899  /// \pre this->Inst && this->Origin are valid Instructions and this
9900  /// object passed the legal check: LoadedSlice::isLegal returned true.
9901  /// \return The last instruction of the sequence used to load the slice.
9902  SDValue loadSlice() const {
9903  assert(Inst && Origin && "Unable to replace a non-existing slice.");
9904  const SDValue &OldBaseAddr = Origin->getBasePtr();
9905  SDValue BaseAddr = OldBaseAddr;
9906  // Get the offset in that chunk of bytes w.r.t. the endianess.
9907  int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
9908  assert(Offset >= 0 && "Offset too big to fit in int64_t!");
9909  if (Offset) {
9910  // BaseAddr = BaseAddr + Offset.
9911  EVT ArithType = BaseAddr.getValueType();
9912  SDLoc DL(Origin);
9913  BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
9914  DAG->getConstant(Offset, DL, ArithType));
9915  }
9916 
9917  // Create the type of the loaded slice according to its size.
9918  EVT SliceType = getLoadedType();
9919 
9920  // Create the load for the slice.
9921  SDValue LastInst = DAG->getLoad(
9922  SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
9923  Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
9924  Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
9925  // If the final type is not the same as the loaded type, this means that
9926  // we have to pad with zero. Create a zero extend for that.
9927  EVT FinalType = Inst->getValueType(0);
9928  if (SliceType != FinalType)
9929  LastInst =
9930  DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
9931  return LastInst;
9932  }
9933 
9934  /// \brief Check if this slice can be merged with an expensive cross register
9935  /// bank copy. E.g.,
9936  /// i = load i32
9937  /// f = bitcast i32 i to float
9938  bool canMergeExpensiveCrossRegisterBankCopy() const {
9939  if (!Inst || !Inst->hasOneUse())
9940  return false;
9941  SDNode *Use = *Inst->use_begin();
9942  if (Use->getOpcode() != ISD::BITCAST)
9943  return false;
9944  assert(DAG && "Missing context");
9945  const TargetLowering &TLI = DAG->getTargetLoweringInfo();
9946  EVT ResVT = Use->getValueType(0);
9947  const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
9948  const TargetRegisterClass *ArgRC =
9950  if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
9951  return false;
9952 
9953  // At this point, we know that we perform a cross-register-bank copy.
9954  // Check if it is expensive.
9955  const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
9956  // Assume bitcasts are cheap, unless both register classes do not
9957  // explicitly share a common sub class.
9958  if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
9959  return false;
9960 
9961  // Check if it will be merged with the load.
9962  // 1. Check the alignment constraint.
9963  unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
9964  ResVT.getTypeForEVT(*DAG->getContext()));
9965 
9966  if (RequiredAlignment > getAlignment())
9967  return false;
9968 
9969  // 2. Check that the load is a legal operation for that type.
9970  if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
9971  return false;
9972 
9973  // 3. Check that we do not have a zext in the way.
9974  if (Inst->getValueType(0) != getLoadedType())
9975  return false;
9976 
9977  return true;
9978  }
9979 };
9980 }
9981 
9982 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
9983 /// \p UsedBits looks like 0..0 1..1 0..0.
9984 static bool areUsedBitsDense(const APInt &UsedBits) {
9985  // If all the bits are one, this is dense!
9986  if (UsedBits.isAllOnesValue())
9987  return true;
9988 
9989  // Get rid of the unused bits on the right.
9990  APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
9991  // Get rid of the unused bits on the left.
9992  if (NarrowedUsedBits.countLeadingZeros())
9993  NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
9994  // Check that the chunk of bits is completely used.
9995  return NarrowedUsedBits.isAllOnesValue();
9996 }
9997 
9998 /// \brief Check whether or not \p First and \p Second are next to each other
9999 /// in memory. This means that there is no hole between the bits loaded
10000 /// by \p First and the bits loaded by \p Second.
10001 static bool areSlicesNextToEachOther(const LoadedSlice &First,
10002  const LoadedSlice &Second) {
10003  assert(First.Origin == Second.Origin && First.Origin &&
10004  "Unable to match different memory origins.");
10005  APInt UsedBits = First.getUsedBits();
10006  assert((UsedBits & Second.getUsedBits()) == 0 &&
10007  "Slices are not supposed to overlap.");
10008  UsedBits |= Second.getUsedBits();
10009  return areUsedBitsDense(UsedBits);
10010 }
10011 
10012 /// \brief Adjust the \p GlobalLSCost according to the target
10013 /// paring capabilities and the layout of the slices.
10014 /// \pre \p GlobalLSCost should account for at least as many loads as
10015 /// there is in the slices in \p LoadedSlices.
10017  LoadedSlice::Cost &GlobalLSCost) {
10018  unsigned NumberOfSlices = LoadedSlices.size();
10019  // If there is less than 2 elements, no pairing is possible.
10020  if (NumberOfSlices < 2)
10021  return;
10022 
10023  // Sort the slices so that elements that are likely to be next to each
10024  // other in memory are next to each other in the list.
10025  std::sort(LoadedSlices.begin(), LoadedSlices.end(),
10026  [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
10027  assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
10028  return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
10029  });
10030  const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
10031  // First (resp. Second) is the first (resp. Second) potentially candidate
10032  // to be placed in a paired load.
10033  const LoadedSlice *First = nullptr;
10034  const LoadedSlice *Second = nullptr;
10035  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
10036  // Set the beginning of the pair.
10037  First = Second) {
10038 
10039  Second = &LoadedSlices[CurrSlice];
10040 
10041  // If First is NULL, it means we start a new pair.
10042  // Get to the next slice.
10043  if (!First)
10044  continue;
10045 
10046  EVT LoadedType = First->getLoadedType();
10047 
10048  // If the types of the slices are different, we cannot pair them.
10049  if (LoadedType != Second->getLoadedType())
10050  continue;
10051 
10052  // Check if the target supplies paired loads for this type.
10053  unsigned RequiredAlignment = 0;
10054  if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
10055  // move to the next pair, this type is hopeless.
10056  Second = nullptr;
10057  continue;
10058  }
10059  // Check if we meet the alignment requirement.
10060  if (RequiredAlignment > First->getAlignment())
10061  continue;
10062 
10063  // Check that both loads are next to each other in memory.
10064  if (!areSlicesNextToEachOther(*First, *Second))
10065  continue;
10066 
10067  assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
10068  --GlobalLSCost.Loads;
10069  // Move to the next pair.
10070  Second = nullptr;
10071  }
10072 }
10073 
10074 /// \brief Check the profitability of all involved LoadedSlice.
10075 /// Currently, it is considered profitable if there is exactly two
10076 /// involved slices (1) which are (2) next to each other in memory, and
10077 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
10078 ///
10079 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
10080 /// the elements themselves.
10081 ///
10082 /// FIXME: When the cost model will be mature enough, we can relax
10083 /// constraints (1) and (2).
10085  const APInt &UsedBits, bool ForCodeSize) {
10086  unsigned NumberOfSlices = LoadedSlices.size();
10087  if (StressLoadSlicing)
10088  return NumberOfSlices > 1;
10089 
10090  // Check (1).
10091  if (NumberOfSlices != 2)
10092  return false;
10093 
10094  // Check (2).
10095  if (!areUsedBitsDense(UsedBits))
10096  return false;
10097 
10098  // Check (3).
10099  LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
10100  // The original code has one big load.
10101  OrigCost.Loads = 1;
10102  for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
10103  const LoadedSlice &LS = LoadedSlices[CurrSlice];
10104  // Accumulate the cost of all the slices.
10105  LoadedSlice::Cost SliceCost(LS, ForCodeSize);
10106  GlobalSlicingCost += SliceCost;
10107 
10108  // Account as cost in the original configuration the gain obtained
10109  // with the current slices.
10110  OrigCost.addSliceGain(LS);
10111  }
10112 
10113  // If the target supports paired load, adjust the cost accordingly.
10114  adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
10115  return OrigCost > GlobalSlicingCost;
10116 }
10117 
10118 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
10119 /// operations, split it in the various pieces being extracted.
10120 ///
10121 /// This sort of thing is introduced by SROA.
10122 /// This slicing takes care not to insert overlapping loads.
10123 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
10124 bool DAGCombiner::SliceUpLoad(SDNode *N) {
10125  if (Level < AfterLegalizeDAG)
10126  return false;
10127 
10128  LoadSDNode *LD = cast<LoadSDNode>(N);
10129  if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
10130  !LD->getValueType(0).isInteger())
10131  return false;
10132 
10133  // Keep track of already used bits to detect overlapping values.
10134  // In that case, we will just abort the transformation.
10135  APInt UsedBits(LD->getValueSizeInBits(0), 0);
10136 
10137  SmallVector<LoadedSlice, 4> LoadedSlices;
10138 
10139  // Check if this load is used as several smaller chunks of bits.
10140  // Basically, look for uses in trunc or trunc(lshr) and record a new chain
10141  // of computation for each trunc.
10142  for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
10143  UI != UIEnd; ++UI) {
10144  // Skip the uses of the chain.
10145  if (UI.getUse().getResNo() != 0)
10146  continue;
10147 
10148  SDNode *User = *UI;
10149  unsigned Shift = 0;
10150 
10151  // Check if this is a trunc(lshr).
10152  if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
10153  isa<ConstantSDNode>(User->getOperand(1))) {
10154  Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
10155  User = *User->use_begin();
10156  }
10157 
10158  // At this point, User is a Truncate, iff we encountered, trunc or
10159  // trunc(lshr).
10160  if (User->getOpcode() != ISD::TRUNCATE)
10161  return false;
10162 
10163  // The width of the type must be a power of 2 and greater than 8-bits.
10164  // Otherwise the load cannot be represented in LLVM IR.
10165  // Moreover, if we shifted with a non-8-bits multiple, the slice
10166  // will be across several bytes. We do not support that.
10167  unsigned Width = User->getValueSizeInBits(0);
10168  if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
10169  return 0;
10170 
10171  // Build the slice for this chain of computations.
10172  LoadedSlice LS(User, LD, Shift, &DAG);
10173  APInt CurrentUsedBits = LS.getUsedBits();
10174 
10175  // Check if this slice overlaps with another.
10176  if ((CurrentUsedBits & UsedBits) != 0)
10177  return false;
10178  // Update the bits used globally.
10179  UsedBits |= CurrentUsedBits;
10180 
10181  // Check if the new slice would be legal.
10182  if (!LS.isLegal())
10183  return false;
10184 
10185  // Record the slice.
10186  LoadedSlices.push_back(LS);
10187  }
10188 
10189  // Abort slicing if it does not seem to be profitable.
10190  if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
10191  return false;
10192 
10193  ++SlicedLoads;
10194 
10195  // Rewrite each chain to use an independent load.
10196  // By construction, each chain can be represented by a unique load.
10197 
10198  // Prepare the argument for the new token factor for all the slices.
10199  SmallVector<SDValue, 8> ArgChains;
10201  LSIt = LoadedSlices.begin(),
10202  LSItEnd = LoadedSlices.end();
10203  LSIt != LSItEnd; ++LSIt) {
10204  SDValue SliceInst = LSIt->loadSlice();
10205  CombineTo(LSIt->Inst, SliceInst, true);
10206  if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
10207  SliceInst = SliceInst.getOperand(0);
10208  assert(SliceInst->getOpcode() == ISD::LOAD &&
10209  "It takes more than a zext to get to the loaded slice!!");
10210  ArgChains.push_back(SliceInst.getValue(1));
10211  }
10212 
10213  SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
10214  ArgChains);
10215  DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
10216  return true;
10217 }
10218 
10219 /// Check to see if V is (and load (ptr), imm), where the load is having
10220 /// specific bytes cleared out. If so, return the byte size being masked out
10221 /// and the shift amount.
10222 static std::pair<unsigned, unsigned>
10224  std::pair<unsigned, unsigned> Result(0, 0);
10225 
10226  // Check for the structure we're looking for.
10227  if (V->getOpcode() != ISD::AND ||
10228  !isa<ConstantSDNode>(V->getOperand(1)) ||
10230  return Result;
10231 
10232  // Check the chain and pointer.
10233  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
10234  if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
10235 
10236  // The store should be chained directly to the load or be an operand of a
10237  // tokenfactor.
10238  if (LD == Chain.getNode())
10239  ; // ok.
10240  else if (Chain->getOpcode() != ISD::TokenFactor)
10241  return Result; // Fail.
10242  else {
10243  bool isOk = false;
10244  for (const SDValue &ChainOp : Chain->op_values())
10245  if (ChainOp.getNode() == LD) {
10246  isOk = true;
10247  break;
10248  }
10249  if (!isOk) return Result;
10250  }
10251 
10252  // This only handles simple types.
10253  if (V.getValueType() != MVT::i16 &&
10254  V.getValueType() != MVT::i32 &&
10255  V.getValueType() != MVT::i64)
10256  return Result;
10257 
10258  // Check the constant mask. Invert it so that the bits being masked out are
10259  // 0 and the bits being kept are 1. Use getSExtValue so that leading bits
10260  // follow the sign bit for uniformity.
10261  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
10262  unsigned NotMaskLZ = countLeadingZeros(NotMask);
10263  if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
10264  unsigned NotMaskTZ = countTrailingZeros(NotMask);
10265  if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
10266  if (NotMaskLZ == 64) return Result; // All zero mask.
10267 
10268  // See if we have a continuous run of bits. If so, we have 0*1+0*
10269  if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
10270  return Result;
10271 
10272  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
10273  if (V.getValueType() != MVT::i64 && NotMaskLZ)
10274  NotMaskLZ -= 64-V.getValueSizeInBits();
10275 
10276  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
10277  switch (MaskedBytes) {
10278  case 1:
10279  case 2:
10280  case 4: break;
10281  default: return Result; // All one mask, or 5-byte mask.
10282  }
10283 
10284  // Verify that the first bit starts at a multiple of mask so that the access
10285  // is aligned the same as the access width.
10286  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
10287 
10288  Result.first = MaskedBytes;
10289  Result.second = NotMaskTZ/8;
10290  return Result;
10291 }
10292 
10293 
10294 /// Check to see if IVal is something that provides a value as specified by
10295 /// MaskInfo. If so, replace the specified store with a narrower store of
10296 /// truncated IVal.
10297 static SDNode *
10298 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
10299  SDValue IVal, StoreSDNode *St,
10300  DAGCombiner *DC) {
10301  unsigned NumBytes = MaskInfo.first;
10302  unsigned ByteShift = MaskInfo.second;
10303  SelectionDAG &DAG = DC->getDAG();
10304 
10305  // Check to see if IVal is all zeros in the part being masked in by the 'or'
10306  // that uses this. If not, this is not a replacement.
10307  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
10308  ByteShift*8, (ByteShift+NumBytes)*8);
10309  if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
10310 
10311  // Check that it is legal on the target to do this. It is legal if the new
10312  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
10313  // legalization.
10314  MVT VT = MVT::getIntegerVT(NumBytes*8);
10315  if (!DC->isTypeLegal(VT))
10316  return nullptr;
10317 
10318  // Okay, we can do this! Replace the 'St' store with a store of IVal that is
10319  // shifted by ByteShift and truncated down to NumBytes.
10320  if (ByteShift) {
10321  SDLoc DL(IVal);
10322  IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
10323  DAG.getConstant(ByteShift*8, DL,
10324  DC->getShiftAmountTy(IVal.getValueType())));
10325  }
10326 
10327  // Figure out the offset for the store and the alignment of the access.
10328  unsigned StOffset;
10329  unsigned NewAlign = St->getAlignment();
10330 
10331  if (DAG.getDataLayout().isLittleEndian())
10332  StOffset = ByteShift;
10333  else
10334  StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
10335 
10336  SDValue Ptr = St->getBasePtr();
10337  if (StOffset) {
10338  SDLoc DL(IVal);
10339  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
10340  Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
10341  NewAlign = MinAlign(NewAlign, StOffset);
10342  }
10343 
10344  // Truncate down to the new size.
10345  IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
10346 
10347  ++OpsNarrowed;
10348  return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
10349  St->getPointerInfo().getWithOffset(StOffset),
10350  false, false, NewAlign).getNode();
10351 }
10352 
10353 
10354 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
10355 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
10356 /// narrowing the load and store if it would end up being a win for performance
10357 /// or code size.
10358 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
10359  StoreSDNode *ST = cast<StoreSDNode>(N);
10360  if (ST->isVolatile())
10361  return SDValue();
10362 
10363  SDValue Chain = ST->getChain();
10364  SDValue Value = ST->getValue();
10365  SDValue Ptr = ST->getBasePtr();
10366  EVT VT = Value.getValueType();
10367 
10368  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
10369  return SDValue();
10370 
10371  unsigned Opc = Value.getOpcode();
10372 
10373  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
10374  // is a byte mask indicating a consecutive number of bytes, check to see if
10375  // Y is known to provide just those bytes. If so, we try to replace the
10376  // load + replace + store sequence with a single (narrower) store, which makes
10377  // the load dead.
10378  if (Opc == ISD::OR) {
10379  std::pair<unsigned, unsigned> MaskedLoad;
10380  MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
10381  if (MaskedLoad.first)
10382  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
10383  Value.getOperand(1), ST,this))
10384  return SDValue(NewST, 0);
10385 
10386  // Or is commutative, so try swapping X and Y.
10387  MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
10388  if (MaskedLoad.first)
10389  if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
10390  Value.getOperand(0), ST,this))
10391  return SDValue(NewST, 0);
10392  }
10393 
10394  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
10395  Value.getOperand(1).getOpcode() != ISD::Constant)
10396  return SDValue();
10397 
10398  SDValue N0 = Value.getOperand(0);
10399  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10400  Chain == SDValue(N0.getNode(), 1)) {
10401  LoadSDNode *LD = cast<LoadSDNode>(N0);
10402  if (LD->getBasePtr() != Ptr ||
10403  LD->getPointerInfo().getAddrSpace() !=
10404  ST->getPointerInfo().getAddrSpace())
10405  return SDValue();
10406 
10407  // Find the type to narrow it the load / op / store to.
10408  SDValue N1 = Value.getOperand(1);
10409  unsigned BitWidth = N1.getValueSizeInBits();
10410  APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
10411  if (Opc == ISD::AND)
10412  Imm ^= APInt::getAllOnesValue(BitWidth);
10413  if (Imm == 0 || Imm.isAllOnesValue())
10414  return SDValue();
10415  unsigned ShAmt = Imm.countTrailingZeros();
10416  unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
10417  unsigned NewBW = NextPowerOf2(MSB - ShAmt);
10418  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
10419  // The narrowing should be profitable, the load/store operation should be
10420  // legal (or custom) and the store size should be equal to the NewVT width.
10421  while (NewBW < BitWidth &&
10422  (NewVT.getStoreSizeInBits() != NewBW ||
10423  !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
10424  !TLI.isNarrowingProfitable(VT, NewVT))) {
10425  NewBW = NextPowerOf2(NewBW);
10426  NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
10427  }
10428  if (NewBW >= BitWidth)
10429  return SDValue();
10430 
10431  // If the lsb changed does not start at the type bitwidth boundary,
10432  // start at the previous one.
10433  if (ShAmt % NewBW)
10434  ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
10435  APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
10436  std::min(BitWidth, ShAmt + NewBW));
10437  if ((Imm & Mask) == Imm) {
10438  APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
10439  if (Opc == ISD::AND)
10440  NewImm ^= APInt::getAllOnesValue(NewBW);
10441  uint64_t PtrOff = ShAmt / 8;
10442  // For big endian targets, we need to adjust the offset to the pointer to
10443  // load the correct bytes.
10444  if (DAG.getDataLayout().isBigEndian())
10445  PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
10446 
10447  unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
10448  Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
10449  if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
10450  return SDValue();
10451 
10452  SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
10453  Ptr.getValueType(), Ptr,
10454  DAG.getConstant(PtrOff, SDLoc(LD),
10455  Ptr.getValueType()));
10456  SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
10457  LD->getChain(), NewPtr,
10458  LD->getPointerInfo().getWithOffset(PtrOff),
10459  LD->isVolatile(), LD->isNonTemporal(),
10460  LD->isInvariant(), NewAlign,
10461  LD->getAAInfo());
10462  SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
10463  DAG.getConstant(NewImm, SDLoc(Value),
10464  NewVT));
10465  SDValue NewST = DAG.getStore(Chain, SDLoc(N),
10466  NewVal, NewPtr,
10467  ST->getPointerInfo().getWithOffset(PtrOff),
10468  false, false, NewAlign);
10469 
10470  AddToWorklist(NewPtr.getNode());
10471  AddToWorklist(NewLD.getNode());
10472  AddToWorklist(NewVal.getNode());
10473  WorklistRemover DeadNodes(*this);
10474  DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
10475  ++OpsNarrowed;
10476  return NewST;
10477  }
10478  }
10479 
10480  return SDValue();
10481 }
10482 
10483 /// For a given floating point load / store pair, if the load value isn't used
10484 /// by any other operations, then consider transforming the pair to integer
10485 /// load / store operations if the target deems the transformation profitable.
10486 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
10487  StoreSDNode *ST = cast<StoreSDNode>(N);
10488  SDValue Chain = ST->getChain();
10489  SDValue Value = ST->getValue();
10490  if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
10491  Value.hasOneUse() &&
10492  Chain == SDValue(Value.getNode(), 1)) {
10493  LoadSDNode *LD = cast<LoadSDNode>(Value);
10494  EVT VT = LD->getMemoryVT();
10495  if (!VT.isFloatingPoint() ||
10496  VT != ST->getMemoryVT() ||
10497  LD->isNonTemporal() ||
10498  ST->isNonTemporal() ||
10499  LD->getPointerInfo().getAddrSpace() != 0 ||
10500  ST->getPointerInfo().getAddrSpace() != 0)
10501  return SDValue();
10502 
10503  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
10504  if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
10505  !TLI.isOperationLegal(ISD::STORE, IntVT) ||
10508  return SDValue();
10509 
10510  unsigned LDAlign = LD->getAlignment();
10511  unsigned STAlign = ST->getAlignment();
10512  Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
10513  unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
10514  if (LDAlign < ABIAlign || STAlign < ABIAlign)
10515  return SDValue();
10516 
10517  SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
10518  LD->getChain(), LD->getBasePtr(),
10519  LD->getPointerInfo(),
10520  false, false, false, LDAlign);
10521 
10522  SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
10523  NewLD, ST->getBasePtr(),
10524  ST->getPointerInfo(),
10525  false, false, STAlign);
10526 
10527  AddToWorklist(NewLD.getNode());
10528  AddToWorklist(NewST.getNode());
10529  WorklistRemover DeadNodes(*this);
10530  DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
10531  ++LdStFP2Int;
10532  return NewST;
10533  }
10534 
10535  return SDValue();
10536 }
10537 
10538 namespace {
10539 /// Helper struct to parse and store a memory address as base + index + offset.
10540 /// We ignore sign extensions when it is safe to do so.
10541 /// The following two expressions are not equivalent. To differentiate we need
10542 /// to store whether there was a sign extension involved in the index
10543 /// computation.
10544 /// (load (i64 add (i64 copyfromreg %c)
10545 /// (i64 signextend (add (i8 load %index)
10546 /// (i8 1))))
10547 /// vs
10548 ///
10549 /// (load (i64 add (i64 copyfromreg %c)
10550 /// (i64 signextend (i32 add (i32 signextend (i8 load %index))
10551 /// (i32 1)))))
10552 struct BaseIndexOffset {
10553  SDValue Base;
10554  SDValue Index;
10555  int64_t Offset;
10556  bool IsIndexSignExt;
10557 
10558  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
10559 
10560  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
10561  bool IsIndexSignExt) :
10562  Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
10563 
10564  bool equalBaseIndex(const BaseIndexOffset &Other) {
10565  return Other.Base == Base && Other.Index == Index &&
10566  Other.IsIndexSignExt == IsIndexSignExt;
10567  }
10568 
10569  /// Parses tree in Ptr for base, index, offset addresses.
10570  static BaseIndexOffset match(SDValue Ptr) {
10571  bool IsIndexSignExt = false;
10572 
10573  // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
10574  // instruction, then it could be just the BASE or everything else we don't
10575  // know how to handle. Just use Ptr as BASE and give up.
10576  if (Ptr->getOpcode() != ISD::ADD)
10577  return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
10578 
10579  // We know that we have at least an ADD instruction. Try to pattern match
10580  // the simple case of BASE + OFFSET.
10581  if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
10582  int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
10583  return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
10584  IsIndexSignExt);
10585  }
10586 
10587  // Inside a loop the current BASE pointer is calculated using an ADD and a
10588  // MUL instruction. In this case Ptr is the actual BASE pointer.
10589  // (i64 add (i64 %array_ptr)
10590  // (i64 mul (i64 %induction_var)
10591  // (i64 %element_size)))
10592  if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
10593  return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
10594 
10595  // Look at Base + Index + Offset cases.
10596  SDValue Base = Ptr->getOperand(0);
10597  SDValue IndexOffset = Ptr->getOperand(1);
10598 
10599  // Skip signextends.
10600  if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
10601  IndexOffset = IndexOffset->getOperand(0);
10602  IsIndexSignExt = true;
10603  }
10604 
10605  // Either the case of Base + Index (no offset) or something else.
10606  if (IndexOffset->getOpcode() != ISD::ADD)
10607  return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
10608 
10609  // Now we have the case of Base + Index + offset.
10610  SDValue Index = IndexOffset->getOperand(0);
10611  SDValue Offset = IndexOffset->getOperand(1);
10612 
10613  if (!isa<ConstantSDNode>(Offset))
10614  return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
10615 
10616  // Ignore signextends.
10617  if (Index->getOpcode() == ISD::SIGN_EXTEND) {
10618  Index = Index->getOperand(0);
10619  IsIndexSignExt = true;
10620  } else IsIndexSignExt = false;
10621 
10622  int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
10623  return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
10624  }
10625 };
10626 } // namespace
10627 
10628 SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
10629  SDLoc SL,
10630  ArrayRef<MemOpLink> Stores,
10631  EVT Ty) const {
10632  SmallVector<SDValue, 8> BuildVector;
10633 
10634  for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I)
10635  BuildVector.push_back(cast<StoreSDNode>(Stores[I].MemNode)->getValue());
10636 
10637  return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
10638 }
10639 
10640 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
10641  SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
10642  unsigned NumElem, bool IsConstantSrc, bool UseVector) {
10643  // Make sure we have something to merge.
10644  if (NumElem < 2)
10645  return false;
10646 
10647  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
10648  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
10649  unsigned LatestNodeUsed = 0;
10650 
10651  for (unsigned i=0; i < NumElem; ++i) {
10652  // Find a chain for the new wide-store operand. Notice that some
10653  // of the store nodes that we found may not be selected for inclusion
10654  // in the wide store. The chain we use needs to be the chain of the
10655  // latest store node which is *used* and replaced by the wide store.
10656  if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
10657  LatestNodeUsed = i;
10658  }
10659 
10660  // The latest Node in the DAG.
10661  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
10662  SDLoc DL(StoreNodes[0].MemNode);
10663 
10664  SDValue StoredVal;
10665  if (UseVector) {
10666  // Find a legal type for the vector store.
10667  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
10668  assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
10669  if (IsConstantSrc) {
10670  StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Ty);
10671  } else {
10673  for (unsigned i = 0; i < NumElem ; ++i) {
10674  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
10675  SDValue Val = St->getValue();
10676  // All of the operands of a BUILD_VECTOR must have the same type.
10677  if (Val.getValueType() != MemVT)
10678  return false;
10679  Ops.push_back(Val);
10680  }
10681 
10682  // Build the extracted vector elements back into a vector.
10683  StoredVal = DAG.getNode(ISD::BUILD_VECTOR, DL, Ty, Ops);
10684  }
10685  } else {
10686  // We should always use a vector store when merging extracted vector
10687  // elements, so this path implies a store of constants.
10688  assert(IsConstantSrc && "Merged vector elements should use vector store");
10689 
10690  unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
10691  APInt StoreInt(SizeInBits, 0);
10692 
10693  // Construct a single integer constant which is made of the smaller
10694  // constant inputs.
10695  bool IsLE = DAG.getDataLayout().isLittleEndian();
10696  for (unsigned i = 0; i < NumElem ; ++i) {
10697  unsigned Idx = IsLE ? (NumElem - 1 - i) : i;
10698  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
10699  SDValue Val = St->getValue();
10700  StoreInt <<= ElementSizeBytes * 8;
10701  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
10702  StoreInt |= C->getAPIntValue().zext(SizeInBits);
10703  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
10704  StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
10705  } else {
10706  llvm_unreachable("Invalid constant element type");
10707  }
10708  }
10709 
10710  // Create the new Load and Store operations.
10711  EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
10712  StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
10713  }
10714 
10715  SDValue NewStore = DAG.getStore(LatestOp->getChain(), DL, StoredVal,
10716  FirstInChain->getBasePtr(),
10717  FirstInChain->getPointerInfo(),
10718  false, false,
10719  FirstInChain->getAlignment());
10720 
10721  // Replace the last store with the new store
10722  CombineTo(LatestOp, NewStore);
10723  // Erase all other stores.
10724  for (unsigned i = 0; i < NumElem ; ++i) {
10725  if (StoreNodes[i].MemNode == LatestOp)
10726  continue;
10727  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
10728  // ReplaceAllUsesWith will replace all uses that existed when it was
10729  // called, but graph optimizations may cause new ones to appear. For
10730  // example, the case in pr14333 looks like
10731  //
10732  // St's chain -> St -> another store -> X
10733  //
10734  // And the only difference from St to the other store is the chain.
10735  // When we change it's chain to be St's chain they become identical,
10736  // get CSEed and the net result is that X is now a use of St.
10737  // Since we know that St is redundant, just iterate.
10738  while (!St->use_empty())
10739  DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
10740  deleteAndRecombine(St);
10741  }
10742 
10743  return true;
10744 }
10745 
10746 static bool allowableAlignment(const SelectionDAG &DAG,
10747  const TargetLowering &TLI, EVT EVTTy,
10748  unsigned AS, unsigned Align) {
10749  if (TLI.allowsMisalignedMemoryAccesses(EVTTy, AS, Align))
10750  return true;
10751 
10752  Type *Ty = EVTTy.getTypeForEVT(*DAG.getContext());
10753  unsigned ABIAlignment = DAG.getDataLayout().getPrefTypeAlignment(Ty);
10754  return (Align >= ABIAlignment);
10755 }
10756 
10757 void DAGCombiner::getStoreMergeAndAliasCandidates(
10758  StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
10759  SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
10760  // This holds the base pointer, index, and the offset in bytes from the base
10761  // pointer.
10762  BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
10763 
10764  // We must have a base and an offset.
10765  if (!BasePtr.Base.getNode())
10766  return;
10767 
10768  // Do not handle stores to undef base pointers.
10769  if (BasePtr.Base.getOpcode() == ISD::UNDEF)
10770  return;
10771 
10772  // Walk up the chain and look for nodes with offsets from the same
10773  // base pointer. Stop when reaching an instruction with a different kind
10774  // or instruction which has a different base pointer.
10775  EVT MemVT = St->getMemoryVT();
10776  unsigned Seq = 0;
10777  StoreSDNode *Index = St;
10778  while (Index) {
10779  // If the chain has more than one use, then we can't reorder the mem ops.
10780  if (Index != St && !SDValue(Index, 0)->hasOneUse())
10781  break;
10782 
10783  // Find the base pointer and offset for this memory node.
10784  BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
10785 
10786  // Check that the base pointer is the same as the original one.
10787  if (!Ptr.equalBaseIndex(BasePtr))
10788  break;
10789 
10790  // The memory operands must not be volatile.
10791  if (Index->isVolatile() || Index->isIndexed())
10792  break;
10793 
10794  // No truncation.
10795  if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
10796  if (St->isTruncatingStore())
10797  break;
10798 
10799  // The stored memory type must be the same.
10800  if (Index->getMemoryVT() != MemVT)
10801  break;
10802 
10803  // We found a potential memory operand to merge.
10804  StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
10805 
10806  // Find the next memory operand in the chain. If the next operand in the
10807  // chain is a store then move up and continue the scan with the next
10808  // memory operand. If the next operand is a load save it and use alias
10809  // information to check if it interferes with anything.
10810  SDNode *NextInChain = Index->getChain().getNode();
10811  while (1) {
10812  if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
10813  // We found a store node. Use it for the next iteration.
10814  Index = STn;
10815  break;
10816  } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
10817  if (Ldn->isVolatile()) {
10818  Index = nullptr;
10819  break;
10820  }
10821 
10822  // Save the load node for later. Continue the scan.
10823  AliasLoadNodes.push_back(Ldn);
10824  NextInChain = Ldn->getChain().getNode();
10825  continue;
10826  } else {
10827  Index = nullptr;
10828  break;
10829  }
10830  }
10831  }
10832 }
10833 
10834 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
10835  if (OptLevel == CodeGenOpt::None)
10836  return false;
10837 
10838  EVT MemVT = St->getMemoryVT();
10839  int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
10840  bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
10842 
10843  // This function cannot currently deal with non-byte-sized memory sizes.
10844  if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
10845  return false;
10846 
10847  // Don't merge vectors into wider inputs.
10848  if (MemVT.isVector() || !MemVT.isSimple())
10849  return false;
10850 
10851  // Perform an early exit check. Do not bother looking at stored values that
10852  // are not constants, loads, or extracted vector elements.
10853  SDValue StoredVal = St->getValue();
10854  bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
10855  bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
10856  isa<ConstantFPSDNode>(StoredVal);
10857  bool IsExtractVecEltSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT);
10858 
10859  if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecEltSrc)
10860  return false;
10861 
10862  // Only look at ends of store sequences.
10863  SDValue Chain = SDValue(St, 0);
10864  if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
10865  return false;
10866 
10867  // Save the LoadSDNodes that we find in the chain.
10868  // We need to make sure that these nodes do not interfere with
10869  // any of the store nodes.
10870  SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
10871 
10872  // Save the StoreSDNodes that we find in the chain.
10873  SmallVector<MemOpLink, 8> StoreNodes;
10874 
10875  getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
10876 
10877  // Check if there is anything to merge.
10878  if (StoreNodes.size() < 2)
10879  return false;
10880 
10881  // Sort the memory operands according to their distance from the base pointer.
10882  std::sort(StoreNodes.begin(), StoreNodes.end(),
10883  [](MemOpLink LHS, MemOpLink RHS) {
10884  return LHS.OffsetFromBase < RHS.OffsetFromBase ||
10885  (LHS.OffsetFromBase == RHS.OffsetFromBase &&
10886  LHS.SequenceNum > RHS.SequenceNum);
10887  });
10888 
10889  // Scan the memory operations on the chain and find the first non-consecutive
10890  // store memory address.
10891  unsigned LastConsecutiveStore = 0;
10892  int64_t StartAddress = StoreNodes[0].OffsetFromBase;
10893  for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
10894 
10895  // Check that the addresses are consecutive starting from the second
10896  // element in the list of stores.
10897  if (i > 0) {
10898  int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
10899  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
10900  break;
10901  }
10902 
10903  bool Alias = false;
10904  // Check if this store interferes with any of the loads that we found.
10905  for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld)
10906  if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) {
10907  Alias = true;
10908  break;
10909  }
10910  // We found a load that alias with this store. Stop the sequence.
10911  if (Alias)
10912  break;
10913 
10914  // Mark this node as useful.
10915  LastConsecutiveStore = i;
10916  }
10917 
10918  // The node with the lowest store address.
10919  LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
10920  unsigned FirstStoreAS = FirstInChain->getAddressSpace();
10921  unsigned FirstStoreAlign = FirstInChain->getAlignment();
10922 
10923  // Store the constants into memory as one consecutive store.
10924  if (IsConstantSrc) {
10925  unsigned LastLegalType = 0;
10926  unsigned LastLegalVectorType = 0;
10927  bool NonZero = false;
10928  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
10929  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
10930  SDValue StoredVal = St->getValue();
10931 
10932  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
10933  NonZero |= !C->isNullValue();
10934  } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
10935  NonZero |= !C->getConstantFPValue()->isNullValue();
10936  } else {
10937  // Non-constant.
10938  break;
10939  }
10940 
10941  // Find a legal type for the constant store.
10942  unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
10943  EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
10944  if (TLI.isTypeLegal(StoreTy) &&
10945  allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS,
10946  FirstStoreAlign)) {
10947  LastLegalType = i+1;
10948  // Or check whether a truncstore is legal.
10949  } else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
10951  EVT LegalizedStoredValueTy =
10952  TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
10953  if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
10954  allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
10955  FirstStoreAlign)) {
10956  LastLegalType = i + 1;
10957  }
10958  }
10959 
10960  // Find a legal type for the vector store.
10961  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
10962  if (TLI.isTypeLegal(Ty) &&
10963  allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign)) {
10964  LastLegalVectorType = i + 1;
10965  }
10966  }
10967 
10968 
10969  // We only use vectors if the constant is known to be zero or the target
10970  // allows it and the function is not marked with the noimplicitfloat
10971  // attribute.
10972  if (NoVectors) {
10973  LastLegalVectorType = 0;
10974  } else if (NonZero && !TLI.storeOfVectorConstantIsCheap(MemVT,
10975  LastLegalVectorType,
10976  FirstStoreAS)) {
10977  LastLegalVectorType = 0;
10978  }
10979 
10980  // Check if we found a legal integer type to store.
10981  if (LastLegalType == 0 && LastLegalVectorType == 0)
10982  return false;
10983 
10984  bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
10985  unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
10986 
10987  return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
10988  true, UseVector);
10989  }
10990 
10991  // When extracting multiple vector elements, try to store them
10992  // in one vector store rather than a sequence of scalar stores.
10993  if (IsExtractVecEltSrc) {
10994  unsigned NumElem = 0;
10995  for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
10996  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
10997  SDValue StoredVal = St->getValue();
10998  // This restriction could be loosened.
10999  // Bail out if any stored values are not elements extracted from a vector.
11000  // It should be possible to handle mixed sources, but load sources need
11001  // more careful handling (see the block of code below that handles
11002  // consecutive loads).
11003  if (StoredVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
11004  return false;
11005 
11006  // Find a legal type for the vector store.
11007  EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
11008  if (TLI.isTypeLegal(Ty) &&
11009  allowableAlignment(DAG, TLI, Ty, FirstStoreAS, FirstStoreAlign))
11010  NumElem = i + 1;
11011  }
11012 
11013  return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
11014  false, true);
11015  }
11016 
11017  // Below we handle the case of multiple consecutive stores that
11018  // come from multiple consecutive loads. We merge them into a single
11019  // wide load and a single wide store.
11020 
11021  // Look for load nodes which are used by the stored values.
11022  SmallVector<MemOpLink, 8> LoadNodes;
11023 
11024  // Find acceptable loads. Loads need to have the same chain (token factor),
11025  // must not be zext, volatile, indexed, and they must be consecutive.
11026  BaseIndexOffset LdBasePtr;
11027  for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
11028  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11029  LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
11030  if (!Ld) break;
11031 
11032  // Loads must only have one use.
11033  if (!Ld->hasNUsesOfValue(1, 0))
11034  break;
11035 
11036  // The memory operands must not be volatile.
11037  if (Ld->isVolatile() || Ld->isIndexed())
11038  break;
11039 
11040  // We do not accept ext loads.
11041  if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
11042  break;
11043 
11044  // The stored memory type must be the same.
11045  if (Ld->getMemoryVT() != MemVT)
11046  break;
11047 
11048  BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
11049  // If this is not the first ptr that we check.
11050  if (LdBasePtr.Base.getNode()) {
11051  // The base ptr must be the same.
11052  if (!LdPtr.equalBaseIndex(LdBasePtr))
11053  break;
11054  } else {
11055  // Check that all other base pointers are the same as this one.
11056  LdBasePtr = LdPtr;
11057  }
11058 
11059  // We found a potential memory operand to merge.
11060  LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
11061  }
11062 
11063  if (LoadNodes.size() < 2)
11064  return false;
11065 
11066  // If we have load/store pair instructions and we only have two values,
11067  // don't bother.
11068  unsigned RequiredAlignment;
11069  if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
11070  St->getAlignment() >= RequiredAlignment)
11071  return false;
11072 
11073  LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
11074  unsigned FirstLoadAS = FirstLoad->getAddressSpace();
11075  unsigned FirstLoadAlign = FirstLoad->getAlignment();
11076 
11077  // Scan the memory operations on the chain and find the first non-consecutive
11078  // load memory address. These variables hold the index in the store node
11079  // array.
11080  unsigned LastConsecutiveLoad = 0;
11081  // This variable refers to the size and not index in the array.
11082  unsigned LastLegalVectorType = 0;
11083  unsigned LastLegalIntegerType = 0;
11084  StartAddress = LoadNodes[0].OffsetFromBase;
11085  SDValue FirstChain = FirstLoad->getChain();
11086  for (unsigned i = 1; i < LoadNodes.size(); ++i) {
11087  // All loads much share the same chain.
11088  if (LoadNodes[i].MemNode->getChain() != FirstChain)
11089  break;
11090 
11091  int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
11092  if (CurrAddress - StartAddress != (ElementSizeBytes * i))
11093  break;
11094  LastConsecutiveLoad = i;
11095 
11096  // Find a legal type for the vector store.
11097  EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1);
11098  if (TLI.isTypeLegal(StoreTy) &&
11099  allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
11100  allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign)) {
11101  LastLegalVectorType = i + 1;
11102  }
11103 
11104  // Find a legal type for the integer store.
11105  unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
11106  StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11107  if (TLI.isTypeLegal(StoreTy) &&
11108  allowableAlignment(DAG, TLI, StoreTy, FirstStoreAS, FirstStoreAlign) &&
11109  allowableAlignment(DAG, TLI, StoreTy, FirstLoadAS, FirstLoadAlign))
11110  LastLegalIntegerType = i + 1;
11111  // Or check whether a truncstore and extload is legal.
11112  else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) ==
11114  EVT LegalizedStoredValueTy =
11115  TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy);
11116  if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
11117  TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11118  TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11119  TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
11120  allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstStoreAS,
11121  FirstStoreAlign) &&
11122  allowableAlignment(DAG, TLI, LegalizedStoredValueTy, FirstLoadAS,
11123  FirstLoadAlign))
11124  LastLegalIntegerType = i+1;
11125  }
11126  }
11127 
11128  // Only use vector types if the vector type is larger than the integer type.
11129  // If they are the same, use integers.
11130  bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
11131  unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
11132 
11133  // We add +1 here because the LastXXX variables refer to location while
11134  // the NumElem refers to array/index size.
11135  unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
11136  NumElem = std::min(LastLegalType, NumElem);
11137 
11138  if (NumElem < 2)
11139  return false;
11140 
11141  // The latest Node in the DAG.
11142  unsigned LatestNodeUsed = 0;
11143  for (unsigned i=1; i<NumElem; ++i) {
11144  // Find a chain for the new wide-store operand. Notice that some
11145  // of the store nodes that we found may not be selected for inclusion
11146  // in the wide store. The chain we use needs to be the chain of the
11147  // latest store node which is *used* and replaced by the wide store.
11148  if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
11149  LatestNodeUsed = i;
11150  }
11151 
11152  LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
11153 
11154  // Find if it is better to use vectors or integers to load and store
11155  // to memory.
11156  EVT JointMemOpVT;
11157  if (UseVectorTy) {
11158  JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem);
11159  } else {
11160  unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
11161  JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
11162  }
11163 
11164  SDLoc LoadDL(LoadNodes[0].MemNode);
11165  SDLoc StoreDL(StoreNodes[0].MemNode);
11166 
11167  SDValue NewLoad = DAG.getLoad(
11168  JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
11169  FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
11170 
11171  SDValue NewStore = DAG.getStore(
11172  LatestOp->getChain(), StoreDL, NewLoad, FirstInChain->getBasePtr(),
11173  FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
11174 
11175  // Replace one of the loads with the new load.
11176  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode);
11178  SDValue(NewLoad.getNode(), 1));
11179 
11180  // Remove the rest of the load chains.
11181  for (unsigned i = 1; i < NumElem ; ++i) {
11182  // Replace all chain users of the old load nodes with the chain of the new
11183  // load node.
11184  LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
11185  DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain());
11186  }
11187 
11188  // Replace the last store with the new store.
11189  CombineTo(LatestOp, NewStore);
11190  // Erase all other stores.
11191  for (unsigned i = 0; i < NumElem ; ++i) {
11192  // Remove all Store nodes.
11193  if (StoreNodes[i].MemNode == LatestOp)
11194  continue;
11195  StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
11196  DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
11197  deleteAndRecombine(St);
11198  }
11199 
11200  return true;
11201 }
11202 
11203 SDValue DAGCombiner::visitSTORE(SDNode *N) {
11204  StoreSDNode *ST = cast<StoreSDNode>(N);
11205  SDValue Chain = ST->getChain();
11206  SDValue Value = ST->getValue();
11207  SDValue Ptr = ST->getBasePtr();
11208 
11209  // If this is a store of a bit convert, store the input value if the
11210  // resultant store does not need a higher alignment than the original.
11211  if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
11212  ST->isUnindexed()) {
11213  unsigned OrigAlign = ST->getAlignment();
11214  EVT SVT = Value.getOperand(0).getValueType();
11215  unsigned Align = DAG.getDataLayout().getABITypeAlignment(
11216  SVT.getTypeForEVT(*DAG.getContext()));
11217  if (Align <= OrigAlign &&
11218  ((!LegalOperations && !ST->isVolatile()) ||
11220  return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
11221  Ptr, ST->getPointerInfo(), ST->isVolatile(),
11222  ST->isNonTemporal(), OrigAlign,
11223  ST->getAAInfo());
11224  }
11225 
11226  // Turn 'store undef, Ptr' -> nothing.
11227  if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
11228  return Chain;
11229 
11230  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
11231  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
11232  // NOTE: If the original store is volatile, this transform must not increase
11233  // the number of stores. For example, on x86-32 an f64 can be stored in one
11234  // processor operation but an i64 (which is not legal) requires two. So the
11235  // transform should not be done in this case.
11236  if (Value.getOpcode() != ISD::TargetConstantFP) {
11237  SDValue Tmp;
11238  switch (CFP->getSimpleValueType(0).SimpleTy) {
11239  default: llvm_unreachable("Unknown FP type");
11240  case MVT::f16: // We don't do this for these yet.
11241  case MVT::f80:
11242  case MVT::f128:
11243  case MVT::ppcf128:
11244  break;
11245  case MVT::f32:
11246  if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
11248  ;
11249  Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
11250  bitcastToAPInt().getZExtValue(), SDLoc(CFP),
11251  MVT::i32);
11252  return DAG.getStore(Chain, SDLoc(N), Tmp,
11253  Ptr, ST->getMemOperand());
11254  }
11255  break;
11256  case MVT::f64:
11257  if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
11258  !ST->isVolatile()) ||
11260  ;
11261  Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
11262  getZExtValue(), SDLoc(CFP), MVT::i64);
11263  return DAG.getStore(Chain, SDLoc(N), Tmp,
11264  Ptr, ST->getMemOperand());
11265  }
11266 
11267  if (!ST->isVolatile() &&
11269  // Many FP stores are not made apparent until after legalize, e.g. for
11270  // argument passing. Since this is so common, custom legalize the
11271  // 64-bit integer store into two 32-bit stores.
11272  uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
11273  SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
11274  SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
11275  if (DAG.getDataLayout().isBigEndian())
11276  std::swap(Lo, Hi);
11277 
11278  unsigned Alignment = ST->getAlignment();
11279  bool isVolatile = ST->isVolatile();
11280  bool isNonTemporal = ST->isNonTemporal();
11281  AAMDNodes AAInfo = ST->getAAInfo();
11282 
11283  SDLoc DL(N);
11284 
11285  SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo,
11286  Ptr, ST->getPointerInfo(),
11287  isVolatile, isNonTemporal,
11288  ST->getAlignment(), AAInfo);
11289  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
11290  DAG.getConstant(4, DL, Ptr.getValueType()));
11291  Alignment = MinAlign(Alignment, 4U);
11292  SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi,
11293  Ptr, ST->getPointerInfo().getWithOffset(4),
11294  isVolatile, isNonTemporal,
11295  Alignment, AAInfo);
11296  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
11297  St0, St1);
11298  }
11299 
11300  break;
11301  }
11302  }
11303  }
11304 
11305  // Try to infer better alignment information than the store already has.
11306  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
11307  if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
11308  if (Align > ST->getAlignment()) {
11309  SDValue NewStore =
11310  DAG.getTruncStore(Chain, SDLoc(N), Value,
11311  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
11312  ST->isVolatile(), ST->isNonTemporal(), Align,
11313  ST->getAAInfo());
11314  if (NewStore.getNode() != N)
11315  return CombineTo(ST, NewStore, true);
11316  }
11317  }
11318  }
11319 
11320  // Try transforming a pair floating point load / store ops to integer
11321  // load / store ops.
11322  SDValue NewST = TransformFPLoadStorePair(N);
11323  if (NewST.getNode())
11324  return NewST;
11325 
11326  bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
11327  : DAG.getSubtarget().useAA();
11328 #ifndef NDEBUG
11329  if (CombinerAAOnlyFunc.getNumOccurrences() &&
11330  CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
11331  UseAA = false;
11332 #endif
11333  if (UseAA && ST->isUnindexed()) {
11334  // Walk up chain skipping non-aliasing memory nodes.
11335  SDValue BetterChain = FindBetterChain(N, Chain);
11336 
11337  // If there is a better chain.
11338  if (Chain != BetterChain) {
11339  SDValue ReplStore;
11340 
11341  // Replace the chain to avoid dependency.
11342  if (ST->isTruncatingStore()) {
11343  ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr,
11344  ST->getMemoryVT(), ST->getMemOperand());
11345  } else {
11346  ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr,
11347  ST->getMemOperand());
11348  }
11349 
11350  // Create token to keep both nodes around.
11351  SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
11352  MVT::Other, Chain, ReplStore);
11353 
11354  // Make sure the new and old chains are cleaned up.
11355  AddToWorklist(Token.getNode());
11356 
11357  // Don't add users to work list.
11358  return CombineTo(N, Token, false);
11359  }
11360  }
11361 
11362  // Try transforming N to an indexed store.
11363  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11364  return SDValue(N, 0);
11365 
11366  // FIXME: is there such a thing as a truncating indexed store?
11367  if (ST->isTruncatingStore() && ST->isUnindexed() &&
11368  Value.getValueType().isInteger()) {
11369  // See if we can simplify the input to this truncstore with knowledge that
11370  // only the low bits are being used. For example:
11371  // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
11372  SDValue Shorter =
11373  GetDemandedBits(Value,
11377  AddToWorklist(Value.getNode());
11378  if (Shorter.getNode())
11379  return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
11380  Ptr, ST->getMemoryVT(), ST->getMemOperand());
11381 
11382  // Otherwise, see if we can simplify the operation with
11383  // SimplifyDemandedBits, which only works if the value has a single use.
11384  if (SimplifyDemandedBits(Value,
11388  return SDValue(N, 0);
11389  }
11390 
11391  // If this is a load followed by a store to the same location, then the store
11392  // is dead/noop.
11393  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
11394  if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
11395  ST->isUnindexed() && !ST->isVolatile() &&
11396  // There can't be any side effects between the load and store, such as
11397  // a call or store.
11398  Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
11399  // The store is dead, remove it.
11400  return Chain;
11401  }
11402  }
11403 
11404  // If this is a store followed by a store with the same value to the same
11405  // location, then the store is dead/noop.
11406  if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
11407  if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
11408  ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
11409  ST1->isUnindexed() && !ST1->isVolatile()) {
11410  // The store is dead, remove it.
11411  return Chain;
11412  }
11413  }
11414 
11415  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
11416  // truncating store. We can do this even if this is already a truncstore.
11417  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
11418  && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
11419  TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
11420  ST->getMemoryVT())) {
11421  return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
11422  Ptr, ST->getMemoryVT(), ST->getMemOperand());
11423  }
11424 
11425  // Only perform this optimization before the types are legal, because we
11426  // don't want to perform this optimization on every DAGCombine invocation.
11427  if (!LegalTypes) {
11428  bool EverChanged = false;
11429 
11430  do {
11431  // There can be multiple store sequences on the same chain.
11432  // Keep trying to merge store sequences until we are unable to do so
11433  // or until we merge the last store on the chain.
11434  bool Changed = MergeConsecutiveStores(ST);
11435  EverChanged |= Changed;
11436  if (!Changed) break;
11437  } while (ST->getOpcode() != ISD::DELETED_NODE);
11438 
11439  if (EverChanged)
11440  return SDValue(N, 0);
11441  }
11442 
11443  return ReduceLoadOpStoreWidth(N);
11444 }
11445 
11446 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
11447  SDValue InVec = N->getOperand(0);
11448  SDValue InVal = N->getOperand(1);
11449  SDValue EltNo = N->getOperand(2);
11450  SDLoc dl(N);
11451 
11452  // If the inserted element is an UNDEF, just use the input vector.
11453  if (InVal.getOpcode() == ISD::UNDEF)
11454  return InVec;
11455 
11456  EVT VT = InVec.getValueType();
11457 
11458  // If we can't generate a legal BUILD_VECTOR, exit
11459  if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
11460  return SDValue();
11461 
11462  // Check that we know which element is being inserted
11463  if (!isa<ConstantSDNode>(EltNo))
11464  return SDValue();
11465  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11466 
11467  // Canonicalize insert_vector_elt dag nodes.
11468  // Example:
11469  // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
11470  // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
11471  //
11472  // Do this only if the child insert_vector node has one use; also
11473  // do this only if indices are both constants and Idx1 < Idx0.
11474  if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
11475  && isa<ConstantSDNode>(InVec.getOperand(2))) {
11476  unsigned OtherElt =
11477  cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
11478  if (Elt < OtherElt) {
11479  // Swap nodes.
11480  SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
11481  InVec.getOperand(0), InVal, EltNo);
11482  AddToWorklist(NewOp.getNode());
11483  return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
11484  VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
11485  }
11486  }
11487 
11488  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
11489  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
11490  // vector elements.
11492  // Do not combine these two vectors if the output vector will not replace
11493  // the input vector.
11494  if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
11495  Ops.append(InVec.getNode()->op_begin(),
11496  InVec.getNode()->op_end());
11497  } else if (InVec.getOpcode() == ISD::UNDEF) {
11498  unsigned NElts = VT.getVectorNumElements();
11499  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
11500  } else {
11501  return SDValue();
11502  }
11503 
11504  // Insert the element
11505  if (Elt < Ops.size()) {
11506  // All the operands of BUILD_VECTOR must have the same type;
11507  // we enforce that here.
11508  EVT OpVT = Ops[0].getValueType();
11509  if (InVal.getValueType() != OpVT)
11510  InVal = OpVT.bitsGT(InVal.getValueType()) ?
11511  DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
11512  DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
11513  Ops[Elt] = InVal;
11514  }
11515 
11516  // Return the new vector
11517  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
11518 }
11519 
11520 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
11521  SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
11522  EVT ResultVT = EVE->getValueType(0);
11523  EVT VecEltVT = InVecVT.getVectorElementType();
11524  unsigned Align = OriginalLoad->getAlignment();
11525  unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
11526  VecEltVT.getTypeForEVT(*DAG.getContext()));
11527 
11528  if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
11529  return SDValue();
11530 
11531  Align = NewAlign;
11532 
11533  SDValue NewPtr = OriginalLoad->getBasePtr();
11534  SDValue Offset;
11535  EVT PtrType = NewPtr.getValueType();
11536  MachinePointerInfo MPI;
11537  SDLoc DL(EVE);
11538  if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
11539  int Elt = ConstEltNo->getZExtValue();
11540  unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
11541  Offset = DAG.getConstant(PtrOff, DL, PtrType);
11542  MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
11543  } else {
11544  Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
11545  Offset = DAG.getNode(
11546  ISD::MUL, DL, PtrType, Offset,
11547  DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
11548  MPI = OriginalLoad->getPointerInfo();
11549  }
11550  NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
11551 
11552  // The replacement we need to do here is a little tricky: we need to
11553  // replace an extractelement of a load with a load.
11554  // Use ReplaceAllUsesOfValuesWith to do the replacement.
11555  // Note that this replacement assumes that the extractvalue is the only
11556  // use of the load; that's okay because we don't want to perform this
11557  // transformation in other cases anyway.
11558  SDValue Load;
11559  SDValue Chain;
11560  if (ResultVT.bitsGT(VecEltVT)) {
11561  // If the result type of vextract is wider than the load, then issue an
11562  // extending load instead.
11563  ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
11564  VecEltVT)
11565  ? ISD::ZEXTLOAD
11566  : ISD::EXTLOAD;
11567  Load = DAG.getExtLoad(
11568  ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
11569  VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
11570  OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
11571  Chain = Load.getValue(1);
11572  } else {
11573  Load = DAG.getLoad(
11574  VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
11575  OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
11576  OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
11577  Chain = Load.getValue(1);
11578  if (ResultVT.bitsLT(VecEltVT))
11579  Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
11580  else
11581  Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
11582  }
11583  WorklistRemover DeadNodes(*this);
11584  SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
11585  SDValue To[] = { Load, Chain };
11586  DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
11587  // Since we're explicitly calling ReplaceAllUses, add the new node to the
11588  // worklist explicitly as well.
11589  AddToWorklist(Load.getNode());
11590  AddUsersToWorklist(Load.getNode()); // Add users too
11591  // Make sure to revisit this node to clean it up; it will usually be dead.
11592  AddToWorklist(EVE);
11593  ++OpsNarrowed;
11594  return SDValue(EVE, 0);
11595 }
11596 
11597 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
11598  // (vextract (scalar_to_vector val, 0) -> val
11599  SDValue InVec = N->getOperand(0);
11600  EVT VT = InVec.getValueType();
11601  EVT NVT = N->getValueType(0);
11602 
11603  if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
11604  // Check if the result type doesn't match the inserted element type. A
11605  // SCALAR_TO_VECTOR may truncate the inserted element and the
11606  // EXTRACT_VECTOR_ELT may widen the extracted vector.
11607  SDValue InOp = InVec.getOperand(0);
11608  if (InOp.getValueType() != NVT) {
11609  assert(InOp.getValueType().isInteger() && NVT.isInteger());
11610  return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
11611  }
11612  return InOp;
11613  }
11614 
11615  SDValue EltNo = N->getOperand(1);
11616  bool ConstEltNo = isa<ConstantSDNode>(EltNo);
11617 
11618  // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
11619  // We only perform this optimization before the op legalization phase because
11620  // we may introduce new vector instructions which are not backed by TD
11621  // patterns. For example on AVX, extracting elements from a wide vector
11622  // without using extract_subvector. However, if we can find an underlying
11623  // scalar value, then we can always use that.
11624  if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE
11625  && ConstEltNo) {
11626  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11627  int NumElem = VT.getVectorNumElements();
11628  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
11629  // Find the new index to extract from.
11630  int OrigElt = SVOp->getMaskElt(Elt);
11631 
11632  // Extracting an undef index is undef.
11633  if (OrigElt == -1)
11634  return DAG.getUNDEF(NVT);
11635 
11636  // Select the right vector half to extract from.
11637  SDValue SVInVec;
11638  if (OrigElt < NumElem) {
11639  SVInVec = InVec->getOperand(0);
11640  } else {
11641  SVInVec = InVec->getOperand(1);
11642  OrigElt -= NumElem;
11643  }
11644 
11645  if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
11646  SDValue InOp = SVInVec.getOperand(OrigElt);
11647  if (InOp.getValueType() != NVT) {
11648  assert(InOp.getValueType().isInteger() && NVT.isInteger());
11649  InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
11650  }
11651 
11652  return InOp;
11653  }
11654 
11655  // FIXME: We should handle recursing on other vector shuffles and
11656  // scalar_to_vector here as well.
11657 
11658  if (!LegalOperations) {
11659  EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
11660  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
11661  DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
11662  }
11663  }
11664 
11665  bool BCNumEltsChanged = false;
11666  EVT ExtVT = VT.getVectorElementType();
11667  EVT LVT = ExtVT;
11668 
11669  // If the result of load has to be truncated, then it's not necessarily
11670  // profitable.
11671  if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
11672  return SDValue();
11673 
11674  if (InVec.getOpcode() == ISD::BITCAST) {
11675  // Don't duplicate a load with other uses.
11676  if (!InVec.hasOneUse())
11677  return SDValue();
11678 
11679  EVT BCVT = InVec.getOperand(0).getValueType();
11680  if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
11681  return SDValue();
11682  if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
11683  BCNumEltsChanged = true;
11684  InVec = InVec.getOperand(0);
11685  ExtVT = BCVT.getVectorElementType();
11686  }
11687 
11688  // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
11689  if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
11690  ISD::isNormalLoad(InVec.getNode()) &&
11691  !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
11692  SDValue Index = N->getOperand(1);
11693  if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
11694  return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
11695  OrigLoad);
11696  }
11697 
11698  // Perform only after legalization to ensure build_vector / vector_shuffle
11699  // optimizations have already been done.
11700  if (!LegalOperations) return SDValue();
11701 
11702  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
11703  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
11704  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
11705 
11706  if (ConstEltNo) {
11707  int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
11708 
11709  LoadSDNode *LN0 = nullptr;
11710  const ShuffleVectorSDNode *SVN = nullptr;
11711  if (ISD::isNormalLoad(InVec.getNode())) {
11712  LN0 = cast<LoadSDNode>(InVec);
11713  } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
11714  InVec.getOperand(0).getValueType() == ExtVT &&
11715  ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
11716  // Don't duplicate a load with other uses.
11717  if (!InVec.hasOneUse())
11718  return SDValue();
11719 
11720  LN0 = cast<LoadSDNode>(InVec.getOperand(0));
11721  } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
11722  // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
11723  // =>
11724  // (load $addr+1*size)
11725 
11726  // Don't duplicate a load with other uses.
11727  if (!InVec.hasOneUse())
11728  return SDValue();
11729 
11730  // If the bit convert changed the number of elements, it is unsafe
11731  // to examine the mask.
11732  if (BCNumEltsChanged)
11733  return SDValue();
11734 
11735  // Select the input vector, guarding against out of range extract vector.
11736  unsigned NumElems = VT.getVectorNumElements();
11737  int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
11738  InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
11739 
11740  if (InVec.getOpcode() == ISD::BITCAST) {
11741  // Don't duplicate a load with other uses.
11742  if (!InVec.hasOneUse())
11743  return SDValue();
11744 
11745  InVec = InVec.getOperand(0);
11746  }
11747  if (ISD::isNormalLoad(InVec.getNode())) {
11748  LN0 = cast<LoadSDNode>(InVec);
11749  Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
11750  EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
11751  }
11752  }
11753 
11754  // Make sure we found a non-volatile load and the extractelement is
11755  // the only use.
11756  if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
11757  return SDValue();
11758 
11759  // If Idx was -1 above, Elt is going to be -1, so just return undef.
11760  if (Elt == -1)
11761  return DAG.getUNDEF(LVT);
11762 
11763  return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
11764  }
11765 
11766  return SDValue();
11767 }
11768 
11769 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
11770 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
11771  // We perform this optimization post type-legalization because
11772  // the type-legalizer often scalarizes integer-promoted vectors.
11773  // Performing this optimization before may create bit-casts which
11774  // will be type-legalized to complex code sequences.
11775  // We perform this optimization only before the operation legalizer because we
11776  // may introduce illegal operations.
11778  return SDValue();
11779 
11780  unsigned NumInScalars = N->getNumOperands();
11781  SDLoc dl(N);
11782  EVT VT = N->getValueType(0);
11783 
11784  // Check to see if this is a BUILD_VECTOR of a bunch of values
11785  // which come from any_extend or zero_extend nodes. If so, we can create
11786  // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
11787  // optimizations. We do not handle sign-extend because we can't fill the sign
11788  // using shuffles.
11789  EVT SourceType = MVT::Other;
11790  bool AllAnyExt = true;
11791 
11792  for (unsigned i = 0; i != NumInScalars; ++i) {
11793  SDValue In = N->getOperand(i);
11794  // Ignore undef inputs.
11795  if (In.getOpcode() == ISD::UNDEF) continue;
11796 
11797  bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND;
11798  bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
11799 
11800  // Abort if the element is not an extension.
11801  if (!ZeroExt && !AnyExt) {
11802  SourceType = MVT::Other;
11803  break;
11804  }
11805 
11806  // The input is a ZeroExt or AnyExt. Check the original type.
11807  EVT InTy = In.getOperand(0).getValueType();
11808 
11809  // Check that all of the widened source types are the same.
11810  if (SourceType == MVT::Other)
11811  // First time.
11812  SourceType = InTy;
11813  else if (InTy != SourceType) {
11814  // Multiple income types. Abort.
11815  SourceType = MVT::Other;
11816  break;
11817  }
11818 
11819  // Check if all of the extends are ANY_EXTENDs.
11820  AllAnyExt &= AnyExt;
11821  }
11822 
11823  // In order to have valid types, all of the inputs must be extended from the
11824  // same source type and all of the inputs must be any or zero extend.
11825  // Scalar sizes must be a power of two.
11826  EVT OutScalarTy = VT.getScalarType();
11827  bool ValidTypes = SourceType != MVT::Other &&
11828  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
11829  isPowerOf2_32(SourceType.getSizeInBits());
11830 
11831  // Create a new simpler BUILD_VECTOR sequence which other optimizations can
11832  // turn into a single shuffle instruction.
11833  if (!ValidTypes)
11834  return SDValue();
11835 
11836  bool isLE = DAG.getDataLayout().isLittleEndian();
11837  unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
11838  assert(ElemRatio > 1 && "Invalid element size ratio");
11839  SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
11840  DAG.getConstant(0, SDLoc(N), SourceType);
11841 
11842  unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
11843  SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
11844 
11845  // Populate the new build_vector
11846  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
11847  SDValue Cast = N->getOperand(i);
11848  assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
11849  Cast.getOpcode() == ISD::ZERO_EXTEND ||
11850  Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
11851  SDValue In;
11852  if (Cast.getOpcode() == ISD::UNDEF)
11853  In = DAG.getUNDEF(SourceType);
11854  else
11855  In = Cast->getOperand(0);
11856  unsigned Index = isLE ? (i * ElemRatio) :
11857  (i * ElemRatio + (ElemRatio - 1));
11858 
11859  assert(Index < Ops.size() && "Invalid index");
11860  Ops[Index] = In;
11861  }
11862 
11863  // The type of the new BUILD_VECTOR node.
11864  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
11865  assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
11866  "Invalid vector size");
11867  // Check if the new vector type is legal.
11868  if (!isTypeLegal(VecVT)) return SDValue();
11869 
11870  // Make the new BUILD_VECTOR.
11871  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
11872 
11873  // The new BUILD_VECTOR node has the potential to be further optimized.
11874  AddToWorklist(BV.getNode());
11875  // Bitcast to the desired type.
11876  return DAG.getNode(ISD::BITCAST, dl, VT, BV);
11877 }
11878 
11879 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
11880  EVT VT = N->getValueType(0);
11881 
11882  unsigned NumInScalars = N->getNumOperands();
11883  SDLoc dl(N);
11884 
11885  EVT SrcVT = MVT::Other;
11886  unsigned Opcode = ISD::DELETED_NODE;
11887  unsigned NumDefs = 0;
11888 
11889  for (unsigned i = 0; i != NumInScalars; ++i) {
11890  SDValue In = N->getOperand(i);
11891  unsigned Opc = In.getOpcode();
11892 
11893  if (Opc == ISD::UNDEF)
11894  continue;
11895 
11896  // If all scalar values are floats and converted from integers.
11897  if (Opcode == ISD::DELETED_NODE &&
11898  (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
11899  Opcode = Opc;
11900  }
11901 
11902  if (Opc != Opcode)
11903  return SDValue();
11904 
11905  EVT InVT = In.getOperand(0).getValueType();
11906 
11907  // If all scalar values are typed differently, bail out. It's chosen to
11908  // simplify BUILD_VECTOR of integer types.
11909  if (SrcVT == MVT::Other)
11910  SrcVT = InVT;
11911  if (SrcVT != InVT)
11912  return SDValue();
11913  NumDefs++;
11914  }
11915 
11916  // If the vector has just one element defined, it's not worth to fold it into
11917  // a vectorized one.
11918  if (NumDefs < 2)
11919  return SDValue();
11920 
11921  assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
11922  && "Should only handle conversion from integer to float.");
11923  assert(SrcVT != MVT::Other && "Cannot determine source type!");
11924 
11925  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
11926 
11927  if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
11928  return SDValue();
11929 
11930  // Just because the floating-point vector type is legal does not necessarily
11931  // mean that the corresponding integer vector type is.
11932  if (!isTypeLegal(NVT))
11933  return SDValue();
11934 
11936  for (unsigned i = 0; i != NumInScalars; ++i) {
11937  SDValue In = N->getOperand(i);
11938 
11939  if (In.getOpcode() == ISD::UNDEF)
11940  Opnds.push_back(DAG.getUNDEF(SrcVT));
11941  else
11942  Opnds.push_back(In.getOperand(0));
11943  }
11944  SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
11945  AddToWorklist(BV.getNode());
11946 
11947  return DAG.getNode(Opcode, dl, VT, BV);
11948 }
11949 
11950 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
11951  unsigned NumInScalars = N->getNumOperands();
11952  SDLoc dl(N);
11953  EVT VT = N->getValueType(0);
11954 
11955  // A vector built entirely of undefs is undef.
11956  if (ISD::allOperandsUndef(N))
11957  return DAG.getUNDEF(VT);
11958 
11959  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
11960  return V;
11961 
11962  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
11963  return V;
11964 
11965  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
11966  // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
11967  // at most two distinct vectors, turn this into a shuffle node.
11968 
11969  // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
11970  if (!isTypeLegal(VT))
11971  return SDValue();
11972 
11973  // May only combine to shuffle after legalize if shuffle is legal.
11974  if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
11975  return SDValue();
11976 
11977  SDValue VecIn1, VecIn2;
11978  bool UsesZeroVector = false;
11979  for (unsigned i = 0; i != NumInScalars; ++i) {
11980  SDValue Op = N->getOperand(i);
11981  // Ignore undef inputs.
11982  if (Op.getOpcode() == ISD::UNDEF) continue;
11983 
11984  // See if we can combine this build_vector into a blend with a zero vector.
11985  if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
11986  UsesZeroVector = true;
11987  continue;
11988  }
11989 
11990  // If this input is something other than a EXTRACT_VECTOR_ELT with a
11991  // constant index, bail out.
11992  if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
11993  !isa<ConstantSDNode>(Op.getOperand(1))) {
11994  VecIn1 = VecIn2 = SDValue(nullptr, 0);
11995  break;
11996  }
11997 
11998  // We allow up to two distinct input vectors.
11999  SDValue ExtractedFromVec = Op.getOperand(0);
12000  if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
12001  continue;
12002 
12003  if (!VecIn1.getNode()) {
12004  VecIn1 = ExtractedFromVec;
12005  } else if (!VecIn2.getNode() && !UsesZeroVector) {
12006  VecIn2 = ExtractedFromVec;
12007  } else {
12008  // Too many inputs.
12009  VecIn1 = VecIn2 = SDValue(nullptr, 0);
12010  break;
12011  }
12012  }
12013 
12014  // If everything is good, we can make a shuffle operation.
12015  if (VecIn1.getNode()) {
12016  unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
12017  SmallVector<int, 8> Mask;
12018  for (unsigned i = 0; i != NumInScalars; ++i) {
12019  unsigned Opcode = N->getOperand(i).getOpcode();
12020  if (Opcode == ISD::UNDEF) {
12021  Mask.push_back(-1);
12022  continue;
12023  }
12024 
12025  // Operands can also be zero.
12026  if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
12027  assert(UsesZeroVector &&
12028  (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
12029  "Unexpected node found!");
12030  Mask.push_back(NumInScalars+i);
12031  continue;
12032  }
12033 
12034  // If extracting from the first vector, just use the index directly.
12035  SDValue Extract = N->getOperand(i);
12036  SDValue ExtVal = Extract.getOperand(1);
12037  unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
12038  if (Extract.getOperand(0) == VecIn1) {
12039  Mask.push_back(ExtIndex);
12040  continue;
12041  }
12042 
12043  // Otherwise, use InIdx + InputVecSize
12044  Mask.push_back(InNumElements + ExtIndex);
12045  }
12046 
12047  // Avoid introducing illegal shuffles with zero.
12048  if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
12049  return SDValue();
12050 
12051  // We can't generate a shuffle node with mismatched input and output types.
12052  // Attempt to transform a single input vector to the correct type.
12053  if ((VT != VecIn1.getValueType())) {
12054  // If the input vector type has a different base type to the output
12055  // vector type, bail out.
12056  EVT VTElemType = VT.getVectorElementType();
12057  if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
12058  (VecIn2.getNode() &&
12059  (VecIn2.getValueType().getVectorElementType() != VTElemType)))
12060  return SDValue();
12061 
12062  // If the input vector is too small, widen it.
12063  // We only support widening of vectors which are half the size of the
12064  // output registers. For example XMM->YMM widening on X86 with AVX.
12065  EVT VecInT = VecIn1.getValueType();
12066  if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
12067  // If we only have one small input, widen it by adding undef values.
12068  if (!VecIn2.getNode())
12069  VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
12070  DAG.getUNDEF(VecIn1.getValueType()));
12071  else if (VecIn1.getValueType() == VecIn2.getValueType()) {
12072  // If we have two small inputs of the same type, try to concat them.
12073  VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
12074  VecIn2 = SDValue(nullptr, 0);
12075  } else
12076  return SDValue();
12077  } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
12078  // If the input vector is too large, try to split it.
12079  // We don't support having two input vectors that are too large.
12080  // If the zero vector was used, we can not split the vector,
12081  // since we'd need 3 inputs.
12082  if (UsesZeroVector || VecIn2.getNode())
12083  return SDValue();
12084 
12085  if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
12086  return SDValue();
12087 
12088  // Try to replace VecIn1 with two extract_subvectors
12089  // No need to update the masks, they should still be correct.
12090  VecIn2 = DAG.getNode(
12091  ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
12092  DAG.getConstant(VT.getVectorNumElements(), dl,
12093  TLI.getVectorIdxTy(DAG.getDataLayout())));
12094  VecIn1 = DAG.getNode(
12095  ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
12096  DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
12097  } else
12098  return SDValue();
12099  }
12100 
12101  if (UsesZeroVector)
12102  VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
12103  DAG.getConstantFP(0.0, dl, VT);
12104  else
12105  // If VecIn2 is unused then change it to undef.
12106  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
12107 
12108  // Check that we were able to transform all incoming values to the same
12109  // type.
12110  if (VecIn2.getValueType() != VecIn1.getValueType() ||
12111  VecIn1.getValueType() != VT)
12112  return SDValue();
12113 
12114  // Return the new VECTOR_SHUFFLE node.
12115  SDValue Ops[2];
12116  Ops[0] = VecIn1;
12117  Ops[1] = VecIn2;
12118  return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
12119  }
12120 
12121  return SDValue();
12122 }
12123 
12125  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12126  EVT OpVT = N->getOperand(0).getValueType();
12127 
12128  // If the operands are legal vectors, leave them alone.
12129  if (TLI.isTypeLegal(OpVT))
12130  return SDValue();
12131 
12132  SDLoc DL(N);
12133  EVT VT = N->getValueType(0);
12135 
12136  EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
12137  SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
12138 
12139  // Keep track of what we encounter.
12140  bool AnyInteger = false;
12141  bool AnyFP = false;
12142  for (const SDValue &Op : N->ops()) {
12143  if (ISD::BITCAST == Op.getOpcode() &&
12144  !Op.getOperand(0).getValueType().isVector())
12145  Ops.push_back(Op.getOperand(0));
12146  else if (ISD::UNDEF == Op.getOpcode())
12147  Ops.push_back(ScalarUndef);
12148  else
12149  return SDValue();
12150 
12151  // Note whether we encounter an integer or floating point scalar.
12152  // If it's neither, bail out, it could be something weird like x86mmx.
12153  EVT LastOpVT = Ops.back().getValueType();
12154  if (LastOpVT.isFloatingPoint())
12155  AnyFP = true;
12156  else if (LastOpVT.isInteger())
12157  AnyInteger = true;
12158  else
12159  return SDValue();
12160  }
12161 
12162  // If any of the operands is a floating point scalar bitcast to a vector,
12163  // use floating point types throughout, and bitcast everything.
12164  // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
12165  if (AnyFP) {
12166  SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
12167  ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
12168  if (AnyInteger) {
12169  for (SDValue &Op : Ops) {
12170  if (Op.getValueType() == SVT)
12171  continue;
12172  if (Op.getOpcode() == ISD::UNDEF)
12173  Op = ScalarUndef;
12174  else
12175  Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
12176  }
12177  }
12178  }
12179 
12180  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
12181  VT.getSizeInBits() / SVT.getSizeInBits());
12182  return DAG.getNode(ISD::BITCAST, DL, VT,
12183  DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
12184 }
12185 
12186 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
12187  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
12188  // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector
12189  // inputs come from at most two distinct vectors, turn this into a shuffle
12190  // node.
12191 
12192  // If we only have one input vector, we don't need to do any concatenation.
12193  if (N->getNumOperands() == 1)
12194  return N->getOperand(0);
12195 
12196  // Check if all of the operands are undefs.
12197  EVT VT = N->getValueType(0);
12198  if (ISD::allOperandsUndef(N))
12199  return DAG.getUNDEF(VT);
12200 
12201  // Optimize concat_vectors where all but the first of the vectors are undef.
12202  if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
12203  return Op.getOpcode() == ISD::UNDEF;
12204  })) {
12205  SDValue In = N->getOperand(0);
12206  assert(In.getValueType().isVector() && "Must concat vectors");
12207 
12208  // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
12209  if (In->getOpcode() == ISD::BITCAST &&
12210  !In->getOperand(0)->getValueType(0).isVector()) {
12211  SDValue Scalar = In->getOperand(0);
12212 
12213  // If the bitcast type isn't legal, it might be a trunc of a legal type;
12214  // look through the trunc so we can still do the transform:
12215  // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
12216  if (Scalar->getOpcode() == ISD::TRUNCATE &&
12217  !TLI.isTypeLegal(Scalar.getValueType()) &&
12218  TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
12219  Scalar = Scalar->getOperand(0);
12220 
12221  EVT SclTy = Scalar->getValueType(0);
12222 
12223  if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
12224  return SDValue();
12225 
12226  EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
12227  VT.getSizeInBits() / SclTy.getSizeInBits());
12228  if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
12229  return SDValue();
12230 
12231  SDLoc dl = SDLoc(N);
12232  SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
12233  return DAG.getNode(ISD::BITCAST, dl, VT, Res);
12234  }
12235  }
12236 
12237  // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
12238  // We have already tested above for an UNDEF only concatenation.
12239  // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
12240  // -> (BUILD_VECTOR A, B, ..., C, D, ...)
12241  auto IsBuildVectorOrUndef = [](const SDValue &Op) {
12242  return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
12243  };
12244  bool AllBuildVectorsOrUndefs =
12245  std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
12246  if (AllBuildVectorsOrUndefs) {
12248  EVT SVT = VT.getScalarType();
12249 
12250  EVT MinVT = SVT;
12251  if (!SVT.isFloatingPoint()) {
12252  // If BUILD_VECTOR are from built from integer, they may have different
12253  // operand types. Get the smallest type and truncate all operands to it.
12254  bool FoundMinVT = false;
12255  for (const SDValue &Op : N->ops())
12256  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
12257  EVT OpSVT = Op.getOperand(0)->getValueType(0);
12258  MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
12259  FoundMinVT = true;
12260  }
12261  assert(FoundMinVT && "Concat vector type mismatch");
12262  }
12263 
12264  for (const SDValue &Op : N->ops()) {
12265  EVT OpVT = Op.getValueType();
12266  unsigned NumElts = OpVT.getVectorNumElements();
12267 
12268  if (ISD::UNDEF == Op.getOpcode())
12269  Opnds.append(NumElts, DAG.getUNDEF(MinVT));
12270 
12271  if (ISD::BUILD_VECTOR == Op.getOpcode()) {
12272  if (SVT.isFloatingPoint()) {
12273  assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
12274  Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
12275  } else {
12276  for (unsigned i = 0; i != NumElts; ++i)
12277  Opnds.push_back(
12278  DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
12279  }
12280  }
12281  }
12282 
12283  assert(VT.getVectorNumElements() == Opnds.size() &&
12284  "Concat vector type mismatch");
12285  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
12286  }
12287 
12288  // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
12289  if (SDValue V = combineConcatVectorOfScalars(N, DAG))
12290  return V;
12291 
12292  // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
12293  // nodes often generate nop CONCAT_VECTOR nodes.
12294  // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
12295  // place the incoming vectors at the exact same location.
12296  SDValue SingleSource = SDValue();
12297  unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
12298 
12299  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
12300  SDValue Op = N->getOperand(i);
12301 
12302  if (Op.getOpcode() == ISD::UNDEF)
12303  continue;
12304 
12305  // Check if this is the identity extract:
12306  if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
12307  return SDValue();
12308 
12309  // Find the single incoming vector for the extract_subvector.
12310  if (SingleSource.getNode()) {
12311  if (Op.getOperand(0) != SingleSource)
12312  return SDValue();
12313  } else {
12314  SingleSource = Op.getOperand(0);
12315 
12316  // Check the source type is the same as the type of the result.
12317  // If not, this concat may extend the vector, so we can not
12318  // optimize it away.
12319  if (SingleSource.getValueType() != N->getValueType(0))
12320  return SDValue();
12321  }
12322 
12323  unsigned IdentityIndex = i * PartNumElem;
12325  // The extract index must be constant.
12326  if (!CS)
12327  return SDValue();
12328 
12329  // Check that we are reading from the identity index.
12330  if (CS->getZExtValue() != IdentityIndex)
12331  return SDValue();
12332  }
12333 
12334  if (SingleSource.getNode())
12335  return SingleSource;
12336 
12337  return SDValue();
12338 }
12339 
12340 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
12341  EVT NVT = N->getValueType(0);
12342  SDValue V = N->getOperand(0);
12343 
12344  if (V->getOpcode() == ISD::CONCAT_VECTORS) {
12345  // Combine:
12346  // (extract_subvec (concat V1, V2, ...), i)
12347  // Into:
12348  // Vi if possible
12349  // Only operand 0 is checked as 'concat' assumes all inputs of the same
12350  // type.
12351  if (V->getOperand(0).getValueType() != NVT)
12352  return SDValue();
12353  unsigned Idx = N->getConstantOperandVal(1);
12354  unsigned NumElems = NVT.getVectorNumElements();
12355  assert((Idx % NumElems) == 0 &&
12356  "IDX in concat is not a multiple of the result vector length.");
12357  return V->getOperand(Idx / NumElems);
12358  }
12359 
12360  // Skip bitcasting
12361  if (V->getOpcode() == ISD::BITCAST)
12362  V = V.getOperand(0);
12363 
12364  if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
12365  SDLoc dl(N);
12366  // Handle only simple case where vector being inserted and vector
12367  // being extracted are of same type, and are half size of larger vectors.
12368  EVT BigVT = V->getOperand(0).getValueType();
12369  EVT SmallVT = V->getOperand(1).getValueType();
12370  if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
12371  return SDValue();
12372 
12373  // Only handle cases where both indexes are constants with the same type.
12376 
12377  if (InsIdx && ExtIdx &&
12378  InsIdx->getValueType(0).getSizeInBits() <= 64 &&
12379  ExtIdx->getValueType(0).getSizeInBits() <= 64) {
12380  // Combine:
12381  // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
12382  // Into:
12383  // indices are equal or bit offsets are equal => V1
12384  // otherwise => (extract_subvec V1, ExtIdx)
12385  if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
12386  ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
12387  return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
12388  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
12389  DAG.getNode(ISD::BITCAST, dl,
12390  N->getOperand(0).getValueType(),
12391  V->getOperand(0)), N->getOperand(1));
12392  }
12393  }
12394 
12395  return SDValue();
12396 }
12397 
12399  SDValue V, SelectionDAG &DAG) {
12400  SDLoc DL(V);
12401  EVT VT = V.getValueType();
12402 
12403  switch (V.getOpcode()) {
12404  default:
12405  return V;
12406 
12407  case ISD::CONCAT_VECTORS: {
12408  EVT OpVT = V->getOperand(0).getValueType();
12409  int OpSize = OpVT.getVectorNumElements();
12410  SmallBitVector OpUsedElements(OpSize, false);
12411  bool FoundSimplification = false;
12412  SmallVector<SDValue, 4> NewOps;
12413  NewOps.reserve(V->getNumOperands());
12414  for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
12415  SDValue Op = V->getOperand(i);
12416  bool OpUsed = false;
12417  for (int j = 0; j < OpSize; ++j)
12418  if (UsedElements[i * OpSize + j]) {
12419  OpUsedElements[j] = true;
12420  OpUsed = true;
12421  }
12422  NewOps.push_back(
12423  OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
12424  : DAG.getUNDEF(OpVT));
12425  FoundSimplification |= Op == NewOps.back();
12426  OpUsedElements.reset();
12427  }
12428  if (FoundSimplification)
12429  V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
12430  return V;
12431  }
12432 
12433  case ISD::INSERT_SUBVECTOR: {
12434  SDValue BaseV = V->getOperand(0);
12435  SDValue SubV = V->getOperand(1);
12436  auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
12437  if (!IdxN)
12438  return V;
12439 
12440  int SubSize = SubV.getValueType().getVectorNumElements();
12441  int Idx = IdxN->getZExtValue();
12442  bool SubVectorUsed = false;
12443  SmallBitVector SubUsedElements(SubSize, false);
12444  for (int i = 0; i < SubSize; ++i)
12445  if (UsedElements[i + Idx]) {
12446  SubVectorUsed = true;
12447  SubUsedElements[i] = true;
12448  UsedElements[i + Idx] = false;
12449  }
12450 
12451  // Now recurse on both the base and sub vectors.
12452  SDValue SimplifiedSubV =
12453  SubVectorUsed
12454  ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
12455  : DAG.getUNDEF(SubV.getValueType());
12456  SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
12457  if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
12458  V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
12459  SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
12460  return V;
12461  }
12462  }
12463 }
12464 
12466  SDValue N1, SelectionDAG &DAG) {
12467  EVT VT = SVN->getValueType(0);
12468  int NumElts = VT.getVectorNumElements();
12469  SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
12470  for (int M : SVN->getMask())
12471  if (M >= 0 && M < NumElts)
12472  N0UsedElements[M] = true;
12473  else if (M >= NumElts)
12474  N1UsedElements[M - NumElts] = true;
12475 
12476  SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
12477  SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
12478  if (S0 == N0 && S1 == N1)
12479  return SDValue();
12480 
12481  return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
12482 }
12483 
12484 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
12485 // or turn a shuffle of a single concat into simpler shuffle then concat.
12487  EVT VT = N->getValueType(0);
12488  unsigned NumElts = VT.getVectorNumElements();
12489 
12490  SDValue N0 = N->getOperand(0);
12491  SDValue N1 = N->getOperand(1);
12492  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
12493 
12495  EVT ConcatVT = N0.getOperand(0).getValueType();
12496  unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
12497  unsigned NumConcats = NumElts / NumElemsPerConcat;
12498 
12499  // Special case: shuffle(concat(A,B)) can be more efficiently represented
12500  // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
12501  // half vector elements.
12502  if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
12503  std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
12504  SVN->getMask().end(), [](int i) { return i == -1; })) {
12505  N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
12506  ArrayRef<int>(SVN->getMask().begin(), NumElemsPerConcat));
12507  N1 = DAG.getUNDEF(ConcatVT);
12508  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
12509  }
12510 
12511  // Look at every vector that's inserted. We're looking for exact
12512  // subvector-sized copies from a concatenated vector
12513  for (unsigned I = 0; I != NumConcats; ++I) {
12514  // Make sure we're dealing with a copy.
12515  unsigned Begin = I * NumElemsPerConcat;
12516  bool AllUndef = true, NoUndef = true;
12517  for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
12518  if (SVN->getMaskElt(J) >= 0)
12519  AllUndef = false;
12520  else
12521  NoUndef = false;
12522  }
12523 
12524  if (NoUndef) {
12525  if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
12526  return SDValue();
12527 
12528  for (unsigned J = 1; J != NumElemsPerConcat; ++J)
12529  if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
12530  return SDValue();
12531 
12532  unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
12533  if (FirstElt < N0.getNumOperands())
12534  Ops.push_back(N0.getOperand(FirstElt));
12535  else
12536  Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
12537 
12538  } else if (AllUndef) {
12539  Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
12540  } else { // Mixed with general masks and undefs, can't do optimization.
12541  return SDValue();
12542  }
12543  }
12544 
12545  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
12546 }
12547 
12548 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
12549  EVT VT = N->getValueType(0);
12550  unsigned NumElts = VT.getVectorNumElements();
12551 
12552  SDValue N0 = N->getOperand(0);
12553  SDValue N1 = N->getOperand(1);
12554 
12555  assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
12556 
12557  // Canonicalize shuffle undef, undef -> undef
12558  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
12559  return DAG.getUNDEF(VT);
12560 
12561  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
12562 
12563  // Canonicalize shuffle v, v -> v, undef
12564  if (N0 == N1) {
12565  SmallVector<int, 8> NewMask;
12566  for (unsigned i = 0; i != NumElts; ++i) {
12567  int Idx = SVN->getMaskElt(i);
12568  if (Idx >= (int)NumElts) Idx -= NumElts;
12569  NewMask.push_back(Idx);
12570  }
12571  return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
12572  &NewMask[0]);
12573  }
12574 
12575  // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask.
12576  if (N0.getOpcode() == ISD::UNDEF) {
12577  SmallVector<int, 8> NewMask;
12578  for (unsigned i = 0; i != NumElts; ++i) {
12579  int Idx = SVN->getMaskElt(i);
12580  if (Idx >= 0) {
12581  if (Idx >= (int)NumElts)
12582  Idx -= NumElts;
12583  else
12584  Idx = -1; // remove reference to lhs
12585  }
12586  NewMask.push_back(Idx);
12587  }
12588  return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
12589  &NewMask[0]);
12590  }
12591 
12592  // Remove references to rhs if it is undef
12593  if (N1.getOpcode() == ISD::UNDEF) {
12594  bool Changed = false;
12595  SmallVector<int, 8> NewMask;
12596  for (unsigned i = 0; i != NumElts; ++i) {
12597  int Idx = SVN->getMaskElt(i);
12598  if (Idx >= (int)NumElts) {
12599  Idx = -1;
12600  Changed = true;
12601  }
12602  NewMask.push_back(Idx);
12603  }
12604  if (Changed)
12605  return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
12606  }
12607 
12608  // If it is a splat, check if the argument vector is another splat or a
12609  // build_vector.
12610  if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
12611  SDNode *V = N0.getNode();
12612 
12613  // If this is a bit convert that changes the element type of the vector but
12614  // not the number of vector elements, look through it. Be careful not to
12615  // look though conversions that change things like v4f32 to v2f64.
12616  if (V->getOpcode() == ISD::BITCAST) {
12617  SDValue ConvInput = V->getOperand(0);
12618  if (ConvInput.getValueType().isVector() &&
12619  ConvInput.getValueType().getVectorNumElements() == NumElts)
12620  V = ConvInput.getNode();
12621  }
12622 
12623  if (V->getOpcode() == ISD::BUILD_VECTOR) {
12624  assert(V->getNumOperands() == NumElts &&
12625  "BUILD_VECTOR has wrong number of operands");
12626  SDValue Base;
12627  bool AllSame = true;
12628  for (unsigned i = 0; i != NumElts; ++i) {
12629  if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
12630  Base = V->getOperand(i);
12631  break;
12632  }
12633  }
12634  // Splat of <u, u, u, u>, return <u, u, u, u>
12635  if (!Base.getNode())
12636  return N0;
12637  for (unsigned i = 0; i != NumElts; ++i) {
12638  if (V->getOperand(i) != Base) {
12639  AllSame = false;
12640  break;
12641  }
12642  }
12643  // Splat of <x, x, x, x>, return <x, x, x, x>
12644  if (AllSame)
12645  return N0;
12646 
12647  // Canonicalize any other splat as a build_vector.
12648  const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
12649  SmallVector<SDValue, 8> Ops(NumElts, Splatted);
12650  SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
12651  V->getValueType(0), Ops);
12652 
12653  // We may have jumped through bitcasts, so the type of the
12654  // BUILD_VECTOR may not match the type of the shuffle.
12655  if (V->getValueType(0) != VT)
12656  NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
12657  return NewBV;
12658  }
12659  }
12660 
12661  // There are various patterns used to build up a vector from smaller vectors,
12662  // subvectors, or elements. Scan chains of these and replace unused insertions
12663  // or components with undef.
12664  if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
12665  return S;
12666 
12667  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
12669  (N1.getOpcode() == ISD::UNDEF ||
12670  (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12671  N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
12672  SDValue V = partitionShuffleOfConcats(N, DAG);
12673 
12674  if (V.getNode())
12675  return V;
12676  }
12677 
12678  // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
12679  // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
12680  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
12682  for (int M : SVN->getMask()) {
12683  SDValue Op = DAG.getUNDEF(VT.getScalarType());
12684  if (M >= 0) {
12685  int Idx = M % NumElts;
12686  SDValue &S = (M < (int)NumElts ? N0 : N1);
12687  if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
12688  Op = S.getOperand(Idx);
12689  } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
12690  if (Idx == 0)
12691  Op = S.getOperand(0);
12692  } else {
12693  // Operand can't be combined - bail out.
12694  break;
12695  }
12696  }
12697  Ops.push_back(Op);
12698  }
12699  if (Ops.size() == VT.getVectorNumElements()) {
12700  // BUILD_VECTOR requires all inputs to be of the same type, find the
12701  // maximum type and extend them all.
12702  EVT SVT = VT.getScalarType();
12703  if (SVT.isInteger())
12704  for (SDValue &Op : Ops)
12705  SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
12706  if (SVT != VT.getScalarType())
12707  for (SDValue &Op : Ops)
12708  Op = TLI.isZExtFree(Op.getValueType(), SVT)
12709  ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
12710  : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
12711  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
12712  }
12713  }
12714 
12715  // If this shuffle only has a single input that is a bitcasted shuffle,
12716  // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
12717  // back to their original types.
12718  if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
12719  N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
12720  TLI.isTypeLegal(VT)) {
12721 
12722  // Peek through the bitcast only if there is one user.
12723  SDValue BC0 = N0;
12724  while (BC0.getOpcode() == ISD::BITCAST) {
12725  if (!BC0.hasOneUse())
12726  break;
12727  BC0 = BC0.getOperand(0);
12728  }
12729 
12730  auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
12731  if (Scale == 1)
12732  return SmallVector<int, 8>(Mask.begin(), Mask.end());
12733 
12734  SmallVector<int, 8> NewMask;
12735  for (int M : Mask)
12736  for (int s = 0; s != Scale; ++s)
12737  NewMask.push_back(M < 0 ? -1 : Scale * M + s);
12738  return NewMask;
12739  };
12740 
12741  if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
12742  EVT SVT = VT.getScalarType();
12743  EVT InnerVT = BC0->getValueType(0);
12744  EVT InnerSVT = InnerVT.getScalarType();
12745 
12746  // Determine which shuffle works with the smaller scalar type.
12747  EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
12748  EVT ScaleSVT = ScaleVT.getScalarType();
12749 
12750  if (TLI.isTypeLegal(ScaleVT) &&
12751  0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
12752  0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
12753 
12754  int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
12755  int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
12756 
12757  // Scale the shuffle masks to the smaller scalar type.
12758  ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
12759  SmallVector<int, 8> InnerMask =
12760  ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
12761  SmallVector<int, 8> OuterMask =
12762  ScaleShuffleMask(SVN->getMask(), OuterScale);
12763 
12764  // Merge the shuffle masks.
12765  SmallVector<int, 8> NewMask;
12766  for (int M : OuterMask)
12767  NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
12768 
12769  // Test for shuffle mask legality over both commutations.
12770  SDValue SV0 = BC0->getOperand(0);
12771  SDValue SV1 = BC0->getOperand(1);
12772  bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
12773  if (!LegalMask) {
12774  std::swap(SV0, SV1);
12776  LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
12777  }
12778 
12779  if (LegalMask) {
12780  SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
12781  SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
12782  return DAG.getNode(
12783  ISD::BITCAST, SDLoc(N), VT,
12784  DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
12785  }
12786  }
12787  }
12788  }
12789 
12790  // Canonicalize shuffles according to rules:
12791  // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
12792  // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
12793  // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
12794  if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
12796  TLI.isTypeLegal(VT)) {
12797  // The incoming shuffle must be of the same type as the result of the
12798  // current shuffle.
12799  assert(N1->getOperand(0).getValueType() == VT &&
12800  "Shuffle types don't match");
12801 
12802  SDValue SV0 = N1->getOperand(0);
12803  SDValue SV1 = N1->getOperand(1);
12804  bool HasSameOp0 = N0 == SV0;
12805  bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
12806  if (HasSameOp0 || IsSV1Undef || N0 == SV1)
12807  // Commute the operands of this shuffle so that next rule
12808  // will trigger.
12809  return DAG.getCommutedVectorShuffle(*SVN);
12810  }
12811 
12812  // Try to fold according to rules:
12813  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
12814  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
12815  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
12816  // Don't try to fold shuffles with illegal type.
12817  // Only fold if this shuffle is the only user of the other shuffle.
12818  if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
12819  Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
12820  ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
12821 
12822  // The incoming shuffle must be of the same type as the result of the
12823  // current shuffle.
12824  assert(OtherSV->getOperand(0).getValueType() == VT &&
12825  "Shuffle types don't match");
12826 
12827  SDValue SV0, SV1;
12828  SmallVector<int, 4> Mask;
12829  // Compute the combined shuffle mask for a shuffle with SV0 as the first
12830  // operand, and SV1 as the second operand.
12831  for (unsigned i = 0; i != NumElts; ++i) {
12832  int Idx = SVN->getMaskElt(i);
12833  if (Idx < 0) {
12834  // Propagate Undef.
12835  Mask.push_back(Idx);
12836  continue;
12837  }
12838 
12839  SDValue CurrentVec;
12840  if (Idx < (int)NumElts) {
12841  // This shuffle index refers to the inner shuffle N0. Lookup the inner
12842  // shuffle mask to identify which vector is actually referenced.
12843  Idx = OtherSV->getMaskElt(Idx);
12844  if (Idx < 0) {
12845  // Propagate Undef.
12846  Mask.push_back(Idx);
12847  continue;
12848  }
12849 
12850  CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
12851  : OtherSV->getOperand(1);
12852  } else {
12853  // This shuffle index references an element within N1.
12854  CurrentVec = N1;
12855  }
12856 
12857  // Simple case where 'CurrentVec' is UNDEF.
12858  if (CurrentVec.getOpcode() == ISD::UNDEF) {
12859  Mask.push_back(-1);
12860  continue;
12861  }
12862 
12863  // Canonicalize the shuffle index. We don't know yet if CurrentVec
12864  // will be the first or second operand of the combined shuffle.
12865  Idx = Idx % NumElts;
12866  if (!SV0.getNode() || SV0 == CurrentVec) {
12867  // Ok. CurrentVec is the left hand side.
12868  // Update the mask accordingly.
12869  SV0 = CurrentVec;
12870  Mask.push_back(Idx);
12871  continue;
12872  }
12873 
12874  // Bail out if we cannot convert the shuffle pair into a single shuffle.
12875  if (SV1.getNode() && SV1 != CurrentVec)
12876  return SDValue();
12877 
12878  // Ok. CurrentVec is the right hand side.
12879  // Update the mask accordingly.
12880  SV1 = CurrentVec;
12881  Mask.push_back(Idx + NumElts);
12882  }
12883 
12884  // Check if all indices in Mask are Undef. In case, propagate Undef.
12885  bool isUndefMask = true;
12886  for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
12887  isUndefMask &= Mask[i] < 0;
12888 
12889  if (isUndefMask)
12890  return DAG.getUNDEF(VT);
12891 
12892  if (!SV0.getNode())
12893  SV0 = DAG.getUNDEF(VT);
12894  if (!SV1.getNode())
12895  SV1 = DAG.getUNDEF(VT);
12896 
12897  // Avoid introducing shuffles with illegal mask.
12898  if (!TLI.isShuffleMaskLegal(Mask, VT)) {
12900 
12901  if (!TLI.isShuffleMaskLegal(Mask, VT))
12902  return SDValue();
12903 
12904  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
12905  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
12906  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
12907  std::swap(SV0, SV1);
12908  }
12909 
12910  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
12911  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
12912  // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
12913  return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
12914  }
12915 
12916  return SDValue();
12917 }
12918 
12919 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
12920  SDValue InVal = N->getOperand(0);
12921  EVT VT = N->getValueType(0);
12922 
12923  // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
12924  // with a VECTOR_SHUFFLE.
12925  if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
12926  SDValue InVec = InVal->getOperand(0);
12927  SDValue EltNo = InVal->getOperand(1);
12928 
12929  // FIXME: We could support implicit truncation if the shuffle can be
12930  // scaled to a smaller vector scalar type.
12931  ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
12932  if (C0 && VT == InVec.getValueType() &&
12933  VT.getScalarType() == InVal.getValueType()) {
12934  SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
12935  int Elt = C0->getZExtValue();
12936  NewMask[0] = Elt;
12937 
12938  if (TLI.isShuffleMaskLegal(NewMask, VT))
12939  return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
12940  NewMask);
12941  }
12942  }
12943 
12944  return SDValue();
12945 }
12946 
12947 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
12948  SDValue N0 = N->getOperand(0);
12949  SDValue N2 = N->getOperand(2);
12950 
12951  // If the input vector is a concatenation, and the insert replaces
12952  // one of the halves, we can optimize into a single concat_vectors.
12953  if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
12954  N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
12955  APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
12956  EVT VT = N->getValueType(0);
12957 
12958  // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
12959  // (concat_vectors Z, Y)
12960  if (InsIdx == 0)
12961  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
12962  N->getOperand(1), N0.getOperand(1));
12963 
12964  // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
12965  // (concat_vectors X, Z)
12966  if (InsIdx == VT.getVectorNumElements()/2)
12967  return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
12968  N0.getOperand(0), N->getOperand(1));
12969  }
12970 
12971  return SDValue();
12972 }
12973 
12974 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
12975  SDValue N0 = N->getOperand(0);
12976 
12977  // fold (fp_to_fp16 (fp16_to_fp op)) -> op
12978  if (N0->getOpcode() == ISD::FP16_TO_FP)
12979  return N0->getOperand(0);
12980 
12981  return SDValue();
12982 }
12983 
12984 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
12985 /// with the destination vector and a zero vector.
12986 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
12987 /// vector_shuffle V, Zero, <0, 4, 2, 4>
12988 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
12989  EVT VT = N->getValueType(0);
12990  SDValue LHS = N->getOperand(0);
12991  SDValue RHS = N->getOperand(1);
12992  SDLoc dl(N);
12993 
12994  // Make sure we're not running after operation legalization where it
12995  // may have custom lowered the vector shuffles.
12996  if (LegalOperations)
12997  return SDValue();
12998 
12999  if (N->getOpcode() != ISD::AND)
13000  return SDValue();
13001 
13002  if (RHS.getOpcode() == ISD::BITCAST)
13003  RHS = RHS.getOperand(0);
13004 
13005  if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
13006  SmallVector<int, 8> Indices;
13007  unsigned NumElts = RHS.getNumOperands();
13008 
13009  for (unsigned i = 0; i != NumElts; ++i) {
13010  SDValue Elt = RHS.getOperand(i);
13011  if (isAllOnesConstant(Elt))
13012  Indices.push_back(i);
13013  else if (isNullConstant(Elt))
13014  Indices.push_back(NumElts+i);
13015  else
13016  return SDValue();
13017  }
13018 
13019  // Let's see if the target supports this vector_shuffle.
13020  EVT RVT = RHS.getValueType();
13021  if (!TLI.isVectorClearMaskLegal(Indices, RVT))
13022  return SDValue();
13023 
13024  // Return the new VECTOR_SHUFFLE node.
13025  EVT EltVT = RVT.getVectorElementType();
13027  DAG.getConstant(0, dl, EltVT));
13028  SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, RVT, ZeroOps);
13029  LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
13030  SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
13031  return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
13032  }
13033 
13034  return SDValue();
13035 }
13036 
13037 /// Visit a binary vector operation, like ADD.
13038 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
13039  assert(N->getValueType(0).isVector() &&
13040  "SimplifyVBinOp only works on vectors!");
13041 
13042  SDValue LHS = N->getOperand(0);
13043  SDValue RHS = N->getOperand(1);
13044 
13045  if (SDValue Shuffle = XformToShuffleWithZero(N))
13046  return Shuffle;
13047 
13048  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
13049  // this operation.
13050  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
13051  RHS.getOpcode() == ISD::BUILD_VECTOR) {
13052  // Check if both vectors are constants. If not bail out.
13053  if (!(cast<BuildVectorSDNode>(LHS)->isConstant() &&
13054  cast<BuildVectorSDNode>(RHS)->isConstant()))
13055  return SDValue();
13056 
13058  for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
13059  SDValue LHSOp = LHS.getOperand(i);
13060  SDValue RHSOp = RHS.getOperand(i);
13061 
13062  // Can't fold divide by zero.
13063  if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
13064  N->getOpcode() == ISD::FDIV) {
13065  if (isNullConstant(RHSOp) || (RHSOp.getOpcode() == ISD::ConstantFP &&
13066  cast<ConstantFPSDNode>(RHSOp.getNode())->isZero()))
13067  break;
13068  }
13069 
13070  EVT VT = LHSOp.getValueType();
13071  EVT RVT = RHSOp.getValueType();
13072  if (RVT != VT) {
13073  // Integer BUILD_VECTOR operands may have types larger than the element
13074  // size (e.g., when the element type is not legal). Prior to type
13075  // legalization, the types may not match between the two BUILD_VECTORS.
13076  // Truncate one of the operands to make them match.
13077  if (RVT.getSizeInBits() > VT.getSizeInBits()) {
13078  RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp);
13079  } else {
13080  LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp);
13081  VT = RVT;
13082  }
13083  }
13084  SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT,
13085  LHSOp, RHSOp);
13086  if (FoldOp.getOpcode() != ISD::UNDEF &&
13087  FoldOp.getOpcode() != ISD::Constant &&
13088  FoldOp.getOpcode() != ISD::ConstantFP)
13089  break;
13090  Ops.push_back(FoldOp);
13091  AddToWorklist(FoldOp.getNode());
13092  }
13093 
13094  if (Ops.size() == LHS.getNumOperands())
13095  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops);
13096  }
13097 
13098  // Type legalization might introduce new shuffles in the DAG.
13099  // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
13100  // -> (shuffle (VBinOp (A, B)), Undef, Mask).
13101  if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
13102  isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
13103  LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
13104  RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
13105  ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
13106  ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
13107 
13108  if (SVN0->getMask().equals(SVN1->getMask())) {
13109  EVT VT = N->getValueType(0);
13110  SDValue UndefVector = LHS.getOperand(1);
13111  SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
13112  LHS.getOperand(0), RHS.getOperand(0));
13113  AddUsersToWorklist(N);
13114  return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
13115  &SVN0->getMask()[0]);
13116  }
13117  }
13118 
13119  return SDValue();
13120 }
13121 
13122 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
13123  SDValue N1, SDValue N2){
13124  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
13125 
13126  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
13127  cast<CondCodeSDNode>(N0.getOperand(2))->get());
13128 
13129  // If we got a simplified select_cc node back from SimplifySelectCC, then
13130  // break it down into a new SETCC node, and a new SELECT node, and then return
13131  // the SELECT node, since we were called with a SELECT node.
13132  if (SCC.getNode()) {
13133  // Check to see if we got a select_cc back (to turn into setcc/select).
13134  // Otherwise, just return whatever node we got back, like fabs.
13135  if (SCC.getOpcode() == ISD::SELECT_CC) {
13136  SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
13137  N0.getValueType(),
13138  SCC.getOperand(0), SCC.getOperand(1),
13139  SCC.getOperand(4));
13140  AddToWorklist(SETCC.getNode());
13141  return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
13142  SCC.getOperand(2), SCC.getOperand(3));
13143  }
13144 
13145  return SCC;
13146  }
13147  return SDValue();
13148 }
13149 
13150 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
13151 /// being selected between, see if we can simplify the select. Callers of this
13152 /// should assume that TheSelect is deleted if this returns true. As such, they
13153 /// should return the appropriate thing (e.g. the node) back to the top-level of
13154 /// the DAG combiner loop to avoid it being looked at.
13155 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
13156  SDValue RHS) {
13157 
13158  // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
13159  // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
13160  if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
13161  if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
13162  // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
13163  SDValue Sqrt = RHS;
13164  ISD::CondCode CC;
13165  SDValue CmpLHS;
13166  const ConstantFPSDNode *NegZero = nullptr;
13167 
13168  if (TheSelect->getOpcode() == ISD::SELECT_CC) {
13169  CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
13170  CmpLHS = TheSelect->getOperand(0);
13171  NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
13172  } else {
13173  // SELECT or VSELECT
13174  SDValue Cmp = TheSelect->getOperand(0);
13175  if (Cmp.getOpcode() == ISD::SETCC) {
13176  CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
13177  CmpLHS = Cmp.getOperand(0);
13178  NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
13179  }
13180  }
13181  if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
13182  Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
13183  CC == ISD::SETULT || CC == ISD::SETLT)) {
13184  // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
13185  CombineTo(TheSelect, Sqrt);
13186  return true;
13187  }
13188  }
13189  }
13190  // Cannot simplify select with vector condition
13191  if (TheSelect->getOperand(0).getValueType().isVector()) return false;
13192 
13193  // If this is a select from two identical things, try to pull the operation
13194  // through the select.
13195  if (LHS.getOpcode() != RHS.getOpcode() ||
13196  !LHS.hasOneUse() || !RHS.hasOneUse())
13197  return false;
13198 
13199  // If this is a load and the token chain is identical, replace the select
13200  // of two loads with a load through a select of the address to load from.
13201  // This triggers in things like "select bool X, 10.0, 123.0" after the FP
13202  // constants have been dropped into the constant pool.
13203  if (LHS.getOpcode() == ISD::LOAD) {
13204  LoadSDNode *LLD = cast<LoadSDNode>(LHS);
13205  LoadSDNode *RLD = cast<LoadSDNode>(RHS);
13206 
13207  // Token chains must be identical.
13208  if (LHS.getOperand(0) != RHS.getOperand(0) ||
13209  // Do not let this transformation reduce the number of volatile loads.
13210  LLD->isVolatile() || RLD->isVolatile() ||
13211  // FIXME: If either is a pre/post inc/dec load,
13212  // we'd need to split out the address adjustment.
13213  LLD->isIndexed() || RLD->isIndexed() ||
13214  // If this is an EXTLOAD, the VT's must match.
13215  LLD->getMemoryVT() != RLD->getMemoryVT() ||
13216  // If this is an EXTLOAD, the kind of extension must match.
13217  (LLD->getExtensionType() != RLD->getExtensionType() &&
13218  // The only exception is if one of the extensions is anyext.
13219  LLD->getExtensionType() != ISD::EXTLOAD &&
13220  RLD->getExtensionType() != ISD::EXTLOAD) ||
13221  // FIXME: this discards src value information. This is
13222  // over-conservative. It would be beneficial to be able to remember
13223  // both potential memory locations. Since we are discarding
13224  // src value info, don't do the transformation if the memory
13225  // locations are not in the default address space.
13226  LLD->getPointerInfo().getAddrSpace() != 0 ||
13227  RLD->getPointerInfo().getAddrSpace() != 0 ||
13228  !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
13229  LLD->getBasePtr().getValueType()))
13230  return false;
13231 
13232  // Check that the select condition doesn't reach either load. If so,
13233  // folding this will induce a cycle into the DAG. If not, this is safe to
13234  // xform, so create a select of the addresses.
13235  SDValue Addr;
13236  if (TheSelect->getOpcode() == ISD::SELECT) {
13237  SDNode *CondNode = TheSelect->getOperand(0).getNode();
13238  if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
13239  (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
13240  return false;
13241  // The loads must not depend on one another.
13242  if (LLD->isPredecessorOf(RLD) ||
13243  RLD->isPredecessorOf(LLD))
13244  return false;
13245  Addr = DAG.getSelect(SDLoc(TheSelect),
13246  LLD->getBasePtr().getValueType(),
13247  TheSelect->getOperand(0), LLD->getBasePtr(),
13248  RLD->getBasePtr());
13249  } else { // Otherwise SELECT_CC
13250  SDNode *CondLHS = TheSelect->getOperand(0).getNode();
13251  SDNode *CondRHS = TheSelect->getOperand(1).getNode();
13252 
13253  if ((LLD->hasAnyUseOfValue(1) &&
13254  (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
13255  (RLD->hasAnyUseOfValue(1) &&
13256  (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
13257  return false;
13258 
13259  Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
13260  LLD->getBasePtr().getValueType(),
13261  TheSelect->getOperand(0),
13262  TheSelect->getOperand(1),
13263  LLD->getBasePtr(), RLD->getBasePtr(),
13264  TheSelect->getOperand(4));
13265  }
13266 
13267  SDValue Load;
13268  // It is safe to replace the two loads if they have different alignments,
13269  // but the new load must be the minimum (most restrictive) alignment of the
13270  // inputs.
13271  bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
13272  unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
13273  if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
13274  Load = DAG.getLoad(TheSelect->getValueType(0),
13275  SDLoc(TheSelect),
13276  // FIXME: Discards pointer and AA info.
13277  LLD->getChain(), Addr, MachinePointerInfo(),
13278  LLD->isVolatile(), LLD->isNonTemporal(),
13279  isInvariant, Alignment);
13280  } else {
13281  Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
13282  RLD->getExtensionType() : LLD->getExtensionType(),
13283  SDLoc(TheSelect),
13284  TheSelect->getValueType(0),
13285  // FIXME: Discards pointer and AA info.
13286  LLD->getChain(), Addr, MachinePointerInfo(),
13287  LLD->getMemoryVT(), LLD->isVolatile(),
13288  LLD->isNonTemporal(), isInvariant, Alignment);
13289  }
13290 
13291  // Users of the select now use the result of the load.
13292  CombineTo(TheSelect, Load);
13293 
13294  // Users of the old loads now use the new load's chain. We know the
13295  // old-load value is dead now.
13296  CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
13297  CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
13298  return true;
13299  }
13300 
13301  return false;
13302 }
13303 
13304 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
13305 /// where 'cond' is the comparison specified by CC.
13306 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
13307  SDValue N2, SDValue N3,
13308  ISD::CondCode CC, bool NotExtCompare) {
13309  // (x ? y : y) -> y.
13310  if (N2 == N3) return N2;
13311 
13312  EVT VT = N2.getValueType();
13315 
13316  // Determine if the condition we're dealing with is constant
13318  N0, N1, CC, DL, false);
13319  if (SCC.getNode()) AddToWorklist(SCC.getNode());
13320 
13321  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
13322  // fold select_cc true, x, y -> x
13323  // fold select_cc false, x, y -> y
13324  return !SCCC->isNullValue() ? N2 : N3;
13325  }
13326 
13327  // Check to see if we can simplify the select into an fabs node
13328  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
13329  // Allow either -0.0 or 0.0
13330  if (CFP->isZero()) {
13331  // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
13332  if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
13333  N0 == N2 && N3.getOpcode() == ISD::FNEG &&
13334  N2 == N3.getOperand(0))
13335  return DAG.getNode(ISD::FABS, DL, VT, N0);
13336 
13337  // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
13338  if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
13339  N0 == N3 && N2.getOpcode() == ISD::FNEG &&
13340  N2.getOperand(0) == N3)
13341  return DAG.getNode(ISD::FABS, DL, VT, N3);
13342  }
13343  }
13344 
13345  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
13346  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
13347  // in it. This is a win when the constant is not otherwise available because
13348  // it replaces two constant pool loads with one. We only do this if the FP
13349  // type is known to be legal, because if it isn't, then we are before legalize
13350  // types an we want the other legalization to happen first (e.g. to avoid
13351  // messing with soft float) and if the ConstantFP is not legal, because if
13352  // it is legal, we may not need to store the FP constant in a constant pool.
13353  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
13354  if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
13355  if (TLI.isTypeLegal(N2.getValueType()) &&
13356  (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
13358  !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
13359  !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
13360  // If both constants have multiple uses, then we won't need to do an
13361  // extra load, they are likely around in registers for other users.
13362  (TV->hasOneUse() || FV->hasOneUse())) {
13363  Constant *Elts[] = {
13364  const_cast<ConstantFP*>(FV->getConstantFPValue()),
13365  const_cast<ConstantFP*>(TV->getConstantFPValue())
13366  };
13367  Type *FPTy = Elts[0]->getType();
13368  const DataLayout &TD = DAG.getDataLayout();
13369 
13370  // Create a ConstantArray of the two constants.
13371  Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
13372  SDValue CPIdx =
13373  DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
13374  TD.getPrefTypeAlignment(FPTy));
13375  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
13376 
13377  // Get the offsets to the 0 and 1 element of the array so that we can
13378  // select between them.
13379  SDValue Zero = DAG.getIntPtrConstant(0, DL);
13380  unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
13381  SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
13382 
13383  SDValue Cond = DAG.getSetCC(DL,
13385  N0, N1, CC);
13386  AddToWorklist(Cond.getNode());
13387  SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
13388  Cond, One, Zero);
13389  AddToWorklist(CstOffset.getNode());
13390  CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
13391  CstOffset);
13392  AddToWorklist(CPIdx.getNode());
13393  return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
13395  false, false, Alignment);
13396  }
13397  }
13398 
13399  // Check to see if we can perform the "gzip trick", transforming
13400  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
13401  if (isNullConstant(N3) && CC == ISD::SETLT &&
13402  (isNullConstant(N1) || // (a < 0) ? b : 0
13403  (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0
13404  EVT XType = N0.getValueType();
13405  EVT AType = N2.getValueType();
13406  if (XType.bitsGE(AType)) {
13407  // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
13408  // single-bit constant.
13409  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
13410  unsigned ShCtV = N2C->getAPIntValue().logBase2();
13411  ShCtV = XType.getSizeInBits() - ShCtV - 1;
13412  SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
13414  SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
13415  XType, N0, ShCt);
13416  AddToWorklist(Shift.getNode());
13417 
13418  if (XType.bitsGT(AType)) {
13419  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
13420  AddToWorklist(Shift.getNode());
13421  }
13422 
13423  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
13424  }
13425 
13426  SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
13427  XType, N0,
13428  DAG.getConstant(XType.getSizeInBits() - 1,
13429  SDLoc(N0),
13431  AddToWorklist(Shift.getNode());
13432 
13433  if (XType.bitsGT(AType)) {
13434  Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
13435  AddToWorklist(Shift.getNode());
13436  }
13437 
13438  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
13439  }
13440  }
13441 
13442  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
13443  // where y is has a single bit set.
13444  // A plaintext description would be, we can turn the SELECT_CC into an AND
13445  // when the condition can be materialized as an all-ones register. Any
13446  // single bit-test can be materialized as an all-ones register with
13447  // shift-left and shift-right-arith.
13448  if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
13449  N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
13450  SDValue AndLHS = N0->getOperand(0);
13451  ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
13452  if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
13453  // Shift the tested bit over the sign bit.
13454  APInt AndMask = ConstAndRHS->getAPIntValue();
13455  SDValue ShlAmt =
13456  DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
13457  getShiftAmountTy(AndLHS.getValueType()));
13458  SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
13459 
13460  // Now arithmetic right shift it all the way over, so the result is either
13461  // all-ones, or zero.
13462  SDValue ShrAmt =
13463  DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
13465  SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
13466 
13467  return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
13468  }
13469  }
13470 
13471  // fold select C, 16, 0 -> shl C, 4
13472  if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
13473  TLI.getBooleanContents(N0.getValueType()) ==
13475 
13476  // If the caller doesn't want us to simplify this into a zext of a compare,
13477  // don't do it.
13478  if (NotExtCompare && N2C->isOne())
13479  return SDValue();
13480 
13481  // Get a SetCC of the condition
13482  // NOTE: Don't create a SETCC if it's not legal on this target.
13483  if (!LegalOperations ||
13485  LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) {
13486  SDValue Temp, SCC;
13487  // cast from setcc result type to select result type
13488  if (LegalTypes) {
13489  SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
13490  N0, N1, CC);
13491  if (N2.getValueType().bitsLT(SCC.getValueType()))
13492  Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
13493  N2.getValueType());
13494  else
13495  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
13496  N2.getValueType(), SCC);
13497  } else {
13498  SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
13499  Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
13500  N2.getValueType(), SCC);
13501  }
13502 
13503  AddToWorklist(SCC.getNode());
13504  AddToWorklist(Temp.getNode());
13505 
13506  if (N2C->isOne())
13507  return Temp;
13508 
13509  // shl setcc result by log2 n2c
13510  return DAG.getNode(
13511  ISD::SHL, DL, N2.getValueType(), Temp,
13512  DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
13513  getShiftAmountTy(Temp.getValueType())));
13514  }
13515  }
13516 
13517  // Check to see if this is the equivalent of setcc
13518  // FIXME: Turn all of these into setcc if setcc if setcc is legal
13519  // otherwise, go ahead with the folds.
13520  if (0 && isNullConstant(N3) && isOneConstant(N2)) {
13521  EVT XType = N0.getValueType();
13522  if (!LegalOperations ||
13524  SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC);
13525  if (Res.getValueType() != VT)
13526  Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
13527  return Res;
13528  }
13529 
13530  // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
13531  if (isNullConstant(N1) && CC == ISD::SETEQ &&
13532  (!LegalOperations ||
13533  TLI.isOperationLegal(ISD::CTLZ, XType))) {
13534  SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0);
13535  return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
13536  DAG.getConstant(Log2_32(XType.getSizeInBits()),
13537  SDLoc(Ctlz),
13538  getShiftAmountTy(Ctlz.getValueType())));
13539  }
13540  // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
13541  if (isNullConstant(N1) && CC == ISD::SETGT) {
13542  SDLoc DL(N0);
13543  SDValue NegN0 = DAG.getNode(ISD::SUB, DL,
13544  XType, DAG.getConstant(0, DL, XType), N0);
13545  SDValue NotN0 = DAG.getNOT(DL, N0, XType);
13546  return DAG.getNode(ISD::SRL, DL, XType,
13547  DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
13548  DAG.getConstant(XType.getSizeInBits() - 1, DL,
13549  getShiftAmountTy(XType)));
13550  }
13551  // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
13552  if (isAllOnesConstant(N1) && CC == ISD::SETGT) {
13553  SDLoc DL(N0);
13554  SDValue Sign = DAG.getNode(ISD::SRL, DL, XType, N0,
13555  DAG.getConstant(XType.getSizeInBits() - 1, DL,
13557  return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, DL,
13558  XType));
13559  }
13560  }
13561 
13562  // Check to see if this is an integer abs.
13563  // select_cc setg[te] X, 0, X, -X ->
13564  // select_cc setgt X, -1, X, -X ->
13565  // select_cc setl[te] X, 0, -X, X ->
13566  // select_cc setlt X, 1, -X, X ->
13567  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
13568  if (N1C) {
13569  ConstantSDNode *SubC = nullptr;
13570  if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
13571  (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
13572  N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
13573  SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
13574  else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
13575  (N1C->isOne() && CC == ISD::SETLT)) &&
13576  N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
13577  SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
13578 
13579  EVT XType = N0.getValueType();
13580  if (SubC && SubC->isNullValue() && XType.isInteger()) {
13581  SDLoc DL(N0);
13582  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
13583  N0,
13584  DAG.getConstant(XType.getSizeInBits() - 1, DL,
13585  getShiftAmountTy(N0.getValueType())));
13586  SDValue Add = DAG.getNode(ISD::ADD, DL,
13587  XType, N0, Shift);
13588  AddToWorklist(Shift.getNode());
13589  AddToWorklist(Add.getNode());
13590  return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
13591  }
13592  }
13593 
13594  return SDValue();
13595 }
13596 
13597 /// This is a stub for TargetLowering::SimplifySetCC.
13598 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
13599  SDValue N1, ISD::CondCode Cond,
13600  SDLoc DL, bool foldBooleans) {
13602  DagCombineInfo(DAG, Level, false, this);
13603  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
13604 }
13605 
13606 /// Given an ISD::SDIV node expressing a divide by constant, return
13607 /// a DAG expression to select that will generate the same value by multiplying
13608 /// by a magic number.
13609 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
13610 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
13612  if (!C)
13613  return SDValue();
13614 
13615  // Avoid division by zero.
13616  if (C->isNullValue())
13617  return SDValue();
13618 
13619  std::vector<SDNode*> Built;
13620  SDValue S =
13621  TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
13622 
13623  for (SDNode *N : Built)
13624  AddToWorklist(N);
13625  return S;
13626 }
13627 
13628 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
13629 /// DAG expression that will generate the same value by right shifting.
13630 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
13632  if (!C)
13633  return SDValue();
13634 
13635  // Avoid division by zero.
13636  if (C->isNullValue())
13637  return SDValue();
13638 
13639  std::vector<SDNode *> Built;
13640  SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
13641 
13642  for (SDNode *N : Built)
13643  AddToWorklist(N);
13644  return S;
13645 }
13646 
13647 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
13648 /// expression that will generate the same value by multiplying by a magic
13649 /// number.
13650 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
13651 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
13653  if (!C)
13654  return SDValue();
13655 
13656  // Avoid division by zero.
13657  if (C->isNullValue())
13658  return SDValue();
13659 
13660  std::vector<SDNode*> Built;
13661  SDValue S =
13662  TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
13663 
13664  for (SDNode *N : Built)
13665  AddToWorklist(N);
13666  return S;
13667 }
13668 
13669 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) {
13670  if (Level >= AfterLegalizeDAG)
13671  return SDValue();
13672 
13673  // Expose the DAG combiner to the target combiner implementations.
13674  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
13675 
13676  unsigned Iterations = 0;
13677  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
13678  if (Iterations) {
13679  // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
13680  // For the reciprocal, we need to find the zero of the function:
13681  // F(X) = A X - 1 [which has a zero at X = 1/A]
13682  // =>
13683  // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
13684  // does not require additional intermediate precision]
13685  EVT VT = Op.getValueType();
13686  SDLoc DL(Op);
13687  SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
13688 
13689  AddToWorklist(Est.getNode());
13690 
13691  // Newton iterations: Est = Est + Est (1 - Arg * Est)
13692  for (unsigned i = 0; i < Iterations; ++i) {
13693  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est);
13694  AddToWorklist(NewEst.getNode());
13695 
13696  NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst);
13697  AddToWorklist(NewEst.getNode());
13698 
13699  NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
13700  AddToWorklist(NewEst.getNode());
13701 
13702  Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst);
13703  AddToWorklist(Est.getNode());
13704  }
13705  }
13706  return Est;
13707  }
13708 
13709  return SDValue();
13710 }
13711 
13712 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
13713 /// For the reciprocal sqrt, we need to find the zero of the function:
13714 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
13715 /// =>
13716 /// X_{i+1} = X_i (1.5 - A X_i^2 / 2)
13717 /// As a result, we precompute A/2 prior to the iteration loop.
13718 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
13719  unsigned Iterations) {
13720  EVT VT = Arg.getValueType();
13721  SDLoc DL(Arg);
13722  SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
13723 
13724  // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
13725  // this entire sequence requires only one FP constant.
13726  SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg);
13727  AddToWorklist(HalfArg.getNode());
13728 
13729  HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg);
13730  AddToWorklist(HalfArg.getNode());
13731 
13732  // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
13733  for (unsigned i = 0; i < Iterations; ++i) {
13734  SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
13735  AddToWorklist(NewEst.getNode());
13736 
13737  NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst);
13738  AddToWorklist(NewEst.getNode());
13739 
13740  NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst);
13741  AddToWorklist(NewEst.getNode());
13742 
13743  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst);
13744  AddToWorklist(Est.getNode());
13745  }
13746  return Est;
13747 }
13748 
13749 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
13750 /// For the reciprocal sqrt, we need to find the zero of the function:
13751 /// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
13752 /// =>
13753 /// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
13754 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
13755  unsigned Iterations) {
13756  EVT VT = Arg.getValueType();
13757  SDLoc DL(Arg);
13758  SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
13759  SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
13760 
13761  // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
13762  for (unsigned i = 0; i < Iterations; ++i) {
13763  SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf);
13764  AddToWorklist(HalfEst.getNode());
13765 
13766  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est);
13767  AddToWorklist(Est.getNode());
13768 
13769  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg);
13770  AddToWorklist(Est.getNode());
13771 
13772  Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree);
13773  AddToWorklist(Est.getNode());
13774 
13775  Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst);
13776  AddToWorklist(Est.getNode());
13777  }
13778  return Est;
13779 }
13780 
13781 SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) {
13782  if (Level >= AfterLegalizeDAG)
13783  return SDValue();
13784 
13785  // Expose the DAG combiner to the target combiner implementations.
13786  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
13787  unsigned Iterations = 0;
13788  bool UseOneConstNR = false;
13789  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
13790  AddToWorklist(Est.getNode());
13791  if (Iterations) {
13792  Est = UseOneConstNR ?
13793  BuildRsqrtNROneConst(Op, Est, Iterations) :
13794  BuildRsqrtNRTwoConst(Op, Est, Iterations);
13795  }
13796  return Est;
13797  }
13798 
13799  return SDValue();
13800 }
13801 
13802 /// Return true if base is a frame index, which is known not to alias with
13803 /// anything but itself. Provides base object and offset as results.
13804 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
13805  const GlobalValue *&GV, const void *&CV) {
13806  // Assume it is a primitive operation.
13807  Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
13808 
13809  // If it's an adding a simple constant then integrate the offset.
13810  if (Base.getOpcode() == ISD::ADD) {
13811  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
13812  Base = Base.getOperand(0);
13813  Offset += C->getZExtValue();
13814  }
13815  }
13816 
13817  // Return the underlying GlobalValue, and update the Offset. Return false
13818  // for GlobalAddressSDNode since the same GlobalAddress may be represented
13819  // by multiple nodes with different offsets.
13820  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
13821  GV = G->getGlobal();
13822  Offset += G->getOffset();
13823  return false;
13824  }
13825 
13826  // Return the underlying Constant value, and update the Offset. Return false
13827  // for ConstantSDNodes since the same constant pool entry may be represented
13828  // by multiple nodes with different offsets.
13829  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
13830  CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
13831  : (const void *)C->getConstVal();
13832  Offset += C->getOffset();
13833  return false;
13834  }
13835  // If it's any of the following then it can't alias with anything but itself.
13836  return isa<FrameIndexSDNode>(Base);
13837 }
13838 
13839 /// Return true if there is any possibility that the two addresses overlap.
13840 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
13841  // If they are the same then they must be aliases.
13842  if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
13843 
13844  // If they are both volatile then they cannot be reordered.
13845  if (Op0->isVolatile() && Op1->isVolatile()) return true;
13846 
13847  // If one operation reads from invariant memory, and the other may store, they
13848  // cannot alias. These should really be checking the equivalent of mayWrite,
13849  // but it only matters for memory nodes other than load /store.
13850  if (Op0->isInvariant() && Op1->writeMem())
13851  return false;
13852 
13853  if (Op1->isInvariant() && Op0->writeMem())
13854  return false;
13855 
13856  // Gather base node and offset information.
13857  SDValue Base1, Base2;
13858  int64_t Offset1, Offset2;
13859  const GlobalValue *GV1, *GV2;
13860  const void *CV1, *CV2;
13861  bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
13862  Base1, Offset1, GV1, CV1);
13863  bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
13864  Base2, Offset2, GV2, CV2);
13865 
13866  // If they have a same base address then check to see if they overlap.
13867  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
13868  return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
13869  (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
13870 
13871  // It is possible for different frame indices to alias each other, mostly
13872  // when tail call optimization reuses return address slots for arguments.
13873  // To catch this case, look up the actual index of frame indices to compute
13874  // the real alias relationship.
13875  if (isFrameIndex1 && isFrameIndex2) {
13877  Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
13878  Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
13879  return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
13880  (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
13881  }
13882 
13883  // Otherwise, if we know what the bases are, and they aren't identical, then
13884  // we know they cannot alias.
13885  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
13886  return false;
13887 
13888  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
13889  // compared to the size and offset of the access, we may be able to prove they
13890  // do not alias. This check is conservative for now to catch cases created by
13891  // splitting vector types.
13892  if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
13893  (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
13894  (Op0->getMemoryVT().getSizeInBits() >> 3 ==
13895  Op1->getMemoryVT().getSizeInBits() >> 3) &&
13896  (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
13897  int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
13898  int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
13899 
13900  // There is no overlap between these relatively aligned accesses of similar
13901  // size, return no alias.
13902  if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
13903  (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
13904  return false;
13905  }
13906 
13907  bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
13908  ? CombinerGlobalAA
13909  : DAG.getSubtarget().useAA();
13910 #ifndef NDEBUG
13911  if (CombinerAAOnlyFunc.getNumOccurrences() &&
13912  CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
13913  UseAA = false;
13914 #endif
13915  if (UseAA &&
13916  Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
13917  // Use alias analysis information.
13918  int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
13919  Op1->getSrcValueOffset());
13920  int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
13921  Op0->getSrcValueOffset() - MinOffset;
13922  int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
13923  Op1->getSrcValueOffset() - MinOffset;
13924  AliasResult AAResult =
13925  AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
13926  UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
13927  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
13928  UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
13929  if (AAResult == NoAlias)
13930  return false;
13931  }
13932 
13933  // Otherwise we have to assume they alias.
13934  return true;
13935 }
13936 
13937 /// Walk up chain skipping non-aliasing memory nodes,
13938 /// looking for aliasing nodes and adding them to the Aliases vector.
13939 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
13940  SmallVectorImpl<SDValue> &Aliases) {
13941  SmallVector<SDValue, 8> Chains; // List of chains to visit.
13942  SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
13943 
13944  // Get alias information for node.
13945  bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
13946 
13947  // Starting off.
13948  Chains.push_back(OriginalChain);
13949  unsigned Depth = 0;
13950 
13951  // Look at each chain and determine if it is an alias. If so, add it to the
13952  // aliases list. If not, then continue up the chain looking for the next
13953  // candidate.
13954  while (!Chains.empty()) {
13955  SDValue Chain = Chains.pop_back_val();
13956 
13957  // For TokenFactor nodes, look at each operand and only continue up the
13958  // chain until we find two aliases. If we've seen two aliases, assume we'll
13959  // find more and revert to original chain since the xform is unlikely to be
13960  // profitable.
13961  //
13962  // FIXME: The depth check could be made to return the last non-aliasing
13963  // chain we found before we hit a tokenfactor rather than the original
13964  // chain.
13965  if (Depth > 6 || Aliases.size() == 2) {
13966  Aliases.clear();
13967  Aliases.push_back(OriginalChain);
13968  return;
13969  }
13970 
13971  // Don't bother if we've been before.
13972  if (!Visited.insert(Chain.getNode()).second)
13973  continue;
13974 
13975  switch (Chain.getOpcode()) {
13976  case ISD::EntryToken:
13977  // Entry token is ideal chain operand, but handled in FindBetterChain.
13978  break;
13979 
13980  case ISD::LOAD:
13981  case ISD::STORE: {
13982  // Get alias information for Chain.
13983  bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
13984  !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
13985 
13986  // If chain is alias then stop here.
13987  if (!(IsLoad && IsOpLoad) &&
13988  isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
13989  Aliases.push_back(Chain);
13990  } else {
13991  // Look further up the chain.
13992  Chains.push_back(Chain.getOperand(0));
13993  ++Depth;
13994  }
13995  break;
13996  }
13997 
13998  case ISD::TokenFactor:
13999  // We have to check each of the operands of the token factor for "small"
14000  // token factors, so we queue them up. Adding the operands to the queue
14001  // (stack) in reverse order maintains the original order and increases the
14002  // likelihood that getNode will find a matching token factor (CSE.)
14003  if (Chain.getNumOperands() > 16) {
14004  Aliases.push_back(Chain);
14005  break;
14006  }
14007  for (unsigned n = Chain.getNumOperands(); n;)
14008  Chains.push_back(Chain.getOperand(--n));
14009  ++Depth;
14010  break;
14011 
14012  default:
14013  // For all other instructions we will just have to take what we can get.
14014  Aliases.push_back(Chain);
14015  break;
14016  }
14017  }
14018 
14019  // We need to be careful here to also search for aliases through the
14020  // value operand of a store, etc. Consider the following situation:
14021  // Token1 = ...
14022  // L1 = load Token1, %52
14023  // S1 = store Token1, L1, %51
14024  // L2 = load Token1, %52+8
14025  // S2 = store Token1, L2, %51+8
14026  // Token2 = Token(S1, S2)
14027  // L3 = load Token2, %53
14028  // S3 = store Token2, L3, %52
14029  // L4 = load Token2, %53+8
14030  // S4 = store Token2, L4, %52+8
14031  // If we search for aliases of S3 (which loads address %52), and we look
14032  // only through the chain, then we'll miss the trivial dependence on L1
14033  // (which also loads from %52). We then might change all loads and
14034  // stores to use Token1 as their chain operand, which could result in
14035  // copying %53 into %52 before copying %52 into %51 (which should
14036  // happen first).
14037  //
14038  // The problem is, however, that searching for such data dependencies
14039  // can become expensive, and the cost is not directly related to the
14040  // chain depth. Instead, we'll rule out such configurations here by
14041  // insisting that we've visited all chain users (except for users
14042  // of the original chain, which is not necessary). When doing this,
14043  // we need to look through nodes we don't care about (otherwise, things
14044  // like register copies will interfere with trivial cases).
14045 
14047  for (const SDNode *N : Visited)
14048  if (N != OriginalChain.getNode())
14049  Worklist.push_back(N);
14050 
14051  while (!Worklist.empty()) {
14052  const SDNode *M = Worklist.pop_back_val();
14053 
14054  // We have already visited M, and want to make sure we've visited any uses
14055  // of M that we care about. For uses that we've not visisted, and don't
14056  // care about, queue them to the worklist.
14057 
14058  for (SDNode::use_iterator UI = M->use_begin(),
14059  UIE = M->use_end(); UI != UIE; ++UI)
14060  if (UI.getUse().getValueType() == MVT::Other &&
14061  Visited.insert(*UI).second) {
14062  if (isa<MemSDNode>(*UI)) {
14063  // We've not visited this use, and we care about it (it could have an
14064  // ordering dependency with the original node).
14065  Aliases.clear();
14066  Aliases.push_back(OriginalChain);
14067  return;
14068  }
14069 
14070  // We've not visited this use, but we don't care about it. Mark it as
14071  // visited and enqueue it to the worklist.
14072  Worklist.push_back(*UI);
14073  }
14074  }
14075 }
14076 
14077 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
14078 /// (aliasing node.)
14079 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
14080  SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
14081 
14082  // Accumulate all the aliases to this node.
14083  GatherAllAliases(N, OldChain, Aliases);
14084 
14085  // If no operands then chain to entry token.
14086  if (Aliases.size() == 0)
14087  return DAG.getEntryNode();
14088 
14089  // If a single operand then chain to it. We don't need to revisit it.
14090  if (Aliases.size() == 1)
14091  return Aliases[0];
14092 
14093  // Construct a custom tailored token factor.
14094  return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
14095 }
14096 
14097 /// This is the entry point for the file.
14099  CodeGenOpt::Level OptLevel) {
14100  /// This is the main entry point to this class.
14101  DAGCombiner(*this, AA, OptLevel).Run(Level);
14102 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, bool isNonTemporal, bool isVolatile, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
unsigned getValueSizeInBits(unsigned ResNo) const
Returns MVT::getSizeInBits(getValueType(ResNo)).
bool use_empty() const
Return true if there are no uses of this node.
opStatus divide(const APFloat &, roundingMode)
Definition: APFloat.cpp:1709
static MVT getIntegerVT(unsigned BitWidth)
std::string & operator+=(std::string &buffer, StringRef string)
Definition: StringRef.h:557
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:724
The memory access reads data.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
SDValue getValue(unsigned R) const
const SDValue & getValue() const
static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG)
static APInt getSignBit(unsigned BitWidth)
Get the SignBit for a specific bit width.
Definition: APInt.h:446
The memory access writes data.
bool isTruncatingStore() const
Return true if the op does a truncation before store.
bool isExactlyValue(double V) const
We don't rely on operator== working on double values, as it returns true for things that are clearly ...
static bool areSlicesNextToEachOther(const LoadedSlice &First, const LoadedSlice &Second)
Check whether or not First and Second are next to each other in memory.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:453
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef...
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
SmallBitVector - This is a 'bitvector' (really, a variable-sized bit array), optimized for the case w...
void dump() const
Dump this node, for debugging.
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:42
STATISTIC(NumFunctions,"Total number of functions")
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR (an vector value) starting with the ...
Definition: ISDOpcodes.h:292
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
bool isNON_TRUNCStore(const SDNode *N)
Returns true if the specified node is a non-truncating store.
bool none() const
none - Returns true if none of the bits are set.
Definition: BitVector.h:150
static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize)
bool hasOneUse() const
Return true if there is exactly one use of this node.
virtual bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, unsigned AddrSpace) const
Return true if it is expected to be cheaper to do a store of a non-zero vector constant with the give...
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDVTList getVTList() const
static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask)
Match "(X shl/srl V1) & V2" where V2 may not be present.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:285
bool isExtended() const
isExtended - Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:100
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:684
Clients of various APIs that cause global effects on the DAG can optionally implement this interface...
Definition: SelectionDAG.h:225
iterator end() const
Definition: ArrayRef.h:123
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:324
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:333
static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, SDValue V, SelectionDAG &DAG)
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:286
unsigned InferPtrAlignment(SDValue Ptr) const
Infer alignment of a load / store address.
void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in the KnownZero/KnownO...
std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
CondCode getSetCCAndOperation(CondCode Op1, CondCode Op2, bool isInteger)
getSetCCAndOperation - Return the result of a logical AND between different comparisons of identical ...
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:531
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:228
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
const SDValue & getSrc0() const
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
unsigned getSizeInBits() const
opStatus
IEEE-754R 7: Default exception handling.
Definition: APFloat.h:166
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
SDValue getZeroExtendInReg(SDValue Op, SDLoc DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
static const fltSemantics & EVTToAPFloatSemantics(EVT VT)
Returns an APFloat semantics tag appropriate for the given type.
bool isMask(unsigned numBits, const APInt &APIVal)
Definition: APInt.h:1767
unsigned getNumOperands() const
Return the number of values used by this operation.
void setAllBits()
Set every bit to 1.
Definition: APInt.h:1221
const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, const TargetRegisterClass *B) const
getCommonSubClass - find the largest common subclass of A and B.
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
virtual bool isZExtFree(Type *, Type *) const
Return true if any actual instruction that defines a value of type Ty1 implicitly zero-extends the va...
The two locations do not alias at all.
Definition: AliasAnalysis.h:78
const SDValue & getOperand(unsigned Num) const
F(f)
const Function * getFunction() const
getFunction - Return the LLVM function that this machine code represents
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:423
static MachinePointerInfo getConstantPool()
getConstantPool - Return a MachinePointerInfo record that refers to the constant pool.
static ConstantSDNode * isConstOrConstSplat(SDValue N)
static bool isCommutativeBinOp(unsigned Opcode)
Returns true if the opcode is a commutative binary operation.
iv Induction Variable Users
Definition: IVUsers.cpp:43
void reserve(size_type N)
Definition: SmallVector.h:401
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV)
Return true if base is a frame index, which is known not to alias with anything but itself...
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
const SDValue & getBasePtr() const
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1 at the ...
Definition: ISDOpcodes.h:287
SDValue getConstantPool(const Constant *C, EVT VT, unsigned Align=0, int Offs=0, bool isT=false, unsigned char TargetFlags=0)
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1015
SDValue BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector< SDNode * > *Created) const
Given an ISD::UDIV node expressing a divide by constant, return a DAG expression to select that will ...
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:178
static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair< unsigned, unsigned > &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC)
Check to see if IVal is something that provides a value as specified by MaskInfo. ...
virtual bool isFPImmLegal(const APFloat &, EVT) const
Returns true if the target can instruction select the specified FP immediate natively.
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:64
EntryToken - This is the marker used to indicate the start of a region.
Definition: ISDOpcodes.h:45
static bool isOneConstant(SDValue V)
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:39
unsigned getResNo() const
get the index which selects a specific result in the SDNode
void ReplaceAllUsesOfValuesWith(const SDValue *From, const SDValue *To, unsigned Num)
Like ReplaceAllUsesOfValueWith, but for multiple values at once.
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:189
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth=0) const
Look at Op.
int64_t getSrcValueOffset() const
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
bool isAllOnesValue() const
static bool isAllOnesConstant(SDValue V)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool operator<=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:330
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:319
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:41
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
bool isNegative() const
Return true if the value is negative.
MachineMemOperand - A description of a memory reference used in the backend.
bool isRound() const
isRound - Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:165
bool operator>=(int64_t V1, const APSInt &V2)
Definition: APSInt.h:331
Shift and rotation operations.
Definition: ISDOpcodes.h:332
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:407
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
static cl::opt< bool > Aggressive("aggressive-ext-opt", cl::Hidden, cl::desc("Aggressive extension optimization"))
Base class for LoadSDNode and StoreSDNode.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef...
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist) const
Return true if N is a predecessor of this node.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
bool HonorSignDependentRoundingFPMath() const
HonorSignDependentRoundingFPMath - Return true if the codegen must assume that the rounding mode of t...
APInt LLVM_ATTRIBUTE_UNUSED_RESULT lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.cpp:1142
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
CopyToReg - This node has three operands: a chain, a register number to set to this value...
Definition: ISDOpcodes.h:161
unsigned logBase2(const APInt &APIVal)
Returns the floor log base 2 of the specified APInt value.
Definition: APInt.h:1782
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
MachinePointerInfo getWithOffset(int64_t O) const
SimpleValueType SimpleTy
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:210
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:183
int getMaskElt(unsigned Idx) const
This class is used to represent EVT's, which are used to parameterize some operations.
#define false
Definition: ConvertUTF.c:65
SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, SDLoc dl) const
Try to simplify a setcc built with the specified operands and cc.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
ELFYAML::ELF_STO Other
Definition: ELFYAML.cpp:591
#define G(x, y, z)
Definition: MD5.cpp:52
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:400
This class is used to represent an MSTORE node.
APInt lshr(const APInt &LHS, unsigned shiftAmt)
Logical right-shift function.
Definition: APInt.h:1840
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector...
load Combine Adjacent Loads
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:102
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
SDValue getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *MaskElts)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:59
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1297
static ConstantFPSDNode * isConstOrConstSplatFP(SDValue N)
static unsigned int semanticsPrecision(const fltSemantics &)
Definition: APFloat.cpp:846
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:217
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
static bool isConstantSplatVector(SDNode *N, APInt &SplatValue)
Returns true if N is a BUILD_VECTOR node whose elements are all the same constant or undefined...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
const APInt & getAPIntValue() const
APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:868
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification, or lowering of the constant.
Definition: ISDOpcodes.h:116
virtual bool isDesirableToTransformToIntegerOp(unsigned, EVT) const
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
EVT getMemoryVT() const
Return the type of the in-memory value.
const ConstantInt * getConstantIntValue() const
bool isSignedIntSetCC(CondCode Code)
isSignedIntSetCC - Return true if this is a setcc instruction that performs a signed comparison when ...
Definition: ISDOpcodes.h:837
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:195
bool isPow2VectorType() const
isPow2VectorType - Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:291
static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, APInt &KnownZero)
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Get a value with high bits set.
Definition: APInt.h:513
SDNode * getNode() const
get the SDNode which holds the desired result
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
A self-contained host- and target-independent arbitrary-precision floating-point software implementat...
Definition: APFloat.h:122
bool hasPredecessor(const SDNode *N) const
Return true if N is a predecessor of this node.
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:239
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:245
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl< SDNode * > &ExtendNodes, const TargetLowering &TLI)
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned getStoreSizeInBits() const
getStoreSizeInBits - Return the number of bits overwritten by a store of the specified value type...
Definition: ValueTypes.h:251
FPOpFusion::FPOpFusionMode AllowFPOpFusion
AllowFPOpFusion - This flag is set by the -fuse-fp-ops=xxx option.
virtual bool isShuffleMaskLegal(const SmallVectorImpl< int > &, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations, those with specific masks.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
const SDValue & getBasePtr() const
bool isZero() const
Return true if the value is positive or negative zero.
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:109
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:221
APInt LLVM_ATTRIBUTE_UNUSED_RESULT trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:932
MVT - Machine Value Type.
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG)
const SDValue & getOperand(unsigned i) const
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
static volatile int One
Definition: InfiniteTest.cpp:9
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
bool isNonTemporal() const
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel)
This iterates over the nodes in the SelectionDAG, folding certain types of nodes together, or eliminating superfluous nodes.
This is an important base class in LLVM.
Definition: Constant.h:41
static SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N)
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:873
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:233
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
unsigned getScalarValueSizeInBits() const
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
CombineLevel
Definition: DAGCombine.h:16
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:955
AliasResult
The possible results of an alias query.
Definition: AliasAnalysis.h:72
APInt getLoBits(unsigned numBits) const
Compute an APInt containing numBits lowbits from this APInt.
Definition: APInt.cpp:689
virtual bool isTruncateFree(Type *, Type *) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
This class provides iterator support for SDUse operands that use a specific SDNode.
unsigned getOriginalAlignment() const
Returns alignment and volatility of the memory access.
SDValue getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
static EVT getFloatingPointVT(unsigned BitWidth)
getFloatingPointVT - Returns the EVT that represents a floating point type with the given number of b...
Definition: ValueTypes.h:55
EVT getVT() const
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
iterator begin() const
Definition: ArrayRef.h:122
static bool allowableAlignment(const SelectionDAG &DAG, const TargetLowering &TLI, EVT EVTTy, unsigned AS, unsigned Align)
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1137
unsigned getOpcode() const
const SDValue & getBasePtr() const
virtual bool isNarrowingProfitable(EVT, EVT) const
Return true if it's profitable to narrow operations of type VT1 to VT2.
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:57
void changeSign()
Definition: APFloat.cpp:1623
virtual SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps) const
Return a reciprocal estimate value for the input operand.
CondCode getSetCCSwappedOperands(CondCode Operation)
getSetCCSwappedOperands - Return the operation corresponding to (Y op X) when given the operation for...
unsigned countPopulation() const
Count the number of bits set.
Definition: APInt.h:1415
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isVolatile() const
const SDValue & getValue() const
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:338
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector< SDNode * > *Created) const
Given an ISD::SDIV node expressing a divide by constant, return a DAG expression to select that will ...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:468
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
EVT - Extended Value Type.
Definition: ValueTypes.h:31
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:582
static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, SelectionDAG &DAG, bool LegalOperations, bool LegalTypes)
const SDValue & getMask() const
const APFloat & getValueAPF() const
bool bitsEq(EVT VT) const
bitsEq - Return true if this has the same number of bits as VT.
Definition: ValueTypes.h:171
const ConstantFP * getConstantFPValue() const
static bool areUsedBitsDense(const APInt &UsedBits)
Check that all bits set in UsedBits form a dense region, i.e., UsedBits looks like 0...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static void adjustCostForPairing(SmallVectorImpl< LoadedSlice > &LoadedSlices, LoadedSlice::Cost &GlobalLSCost)
Adjust the GlobalLSCost according to the target paring capabilities and the layout of the slices...
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:386
HANDLENODE node - Used as a handle for various purposes.
Definition: ISDOpcodes.h:627
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:670
static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, unsigned Depth=0)
Return 1 if we can compute the negated form of the specified expression for the same cost as the expr...
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
const MachinePointerInfo & getPointerInfo() const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
const SDValue & getOffset() const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:177
SmallBitVector & reset()
ArrayRef< int > getMask() const
static void commuteMask(SmallVectorImpl< int > &Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position...
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.cpp:749
Representation for a specific memory location.
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
bool isBuildVectorAllOnes(const SDNode *N)
Node predicates.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI)
Return true if 'Use' is a load or a store that uses N as its base pointer and that N may be folded in...
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:217
SDValue getNOT(SDLoc DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
virtual bool hasPairedLoad(Type *, unsigned &) const
Return true if the target supplies and combines to a paired load two loaded values of type LoadedType...
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:299
virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector< SDNode * > *Created) const
bool isNegative() const
IEEE-754R isSignMinus: Returns true if and only if the current value is negative. ...
Definition: APFloat.h:399
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
bool isInvariant() const
virtual bool isVectorClearMaskLegal(const SmallVectorImpl< int > &, EVT) const
Similar to isShuffleMaskLegal.
static bool isNullFPConstant(SDValue V)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:465
unsigned logBase2() const
Definition: APInt.h:1521
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
bool allOperandsUndef(const SDNode *N)
Return true if the node has at least one operand and all operands of the specified node are ISD::UNDE...
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
const SDValue & getChain() const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:548
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:335
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:500
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
unsigned getAddrSpace() const
getAddrSpace - Return the LLVM IR address space number that this pointer points into.
MachineFrameInfo * getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Represents one node in the SelectionDAG.
static SDNode * tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SelectionDAG &DAG, bool LegalTypes, bool LegalOperations)
Try to fold a sext/zext/aext dag node into a ConstantSDNode or a build_vector of constants.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
getSetCCInverse - Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operat...
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
static std::pair< SDValue, SDValue > SplitVSETCC(const SDNode *N, SelectionDAG &DAG)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:468
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.h:545
const SDValue & getValue() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Class for arbitrary precision integers.
Definition: APInt.h:73
const Value * getValue() const
getValue - Return the base address of the memory access.
bool isBuildVectorOfConstantFPSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantFPSDNode or undef...
iterator_range< use_iterator > uses()
static SDNode * getBuildPairElt(SDNode *N, unsigned i)
A "pseudo-class" with methods for operating on BUILD_VECTORs.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
int64_t getSExtValue() const
op_iterator op_begin() const
static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, SDValue N1, SelectionDAG &DAG)
virtual const TargetRegisterClass * getRegClassFor(MVT VT) const
Return the register class that should be used for the specified value type.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth=0)
If isNegatibleForFree returns true, return the newly negated expression.
static use_iterator use_end()
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
AddrMode
ARM Addressing Modes.
Definition: ARMBaseInfo.h:235
bool isPowerOf2_64(uint64_t Value)
isPowerOf2_64 - This function returns true if the argument is a power of two 0 (64 bit edition...
Definition: MathExtras.h:360
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3084
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
static std::pair< unsigned, unsigned > CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain)
Check to see if V is (and load (ptr), imm), where the load is having specific bytes cleared out...
CondCode getSetCCOrOperation(CondCode Op1, CondCode Op2, bool isInteger)
getSetCCOrOperation - Return the result of a logical OR between different comparisons of identical va...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:250
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG)
iterator_range< value_op_iterator > op_values() const
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:498
unsigned getAddressSpace() const
Return the address space for the associated pointer.
const SDValue & getMask() const
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1736
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:337
uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:552
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:697
Represents a use of a SDNode.
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:217
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:196
static bool isNullConstant(SDValue V)
static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, SelectionDAG &DAG)
Generate Min/Max node.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
load combine
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
void ReplaceAllUsesWith(SDValue From, SDValue Op)
Modify anything using 'From' to use 'To' instead.
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
ArrayRef< SDUse > ops() const
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
op_iterator op_end() const
static ArrayType * get(Type *ElementType, uint64_t NumElements)
ArrayType::get - This static method is the primary way to construct an ArrayType. ...
Definition: Type.cpp:686
static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG)
Same for multiplication.
Definition: ISDOpcodes.h:234
static SDValue getInputChainForNode(SDNode *N)
Given a node, return its input chain if it has one, otherwise return a null sd operand.
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, unsigned &RefinementSteps, bool &UseOneConstNR) const
Hooks for building estimates in place of slower divisions and square roots.
virtual bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
This class is used to represent an MSCATTER node.
const SDValue & getIndex() const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool all_of(R &&Range, UnaryPredicate &&P)
Provide wrappers to std::all_of which take ranges instead of having to pass being/end explicitly...
Definition: STLExtras.h:334
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
bool isByteSized() const
isByteSized - Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:160
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:279
SDValue getSelect(SDLoc DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
Definition: SelectionDAG.h:739
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:105
bool writeMem() const
This class is used to form a handle around another node that is persistent and is updated across invo...
This class is used to represent an MLOAD node.
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:332
static ConstantSDNode * getAsNonOpaqueConstant(SDValue N)
If N is a ContantSDNode with isOpaque() == false return it casted to a ContantSDNode pointer else nul...
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
static bool isSlicingProfitable(SmallVectorImpl< LoadedSlice > &LoadedSlices, const APInt &UsedBits, bool ForCodeSize)
Check the profitability of all involved LoadedSlice.
const SDValue & getBasePtr() const
A vector that has set insertion semantics.
Definition: SetVector.h:37
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MGATHER node.
Disable implicit floating point insts.
Definition: Attributes.h:87
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:287
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
#define DEBUG(X)
Definition: Debug.h:92
bool isInt(int64_t x)
isInt - Checks if an integer fits into the given bit width.
Definition: MathExtras.h:263
unsigned countLeadingZeros() const
The APInt version of the countLeadingZeros functions in MathExtras.h.
Definition: APInt.h:1361
bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:354
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:244
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:996
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
static SDNode * isConstantFPBuildVectorOrConstantFP(SDValue N)
SDValue getConstantFP(double Val, SDLoc DL, EVT VT, bool isTarget=false)
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml","ocaml 3.10-compatible collector")
SDValue getSetCC(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
Definition: SelectionDAG.h:726
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1734
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:188
static bool isVolatile(Instruction *Inst)
Conversion operators.
Definition: ISDOpcodes.h:380
static APInt getNullValue(unsigned numBits)
Get the '0' value.
Definition: APInt.h:460
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
SDNode * getUser()
This returns the SDNode that contains this Use.
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
bool isUIntN(unsigned N, uint64_t x)
isUIntN - Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:315
unsigned getAlignment() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
bool isBigEndian() const
Definition: DataLayout.h:218
MVT getSimpleValueType(unsigned ResNo) const
Return the type of a specified result as a simple type.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
const fltSemantics & getSemantics() const
Definition: APFloat.h:435
unsigned Log2_64(uint64_t Value)
Log2_64 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:474
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:309
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
unsigned getResNo() const
Convenience function for get().getResNo().
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:659
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
CARRY_FALSE - This node is used when folding other nodes, like ADDC/SUBC, which indicate the carry re...
Definition: ISDOpcodes.h:204
SDValue getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget=false)
static bool isBSwapHWordElement(SDValue N, MutableArrayRef< SDNode * > Parts)
Return true if the specified node is an element that makes up a 32-bit packed halfword byteswap...
This file describes how to lower LLVM code to machine code.
void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO)
uint64_t getBaseAlignment() const
getBaseAlignment - Return the minimum known alignment in bytes of the base address, without the offset.
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
ISD::CondCode get() const
uint64_t getZExtValue() const
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:761
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:314
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.
Function must be optimized for size first.
Definition: Attributes.h:80