LLVM  4.0.0
LegalizeVectorOps.cpp
Go to the documentation of this file.
1 //===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SelectionDAG::LegalizeVectors method.
11 //
12 // The vector legalizer looks for vector operations which might need to be
13 // scalarized and legalizes them. This is a separate step from Legalize because
14 // scalarizing can introduce illegal types. For example, suppose we have an
15 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
16 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
17 // operation, which introduces nodes with the illegal type i64 which must be
18 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
19 // the operation must be unrolled, which introduces nodes with the illegal
20 // type i8 which must be promoted.
21 //
22 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
23 // or operations that happen to take a vector which are custom-lowered;
24 // the legalization for such operations never produces nodes
25 // with illegal types, so it's okay to put off legalizing them until
26 // SelectionDAG::Legalize runs.
27 //
28 //===----------------------------------------------------------------------===//
29 
32 using namespace llvm;
33 
34 namespace {
35 class VectorLegalizer {
36  SelectionDAG& DAG;
37  const TargetLowering &TLI;
38  bool Changed; // Keep track of whether anything changed
39 
40  /// For nodes that are of legal width, and that have more than one use, this
41  /// map indicates what regularized operand to use. This allows us to avoid
42  /// legalizing the same thing more than once.
44 
45  /// \brief Adds a node to the translation cache.
46  void AddLegalizedOperand(SDValue From, SDValue To) {
47  LegalizedNodes.insert(std::make_pair(From, To));
48  // If someone requests legalization of the new node, return itself.
49  if (From != To)
50  LegalizedNodes.insert(std::make_pair(To, To));
51  }
52 
53  /// \brief Legalizes the given node.
54  SDValue LegalizeOp(SDValue Op);
55 
56  /// \brief Assuming the node is legal, "legalize" the results.
57  SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
58 
59  /// \brief Implements unrolling a VSETCC.
60  SDValue UnrollVSETCC(SDValue Op);
61 
62  /// \brief Implement expand-based legalization of vector operations.
63  ///
64  /// This is just a high-level routine to dispatch to specific code paths for
65  /// operations to legalize them.
66  SDValue Expand(SDValue Op);
67 
68  /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
69  /// FSUB isn't legal.
70  ///
71  /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
72  /// SINT_TO_FLOAT and SHR on vectors isn't legal.
73  SDValue ExpandUINT_TO_FLOAT(SDValue Op);
74 
75  /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
76  SDValue ExpandSEXTINREG(SDValue Op);
77 
78  /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
79  ///
80  /// Shuffles the low lanes of the operand into place and bitcasts to the proper
81  /// type. The contents of the bits in the extended part of each element are
82  /// undef.
83  SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
84 
85  /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
86  ///
87  /// Shuffles the low lanes of the operand into place, bitcasts to the proper
88  /// type, then shifts left and arithmetic shifts right to introduce a sign
89  /// extension.
90  SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
91 
92  /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
93  ///
94  /// Shuffles the low lanes of the operand into place and blends zeros into
95  /// the remaining lanes, finally bitcasting to the proper type.
96  SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
97 
98  /// \brief Expand bswap of vectors into a shuffle if legal.
99  SDValue ExpandBSWAP(SDValue Op);
100 
101  /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
102  /// supported by the target.
103  SDValue ExpandVSELECT(SDValue Op);
104  SDValue ExpandSELECT(SDValue Op);
105  SDValue ExpandLoad(SDValue Op);
106  SDValue ExpandStore(SDValue Op);
107  SDValue ExpandFNEG(SDValue Op);
108  SDValue ExpandBITREVERSE(SDValue Op);
109  SDValue ExpandCTLZ(SDValue Op);
110  SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
111 
112  /// \brief Implements vector promotion.
113  ///
114  /// This is essentially just bitcasting the operands to a different type and
115  /// bitcasting the result back to the original type.
116  SDValue Promote(SDValue Op);
117 
118  /// \brief Implements [SU]INT_TO_FP vector promotion.
119  ///
120  /// This is a [zs]ext of the input operand to the next size up.
121  SDValue PromoteINT_TO_FP(SDValue Op);
122 
123  /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
124  ///
125  /// It is promoted to the next size up integer type. The result is then
126  /// truncated back to the original type.
127  SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
128 
129 public:
130  /// \brief Begin legalizer the vector operations in the DAG.
131  bool Run();
132  VectorLegalizer(SelectionDAG& dag) :
133  DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
134 };
135 
136 bool VectorLegalizer::Run() {
137  // Before we start legalizing vector nodes, check if there are any vectors.
138  bool HasVectors = false;
139  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
140  E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
141  // Check if the values of the nodes contain vectors. We don't need to check
142  // the operands because we are going to check their values at some point.
143  for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
144  J != E; ++J)
145  HasVectors |= J->isVector();
146 
147  // If we found a vector node we can start the legalization.
148  if (HasVectors)
149  break;
150  }
151 
152  // If this basic block has no vectors then no need to legalize vectors.
153  if (!HasVectors)
154  return false;
155 
156  // The legalize process is inherently a bottom-up recursive process (users
157  // legalize their uses before themselves). Given infinite stack space, we
158  // could just start legalizing on the root and traverse the whole graph. In
159  // practice however, this causes us to run out of stack space on large basic
160  // blocks. To avoid this problem, compute an ordering of the nodes where each
161  // node is only legalized after all of its operands are legalized.
162  DAG.AssignTopologicalOrder();
163  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
164  E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
165  LegalizeOp(SDValue(&*I, 0));
166 
167  // Finally, it's possible the root changed. Get the new root.
168  SDValue OldRoot = DAG.getRoot();
169  assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
170  DAG.setRoot(LegalizedNodes[OldRoot]);
171 
172  LegalizedNodes.clear();
173 
174  // Remove dead nodes now.
175  DAG.RemoveDeadNodes();
176 
177  return Changed;
178 }
179 
180 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
181  // Generic legalization: just pass the operand through.
182  for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
183  AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
184  return Result.getValue(Op.getResNo());
185 }
186 
187 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
188  // Note that LegalizeOp may be reentered even from single-use nodes, which
189  // means that we always must cache transformed nodes.
190  DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
191  if (I != LegalizedNodes.end()) return I->second;
192 
193  SDNode* Node = Op.getNode();
194 
195  // Legalize the operands
197  for (const SDValue &Op : Node->op_values())
198  Ops.push_back(LegalizeOp(Op));
199 
200  SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
201 
202  bool HasVectorValue = false;
203  if (Op.getOpcode() == ISD::LOAD) {
204  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
205  ISD::LoadExtType ExtType = LD->getExtensionType();
206  if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD)
207  switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
208  LD->getMemoryVT())) {
209  default: llvm_unreachable("This action is not supported yet!");
211  return TranslateLegalizeResults(Op, Result);
213  if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
214  if (Lowered == Result)
215  return TranslateLegalizeResults(Op, Lowered);
216  Changed = true;
217  if (Lowered->getNumValues() != Op->getNumValues()) {
218  // This expanded to something other than the load. Assume the
219  // lowering code took care of any chain values, and just handle the
220  // returned value.
221  assert(Result.getValue(1).use_empty() &&
222  "There are still live users of the old chain!");
223  return LegalizeOp(Lowered);
224  }
225  return TranslateLegalizeResults(Op, Lowered);
226  }
228  Changed = true;
229  return LegalizeOp(ExpandLoad(Op));
230  }
231  } else if (Op.getOpcode() == ISD::STORE) {
232  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
233  EVT StVT = ST->getMemoryVT();
234  MVT ValVT = ST->getValue().getSimpleValueType();
235  if (StVT.isVector() && ST->isTruncatingStore())
236  switch (TLI.getTruncStoreAction(ValVT, StVT)) {
237  default: llvm_unreachable("This action is not supported yet!");
239  return TranslateLegalizeResults(Op, Result);
240  case TargetLowering::Custom: {
241  SDValue Lowered = TLI.LowerOperation(Result, DAG);
242  Changed = Lowered != Result;
243  return TranslateLegalizeResults(Op, Lowered);
244  }
246  Changed = true;
247  return LegalizeOp(ExpandStore(Op));
248  }
249  } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
250  HasVectorValue = true;
251 
252  for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
253  J != E;
254  ++J)
255  HasVectorValue |= J->isVector();
256  if (!HasVectorValue)
257  return TranslateLegalizeResults(Op, Result);
258 
259  EVT QueryType;
260  switch (Op.getOpcode()) {
261  default:
262  return TranslateLegalizeResults(Op, Result);
263  case ISD::ADD:
264  case ISD::SUB:
265  case ISD::MUL:
266  case ISD::SDIV:
267  case ISD::UDIV:
268  case ISD::SREM:
269  case ISD::UREM:
270  case ISD::SDIVREM:
271  case ISD::UDIVREM:
272  case ISD::FADD:
273  case ISD::FSUB:
274  case ISD::FMUL:
275  case ISD::FDIV:
276  case ISD::FREM:
277  case ISD::AND:
278  case ISD::OR:
279  case ISD::XOR:
280  case ISD::SHL:
281  case ISD::SRA:
282  case ISD::SRL:
283  case ISD::ROTL:
284  case ISD::ROTR:
285  case ISD::BSWAP:
286  case ISD::BITREVERSE:
287  case ISD::CTLZ:
288  case ISD::CTTZ:
291  case ISD::CTPOP:
292  case ISD::SELECT:
293  case ISD::VSELECT:
294  case ISD::SELECT_CC:
295  case ISD::SETCC:
296  case ISD::ZERO_EXTEND:
297  case ISD::ANY_EXTEND:
298  case ISD::TRUNCATE:
299  case ISD::SIGN_EXTEND:
300  case ISD::FP_TO_SINT:
301  case ISD::FP_TO_UINT:
302  case ISD::FNEG:
303  case ISD::FABS:
304  case ISD::FMINNUM:
305  case ISD::FMAXNUM:
306  case ISD::FMINNAN:
307  case ISD::FMAXNAN:
308  case ISD::FCOPYSIGN:
309  case ISD::FSQRT:
310  case ISD::FSIN:
311  case ISD::FCOS:
312  case ISD::FPOWI:
313  case ISD::FPOW:
314  case ISD::FLOG:
315  case ISD::FLOG2:
316  case ISD::FLOG10:
317  case ISD::FEXP:
318  case ISD::FEXP2:
319  case ISD::FCEIL:
320  case ISD::FTRUNC:
321  case ISD::FRINT:
322  case ISD::FNEARBYINT:
323  case ISD::FROUND:
324  case ISD::FFLOOR:
325  case ISD::FP_ROUND:
326  case ISD::FP_EXTEND:
327  case ISD::FMA:
332  case ISD::SMIN:
333  case ISD::SMAX:
334  case ISD::UMIN:
335  case ISD::UMAX:
336  case ISD::SMUL_LOHI:
337  case ISD::UMUL_LOHI:
338  QueryType = Node->getValueType(0);
339  break;
340  case ISD::FP_ROUND_INREG:
341  QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
342  break;
343  case ISD::SINT_TO_FP:
344  case ISD::UINT_TO_FP:
345  QueryType = Node->getOperand(0).getValueType();
346  break;
347  case ISD::MSCATTER:
348  QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
349  break;
350  case ISD::MSTORE:
351  QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
352  break;
353  }
354 
355  switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
356  default: llvm_unreachable("This action is not supported yet!");
358  Result = Promote(Op);
359  Changed = true;
360  break;
362  break;
363  case TargetLowering::Custom: {
364  if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
365  Result = Tmp1;
366  break;
367  }
369  }
371  Result = Expand(Op);
372  }
373 
374  // Make sure that the generated code is itself legal.
375  if (Result != Op) {
376  Result = LegalizeOp(Result);
377  Changed = true;
378  }
379 
380  // Note that LegalizeOp may be reentered even from single-use nodes, which
381  // means that we always must cache transformed nodes.
382  AddLegalizedOperand(Op, Result);
383  return Result;
384 }
385 
386 SDValue VectorLegalizer::Promote(SDValue Op) {
387  // For a few operations there is a specific concept for promotion based on
388  // the operand's type.
389  switch (Op.getOpcode()) {
390  case ISD::SINT_TO_FP:
391  case ISD::UINT_TO_FP:
392  // "Promote" the operation by extending the operand.
393  return PromoteINT_TO_FP(Op);
394  case ISD::FP_TO_UINT:
395  case ISD::FP_TO_SINT:
396  // Promote the operation by extending the operand.
397  return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT);
398  }
399 
400  // There are currently two cases of vector promotion:
401  // 1) Bitcasting a vector of integers to a different type to a vector of the
402  // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
403  // 2) Extending a vector of floats to a vector of the same number of larger
404  // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
405  MVT VT = Op.getSimpleValueType();
406  assert(Op.getNode()->getNumValues() == 1 &&
407  "Can't promote a vector with multiple results!");
408  MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
409  SDLoc dl(Op);
411 
412  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
413  if (Op.getOperand(j).getValueType().isVector())
414  if (Op.getOperand(j)
415  .getValueType()
417  .isFloatingPoint() &&
418  NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
419  Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
420  else
421  Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
422  else
423  Operands[j] = Op.getOperand(j);
424  }
425 
426  Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
427  if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
429  NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
430  return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
431  else
432  return DAG.getNode(ISD::BITCAST, dl, VT, Op);
433 }
434 
435 SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
436  // INT_TO_FP operations may require the input operand be promoted even
437  // when the type is otherwise legal.
438  EVT VT = Op.getOperand(0).getValueType();
439  assert(Op.getNode()->getNumValues() == 1 &&
440  "Can't promote a vector with multiple results!");
441 
442  // Normal getTypeToPromoteTo() doesn't work here, as that will promote
443  // by widening the vector w/ the same element width and twice the number
444  // of elements. We want the other way around, the same number of elements,
445  // each twice the width.
446  //
447  // Increase the bitwidth of the element to the next pow-of-two
448  // (which is greater than 8 bits).
449 
450  EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext());
451  assert(NVT.isSimple() && "Promoting to a non-simple vector type!");
452  SDLoc dl(Op);
454 
455  unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
457  for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
458  if (Op.getOperand(j).getValueType().isVector())
459  Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
460  else
461  Operands[j] = Op.getOperand(j);
462  }
463 
464  return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
465 }
466 
467 // For FP_TO_INT we promote the result type to a vector type with wider
468 // elements and then truncate the result. This is different from the default
469 // PromoteVector which uses bitcast to promote thus assumning that the
470 // promoted vector type has the same overall size.
471 SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
472  assert(Op.getNode()->getNumValues() == 1 &&
473  "Can't promote a vector with multiple results!");
474  EVT VT = Op.getValueType();
475 
476  EVT NewVT;
477  unsigned NewOpc;
478  while (1) {
479  NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
480  assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
481  if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
482  NewOpc = ISD::FP_TO_SINT;
483  break;
484  }
485  if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) {
486  NewOpc = ISD::FP_TO_UINT;
487  break;
488  }
489  }
490 
491  SDLoc loc(Op);
492  SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
493  return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
494 }
495 
496 
497 SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
498  LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
499 
500  EVT SrcVT = LD->getMemoryVT();
501  EVT SrcEltVT = SrcVT.getScalarType();
502  unsigned NumElem = SrcVT.getVectorNumElements();
503 
504 
505  SDValue NewChain;
506  SDValue Value;
507  if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
508  SDLoc dl(Op);
509 
511  SmallVector<SDValue, 8> LoadChains;
512 
513  EVT DstEltVT = LD->getValueType(0).getScalarType();
514  SDValue Chain = LD->getChain();
515  SDValue BasePTR = LD->getBasePtr();
516  ISD::LoadExtType ExtType = LD->getExtensionType();
517 
518  // When elements in a vector is not byte-addressable, we cannot directly
519  // load each element by advancing pointer, which could only address bytes.
520  // Instead, we load all significant words, mask bits off, and concatenate
521  // them to form each element. Finally, they are extended to destination
522  // scalar type to build the destination vector.
523  EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
524 
525  assert(WideVT.isRound() &&
526  "Could not handle the sophisticated case when the widest integer is"
527  " not power of 2.");
528  assert(WideVT.bitsGE(SrcEltVT) &&
529  "Type is not legalized?");
530 
531  unsigned WideBytes = WideVT.getStoreSize();
532  unsigned Offset = 0;
533  unsigned RemainingBytes = SrcVT.getStoreSize();
534  SmallVector<SDValue, 8> LoadVals;
535 
536  while (RemainingBytes > 0) {
537  SDValue ScalarLoad;
538  unsigned LoadBytes = WideBytes;
539 
540  if (RemainingBytes >= LoadBytes) {
541  ScalarLoad =
542  DAG.getLoad(WideVT, dl, Chain, BasePTR,
543  LD->getPointerInfo().getWithOffset(Offset),
544  MinAlign(LD->getAlignment(), Offset),
545  LD->getMemOperand()->getFlags(), LD->getAAInfo());
546  } else {
547  EVT LoadVT = WideVT;
548  while (RemainingBytes < LoadBytes) {
549  LoadBytes >>= 1; // Reduce the load size by half.
550  LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
551  }
552  ScalarLoad =
553  DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
554  LD->getPointerInfo().getWithOffset(Offset), LoadVT,
555  MinAlign(LD->getAlignment(), Offset),
556  LD->getMemOperand()->getFlags(), LD->getAAInfo());
557  }
558 
559  RemainingBytes -= LoadBytes;
560  Offset += LoadBytes;
561  BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
562  DAG.getConstant(LoadBytes, dl,
563  BasePTR.getValueType()));
564 
565  LoadVals.push_back(ScalarLoad.getValue(0));
566  LoadChains.push_back(ScalarLoad.getValue(1));
567  }
568 
569  // Extract bits, pack and extend/trunc them into destination type.
570  unsigned SrcEltBits = SrcEltVT.getSizeInBits();
571  SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
572 
573  unsigned BitOffset = 0;
574  unsigned WideIdx = 0;
575  unsigned WideBits = WideVT.getSizeInBits();
576 
577  for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
578  SDValue Lo, Hi, ShAmt;
579 
580  if (BitOffset < WideBits) {
581  ShAmt = DAG.getConstant(
582  BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
583  Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
584  Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
585  }
586 
587  BitOffset += SrcEltBits;
588  if (BitOffset >= WideBits) {
589  WideIdx++;
590  BitOffset -= WideBits;
591  if (BitOffset > 0) {
592  ShAmt = DAG.getConstant(
593  SrcEltBits - BitOffset, dl,
594  TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
595  Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
596  Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
597  }
598  }
599 
600  if (Hi.getNode())
601  Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
602 
603  switch (ExtType) {
604  default: llvm_unreachable("Unknown extended-load op!");
605  case ISD::EXTLOAD:
606  Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
607  break;
608  case ISD::ZEXTLOAD:
609  Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
610  break;
611  case ISD::SEXTLOAD:
612  ShAmt =
613  DAG.getConstant(WideBits - SrcEltBits, dl,
614  TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
615  Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
616  Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
617  Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
618  break;
619  }
620  Vals.push_back(Lo);
621  }
622 
623  NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
624  Value = DAG.getNode(ISD::BUILD_VECTOR, dl,
625  Op.getNode()->getValueType(0), Vals);
626  } else {
627  SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
628 
629  NewChain = Scalarized.getValue(1);
630  Value = Scalarized.getValue(0);
631  }
632 
633  AddLegalizedOperand(Op.getValue(0), Value);
634  AddLegalizedOperand(Op.getValue(1), NewChain);
635 
636  return (Op.getResNo() ? NewChain : Value);
637 }
638 
639 SDValue VectorLegalizer::ExpandStore(SDValue Op) {
640  StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
641 
642  EVT StVT = ST->getMemoryVT();
643  EVT MemSclVT = StVT.getScalarType();
644  unsigned ScalarSize = MemSclVT.getSizeInBits();
645 
646  // Round odd types to the next pow of two.
647  if (!isPowerOf2_32(ScalarSize)) {
648  // FIXME: This is completely broken and inconsistent with ExpandLoad
649  // handling.
650 
651  // For sub-byte element sizes, this ends up with 0 stride between elements,
652  // so the same element just gets re-written to the same location. There seem
653  // to be tests explicitly testing for this broken behavior though. tests
654  // for this broken behavior.
655 
656  LLVMContext &Ctx = *DAG.getContext();
657 
658  EVT NewMemVT
659  = EVT::getVectorVT(Ctx,
660  MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)),
661  StVT.getVectorNumElements());
662 
663  SDValue NewVectorStore = DAG.getTruncStore(
664  ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(),
665  ST->getPointerInfo(), NewMemVT, ST->getAlignment(),
666  ST->getMemOperand()->getFlags(), ST->getAAInfo());
667  ST = cast<StoreSDNode>(NewVectorStore.getNode());
668  }
669 
670  SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
671  AddLegalizedOperand(Op, TF);
672  return TF;
673 }
674 
675 SDValue VectorLegalizer::Expand(SDValue Op) {
676  switch (Op->getOpcode()) {
678  return ExpandSEXTINREG(Op);
680  return ExpandANY_EXTEND_VECTOR_INREG(Op);
682  return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
684  return ExpandZERO_EXTEND_VECTOR_INREG(Op);
685  case ISD::BSWAP:
686  return ExpandBSWAP(Op);
687  case ISD::VSELECT:
688  return ExpandVSELECT(Op);
689  case ISD::SELECT:
690  return ExpandSELECT(Op);
691  case ISD::UINT_TO_FP:
692  return ExpandUINT_TO_FLOAT(Op);
693  case ISD::FNEG:
694  return ExpandFNEG(Op);
695  case ISD::SETCC:
696  return UnrollVSETCC(Op);
697  case ISD::BITREVERSE:
698  return ExpandBITREVERSE(Op);
699  case ISD::CTLZ:
701  return ExpandCTLZ(Op);
703  return ExpandCTTZ_ZERO_UNDEF(Op);
704  default:
705  return DAG.UnrollVectorOp(Op.getNode());
706  }
707 }
708 
709 SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
710  // Lower a select instruction where the condition is a scalar and the
711  // operands are vectors. Lower this select to VSELECT and implement it
712  // using XOR AND OR. The selector bit is broadcasted.
713  EVT VT = Op.getValueType();
714  SDLoc DL(Op);
715 
716  SDValue Mask = Op.getOperand(0);
717  SDValue Op1 = Op.getOperand(1);
718  SDValue Op2 = Op.getOperand(2);
719 
720  assert(VT.isVector() && !Mask.getValueType().isVector()
721  && Op1.getValueType() == Op2.getValueType() && "Invalid type");
722 
723  unsigned NumElem = VT.getVectorNumElements();
724 
725  // If we can't even use the basic vector operations of
726  // AND,OR,XOR, we will have to scalarize the op.
727  // Notice that the operation may be 'promoted' which means that it is
728  // 'bitcasted' to another type which is handled.
729  // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
730  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
731  TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
732  TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
733  TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand)
734  return DAG.UnrollVectorOp(Op.getNode());
735 
736  // Generate a mask operand.
738 
739  // What is the size of each element in the vector mask.
740  EVT BitTy = MaskTy.getScalarType();
741 
742  Mask = DAG.getSelect(DL, BitTy, Mask,
743  DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
744  BitTy),
745  DAG.getConstant(0, DL, BitTy));
746 
747  // Broadcast the mask so that the entire vector is all-one or all zero.
748  SmallVector<SDValue, 8> Ops(NumElem, Mask);
749  Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops);
750 
751  // Bitcast the operands to be the same type as the mask.
752  // This is needed when we select between FP types because
753  // the mask is a vector of integers.
754  Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
755  Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
756 
757  SDValue AllOnes = DAG.getConstant(
758  APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
759  SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
760 
761  Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
762  Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
763  SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
764  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
765 }
766 
767 SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
768  EVT VT = Op.getValueType();
769 
770  // Make sure that the SRA and SHL instructions are available.
771  if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
772  TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
773  return DAG.UnrollVectorOp(Op.getNode());
774 
775  SDLoc DL(Op);
776  EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
777 
778  unsigned BW = VT.getScalarSizeInBits();
779  unsigned OrigBW = OrigTy.getScalarSizeInBits();
780  SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
781 
782  Op = Op.getOperand(0);
783  Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
784  return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
785 }
786 
787 // Generically expand a vector anyext in register to a shuffle of the relevant
788 // lanes into the appropriate locations, with other lanes left undef.
789 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
790  SDLoc DL(Op);
791  EVT VT = Op.getValueType();
792  int NumElements = VT.getVectorNumElements();
793  SDValue Src = Op.getOperand(0);
794  EVT SrcVT = Src.getValueType();
795  int NumSrcElements = SrcVT.getVectorNumElements();
796 
797  // Build a base mask of undef shuffles.
798  SmallVector<int, 16> ShuffleMask;
799  ShuffleMask.resize(NumSrcElements, -1);
800 
801  // Place the extended lanes into the correct locations.
802  int ExtLaneScale = NumSrcElements / NumElements;
803  int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
804  for (int i = 0; i < NumElements; ++i)
805  ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
806 
807  return DAG.getNode(
808  ISD::BITCAST, DL, VT,
809  DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
810 }
811 
812 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
813  SDLoc DL(Op);
814  EVT VT = Op.getValueType();
815  SDValue Src = Op.getOperand(0);
816  EVT SrcVT = Src.getValueType();
817 
818  // First build an any-extend node which can be legalized above when we
819  // recurse through it.
820  Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
821 
822  // Now we need sign extend. Do this by shifting the elements. Even if these
823  // aren't legal operations, they have a better chance of being legalized
824  // without full scalarization than the sign extension does.
825  unsigned EltWidth = VT.getScalarSizeInBits();
826  unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
827  SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
828  return DAG.getNode(ISD::SRA, DL, VT,
829  DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
830  ShiftAmount);
831 }
832 
833 // Generically expand a vector zext in register to a shuffle of the relevant
834 // lanes into the appropriate locations, a blend of zero into the high bits,
835 // and a bitcast to the wider element type.
836 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
837  SDLoc DL(Op);
838  EVT VT = Op.getValueType();
839  int NumElements = VT.getVectorNumElements();
840  SDValue Src = Op.getOperand(0);
841  EVT SrcVT = Src.getValueType();
842  int NumSrcElements = SrcVT.getVectorNumElements();
843 
844  // Build up a zero vector to blend into this one.
845  SDValue Zero = DAG.getConstant(0, DL, SrcVT);
846 
847  // Shuffle the incoming lanes into the correct position, and pull all other
848  // lanes from the zero vector.
849  SmallVector<int, 16> ShuffleMask;
850  ShuffleMask.reserve(NumSrcElements);
851  for (int i = 0; i < NumSrcElements; ++i)
852  ShuffleMask.push_back(i);
853 
854  int ExtLaneScale = NumSrcElements / NumElements;
855  int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
856  for (int i = 0; i < NumElements; ++i)
857  ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
858 
859  return DAG.getNode(ISD::BITCAST, DL, VT,
860  DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
861 }
862 
863 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
864  int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
865  for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
866  for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
867  ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
868 }
869 
870 SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
871  EVT VT = Op.getValueType();
872 
873  // Generate a byte wise shuffle mask for the BSWAP.
874  SmallVector<int, 16> ShuffleMask;
875  createBSWAPShuffleMask(VT, ShuffleMask);
876  EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
877 
878  // Only emit a shuffle if the mask is legal.
879  if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
880  return DAG.UnrollVectorOp(Op.getNode());
881 
882  SDLoc DL(Op);
883  Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
884  Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
885  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
886 }
887 
888 SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
889  EVT VT = Op.getValueType();
890 
891  // If we have the scalar operation, it's probably cheaper to unroll it.
892  if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
893  return DAG.UnrollVectorOp(Op.getNode());
894 
895  // If the vector element width is a whole number of bytes, test if its legal
896  // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
897  // vector. This greatly reduces the number of bit shifts necessary.
898  unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
899  if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
900  SmallVector<int, 16> BSWAPMask;
901  createBSWAPShuffleMask(VT, BSWAPMask);
902 
903  EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
904  if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
905  (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
906  (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
907  TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
908  TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
909  TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
910  SDLoc DL(Op);
911  Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
912  Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
913  BSWAPMask);
914  Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
915  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
916  }
917  }
918 
919  // If we have the appropriate vector bit operations, it is better to use them
920  // than unrolling and expanding each component.
921  if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
922  !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
923  !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
924  !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
925  return DAG.UnrollVectorOp(Op.getNode());
926 
927  // Let LegalizeDAG handle this later.
928  return Op;
929 }
930 
931 SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
932  // Implement VSELECT in terms of XOR, AND, OR
933  // on platforms which do not support blend natively.
934  SDLoc DL(Op);
935 
936  SDValue Mask = Op.getOperand(0);
937  SDValue Op1 = Op.getOperand(1);
938  SDValue Op2 = Op.getOperand(2);
939 
940  EVT VT = Mask.getValueType();
941 
942  // If we can't even use the basic vector operations of
943  // AND,OR,XOR, we will have to scalarize the op.
944  // Notice that the operation may be 'promoted' which means that it is
945  // 'bitcasted' to another type which is handled.
946  // This operation also isn't safe with AND, OR, XOR when the boolean
947  // type is 0/1 as we need an all ones vector constant to mask with.
948  // FIXME: Sign extend 1 to all ones if thats legal on the target.
949  if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
950  TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
951  TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
952  TLI.getBooleanContents(Op1.getValueType()) !=
954  return DAG.UnrollVectorOp(Op.getNode());
955 
956  // If the mask and the type are different sizes, unroll the vector op. This
957  // can occur when getSetCCResultType returns something that is different in
958  // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
959  if (VT.getSizeInBits() != Op1.getValueSizeInBits())
960  return DAG.UnrollVectorOp(Op.getNode());
961 
962  // Bitcast the operands to be the same type as the mask.
963  // This is needed when we select between FP types because
964  // the mask is a vector of integers.
965  Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
966  Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
967 
968  SDValue AllOnes = DAG.getConstant(
970  SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
971 
972  Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
973  Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
974  SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
975  return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
976 }
977 
978 SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
979  EVT VT = Op.getOperand(0).getValueType();
980  SDLoc DL(Op);
981 
982  // Make sure that the SINT_TO_FP and SRL instructions are available.
983  if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
984  TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand)
985  return DAG.UnrollVectorOp(Op.getNode());
986 
987  unsigned BW = VT.getScalarSizeInBits();
988  assert((BW == 64 || BW == 32) &&
989  "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
990 
991  SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
992 
993  // Constants to clear the upper part of the word.
994  // Notice that we can also use SHL+SHR, but using a constant is slightly
995  // faster on x86.
996  uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
997  SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
998 
999  // Two to the power of half-word-size.
1000  SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
1001 
1002  // Clear upper part of LO, lower HI
1003  SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
1004  SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
1005 
1006  // Convert hi and lo to floats
1007  // Convert the hi part back to the upper values
1008  // TODO: Can any fast-math-flags be set on these nodes?
1009  SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
1010  fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
1011  SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
1012 
1013  // Add the two halves
1014  return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
1015 }
1016 
1017 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
1018  if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
1019  SDLoc DL(Op);
1020  SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
1021  // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
1022  return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
1023  Zero, Op.getOperand(0));
1024  }
1025  return DAG.UnrollVectorOp(Op.getNode());
1026 }
1027 
1028 SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
1029  EVT VT = Op.getValueType();
1030  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
1031 
1032  // If the non-ZERO_UNDEF version is supported we can use that instead.
1033  if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
1034  TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
1035  SDLoc DL(Op);
1036  return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
1037  }
1038 
1039  // If CTPOP is available we can lower with a CTPOP based method:
1040  // u16 ctlz(u16 x) {
1041  // x |= (x >> 1);
1042  // x |= (x >> 2);
1043  // x |= (x >> 4);
1044  // x |= (x >> 8);
1045  // return ctpop(~x);
1046  // }
1047  // Ref: "Hacker's Delight" by Henry Warren
1048  if (isPowerOf2_32(NumBitsPerElt) &&
1049  TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
1050  TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1051  TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
1052  TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
1053  SDLoc DL(Op);
1054  SDValue Res = Op.getOperand(0);
1055  EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
1056 
1057  for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
1058  Res = DAG.getNode(
1059  ISD::OR, DL, VT, Res,
1060  DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
1061 
1062  Res = DAG.getNOT(DL, Res, VT);
1063  return DAG.getNode(ISD::CTPOP, DL, VT, Res);
1064  }
1065 
1066  // Otherwise go ahead and unroll.
1067  return DAG.UnrollVectorOp(Op.getNode());
1068 }
1069 
1070 SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
1071  // If the non-ZERO_UNDEF version is supported we can use that instead.
1072  if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
1073  SDLoc DL(Op);
1074  return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
1075  }
1076 
1077  // Otherwise go ahead and unroll.
1078  return DAG.UnrollVectorOp(Op.getNode());
1079 }
1080 
1081 SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
1082  EVT VT = Op.getValueType();
1083  unsigned NumElems = VT.getVectorNumElements();
1084  EVT EltVT = VT.getVectorElementType();
1085  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
1086  EVT TmpEltVT = LHS.getValueType().getVectorElementType();
1087  SDLoc dl(Op);
1088  SmallVector<SDValue, 8> Ops(NumElems);
1089  for (unsigned i = 0; i < NumElems; ++i) {
1090  SDValue LHSElem = DAG.getNode(
1091  ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
1092  DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1093  SDValue RHSElem = DAG.getNode(
1094  ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
1095  DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
1096  Ops[i] = DAG.getNode(ISD::SETCC, dl,
1097  TLI.getSetCCResultType(DAG.getDataLayout(),
1098  *DAG.getContext(), TmpEltVT),
1099  LHSElem, RHSElem, CC);
1100  Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
1101  DAG.getConstant(APInt::getAllOnesValue
1102  (EltVT.getSizeInBits()), dl, EltVT),
1103  DAG.getConstant(0, dl, EltVT));
1104  }
1105  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
1106 }
1107 
1108 }
1109 
1111  return VectorLegalizer(*this).Run();
1112 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
SDValue getValue(unsigned R) const
Flags getFlags() const
Return the raw flags of the source value,.
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:458
size_t i
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:449
bool LegalizeVectors()
This transforms the SelectionDAG into a SelectionDAG that only uses vector math operations supported ...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
unsigned getNumOperands() const
unsigned getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned Num) const
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:440
[US]{MIN/MAX} - Binary minimum or maximum or signed or unsigned integers.
Definition: ISDOpcodes.h:330
void reserve(size_type N)
Definition: SmallVector.h:377
const SDValue & getBasePtr() const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
unsigned getResNo() const
get the index which selects a specific result in the SDNode
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:172
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
bool isRound() const
isRound - Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:188
Shift and rotation operations.
Definition: ISDOpcodes.h:344
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachinePointerInfo getWithOffset(int64_t O) const
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:206
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:410
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
Function Alias Analysis false
const SDValue & getBasePtr() const
EVT getMemoryVT() const
Return the type of the in-memory value.
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:427
This class is used to represent ISD::STORE nodes.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
SDNode * getNode() const
get the SDNode which holds the desired result
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
bool isVector() const
isVector - Return true if this is a vector value type.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
uint32_t Offset
unsigned getOpcode() const
const SDValue & getValue() const
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:350
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:485
EVT - Extended Value Type.
Definition: ValueTypes.h:31
uint64_t NextPowerOf2(uint64_t A)
NextPowerOf2 - Returns the next power of two (in 64-bits) that is strictly greater than A...
Definition: MathExtras.h:619
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
const MachinePointerInfo & getPointerInfo() const
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
uint64_t * Vals
Iterator for intrusive lists based on ilist_node.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
X = FP_ROUND_INREG(Y, VT) - This operator takes an FP register, and rounds it to a floating point val...
Definition: ISDOpcodes.h:482
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
const SDValue & getChain() const
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:347
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool use_empty() const
Return true if there are no nodes using value ResNo of Node.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:400
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition: ValueTypes.h:307
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:527
iterator_range< value_op_iterator > op_values() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:205
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
static volatile int Zero
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isByteSized() const
isByteSized - Return true if the bit size is a multiple of 8.
Definition: ValueTypes.h:183
bool isFloatingPoint() const
isFloatingPoint - Return true if this is a FP, or a vector FP type.
Definition: ValueTypes.h:118
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
bool isTruncatingStore() const
Return true if the op does a truncation before store.
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
MVT getVectorElementType() const
Conversion operators.
Definition: ISDOpcodes.h:397
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
unsigned getAlignment() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
const SDNodeFlags * getFlags() const
This could be defined as a virtual function and implemented more simply and directly, but it is not to avoid creating a vtable for this class.
This file describes how to lower LLVM code to machine code.
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
void resize(size_type N)
Definition: SmallVector.h:352
This class is used to represent ISD::LOAD nodes.