LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - LegalizeVectorOps.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 436 454 96.0 %
Date: 2018-02-23 15:42:53 Functions: 26 26 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the SelectionDAG::LegalizeVectors method.
      11             : //
      12             : // The vector legalizer looks for vector operations which might need to be
      13             : // scalarized and legalizes them. This is a separate step from Legalize because
      14             : // scalarizing can introduce illegal types.  For example, suppose we have an
      15             : // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
      16             : // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
      17             : // operation, which introduces nodes with the illegal type i64 which must be
      18             : // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
      19             : // the operation must be unrolled, which introduces nodes with the illegal
      20             : // type i8 which must be promoted.
      21             : //
      22             : // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
      23             : // or operations that happen to take a vector which are custom-lowered;
      24             : // the legalization for such operations never produces nodes
      25             : // with illegal types, so it's okay to put off legalizing them until
      26             : // SelectionDAG::Legalize runs.
      27             : //
      28             : //===----------------------------------------------------------------------===//
      29             : 
      30             : #include "llvm/ADT/APInt.h"
      31             : #include "llvm/ADT/DenseMap.h"
      32             : #include "llvm/ADT/SmallVector.h"
      33             : #include "llvm/CodeGen/ISDOpcodes.h"
      34             : #include "llvm/CodeGen/MachineMemOperand.h"
      35             : #include "llvm/CodeGen/MachineValueType.h"
      36             : #include "llvm/CodeGen/SelectionDAG.h"
      37             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      38             : #include "llvm/CodeGen/TargetLowering.h"
      39             : #include "llvm/CodeGen/ValueTypes.h"
      40             : #include "llvm/IR/DataLayout.h"
      41             : #include "llvm/Support/Casting.h"
      42             : #include "llvm/Support/Compiler.h"
      43             : #include "llvm/Support/ErrorHandling.h"
      44             : #include "llvm/Support/MathExtras.h"
      45             : #include <cassert>
      46             : #include <cstdint>
      47             : #include <iterator>
      48             : #include <utility>
      49             : 
      50             : using namespace llvm;
      51             : 
      52             : #define DEBUG_TYPE "legalizevectorops"
      53             : 
      54             : namespace {
      55             : 
      56             : class VectorLegalizer {
      57             :   SelectionDAG& DAG;
      58             :   const TargetLowering &TLI;
      59             :   bool Changed = false; // Keep track of whether anything changed
      60             : 
      61             :   /// For nodes that are of legal width, and that have more than one use, this
      62             :   /// map indicates what regularized operand to use.  This allows us to avoid
      63             :   /// legalizing the same thing more than once.
      64             :   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
      65             : 
      66             :   /// \brief Adds a node to the translation cache.
      67     4460473 :   void AddLegalizedOperand(SDValue From, SDValue To) {
      68     8920946 :     LegalizedNodes.insert(std::make_pair(From, To));
      69             :     // If someone requests legalization of the new node, return itself.
      70             :     if (From != To)
      71       58401 :       LegalizedNodes.insert(std::make_pair(To, To));
      72     4460473 :   }
      73             : 
      74             :   /// \brief Legalizes the given node.
      75             :   SDValue LegalizeOp(SDValue Op);
      76             : 
      77             :   /// \brief Assuming the node is legal, "legalize" the results.
      78             :   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
      79             : 
      80             :   /// \brief Implements unrolling a VSETCC.
      81             :   SDValue UnrollVSETCC(SDValue Op);
      82             : 
      83             :   /// \brief Implement expand-based legalization of vector operations.
      84             :   ///
      85             :   /// This is just a high-level routine to dispatch to specific code paths for
      86             :   /// operations to legalize them.
      87             :   SDValue Expand(SDValue Op);
      88             : 
      89             :   /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if
      90             :   /// FSUB isn't legal.
      91             :   ///
      92             :   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
      93             :   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
      94             :   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
      95             : 
      96             :   /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
      97             :   SDValue ExpandSEXTINREG(SDValue Op);
      98             : 
      99             :   /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG.
     100             :   ///
     101             :   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
     102             :   /// type. The contents of the bits in the extended part of each element are
     103             :   /// undef.
     104             :   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
     105             : 
     106             :   /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG.
     107             :   ///
     108             :   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
     109             :   /// type, then shifts left and arithmetic shifts right to introduce a sign
     110             :   /// extension.
     111             :   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
     112             : 
     113             :   /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG.
     114             :   ///
     115             :   /// Shuffles the low lanes of the operand into place and blends zeros into
     116             :   /// the remaining lanes, finally bitcasting to the proper type.
     117             :   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
     118             : 
     119             :   /// \brief Expand bswap of vectors into a shuffle if legal.
     120             :   SDValue ExpandBSWAP(SDValue Op);
     121             : 
     122             :   /// \brief Implement vselect in terms of XOR, AND, OR when blend is not
     123             :   /// supported by the target.
     124             :   SDValue ExpandVSELECT(SDValue Op);
     125             :   SDValue ExpandSELECT(SDValue Op);
     126             :   SDValue ExpandLoad(SDValue Op);
     127             :   SDValue ExpandStore(SDValue Op);
     128             :   SDValue ExpandFNEG(SDValue Op);
     129             :   SDValue ExpandFSUB(SDValue Op);
     130             :   SDValue ExpandBITREVERSE(SDValue Op);
     131             :   SDValue ExpandCTLZ(SDValue Op);
     132             :   SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
     133             : 
     134             :   /// \brief Implements vector promotion.
     135             :   ///
     136             :   /// This is essentially just bitcasting the operands to a different type and
     137             :   /// bitcasting the result back to the original type.
     138             :   SDValue Promote(SDValue Op);
     139             : 
     140             :   /// \brief Implements [SU]INT_TO_FP vector promotion.
     141             :   ///
     142             :   /// This is a [zs]ext of the input operand to a larger integer type.
     143             :   SDValue PromoteINT_TO_FP(SDValue Op);
     144             : 
     145             :   /// \brief Implements FP_TO_[SU]INT vector promotion of the result type.
     146             :   ///
     147             :   /// It is promoted to a larger integer type.  The result is then
     148             :   /// truncated back to the original type.
     149             :   SDValue PromoteFP_TO_INT(SDValue Op);
     150             : 
     151             : public:
     152      310255 :   VectorLegalizer(SelectionDAG& dag) :
     153      310255 :       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
     154             : 
     155             :   /// \brief Begin legalizer the vector operations in the DAG.
     156             :   bool Run();
     157             : };
     158             : 
     159             : } // end anonymous namespace
     160             : 
     161      310245 : bool VectorLegalizer::Run() {
     162             :   // Before we start legalizing vector nodes, check if there are any vectors.
     163             :   bool HasVectors = false;
     164      310245 :   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
     165     6328267 :        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
     166             :     // Check if the values of the nodes contain vectors. We don't need to check
     167             :     // the operands because we are going to check their values at some point.
     168    13930282 :     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
     169    13930282 :          J != E; ++J)
     170     7795478 :       HasVectors |= J->isVector();
     171             : 
     172             :     // If we found a vector node we can start the legalization.
     173     6134804 :     if (HasVectors)
     174             :       break;
     175             :   }
     176             : 
     177             :   // If this basic block has no vectors then no need to legalize vectors.
     178      310245 :   if (!HasVectors)
     179             :     return false;
     180             : 
     181             :   // The legalize process is inherently a bottom-up recursive process (users
     182             :   // legalize their uses before themselves).  Given infinite stack space, we
     183             :   // could just start legalizing on the root and traverse the whole graph.  In
     184             :   // practice however, this causes us to run out of stack space on large basic
     185             :   // blocks.  To avoid this problem, compute an ordering of the nodes where each
     186             :   // node is only legalized after all of its operands are legalized.
     187      116782 :   DAG.AssignTopologicalOrder();
     188      116782 :   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
     189     3588435 :        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
     190     3471653 :     LegalizeOp(SDValue(&*I, 0));
     191             : 
     192             :   // Finally, it's possible the root changed.  Get the new root.
     193      116782 :   SDValue OldRoot = DAG.getRoot();
     194             :   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
     195      233564 :   DAG.setRoot(LegalizedNodes[OldRoot]);
     196             : 
     197      116782 :   LegalizedNodes.clear();
     198             : 
     199             :   // Remove dead nodes now.
     200      116782 :   DAG.RemoveDeadNodes();
     201             : 
     202      116782 :   return Changed;
     203             : }
     204             : 
     205     3500314 : SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
     206             :   // Generic legalization: just pass the operand through.
     207    15540670 :   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
     208     4270021 :     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
     209     3500314 :   return Result.getValue(Op.getResNo());
     210             : }
     211             : 
     212     9997513 : SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     213             :   // Note that LegalizeOp may be reentered even from single-use nodes, which
     214             :   // means that we always must cache transformed nodes.
     215     9997513 :   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
     216     9997513 :   if (I != LegalizedNodes.end()) return I->second;
     217             : 
     218     3690631 :   SDNode* Node = Op.getNode();
     219             : 
     220             :   // Legalize the operands
     221             :   SmallVector<SDValue, 8> Ops;
     222    10158530 :   for (const SDValue &Op : Node->op_values())
     223     6467899 :     Ops.push_back(LegalizeOp(Op));
     224             : 
     225     7381262 :   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
     226             : 
     227             :   bool HasVectorValue = false;
     228     7381262 :   if (Op.getOpcode() == ISD::LOAD) {
     229             :     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     230             :     ISD::LoadExtType ExtType = LD->getExtensionType();
     231      691263 :     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
     232             :       DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG));
     233        8356 :       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
     234             :                                    LD->getMemoryVT())) {
     235           0 :       default: llvm_unreachable("This action is not supported yet!");
     236        1192 :       case TargetLowering::Legal:
     237        1192 :         return TranslateLegalizeResults(Op, Result);
     238        1429 :       case TargetLowering::Custom:
     239        1429 :         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
     240             :           if (Lowered == Result)
     241           0 :             return TranslateLegalizeResults(Op, Lowered);
     242        1429 :           Changed = true;
     243        1429 :           if (Lowered->getNumValues() != Op->getNumValues()) {
     244             :             // This expanded to something other than the load. Assume the
     245             :             // lowering code took care of any chain values, and just handle the
     246             :             // returned value.
     247             :             assert(Result.getValue(1).use_empty() &&
     248             :                    "There are still live users of the old chain!");
     249        1429 :             return LegalizeOp(Lowered);
     250             :           }
     251           0 :           return TranslateLegalizeResults(Op, Lowered);
     252           0 :         }
     253             :         LLVM_FALLTHROUGH;
     254             :       case TargetLowering::Expand:
     255        1564 :         Changed = true;
     256        1564 :         return LegalizeOp(ExpandLoad(Op));
     257             :       }
     258             :     }
     259     3458815 :   } else if (Op.getOpcode() == ISD::STORE) {
     260             :     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     261      270331 :     EVT StVT = ST->getMemoryVT();
     262             :     MVT ValVT = ST->getValue().getSimpleValueType();
     263      398216 :     if (StVT.isVector() && ST->isTruncatingStore()) {
     264             :       DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
     265             :             Node->dump(&DAG));
     266        1397 :       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
     267           0 :       default: llvm_unreachable("This action is not supported yet!");
     268         255 :       case TargetLowering::Legal:
     269         962 :         return TranslateLegalizeResults(Op, Result);
     270         235 :       case TargetLowering::Custom: {
     271         235 :         SDValue Lowered = TLI.LowerOperation(Result, DAG);
     272         235 :         Changed = Lowered != Result;
     273         235 :         return TranslateLegalizeResults(Op, Lowered);
     274             :       }
     275         217 :       case TargetLowering::Expand:
     276         217 :         Changed = true;
     277         217 :         return LegalizeOp(ExpandStore(Op));
     278             :       }
     279             :     }
     280     3188484 :   } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
     281             :     HasVectorValue = true;
     282             : 
     283     8139993 :   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
     284     8139993 :        J != E;
     285             :        ++J)
     286     4454254 :     HasVectorValue |= J->isVector();
     287     3685739 :   if (!HasVectorValue)
     288     2788482 :     return TranslateLegalizeResults(Op, Result);
     289             : 
     290             :   EVT QueryType;
     291     1794514 :   switch (Op.getOpcode()) {
     292      710150 :   default:
     293      710150 :     return TranslateLegalizeResults(Op, Result);
     294      185076 :   case ISD::ADD:
     295             :   case ISD::SUB:
     296             :   case ISD::MUL:
     297             :   case ISD::SDIV:
     298             :   case ISD::UDIV:
     299             :   case ISD::SREM:
     300             :   case ISD::UREM:
     301             :   case ISD::SDIVREM:
     302             :   case ISD::UDIVREM:
     303             :   case ISD::FADD:
     304             :   case ISD::FSUB:
     305             :   case ISD::FMUL:
     306             :   case ISD::FDIV:
     307             :   case ISD::FREM:
     308             :   case ISD::AND:
     309             :   case ISD::OR:
     310             :   case ISD::XOR:
     311             :   case ISD::SHL:
     312             :   case ISD::SRA:
     313             :   case ISD::SRL:
     314             :   case ISD::ROTL:
     315             :   case ISD::ROTR:
     316             :   case ISD::BSWAP:
     317             :   case ISD::BITREVERSE:
     318             :   case ISD::CTLZ:
     319             :   case ISD::CTTZ:
     320             :   case ISD::CTLZ_ZERO_UNDEF:
     321             :   case ISD::CTTZ_ZERO_UNDEF:
     322             :   case ISD::CTPOP:
     323             :   case ISD::SELECT:
     324             :   case ISD::VSELECT:
     325             :   case ISD::SELECT_CC:
     326             :   case ISD::SETCC:
     327             :   case ISD::ZERO_EXTEND:
     328             :   case ISD::ANY_EXTEND:
     329             :   case ISD::TRUNCATE:
     330             :   case ISD::SIGN_EXTEND:
     331             :   case ISD::FP_TO_SINT:
     332             :   case ISD::FP_TO_UINT:
     333             :   case ISD::FNEG:
     334             :   case ISD::FABS:
     335             :   case ISD::FMINNUM:
     336             :   case ISD::FMAXNUM:
     337             :   case ISD::FMINNAN:
     338             :   case ISD::FMAXNAN:
     339             :   case ISD::FCOPYSIGN:
     340             :   case ISD::FSQRT:
     341             :   case ISD::FSIN:
     342             :   case ISD::FCOS:
     343             :   case ISD::FPOWI:
     344             :   case ISD::FPOW:
     345             :   case ISD::FLOG:
     346             :   case ISD::FLOG2:
     347             :   case ISD::FLOG10:
     348             :   case ISD::FEXP:
     349             :   case ISD::FEXP2:
     350             :   case ISD::FCEIL:
     351             :   case ISD::FTRUNC:
     352             :   case ISD::FRINT:
     353             :   case ISD::FNEARBYINT:
     354             :   case ISD::FROUND:
     355             :   case ISD::FFLOOR:
     356             :   case ISD::FP_ROUND:
     357             :   case ISD::FP_EXTEND:
     358             :   case ISD::FMA:
     359             :   case ISD::SIGN_EXTEND_INREG:
     360             :   case ISD::ANY_EXTEND_VECTOR_INREG:
     361             :   case ISD::SIGN_EXTEND_VECTOR_INREG:
     362             :   case ISD::ZERO_EXTEND_VECTOR_INREG:
     363             :   case ISD::SMIN:
     364             :   case ISD::SMAX:
     365             :   case ISD::UMIN:
     366             :   case ISD::UMAX:
     367             :   case ISD::SMUL_LOHI:
     368             :   case ISD::UMUL_LOHI:
     369             :     QueryType = Node->getValueType(0);
     370      185076 :     break;
     371           0 :   case ISD::FP_ROUND_INREG:
     372           0 :     QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
     373           0 :     break;
     374        1803 :   case ISD::SINT_TO_FP:
     375             :   case ISD::UINT_TO_FP:
     376        1803 :     QueryType = Node->getOperand(0).getValueType();
     377        1803 :     break;
     378             :   case ISD::MSCATTER:
     379         103 :     QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType();
     380         103 :     break;
     381             :   case ISD::MSTORE:
     382         125 :     QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType();
     383         125 :     break;
     384             :   }
     385             : 
     386             :   DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
     387             : 
     388      374214 :   switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) {
     389           0 :   default: llvm_unreachable("This action is not supported yet!");
     390       13677 :   case TargetLowering::Promote:
     391       13677 :     Result = Promote(Op);
     392       13677 :     Changed = true;
     393       13677 :     break;
     394             :   case TargetLowering::Legal:
     395             :     DEBUG(dbgs() << "Legal node: nothing to do\n");
     396             :     break;
     397       45642 :   case TargetLowering::Custom: {
     398             :     DEBUG(dbgs() << "Trying custom legalization\n");
     399       45642 :     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
     400             :       DEBUG(dbgs() << "Successfully custom legalized node\n");
     401       44319 :       Result = Tmp1;
     402       44319 :       break;
     403             :     }
     404             :     DEBUG(dbgs() << "Could not custom legalize node\n");
     405             :     LLVM_FALLTHROUGH;
     406             :   }
     407             :   case TargetLowering::Expand:
     408        7077 :     Result = Expand(Op);
     409             :   }
     410             : 
     411             :   // Make sure that the generated code is itself legal.
     412             :   if (Result != Op) {
     413       54751 :     Result = LegalizeOp(Result);
     414       54751 :     Changed = true;
     415             :   }
     416             : 
     417             :   // Note that LegalizeOp may be reentered even from single-use nodes, which
     418             :   // means that we always must cache transformed nodes.
     419      187107 :   AddLegalizedOperand(Op, Result);
     420      187107 :   return Result;
     421             : }
     422             : 
     423       13677 : SDValue VectorLegalizer::Promote(SDValue Op) {
     424             :   // For a few operations there is a specific concept for promotion based on
     425             :   // the operand's type.
     426       13677 :   switch (Op.getOpcode()) {
     427          22 :   case ISD::SINT_TO_FP:
     428             :   case ISD::UINT_TO_FP:
     429             :     // "Promote" the operation by extending the operand.
     430          22 :     return PromoteINT_TO_FP(Op);
     431         145 :   case ISD::FP_TO_UINT:
     432             :   case ISD::FP_TO_SINT:
     433             :     // Promote the operation by extending the operand.
     434         145 :     return PromoteFP_TO_INT(Op);
     435             :   }
     436             : 
     437             :   // There are currently two cases of vector promotion:
     438             :   // 1) Bitcasting a vector of integers to a different type to a vector of the
     439             :   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
     440             :   // 2) Extending a vector of floats to a vector of the same number of larger
     441             :   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
     442       13510 :   MVT VT = Op.getSimpleValueType();
     443             :   assert(Op.getNode()->getNumValues() == 1 &&
     444             :          "Can't promote a vector with multiple results!");
     445       13510 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     446             :   SDLoc dl(Op);
     447       40530 :   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
     448             : 
     449       67698 :   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     450       81282 :     if (Op.getOperand(j).getValueType().isVector())
     451             :       if (Op.getOperand(j)
     452       53874 :               .getValueType()
     453       53874 :               .getVectorElementType()
     454       27194 :               .isFloatingPoint() &&
     455       54243 :           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
     456         423 :         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
     457             :       else
     458       80388 :         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
     459             :     else
     460         157 :       Operands[j] = Op.getOperand(j);
     461             :   }
     462             : 
     463       27020 :   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
     464       27078 :   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
     465       40364 :       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
     466       13512 :        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
     467         336 :     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
     468             :   else
     469       26796 :     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
     470             : }
     471             : 
     472          22 : SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
     473             :   // INT_TO_FP operations may require the input operand be promoted even
     474             :   // when the type is otherwise legal.
     475          22 :   MVT VT = Op.getOperand(0).getSimpleValueType();
     476          44 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     477             :   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
     478             :          "Vectors have different number of elements!");
     479             : 
     480             :   SDLoc dl(Op);
     481          66 :   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
     482             : 
     483          22 :   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
     484             :     ISD::SIGN_EXTEND;
     485          66 :   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     486          66 :     if (Op.getOperand(j).getValueType().isVector())
     487          66 :       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
     488             :     else
     489           0 :       Operands[j] = Op.getOperand(j);
     490             :   }
     491             : 
     492          66 :   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
     493             : }
     494             : 
     495             : // For FP_TO_INT we promote the result type to a vector type with wider
     496             : // elements and then truncate the result.  This is different from the default
     497             : // PromoteVector which uses bitcast to promote thus assumning that the
     498             : // promoted vector type has the same overall size.
     499         145 : SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
     500         145 :   MVT VT = Op.getSimpleValueType();
     501         290 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     502             :   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
     503             :          "Vectors have different number of elements!");
     504             : 
     505         145 :   unsigned NewOpc = Op->getOpcode();
     506             :   // Change FP_TO_UINT to FP_TO_SINT if possible.
     507             :   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
     508         145 :   if (NewOpc == ISD::FP_TO_UINT &&
     509          83 :       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     510             :     NewOpc = ISD::FP_TO_SINT;
     511             : 
     512             :   SDLoc dl(Op);
     513         290 :   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
     514             : 
     515             :   // Assert that the converted value fits in the original type.  If it doesn't
     516             :   // (eg: because the value being converted is too big), then the result of the
     517             :   // original operation was undefined anyway, so the assert is still correct.
     518         435 :   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
     519             :                                                             : ISD::AssertSext,
     520             :                          dl, NVT, Promoted,
     521         435 :                          DAG.getValueType(VT.getScalarType()));
     522         435 :   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
     523             : }
     524             : 
     525        1564 : SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
     526             :   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     527             : 
     528        1564 :   EVT SrcVT = LD->getMemoryVT();
     529        1564 :   EVT SrcEltVT = SrcVT.getScalarType();
     530        1564 :   unsigned NumElem = SrcVT.getVectorNumElements();
     531             : 
     532        1564 :   SDValue NewChain;
     533        1564 :   SDValue Value;
     534        3128 :   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
     535             :     SDLoc dl(Op);
     536             : 
     537             :     SmallVector<SDValue, 8> Vals;
     538             :     SmallVector<SDValue, 8> LoadChains;
     539             : 
     540         942 :     EVT DstEltVT = LD->getValueType(0).getScalarType();
     541         471 :     SDValue Chain = LD->getChain();
     542         471 :     SDValue BasePTR = LD->getBasePtr();
     543             :     ISD::LoadExtType ExtType = LD->getExtensionType();
     544             : 
     545             :     // When elements in a vector is not byte-addressable, we cannot directly
     546             :     // load each element by advancing pointer, which could only address bytes.
     547             :     // Instead, we load all significant words, mask bits off, and concatenate
     548             :     // them to form each element. Finally, they are extended to destination
     549             :     // scalar type to build the destination vector.
     550         471 :     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
     551             : 
     552             :     assert(WideVT.isRound() &&
     553             :            "Could not handle the sophisticated case when the widest integer is"
     554             :            " not power of 2.");
     555             :     assert(WideVT.bitsGE(SrcEltVT) &&
     556             :            "Type is not legalized?");
     557             : 
     558             :     unsigned WideBytes = WideVT.getStoreSize();
     559             :     unsigned Offset = 0;
     560             :     unsigned RemainingBytes = SrcVT.getStoreSize();
     561             :     SmallVector<SDValue, 8> LoadVals;
     562         946 :     while (RemainingBytes > 0) {
     563             :       SDValue ScalarLoad;
     564             :       unsigned LoadBytes = WideBytes;
     565             : 
     566         475 :       if (RemainingBytes >= LoadBytes) {
     567          17 :         ScalarLoad =
     568          51 :             DAG.getLoad(WideVT, dl, Chain, BasePTR,
     569          17 :                         LD->getPointerInfo().getWithOffset(Offset),
     570          17 :                         MinAlign(LD->getAlignment(), Offset),
     571          85 :                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
     572             :       } else {
     573         458 :         EVT LoadVT = WideVT;
     574        1406 :         while (RemainingBytes < LoadBytes) {
     575         948 :           LoadBytes >>= 1; // Reduce the load size by half.
     576         948 :           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
     577             :         }
     578         458 :         ScalarLoad =
     579        1374 :             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
     580         458 :                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
     581         458 :                            MinAlign(LD->getAlignment(), Offset),
     582        2290 :                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
     583             :       }
     584             : 
     585         475 :       RemainingBytes -= LoadBytes;
     586         475 :       Offset += LoadBytes;
     587             : 
     588         475 :       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
     589             : 
     590         475 :       LoadVals.push_back(ScalarLoad.getValue(0));
     591         475 :       LoadChains.push_back(ScalarLoad.getValue(1));
     592             :     }
     593             : 
     594             :     // Extract bits, pack and extend/trunc them into destination type.
     595         471 :     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
     596         471 :     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
     597             : 
     598             :     unsigned BitOffset = 0;
     599             :     unsigned WideIdx = 0;
     600         471 :     unsigned WideBits = WideVT.getSizeInBits();
     601             : 
     602        5363 :     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
     603        2446 :       SDValue Lo, Hi, ShAmt;
     604             : 
     605        2446 :       if (BitOffset < WideBits) {
     606        4892 :         ShAmt = DAG.getConstant(
     607        4892 :             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     608        7338 :         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
     609        4892 :         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
     610             :       }
     611             : 
     612        2446 :       BitOffset += SrcEltBits;
     613        2446 :       if (BitOffset >= WideBits) {
     614          17 :         WideIdx++;
     615          17 :         BitOffset -= WideBits;
     616          17 :         if (BitOffset > 0) {
     617           0 :           ShAmt = DAG.getConstant(
     618           0 :               SrcEltBits - BitOffset, dl,
     619           0 :               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     620           0 :           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
     621           0 :           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
     622             :         }
     623             :       }
     624             : 
     625        2446 :       if (Hi.getNode())
     626           0 :         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
     627             : 
     628        2446 :       switch (ExtType) {
     629           0 :       default: llvm_unreachable("Unknown extended-load op!");
     630        1198 :       case ISD::EXTLOAD:
     631        1198 :         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
     632             :         break;
     633         408 :       case ISD::ZEXTLOAD:
     634         408 :         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
     635             :         break;
     636         840 :       case ISD::SEXTLOAD:
     637         840 :         ShAmt =
     638        3360 :             DAG.getConstant(WideBits - SrcEltBits, dl,
     639        2520 :                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     640        1680 :         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
     641        1680 :         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
     642         840 :         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
     643             :         break;
     644             :       }
     645        2446 :       Vals.push_back(Lo);
     646             :     }
     647             : 
     648         942 :     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
     649        1413 :     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
     650             :   } else {
     651        1093 :     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
     652             : 
     653        1093 :     NewChain = Scalarized.getValue(1);
     654        1093 :     Value = Scalarized.getValue(0);
     655             :   }
     656             : 
     657        1564 :   AddLegalizedOperand(Op.getValue(0), Value);
     658        1564 :   AddLegalizedOperand(Op.getValue(1), NewChain);
     659             : 
     660        1564 :   return (Op.getResNo() ? NewChain : Value);
     661             : }
     662             : 
     663         217 : SDValue VectorLegalizer::ExpandStore(SDValue Op) {
     664         217 :   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     665         217 :   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
     666         217 :   AddLegalizedOperand(Op, TF);
     667         217 :   return TF;
     668             : }
     669             : 
     670        7077 : SDValue VectorLegalizer::Expand(SDValue Op) {
     671       14154 :   switch (Op->getOpcode()) {
     672         528 :   case ISD::SIGN_EXTEND_INREG:
     673         528 :     return ExpandSEXTINREG(Op);
     674         257 :   case ISD::ANY_EXTEND_VECTOR_INREG:
     675         257 :     return ExpandANY_EXTEND_VECTOR_INREG(Op);
     676          12 :   case ISD::SIGN_EXTEND_VECTOR_INREG:
     677          12 :     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
     678         657 :   case ISD::ZERO_EXTEND_VECTOR_INREG:
     679         657 :     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
     680          49 :   case ISD::BSWAP:
     681          49 :     return ExpandBSWAP(Op);
     682        1336 :   case ISD::VSELECT:
     683        1336 :     return ExpandVSELECT(Op);
     684          90 :   case ISD::SELECT:
     685          90 :     return ExpandSELECT(Op);
     686         128 :   case ISD::UINT_TO_FP:
     687         128 :     return ExpandUINT_TO_FLOAT(Op);
     688          36 :   case ISD::FNEG:
     689          36 :     return ExpandFNEG(Op);
     690          37 :   case ISD::FSUB:
     691          37 :     return ExpandFSUB(Op);
     692          94 :   case ISD::SETCC:
     693          94 :     return UnrollVSETCC(Op);
     694         132 :   case ISD::BITREVERSE:
     695         132 :     return ExpandBITREVERSE(Op);
     696         136 :   case ISD::CTLZ:
     697             :   case ISD::CTLZ_ZERO_UNDEF:
     698         136 :     return ExpandCTLZ(Op);
     699         111 :   case ISD::CTTZ_ZERO_UNDEF:
     700         111 :     return ExpandCTTZ_ZERO_UNDEF(Op);
     701        3474 :   default:
     702        3474 :     return DAG.UnrollVectorOp(Op.getNode());
     703             :   }
     704             : }
     705             : 
     706          90 : SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
     707             :   // Lower a select instruction where the condition is a scalar and the
     708             :   // operands are vectors. Lower this select to VSELECT and implement it
     709             :   // using XOR AND OR. The selector bit is broadcasted.
     710          90 :   EVT VT = Op.getValueType();
     711             :   SDLoc DL(Op);
     712             : 
     713          90 :   SDValue Mask = Op.getOperand(0);
     714          90 :   SDValue Op1 = Op.getOperand(1);
     715          90 :   SDValue Op2 = Op.getOperand(2);
     716             : 
     717             :   assert(VT.isVector() && !Mask.getValueType().isVector()
     718             :          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
     719             : 
     720             :   // If we can't even use the basic vector operations of
     721             :   // AND,OR,XOR, we will have to scalarize the op.
     722             :   // Notice that the operation may be 'promoted' which means that it is
     723             :   // 'bitcasted' to another type which is handled.
     724             :   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
     725          97 :   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
     726           7 :       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
     727          97 :       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
     728             :       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
     729          83 :     return DAG.UnrollVectorOp(Op.getNode());
     730             : 
     731             :   // Generate a mask operand.
     732           7 :   EVT MaskTy = VT.changeVectorElementTypeToInteger();
     733             : 
     734             :   // What is the size of each element in the vector mask.
     735           7 :   EVT BitTy = MaskTy.getScalarType();
     736             : 
     737          14 :   Mask = DAG.getSelect(DL, BitTy, Mask,
     738          14 :           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
     739             :                           BitTy),
     740          14 :           DAG.getConstant(0, DL, BitTy));
     741             : 
     742             :   // Broadcast the mask so that the entire vector is all-one or all zero.
     743           7 :   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
     744             : 
     745             :   // Bitcast the operands to be the same type as the mask.
     746             :   // This is needed when we select between FP types because
     747             :   // the mask is a vector of integers.
     748          14 :   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
     749          14 :   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
     750             : 
     751           7 :   SDValue AllOnes = DAG.getConstant(
     752          14 :             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
     753          14 :   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
     754             : 
     755          14 :   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
     756          14 :   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
     757          14 :   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
     758          14 :   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
     759             : }
     760             : 
     761         528 : SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
     762         528 :   EVT VT = Op.getValueType();
     763             : 
     764             :   // Make sure that the SRA and SHL instructions are available.
     765        1561 :   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
     766             :       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
     767          23 :     return DAG.UnrollVectorOp(Op.getNode());
     768             : 
     769             :   SDLoc DL(Op);
     770        1010 :   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
     771             : 
     772             :   unsigned BW = VT.getScalarSizeInBits();
     773             :   unsigned OrigBW = OrigTy.getScalarSizeInBits();
     774         505 :   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
     775             : 
     776         505 :   Op = Op.getOperand(0);
     777        1010 :   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
     778        1010 :   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
     779             : }
     780             : 
     781             : // Generically expand a vector anyext in register to a shuffle of the relevant
     782             : // lanes into the appropriate locations, with other lanes left undef.
     783         257 : SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
     784             :   SDLoc DL(Op);
     785         257 :   EVT VT = Op.getValueType();
     786         257 :   int NumElements = VT.getVectorNumElements();
     787         257 :   SDValue Src = Op.getOperand(0);
     788         257 :   EVT SrcVT = Src.getValueType();
     789         257 :   int NumSrcElements = SrcVT.getVectorNumElements();
     790             : 
     791             :   // Build a base mask of undef shuffles.
     792             :   SmallVector<int, 16> ShuffleMask;
     793         257 :   ShuffleMask.resize(NumSrcElements, -1);
     794             : 
     795             :   // Place the extended lanes into the correct locations.
     796         257 :   int ExtLaneScale = NumSrcElements / NumElements;
     797         514 :   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
     798        1861 :   for (int i = 0; i < NumElements; ++i)
     799        1604 :     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
     800             : 
     801         257 :   return DAG.getNode(
     802             :       ISD::BITCAST, DL, VT,
     803         514 :       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
     804             : }
     805             : 
     806          12 : SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
     807             :   SDLoc DL(Op);
     808          12 :   EVT VT = Op.getValueType();
     809          12 :   SDValue Src = Op.getOperand(0);
     810          12 :   EVT SrcVT = Src.getValueType();
     811             : 
     812             :   // First build an any-extend node which can be legalized above when we
     813             :   // recurse through it.
     814          12 :   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
     815             : 
     816             :   // Now we need sign extend. Do this by shifting the elements. Even if these
     817             :   // aren't legal operations, they have a better chance of being legalized
     818             :   // without full scalarization than the sign extension does.
     819             :   unsigned EltWidth = VT.getScalarSizeInBits();
     820             :   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
     821          12 :   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
     822          12 :   return DAG.getNode(ISD::SRA, DL, VT,
     823             :                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
     824          24 :                      ShiftAmount);
     825             : }
     826             : 
     827             : // Generically expand a vector zext in register to a shuffle of the relevant
     828             : // lanes into the appropriate locations, a blend of zero into the high bits,
     829             : // and a bitcast to the wider element type.
     830         657 : SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
     831             :   SDLoc DL(Op);
     832         657 :   EVT VT = Op.getValueType();
     833         657 :   int NumElements = VT.getVectorNumElements();
     834         657 :   SDValue Src = Op.getOperand(0);
     835         657 :   EVT SrcVT = Src.getValueType();
     836         657 :   int NumSrcElements = SrcVT.getVectorNumElements();
     837             : 
     838             :   // Build up a zero vector to blend into this one.
     839         657 :   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
     840             : 
     841             :   // Shuffle the incoming lanes into the correct position, and pull all other
     842             :   // lanes from the zero vector.
     843             :   SmallVector<int, 16> ShuffleMask;
     844         657 :   ShuffleMask.reserve(NumSrcElements);
     845       11457 :   for (int i = 0; i < NumSrcElements; ++i)
     846       10800 :     ShuffleMask.push_back(i);
     847             : 
     848         657 :   int ExtLaneScale = NumSrcElements / NumElements;
     849        1314 :   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
     850        6921 :   for (int i = 0; i < NumElements; ++i)
     851        6264 :     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
     852             : 
     853         657 :   return DAG.getNode(ISD::BITCAST, DL, VT,
     854        1314 :                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
     855             : }
     856             : 
     857         141 : static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
     858         141 :   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
     859         983 :   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
     860        3866 :     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
     861        3024 :       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
     862         141 : }
     863             : 
     864          49 : SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
     865          49 :   EVT VT = Op.getValueType();
     866             : 
     867             :   // Generate a byte wise shuffle mask for the BSWAP.
     868             :   SmallVector<int, 16> ShuffleMask;
     869          49 :   createBSWAPShuffleMask(VT, ShuffleMask);
     870          98 :   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
     871             : 
     872             :   // Only emit a shuffle if the mask is legal.
     873          98 :   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
     874          12 :     return DAG.UnrollVectorOp(Op.getNode());
     875             : 
     876             :   SDLoc DL(Op);
     877          74 :   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
     878          74 :   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
     879          74 :   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
     880             : }
     881             : 
     882         132 : SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
     883         132 :   EVT VT = Op.getValueType();
     884             : 
     885             :   // If we have the scalar operation, it's probably cheaper to unroll it.
     886         132 :   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
     887          10 :     return DAG.UnrollVectorOp(Op.getNode());
     888             : 
     889             :   // If the vector element width is a whole number of bytes, test if its legal
     890             :   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
     891             :   // vector. This greatly reduces the number of bit shifts necessary.
     892             :   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
     893         122 :   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
     894             :     SmallVector<int, 16> BSWAPMask;
     895          92 :     createBSWAPShuffleMask(VT, BSWAPMask);
     896             : 
     897         184 :     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
     898         276 :     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
     899          84 :         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
     900             :          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
     901          22 :           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
     902          44 :           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
     903          22 :           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
     904             :       SDLoc DL(Op);
     905         168 :       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
     906         168 :       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
     907         168 :                                 BSWAPMask);
     908         168 :       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
     909         168 :       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
     910             :     }
     911             :   }
     912             : 
     913             :   // If we have the appropriate vector bit operations, it is better to use them
     914             :   // than unrolling and expanding each component.
     915          38 :   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
     916          32 :       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
     917          64 :       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
     918          32 :       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
     919           6 :     return DAG.UnrollVectorOp(Op.getNode());
     920             : 
     921             :   // Let LegalizeDAG handle this later.
     922          32 :   return Op;
     923             : }
     924             : 
     925        1336 : SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
     926             :   // Implement VSELECT in terms of XOR, AND, OR
     927             :   // on platforms which do not support blend natively.
     928             :   SDLoc DL(Op);
     929             : 
     930        1336 :   SDValue Mask = Op.getOperand(0);
     931        1336 :   SDValue Op1 = Op.getOperand(1);
     932        1336 :   SDValue Op2 = Op.getOperand(2);
     933             : 
     934        1336 :   EVT VT = Mask.getValueType();
     935             : 
     936             :   // If we can't even use the basic vector operations of
     937             :   // AND,OR,XOR, we will have to scalarize the op.
     938             :   // Notice that the operation may be 'promoted' which means that it is
     939             :   // 'bitcasted' to another type which is handled.
     940             :   // This operation also isn't safe with AND, OR, XOR when the boolean
     941             :   // type is 0/1 as we need an all ones vector constant to mask with.
     942             :   // FIXME: Sign extend 1 to all ones if thats legal on the target.
     943        2655 :   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
     944        1319 :       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
     945        2655 :       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
     946        2638 :       TLI.getBooleanContents(Op1.getValueType()) !=
     947             :           TargetLowering::ZeroOrNegativeOneBooleanContent)
     948          17 :     return DAG.UnrollVectorOp(Op.getNode());
     949             : 
     950             :   // If the mask and the type are different sizes, unroll the vector op. This
     951             :   // can occur when getSetCCResultType returns something that is different in
     952             :   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
     953        1319 :   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
     954           0 :     return DAG.UnrollVectorOp(Op.getNode());
     955             : 
     956             :   // Bitcast the operands to be the same type as the mask.
     957             :   // This is needed when we select between FP types because
     958             :   // the mask is a vector of integers.
     959        2638 :   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
     960        2638 :   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
     961             : 
     962        1319 :   SDValue AllOnes = DAG.getConstant(
     963        2638 :     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
     964        2638 :   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
     965             : 
     966        2638 :   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
     967        2638 :   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
     968        2638 :   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
     969        2638 :   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
     970             : }
     971             : 
     972         128 : SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
     973         256 :   EVT VT = Op.getOperand(0).getValueType();
     974             :   SDLoc DL(Op);
     975             : 
     976             :   // Make sure that the SINT_TO_FP and SRL instructions are available.
     977         277 :   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
     978             :       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
     979         107 :     return DAG.UnrollVectorOp(Op.getNode());
     980             : 
     981             :   unsigned BW = VT.getScalarSizeInBits();
     982             :   assert((BW == 64 || BW == 32) &&
     983             :          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
     984             : 
     985          21 :   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
     986             : 
     987             :   // Constants to clear the upper part of the word.
     988             :   // Notice that we can also use SHL+SHR, but using a constant is slightly
     989             :   // faster on x86.
     990          21 :   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
     991          21 :   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
     992             : 
     993             :   // Two to the power of half-word-size.
     994          42 :   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
     995             : 
     996             :   // Clear upper part of LO, lower HI
     997          42 :   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
     998          42 :   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
     999             : 
    1000             :   // Convert hi and lo to floats
    1001             :   // Convert the hi part back to the upper values
    1002             :   // TODO: Can any fast-math-flags be set on these nodes?
    1003          42 :   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
    1004          42 :           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
    1005          42 :   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
    1006             : 
    1007             :   // Add the two halves
    1008          42 :   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
    1009             : }
    1010             : 
    1011          36 : SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
    1012          36 :   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
    1013             :     SDLoc DL(Op);
    1014           8 :     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
    1015             :     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
    1016           4 :     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
    1017           4 :                        Zero, Op.getOperand(0));
    1018             :   }
    1019          32 :   return DAG.UnrollVectorOp(Op.getNode());
    1020             : }
    1021             : 
    1022          37 : SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
    1023             :   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
    1024             :   // we can defer this to operation legalization where it will be lowered as
    1025             :   // a+(-b).
    1026             :   EVT VT = Op.getValueType();
    1027          37 :   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
    1028             :       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
    1029           3 :     return Op; // Defer to LegalizeDAG
    1030             : 
    1031          34 :   return DAG.UnrollVectorOp(Op.getNode());
    1032             : }
    1033             : 
    1034         136 : SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
    1035         136 :   EVT VT = Op.getValueType();
    1036             :   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
    1037             : 
    1038             :   // If the non-ZERO_UNDEF version is supported we can use that instead.
    1039         136 :   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
    1040         111 :       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
    1041             :     SDLoc DL(Op);
    1042         180 :     return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
    1043             :   }
    1044             : 
    1045             :   // If CTPOP is available we can lower with a CTPOP based method:
    1046             :   // u16 ctlz(u16 x) {
    1047             :   //   x |= (x >> 1);
    1048             :   //   x |= (x >> 2);
    1049             :   //   x |= (x >> 4);
    1050             :   //   x |= (x >> 8);
    1051             :   //   return ctpop(~x);
    1052             :   // }
    1053             :   // Ref: "Hacker's Delight" by Henry Warren
    1054             :   if (isPowerOf2_32(NumBitsPerElt) &&
    1055          46 :       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
    1056          26 :       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
    1057          52 :       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
    1058          26 :       TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
    1059             :     SDLoc DL(Op);
    1060          26 :     SDValue Res = Op.getOperand(0);
    1061          52 :     EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
    1062             : 
    1063         282 :     for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
    1064         256 :       Res = DAG.getNode(
    1065             :           ISD::OR, DL, VT, Res,
    1066         256 :           DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
    1067             : 
    1068          26 :     Res = DAG.getNOT(DL, Res, VT);
    1069          52 :     return DAG.getNode(ISD::CTPOP, DL, VT, Res);
    1070             :   }
    1071             : 
    1072             :   // Otherwise go ahead and unroll.
    1073          20 :   return DAG.UnrollVectorOp(Op.getNode());
    1074             : }
    1075             : 
    1076         111 : SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
    1077             :   // If the non-ZERO_UNDEF version is supported we can use that instead.
    1078         111 :   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
    1079             :     SDLoc DL(Op);
    1080         210 :     return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
    1081             :   }
    1082             : 
    1083             :   // Otherwise go ahead and unroll.
    1084           6 :   return DAG.UnrollVectorOp(Op.getNode());
    1085             : }
    1086             : 
    1087          94 : SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
    1088          94 :   EVT VT = Op.getValueType();
    1089          94 :   unsigned NumElems = VT.getVectorNumElements();
    1090          94 :   EVT EltVT = VT.getVectorElementType();
    1091          94 :   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
    1092          94 :   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
    1093             :   SDLoc dl(Op);
    1094         188 :   SmallVector<SDValue, 8> Ops(NumElems);
    1095         934 :   for (unsigned i = 0; i < NumElems; ++i) {
    1096         420 :     SDValue LHSElem = DAG.getNode(
    1097             :         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
    1098        1260 :         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
    1099         420 :     SDValue RHSElem = DAG.getNode(
    1100             :         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
    1101        1260 :         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
    1102        1260 :     Ops[i] = DAG.getNode(ISD::SETCC, dl,
    1103         420 :                          TLI.getSetCCResultType(DAG.getDataLayout(),
    1104         420 :                                                 *DAG.getContext(), TmpEltVT),
    1105        1260 :                          LHSElem, RHSElem, CC);
    1106        1260 :     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
    1107         420 :                            DAG.getConstant(APInt::getAllOnesValue
    1108         840 :                                            (EltVT.getSizeInBits()), dl, EltVT),
    1109         840 :                            DAG.getConstant(0, dl, EltVT));
    1110             :   }
    1111         282 :   return DAG.getBuildVector(VT, dl, Ops);
    1112             : }
    1113             : 
    1114      310255 : bool SelectionDAG::LegalizeVectors() {
    1115      620510 :   return VectorLegalizer(*this).Run();
    1116             : }

Generated by: LCOV version 1.13