LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - LegalizeVectorOps.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 151 500 30.2 %
Date: 2018-10-20 13:21:21 Functions: 6 27 22.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the SelectionDAG::LegalizeVectors method.
      11             : //
      12             : // The vector legalizer looks for vector operations which might need to be
      13             : // scalarized and legalizes them. This is a separate step from Legalize because
      14             : // scalarizing can introduce illegal types.  For example, suppose we have an
      15             : // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
      16             : // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
      17             : // operation, which introduces nodes with the illegal type i64 which must be
      18             : // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
      19             : // the operation must be unrolled, which introduces nodes with the illegal
      20             : // type i8 which must be promoted.
      21             : //
      22             : // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
      23             : // or operations that happen to take a vector which are custom-lowered;
      24             : // the legalization for such operations never produces nodes
      25             : // with illegal types, so it's okay to put off legalizing them until
      26             : // SelectionDAG::Legalize runs.
      27             : //
      28             : //===----------------------------------------------------------------------===//
      29             : 
      30             : #include "llvm/ADT/APInt.h"
      31             : #include "llvm/ADT/DenseMap.h"
      32             : #include "llvm/ADT/SmallVector.h"
      33             : #include "llvm/CodeGen/ISDOpcodes.h"
      34             : #include "llvm/CodeGen/MachineMemOperand.h"
      35             : #include "llvm/CodeGen/SelectionDAG.h"
      36             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      37             : #include "llvm/CodeGen/TargetLowering.h"
      38             : #include "llvm/CodeGen/ValueTypes.h"
      39             : #include "llvm/IR/DataLayout.h"
      40             : #include "llvm/Support/Casting.h"
      41             : #include "llvm/Support/Compiler.h"
      42             : #include "llvm/Support/ErrorHandling.h"
      43             : #include "llvm/Support/MachineValueType.h"
      44             : #include "llvm/Support/MathExtras.h"
      45             : #include <cassert>
      46             : #include <cstdint>
      47             : #include <iterator>
      48             : #include <utility>
      49             : 
      50             : using namespace llvm;
      51             : 
      52             : #define DEBUG_TYPE "legalizevectorops"
      53             : 
      54             : namespace {
      55             : 
      56             : class VectorLegalizer {
      57             :   SelectionDAG& DAG;
      58             :   const TargetLowering &TLI;
      59             :   bool Changed = false; // Keep track of whether anything changed
      60             : 
      61             :   /// For nodes that are of legal width, and that have more than one use, this
      62             :   /// map indicates what regularized operand to use.  This allows us to avoid
      63             :   /// legalizing the same thing more than once.
      64             :   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
      65             : 
      66             :   /// Adds a node to the translation cache.
      67           0 :   void AddLegalizedOperand(SDValue From, SDValue To) {
      68           0 :     LegalizedNodes.insert(std::make_pair(From, To));
      69             :     // If someone requests legalization of the new node, return itself.
      70             :     if (From != To)
      71           0 :       LegalizedNodes.insert(std::make_pair(To, To));
      72           0 :   }
      73             : 
      74             :   /// Legalizes the given node.
      75             :   SDValue LegalizeOp(SDValue Op);
      76             : 
      77             :   /// Assuming the node is legal, "legalize" the results.
      78             :   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
      79             : 
      80             :   /// Implements unrolling a VSETCC.
      81             :   SDValue UnrollVSETCC(SDValue Op);
      82             : 
      83             :   /// Implement expand-based legalization of vector operations.
      84             :   ///
      85             :   /// This is just a high-level routine to dispatch to specific code paths for
      86             :   /// operations to legalize them.
      87             :   SDValue Expand(SDValue Op);
      88             : 
      89             :   /// Implements expansion for FNEG; falls back to UnrollVectorOp if
      90             :   /// FSUB isn't legal.
      91             :   ///
      92             :   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
      93             :   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
      94             :   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
      95             : 
      96             :   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
      97             :   SDValue ExpandSEXTINREG(SDValue Op);
      98             : 
      99             :   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
     100             :   ///
     101             :   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
     102             :   /// type. The contents of the bits in the extended part of each element are
     103             :   /// undef.
     104             :   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
     105             : 
     106             :   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
     107             :   ///
     108             :   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
     109             :   /// type, then shifts left and arithmetic shifts right to introduce a sign
     110             :   /// extension.
     111             :   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
     112             : 
     113             :   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
     114             :   ///
     115             :   /// Shuffles the low lanes of the operand into place and blends zeros into
     116             :   /// the remaining lanes, finally bitcasting to the proper type.
     117             :   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
     118             : 
     119             :   /// Expand bswap of vectors into a shuffle if legal.
     120             :   SDValue ExpandBSWAP(SDValue Op);
     121             : 
     122             :   /// Implement vselect in terms of XOR, AND, OR when blend is not
     123             :   /// supported by the target.
     124             :   SDValue ExpandVSELECT(SDValue Op);
     125             :   SDValue ExpandSELECT(SDValue Op);
     126             :   SDValue ExpandLoad(SDValue Op);
     127             :   SDValue ExpandStore(SDValue Op);
     128             :   SDValue ExpandFNEG(SDValue Op);
     129             :   SDValue ExpandFSUB(SDValue Op);
     130             :   SDValue ExpandBITREVERSE(SDValue Op);
     131             :   SDValue ExpandCTLZ(SDValue Op);
     132             :   SDValue ExpandCTTZ(SDValue Op);
     133             :   SDValue ExpandStrictFPOp(SDValue Op);
     134             : 
     135             :   /// Implements vector promotion.
     136             :   ///
     137             :   /// This is essentially just bitcasting the operands to a different type and
     138             :   /// bitcasting the result back to the original type.
     139             :   SDValue Promote(SDValue Op);
     140             : 
     141             :   /// Implements [SU]INT_TO_FP vector promotion.
     142             :   ///
     143             :   /// This is a [zs]ext of the input operand to a larger integer type.
     144             :   SDValue PromoteINT_TO_FP(SDValue Op);
     145             : 
     146             :   /// Implements FP_TO_[SU]INT vector promotion of the result type.
     147             :   ///
     148             :   /// It is promoted to a larger integer type.  The result is then
     149             :   /// truncated back to the original type.
     150             :   SDValue PromoteFP_TO_INT(SDValue Op);
     151             : 
     152             : public:
     153     1269116 :   VectorLegalizer(SelectionDAG& dag) :
     154     1269116 :       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
     155             : 
     156             :   /// Begin legalizer the vector operations in the DAG.
     157             :   bool Run();
     158             : };
     159             : 
     160             : } // end anonymous namespace
     161             : 
     162     1269116 : bool VectorLegalizer::Run() {
     163             :   // Before we start legalizing vector nodes, check if there are any vectors.
     164             :   bool HasVectors = false;
     165     1269116 :   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
     166    28668143 :        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
     167             :     // Check if the values of the nodes contain vectors. We don't need to check
     168             :     // the operands because we are going to check their values at some point.
     169    27657639 :     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
     170    62957864 :          J != E; ++J)
     171    35300225 :       HasVectors |= J->isVector();
     172             : 
     173             :     // If we found a vector node we can start the legalization.
     174    27657639 :     if (HasVectors)
     175             :       break;
     176             :   }
     177             : 
     178             :   // If this basic block has no vectors then no need to legalize vectors.
     179     1269116 :   if (!HasVectors)
     180             :     return false;
     181             : 
     182             :   // The legalize process is inherently a bottom-up recursive process (users
     183             :   // legalize their uses before themselves).  Given infinite stack space, we
     184             :   // could just start legalizing on the root and traverse the whole graph.  In
     185             :   // practice however, this causes us to run out of stack space on large basic
     186             :   // blocks.  To avoid this problem, compute an ordering of the nodes where each
     187             :   // node is only legalized after all of its operands are legalized.
     188      258612 :   DAG.AssignTopologicalOrder();
     189      258612 :   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
     190    11967797 :        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
     191    11709185 :     LegalizeOp(SDValue(&*I, 0));
     192             : 
     193             :   // Finally, it's possible the root changed.  Get the new root.
     194      258612 :   SDValue OldRoot = DAG.getRoot();
     195             :   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
     196      258612 :   DAG.setRoot(LegalizedNodes[OldRoot]);
     197             : 
     198      258612 :   LegalizedNodes.clear();
     199             : 
     200             :   // Remove dead nodes now.
     201      258612 :   DAG.RemoveDeadNodes();
     202             : 
     203      258612 :   return Changed;
     204             : }
     205             : 
     206           0 : SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
     207             :   // Generic legalization: just pass the operand through.
     208    27070050 :   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
     209    15405287 :     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
     210           0 :   return Result.getValue(Op.getResNo());
     211             : }
     212             : 
     213    36489037 : SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     214             :   // Note that LegalizeOp may be reentered even from single-use nodes, which
     215             :   // means that we always must cache transformed nodes.
     216    36489037 :   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
     217    36489037 :   if (I != LegalizedNodes.end()) return I->second;
     218             : 
     219    11894555 :   SDNode* Node = Op.getNode();
     220             : 
     221             :   // Legalize the operands
     222             :   SmallVector<SDValue, 8> Ops;
     223    36629371 :   for (const SDValue &Op : Node->op_values())
     224    24734816 :     Ops.push_back(LegalizeOp(Op));
     225             : 
     226    11894555 :   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops),
     227    23789110 :                            Op.getResNo());
     228             : 
     229    23789110 :   if (Op.getOpcode() == ISD::LOAD) {
     230             :     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     231             :     ISD::LoadExtType ExtType = LD->getExtensionType();
     232     2087428 :     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
     233             :       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
     234             :                  Node->dump(&DAG));
     235        8138 :       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
     236             :                                    LD->getMemoryVT())) {
     237           0 :       default: llvm_unreachable("This action is not supported yet!");
     238        1501 :       case TargetLowering::Legal:
     239        1501 :         return TranslateLegalizeResults(Op, Result);
     240         932 :       case TargetLowering::Custom:
     241         932 :         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
     242             :           assert(Lowered->getNumValues() == Op->getNumValues() &&
     243             :                  "Unexpected number of results");
     244         932 :           Changed = Lowered != Result;
     245         932 :           return TranslateLegalizeResults(Op, Lowered);
     246         932 :         }
     247             :         LLVM_FALLTHROUGH;
     248             :       case TargetLowering::Expand:
     249        1712 :         Changed = true;
     250        1712 :         return LegalizeOp(ExpandLoad(Op));
     251             :       }
     252             :     }
     253    10850841 :   } else if (Op.getOpcode() == ISD::STORE) {
     254             :     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     255     1391914 :     EVT StVT = ST->getMemoryVT();
     256             :     MVT ValVT = ST->getValue().getSimpleValueType();
     257     1391914 :     if (StVT.isVector() && ST->isTruncatingStore()) {
     258             :       LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
     259             :                  Node->dump(&DAG));
     260        1410 :       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
     261           0 :       default: llvm_unreachable("This action is not supported yet!");
     262         256 :       case TargetLowering::Legal:
     263         973 :         return TranslateLegalizeResults(Op, Result);
     264         244 :       case TargetLowering::Custom: {
     265         244 :         SDValue Lowered = TLI.LowerOperation(Result, DAG);
     266         244 :         Changed = Lowered != Result;
     267         244 :         return TranslateLegalizeResults(Op, Lowered);
     268             :       }
     269         217 :       case TargetLowering::Expand:
     270         217 :         Changed = true;
     271         217 :         return LegalizeOp(ExpandStore(Op));
     272             :       }
     273             :     }
     274             :   }
     275             : 
     276             :   bool HasVectorValue = false;
     277    11889693 :   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
     278    27517654 :        J != E;
     279             :        ++J)
     280    15627961 :     HasVectorValue |= J->isVector();
     281    11889693 :   if (!HasVectorValue)
     282    10654719 :     return TranslateLegalizeResults(Op, Result);
     283             : 
     284             :   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
     285     1234974 :   switch (Op.getOpcode()) {
     286     1007111 :   default:
     287     1007111 :     return TranslateLegalizeResults(Op, Result);
     288         177 :   case ISD::STRICT_FADD:
     289             :   case ISD::STRICT_FSUB:
     290             :   case ISD::STRICT_FMUL:
     291             :   case ISD::STRICT_FDIV:
     292             :   case ISD::STRICT_FREM:
     293             :   case ISD::STRICT_FSQRT:
     294             :   case ISD::STRICT_FMA:
     295             :   case ISD::STRICT_FPOW:
     296             :   case ISD::STRICT_FPOWI:
     297             :   case ISD::STRICT_FSIN:
     298             :   case ISD::STRICT_FCOS:
     299             :   case ISD::STRICT_FEXP:
     300             :   case ISD::STRICT_FEXP2:
     301             :   case ISD::STRICT_FLOG:
     302             :   case ISD::STRICT_FLOG10:
     303             :   case ISD::STRICT_FLOG2:
     304             :   case ISD::STRICT_FRINT:
     305             :   case ISD::STRICT_FNEARBYINT:
     306             :     // These pseudo-ops get legalized as if they were their non-strict
     307             :     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
     308             :     // is also legal, but if ISD::FSQRT requires expansion then so does
     309             :     // ISD::STRICT_FSQRT.
     310         354 :     Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
     311             :                                             Node->getValueType(0));
     312             :     break;
     313      225480 :   case ISD::ADD:
     314             :   case ISD::SUB:
     315             :   case ISD::MUL:
     316             :   case ISD::SDIV:
     317             :   case ISD::UDIV:
     318             :   case ISD::SREM:
     319             :   case ISD::UREM:
     320             :   case ISD::SDIVREM:
     321             :   case ISD::UDIVREM:
     322             :   case ISD::FADD:
     323             :   case ISD::FSUB:
     324             :   case ISD::FMUL:
     325             :   case ISD::FDIV:
     326             :   case ISD::FREM:
     327             :   case ISD::AND:
     328             :   case ISD::OR:
     329             :   case ISD::XOR:
     330             :   case ISD::SHL:
     331             :   case ISD::SRA:
     332             :   case ISD::SRL:
     333             :   case ISD::ROTL:
     334             :   case ISD::ROTR:
     335             :   case ISD::BSWAP:
     336             :   case ISD::BITREVERSE:
     337             :   case ISD::CTLZ:
     338             :   case ISD::CTTZ:
     339             :   case ISD::CTLZ_ZERO_UNDEF:
     340             :   case ISD::CTTZ_ZERO_UNDEF:
     341             :   case ISD::CTPOP:
     342             :   case ISD::SELECT:
     343             :   case ISD::VSELECT:
     344             :   case ISD::SELECT_CC:
     345             :   case ISD::SETCC:
     346             :   case ISD::ZERO_EXTEND:
     347             :   case ISD::ANY_EXTEND:
     348             :   case ISD::TRUNCATE:
     349             :   case ISD::SIGN_EXTEND:
     350             :   case ISD::FP_TO_SINT:
     351             :   case ISD::FP_TO_UINT:
     352             :   case ISD::FNEG:
     353             :   case ISD::FABS:
     354             :   case ISD::FMINNUM:
     355             :   case ISD::FMAXNUM:
     356             :   case ISD::FMINNAN:
     357             :   case ISD::FMAXNAN:
     358             :   case ISD::FCOPYSIGN:
     359             :   case ISD::FSQRT:
     360             :   case ISD::FSIN:
     361             :   case ISD::FCOS:
     362             :   case ISD::FPOWI:
     363             :   case ISD::FPOW:
     364             :   case ISD::FLOG:
     365             :   case ISD::FLOG2:
     366             :   case ISD::FLOG10:
     367             :   case ISD::FEXP:
     368             :   case ISD::FEXP2:
     369             :   case ISD::FCEIL:
     370             :   case ISD::FTRUNC:
     371             :   case ISD::FRINT:
     372             :   case ISD::FNEARBYINT:
     373             :   case ISD::FROUND:
     374             :   case ISD::FFLOOR:
     375             :   case ISD::FP_ROUND:
     376             :   case ISD::FP_EXTEND:
     377             :   case ISD::FMA:
     378             :   case ISD::SIGN_EXTEND_INREG:
     379             :   case ISD::ANY_EXTEND_VECTOR_INREG:
     380             :   case ISD::SIGN_EXTEND_VECTOR_INREG:
     381             :   case ISD::ZERO_EXTEND_VECTOR_INREG:
     382             :   case ISD::SMIN:
     383             :   case ISD::SMAX:
     384             :   case ISD::UMIN:
     385             :   case ISD::UMAX:
     386             :   case ISD::SMUL_LOHI:
     387             :   case ISD::UMUL_LOHI:
     388             :   case ISD::FCANONICALIZE:
     389             :   case ISD::SADDSAT:
     390      450960 :     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
     391      225480 :     break;
     392           0 :   case ISD::FP_ROUND_INREG:
     393           0 :     Action = TLI.getOperationAction(Node->getOpcode(),
     394             :                cast<VTSDNode>(Node->getOperand(1))->getVT());
     395           0 :     break;
     396        2206 :   case ISD::SINT_TO_FP:
     397             :   case ISD::UINT_TO_FP:
     398        2206 :     Action = TLI.getOperationAction(Node->getOpcode(),
     399        2206 :                                     Node->getOperand(0).getValueType());
     400        2206 :     break;
     401             :   }
     402             : 
     403             :   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
     404             : 
     405      227778 :   switch (Action) {
     406           0 :   default: llvm_unreachable("This action is not supported yet!");
     407         658 :   case TargetLowering::Promote:
     408         658 :     Result = Promote(Op);
     409         658 :     Changed = true;
     410         658 :     break;
     411             :   case TargetLowering::Legal:
     412             :     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
     413             :     break;
     414       57325 :   case TargetLowering::Custom: {
     415             :     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
     416       57325 :     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
     417             :       LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
     418       55935 :       Result = Tmp1;
     419       55935 :       break;
     420             :     }
     421             :     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
     422             :     LLVM_FALLTHROUGH;
     423             :   }
     424             :   case TargetLowering::Expand:
     425        9205 :     Result = Expand(Op);
     426             :   }
     427             : 
     428             :   // Make sure that the generated code is itself legal.
     429             :   if (Result != Op) {
     430       43107 :     Result = LegalizeOp(Result);
     431       43107 :     Changed = true;
     432             :   }
     433             : 
     434             :   // Note that LegalizeOp may be reentered even from single-use nodes, which
     435             :   // means that we always must cache transformed nodes.
     436      227863 :   AddLegalizedOperand(Op, Result);
     437      227863 :   return Result;
     438             : }
     439             : 
     440         658 : SDValue VectorLegalizer::Promote(SDValue Op) {
     441             :   // For a few operations there is a specific concept for promotion based on
     442             :   // the operand's type.
     443         658 :   switch (Op.getOpcode()) {
     444          22 :   case ISD::SINT_TO_FP:
     445             :   case ISD::UINT_TO_FP:
     446             :     // "Promote" the operation by extending the operand.
     447          22 :     return PromoteINT_TO_FP(Op);
     448         171 :   case ISD::FP_TO_UINT:
     449             :   case ISD::FP_TO_SINT:
     450             :     // Promote the operation by extending the operand.
     451         171 :     return PromoteFP_TO_INT(Op);
     452             :   }
     453             : 
     454             :   // There are currently two cases of vector promotion:
     455             :   // 1) Bitcasting a vector of integers to a different type to a vector of the
     456             :   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
     457             :   // 2) Extending a vector of floats to a vector of the same number of larger
     458             :   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
     459         465 :   MVT VT = Op.getSimpleValueType();
     460             :   assert(Op.getNode()->getNumValues() == 1 &&
     461             :          "Can't promote a vector with multiple results!");
     462         465 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     463             :   SDLoc dl(Op);
     464         930 :   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
     465             : 
     466        1461 :   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     467        2988 :     if (Op.getOperand(j).getValueType().isVector())
     468             :       if (Op.getOperand(j)
     469        1694 :               .getValueType()
     470         847 :               .getVectorElementType()
     471         263 :               .isFloatingPoint() &&
     472         847 :           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
     473         282 :         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
     474             :       else
     475        1412 :         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
     476             :     else
     477         149 :       Operands[j] = Op.getOperand(j);
     478             :   }
     479             : 
     480        1395 :   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
     481         930 :   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
     482         353 :       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
     483          61 :        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
     484         224 :     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
     485             :   else
     486         706 :     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
     487             : }
     488             : 
     489           0 : SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
     490             :   // INT_TO_FP operations may require the input operand be promoted even
     491             :   // when the type is otherwise legal.
     492           0 :   MVT VT = Op.getOperand(0).getSimpleValueType();
     493           0 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     494             :   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
     495             :          "Vectors have different number of elements!");
     496             : 
     497           0 :   SDLoc dl(Op);
     498           0 :   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
     499             : 
     500           0 :   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
     501             :     ISD::SIGN_EXTEND;
     502           0 :   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     503           0 :     if (Op.getOperand(j).getValueType().isVector())
     504           0 :       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
     505             :     else
     506           0 :       Operands[j] = Op.getOperand(j);
     507             :   }
     508             : 
     509           0 :   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
     510             : }
     511             : 
     512             : // For FP_TO_INT we promote the result type to a vector type with wider
     513             : // elements and then truncate the result.  This is different from the default
     514             : // PromoteVector which uses bitcast to promote thus assumning that the
     515             : // promoted vector type has the same overall size.
     516           0 : SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
     517           0 :   MVT VT = Op.getSimpleValueType();
     518           0 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     519             :   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
     520             :          "Vectors have different number of elements!");
     521             : 
     522           0 :   unsigned NewOpc = Op->getOpcode();
     523             :   // Change FP_TO_UINT to FP_TO_SINT if possible.
     524             :   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
     525           0 :   if (NewOpc == ISD::FP_TO_UINT &&
     526           0 :       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     527             :     NewOpc = ISD::FP_TO_SINT;
     528             : 
     529           0 :   SDLoc dl(Op);
     530           0 :   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
     531             : 
     532             :   // Assert that the converted value fits in the original type.  If it doesn't
     533             :   // (eg: because the value being converted is too big), then the result of the
     534             :   // original operation was undefined anyway, so the assert is still correct.
     535           0 :   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
     536             :                                                             : ISD::AssertSext,
     537             :                          dl, NVT, Promoted,
     538           0 :                          DAG.getValueType(VT.getScalarType()));
     539           0 :   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
     540             : }
     541             : 
     542           0 : SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
     543             :   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     544             : 
     545           0 :   EVT SrcVT = LD->getMemoryVT();
     546           0 :   EVT SrcEltVT = SrcVT.getScalarType();
     547             :   unsigned NumElem = SrcVT.getVectorNumElements();
     548             : 
     549           0 :   SDValue NewChain;
     550           0 :   SDValue Value;
     551           0 :   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
     552           0 :     SDLoc dl(Op);
     553             : 
     554             :     SmallVector<SDValue, 8> Vals;
     555             :     SmallVector<SDValue, 8> LoadChains;
     556             : 
     557           0 :     EVT DstEltVT = LD->getValueType(0).getScalarType();
     558           0 :     SDValue Chain = LD->getChain();
     559           0 :     SDValue BasePTR = LD->getBasePtr();
     560             :     ISD::LoadExtType ExtType = LD->getExtensionType();
     561             : 
     562             :     // When elements in a vector is not byte-addressable, we cannot directly
     563             :     // load each element by advancing pointer, which could only address bytes.
     564             :     // Instead, we load all significant words, mask bits off, and concatenate
     565             :     // them to form each element. Finally, they are extended to destination
     566             :     // scalar type to build the destination vector.
     567           0 :     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
     568             : 
     569             :     assert(WideVT.isRound() &&
     570             :            "Could not handle the sophisticated case when the widest integer is"
     571             :            " not power of 2.");
     572             :     assert(WideVT.bitsGE(SrcEltVT) &&
     573             :            "Type is not legalized?");
     574             : 
     575             :     unsigned WideBytes = WideVT.getStoreSize();
     576             :     unsigned Offset = 0;
     577             :     unsigned RemainingBytes = SrcVT.getStoreSize();
     578             :     SmallVector<SDValue, 8> LoadVals;
     579           0 :     while (RemainingBytes > 0) {
     580             :       SDValue ScalarLoad;
     581             :       unsigned LoadBytes = WideBytes;
     582             : 
     583           0 :       if (RemainingBytes >= LoadBytes) {
     584           0 :         ScalarLoad =
     585           0 :             DAG.getLoad(WideVT, dl, Chain, BasePTR,
     586           0 :                         LD->getPointerInfo().getWithOffset(Offset),
     587           0 :                         MinAlign(LD->getAlignment(), Offset),
     588           0 :                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
     589             :       } else {
     590           0 :         EVT LoadVT = WideVT;
     591           0 :         while (RemainingBytes < LoadBytes) {
     592           0 :           LoadBytes >>= 1; // Reduce the load size by half.
     593           0 :           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
     594             :         }
     595           0 :         ScalarLoad =
     596           0 :             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
     597           0 :                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
     598           0 :                            MinAlign(LD->getAlignment(), Offset),
     599           0 :                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
     600             :       }
     601             : 
     602           0 :       RemainingBytes -= LoadBytes;
     603           0 :       Offset += LoadBytes;
     604             : 
     605           0 :       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
     606             : 
     607           0 :       LoadVals.push_back(ScalarLoad.getValue(0));
     608           0 :       LoadChains.push_back(ScalarLoad.getValue(1));
     609             :     }
     610             : 
     611             :     // Extract bits, pack and extend/trunc them into destination type.
     612           0 :     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
     613           0 :     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
     614             : 
     615             :     unsigned BitOffset = 0;
     616             :     unsigned WideIdx = 0;
     617           0 :     unsigned WideBits = WideVT.getSizeInBits();
     618             : 
     619           0 :     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
     620           0 :       SDValue Lo, Hi, ShAmt;
     621             : 
     622           0 :       if (BitOffset < WideBits) {
     623           0 :         ShAmt = DAG.getConstant(
     624           0 :             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     625           0 :         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
     626           0 :         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
     627             :       }
     628             : 
     629           0 :       BitOffset += SrcEltBits;
     630           0 :       if (BitOffset >= WideBits) {
     631           0 :         WideIdx++;
     632           0 :         BitOffset -= WideBits;
     633           0 :         if (BitOffset > 0) {
     634           0 :           ShAmt = DAG.getConstant(
     635           0 :               SrcEltBits - BitOffset, dl,
     636           0 :               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     637           0 :           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
     638           0 :           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
     639             :         }
     640             :       }
     641             : 
     642           0 :       if (Hi.getNode())
     643           0 :         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
     644             : 
     645           0 :       switch (ExtType) {
     646           0 :       default: llvm_unreachable("Unknown extended-load op!");
     647           0 :       case ISD::EXTLOAD:
     648           0 :         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
     649           0 :         break;
     650           0 :       case ISD::ZEXTLOAD:
     651           0 :         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
     652           0 :         break;
     653           0 :       case ISD::SEXTLOAD:
     654           0 :         ShAmt =
     655           0 :             DAG.getConstant(WideBits - SrcEltBits, dl,
     656           0 :                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     657           0 :         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
     658           0 :         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
     659           0 :         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
     660           0 :         break;
     661             :       }
     662           0 :       Vals.push_back(Lo);
     663             :     }
     664             : 
     665           0 :     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
     666           0 :     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
     667             :   } else {
     668           0 :     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
     669             :     // Skip past MERGE_VALUE node if known.
     670           0 :     if (Scalarized->getOpcode() == ISD::MERGE_VALUES) {
     671           0 :       NewChain = Scalarized.getOperand(1);
     672           0 :       Value = Scalarized.getOperand(0);
     673             :     } else {
     674           0 :       NewChain = Scalarized.getValue(1);
     675           0 :       Value = Scalarized.getValue(0);
     676             :     }
     677             :   }
     678             : 
     679           0 :   AddLegalizedOperand(Op.getValue(0), Value);
     680           0 :   AddLegalizedOperand(Op.getValue(1), NewChain);
     681             : 
     682           0 :   return (Op.getResNo() ? NewChain : Value);
     683             : }
     684             : 
     685           0 : SDValue VectorLegalizer::ExpandStore(SDValue Op) {
     686             :   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     687           0 :   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
     688           0 :   AddLegalizedOperand(Op, TF);
     689           0 :   return TF;
     690             : }
     691             : 
     692        9205 : SDValue VectorLegalizer::Expand(SDValue Op) {
     693       18410 :   switch (Op->getOpcode()) {
     694         717 :   case ISD::SIGN_EXTEND_INREG:
     695         717 :     return ExpandSEXTINREG(Op);
     696         292 :   case ISD::ANY_EXTEND_VECTOR_INREG:
     697         292 :     return ExpandANY_EXTEND_VECTOR_INREG(Op);
     698          12 :   case ISD::SIGN_EXTEND_VECTOR_INREG:
     699          12 :     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
     700         489 :   case ISD::ZERO_EXTEND_VECTOR_INREG:
     701         489 :     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
     702          49 :   case ISD::BSWAP:
     703          49 :     return ExpandBSWAP(Op);
     704        1425 :   case ISD::VSELECT:
     705        1425 :     return ExpandVSELECT(Op);
     706          82 :   case ISD::SELECT:
     707          82 :     return ExpandSELECT(Op);
     708         155 :   case ISD::UINT_TO_FP:
     709         155 :     return ExpandUINT_TO_FLOAT(Op);
     710          40 :   case ISD::FNEG:
     711          40 :     return ExpandFNEG(Op);
     712          48 :   case ISD::FSUB:
     713          48 :     return ExpandFSUB(Op);
     714          93 :   case ISD::SETCC:
     715          93 :     return UnrollVSETCC(Op);
     716         132 :   case ISD::BITREVERSE:
     717         132 :     return ExpandBITREVERSE(Op);
     718         136 :   case ISD::CTLZ:
     719             :   case ISD::CTLZ_ZERO_UNDEF:
     720         136 :     return ExpandCTLZ(Op);
     721         248 :   case ISD::CTTZ:
     722             :   case ISD::CTTZ_ZERO_UNDEF:
     723         248 :     return ExpandCTTZ(Op);
     724          85 :   case ISD::STRICT_FADD:
     725             :   case ISD::STRICT_FSUB:
     726             :   case ISD::STRICT_FMUL:
     727             :   case ISD::STRICT_FDIV:
     728             :   case ISD::STRICT_FREM:
     729             :   case ISD::STRICT_FSQRT:
     730             :   case ISD::STRICT_FMA:
     731             :   case ISD::STRICT_FPOW:
     732             :   case ISD::STRICT_FPOWI:
     733             :   case ISD::STRICT_FSIN:
     734             :   case ISD::STRICT_FCOS:
     735             :   case ISD::STRICT_FEXP:
     736             :   case ISD::STRICT_FEXP2:
     737             :   case ISD::STRICT_FLOG:
     738             :   case ISD::STRICT_FLOG10:
     739             :   case ISD::STRICT_FLOG2:
     740             :   case ISD::STRICT_FRINT:
     741             :   case ISD::STRICT_FNEARBYINT:
     742          85 :     return ExpandStrictFPOp(Op);
     743        5202 :   default:
     744        5202 :     return DAG.UnrollVectorOp(Op.getNode());
     745             :   }
     746             : }
     747             : 
     748           0 : SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
     749             :   // Lower a select instruction where the condition is a scalar and the
     750             :   // operands are vectors. Lower this select to VSELECT and implement it
     751             :   // using XOR AND OR. The selector bit is broadcasted.
     752           0 :   EVT VT = Op.getValueType();
     753           0 :   SDLoc DL(Op);
     754             : 
     755           0 :   SDValue Mask = Op.getOperand(0);
     756           0 :   SDValue Op1 = Op.getOperand(1);
     757           0 :   SDValue Op2 = Op.getOperand(2);
     758             : 
     759             :   assert(VT.isVector() && !Mask.getValueType().isVector()
     760             :          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
     761             : 
     762             :   // If we can't even use the basic vector operations of
     763             :   // AND,OR,XOR, we will have to scalarize the op.
     764             :   // Notice that the operation may be 'promoted' which means that it is
     765             :   // 'bitcasted' to another type which is handled.
     766             :   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
     767           0 :   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
     768           0 :       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
     769           0 :       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
     770             :       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
     771           0 :     return DAG.UnrollVectorOp(Op.getNode());
     772             : 
     773             :   // Generate a mask operand.
     774           0 :   EVT MaskTy = VT.changeVectorElementTypeToInteger();
     775             : 
     776             :   // What is the size of each element in the vector mask.
     777           0 :   EVT BitTy = MaskTy.getScalarType();
     778             : 
     779           0 :   Mask = DAG.getSelect(DL, BitTy, Mask,
     780           0 :           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
     781             :                           BitTy),
     782           0 :           DAG.getConstant(0, DL, BitTy));
     783             : 
     784             :   // Broadcast the mask so that the entire vector is all-one or all zero.
     785           0 :   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
     786             : 
     787             :   // Bitcast the operands to be the same type as the mask.
     788             :   // This is needed when we select between FP types because
     789             :   // the mask is a vector of integers.
     790           0 :   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
     791           0 :   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
     792             : 
     793           0 :   SDValue AllOnes = DAG.getConstant(
     794           0 :             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
     795           0 :   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
     796             : 
     797           0 :   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
     798           0 :   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
     799           0 :   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
     800           0 :   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
     801             : }
     802             : 
     803           0 : SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
     804           0 :   EVT VT = Op.getValueType();
     805             : 
     806             :   // Make sure that the SRA and SHL instructions are available.
     807           0 :   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
     808             :       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
     809           0 :     return DAG.UnrollVectorOp(Op.getNode());
     810             : 
     811           0 :   SDLoc DL(Op);
     812           0 :   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
     813             : 
     814             :   unsigned BW = VT.getScalarSizeInBits();
     815             :   unsigned OrigBW = OrigTy.getScalarSizeInBits();
     816           0 :   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
     817             : 
     818           0 :   Op = Op.getOperand(0);
     819           0 :   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
     820           0 :   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
     821             : }
     822             : 
     823             : // Generically expand a vector anyext in register to a shuffle of the relevant
     824             : // lanes into the appropriate locations, with other lanes left undef.
     825           0 : SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
     826           0 :   SDLoc DL(Op);
     827           0 :   EVT VT = Op.getValueType();
     828           0 :   int NumElements = VT.getVectorNumElements();
     829           0 :   SDValue Src = Op.getOperand(0);
     830           0 :   EVT SrcVT = Src.getValueType();
     831           0 :   int NumSrcElements = SrcVT.getVectorNumElements();
     832             : 
     833             :   // Build a base mask of undef shuffles.
     834             :   SmallVector<int, 16> ShuffleMask;
     835           0 :   ShuffleMask.resize(NumSrcElements, -1);
     836             : 
     837             :   // Place the extended lanes into the correct locations.
     838           0 :   int ExtLaneScale = NumSrcElements / NumElements;
     839           0 :   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
     840           0 :   for (int i = 0; i < NumElements; ++i)
     841           0 :     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
     842             : 
     843           0 :   return DAG.getNode(
     844             :       ISD::BITCAST, DL, VT,
     845           0 :       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
     846             : }
     847             : 
     848           0 : SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
     849           0 :   SDLoc DL(Op);
     850           0 :   EVT VT = Op.getValueType();
     851           0 :   SDValue Src = Op.getOperand(0);
     852           0 :   EVT SrcVT = Src.getValueType();
     853             : 
     854             :   // First build an any-extend node which can be legalized above when we
     855             :   // recurse through it.
     856           0 :   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
     857             : 
     858             :   // Now we need sign extend. Do this by shifting the elements. Even if these
     859             :   // aren't legal operations, they have a better chance of being legalized
     860             :   // without full scalarization than the sign extension does.
     861             :   unsigned EltWidth = VT.getScalarSizeInBits();
     862             :   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
     863           0 :   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
     864           0 :   return DAG.getNode(ISD::SRA, DL, VT,
     865             :                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
     866           0 :                      ShiftAmount);
     867             : }
     868             : 
     869             : // Generically expand a vector zext in register to a shuffle of the relevant
     870             : // lanes into the appropriate locations, a blend of zero into the high bits,
     871             : // and a bitcast to the wider element type.
     872           0 : SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
     873           0 :   SDLoc DL(Op);
     874           0 :   EVT VT = Op.getValueType();
     875           0 :   int NumElements = VT.getVectorNumElements();
     876           0 :   SDValue Src = Op.getOperand(0);
     877           0 :   EVT SrcVT = Src.getValueType();
     878           0 :   int NumSrcElements = SrcVT.getVectorNumElements();
     879             : 
     880             :   // Build up a zero vector to blend into this one.
     881           0 :   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
     882             : 
     883             :   // Shuffle the incoming lanes into the correct position, and pull all other
     884             :   // lanes from the zero vector.
     885             :   SmallVector<int, 16> ShuffleMask;
     886           0 :   ShuffleMask.reserve(NumSrcElements);
     887           0 :   for (int i = 0; i < NumSrcElements; ++i)
     888           0 :     ShuffleMask.push_back(i);
     889             : 
     890           0 :   int ExtLaneScale = NumSrcElements / NumElements;
     891           0 :   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
     892           0 :   for (int i = 0; i < NumElements; ++i)
     893           0 :     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
     894             : 
     895           0 :   return DAG.getNode(ISD::BITCAST, DL, VT,
     896           0 :                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
     897             : }
     898             : 
     899         141 : static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
     900         141 :   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
     901         983 :   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
     902        3866 :     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
     903        3024 :       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
     904         141 : }
     905             : 
     906           0 : SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
     907           0 :   EVT VT = Op.getValueType();
     908             : 
     909             :   // Generate a byte wise shuffle mask for the BSWAP.
     910             :   SmallVector<int, 16> ShuffleMask;
     911           0 :   createBSWAPShuffleMask(VT, ShuffleMask);
     912           0 :   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
     913             : 
     914             :   // Only emit a shuffle if the mask is legal.
     915           0 :   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
     916           0 :     return DAG.UnrollVectorOp(Op.getNode());
     917             : 
     918           0 :   SDLoc DL(Op);
     919           0 :   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
     920           0 :   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
     921           0 :   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
     922             : }
     923             : 
     924           0 : SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
     925           0 :   EVT VT = Op.getValueType();
     926             : 
     927             :   // If we have the scalar operation, it's probably cheaper to unroll it.
     928           0 :   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
     929           0 :     return DAG.UnrollVectorOp(Op.getNode());
     930             : 
     931             :   // If the vector element width is a whole number of bytes, test if its legal
     932             :   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
     933             :   // vector. This greatly reduces the number of bit shifts necessary.
     934             :   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
     935           0 :   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
     936             :     SmallVector<int, 16> BSWAPMask;
     937           0 :     createBSWAPShuffleMask(VT, BSWAPMask);
     938             : 
     939           0 :     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
     940           0 :     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
     941           0 :         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
     942           0 :          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
     943           0 :           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
     944           0 :           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
     945           0 :           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
     946           0 :       SDLoc DL(Op);
     947           0 :       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
     948           0 :       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
     949           0 :                                 BSWAPMask);
     950           0 :       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
     951           0 :       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
     952             :     }
     953             :   }
     954             : 
     955             :   // If we have the appropriate vector bit operations, it is better to use them
     956             :   // than unrolling and expanding each component.
     957           0 :   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
     958           0 :       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
     959           0 :       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
     960           0 :       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
     961           0 :     return DAG.UnrollVectorOp(Op.getNode());
     962             : 
     963             :   // Let LegalizeDAG handle this later.
     964           0 :   return Op;
     965             : }
     966             : 
     967           0 : SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
     968             :   // Implement VSELECT in terms of XOR, AND, OR
     969             :   // on platforms which do not support blend natively.
     970           0 :   SDLoc DL(Op);
     971             : 
     972           0 :   SDValue Mask = Op.getOperand(0);
     973           0 :   SDValue Op1 = Op.getOperand(1);
     974           0 :   SDValue Op2 = Op.getOperand(2);
     975             : 
     976           0 :   EVT VT = Mask.getValueType();
     977             : 
     978             :   // If we can't even use the basic vector operations of
     979             :   // AND,OR,XOR, we will have to scalarize the op.
     980             :   // Notice that the operation may be 'promoted' which means that it is
     981             :   // 'bitcasted' to another type which is handled.
     982             :   // This operation also isn't safe with AND, OR, XOR when the boolean
     983             :   // type is 0/1 as we need an all ones vector constant to mask with.
     984             :   // FIXME: Sign extend 1 to all ones if thats legal on the target.
     985           0 :   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
     986           0 :       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
     987           0 :       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
     988           0 :       TLI.getBooleanContents(Op1.getValueType()) !=
     989             :           TargetLowering::ZeroOrNegativeOneBooleanContent)
     990           0 :     return DAG.UnrollVectorOp(Op.getNode());
     991             : 
     992             :   // If the mask and the type are different sizes, unroll the vector op. This
     993             :   // can occur when getSetCCResultType returns something that is different in
     994             :   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
     995           0 :   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
     996           0 :     return DAG.UnrollVectorOp(Op.getNode());
     997             : 
     998             :   // Bitcast the operands to be the same type as the mask.
     999             :   // This is needed when we select between FP types because
    1000             :   // the mask is a vector of integers.
    1001           0 :   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
    1002           0 :   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
    1003             : 
    1004           0 :   SDValue AllOnes = DAG.getConstant(
    1005           0 :     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
    1006           0 :   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
    1007             : 
    1008           0 :   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
    1009           0 :   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
    1010           0 :   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
    1011           0 :   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
    1012             : }
    1013             : 
    1014           0 : SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
    1015           0 :   EVT VT = Op.getOperand(0).getValueType();
    1016           0 :   SDLoc DL(Op);
    1017             : 
    1018             :   // Make sure that the SINT_TO_FP and SRL instructions are available.
    1019           0 :   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
    1020             :       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
    1021           0 :     return DAG.UnrollVectorOp(Op.getNode());
    1022             : 
    1023             :   unsigned BW = VT.getScalarSizeInBits();
    1024             :   assert((BW == 64 || BW == 32) &&
    1025             :          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
    1026             : 
    1027           0 :   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
    1028             : 
    1029             :   // Constants to clear the upper part of the word.
    1030             :   // Notice that we can also use SHL+SHR, but using a constant is slightly
    1031             :   // faster on x86.
    1032           0 :   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
    1033           0 :   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
    1034             : 
    1035             :   // Two to the power of half-word-size.
    1036           0 :   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
    1037             : 
    1038             :   // Clear upper part of LO, lower HI
    1039           0 :   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
    1040           0 :   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
    1041             : 
    1042             :   // Convert hi and lo to floats
    1043             :   // Convert the hi part back to the upper values
    1044             :   // TODO: Can any fast-math-flags be set on these nodes?
    1045           0 :   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
    1046           0 :           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
    1047           0 :   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
    1048             : 
    1049             :   // Add the two halves
    1050           0 :   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
    1051             : }
    1052             : 
    1053           0 : SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
    1054           0 :   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
    1055           0 :     SDLoc DL(Op);
    1056           0 :     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
    1057             :     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
    1058           0 :     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
    1059           0 :                        Zero, Op.getOperand(0));
    1060             :   }
    1061           0 :   return DAG.UnrollVectorOp(Op.getNode());
    1062             : }
    1063             : 
    1064           0 : SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
    1065             :   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
    1066             :   // we can defer this to operation legalization where it will be lowered as
    1067             :   // a+(-b).
    1068             :   EVT VT = Op.getValueType();
    1069           0 :   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
    1070           0 :       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
    1071           0 :     return Op; // Defer to LegalizeDAG
    1072             : 
    1073           0 :   return DAG.UnrollVectorOp(Op.getNode());
    1074             : }
    1075             : 
    1076           0 : SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
    1077           0 :   EVT VT = Op.getValueType();
    1078             :   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
    1079             : 
    1080             :   // If the non-ZERO_UNDEF version is supported we can use that instead.
    1081           0 :   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
    1082           0 :       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
    1083           0 :     SDLoc DL(Op);
    1084           0 :     return DAG.getNode(ISD::CTLZ, DL, VT, Op.getOperand(0));
    1085             :   }
    1086             : 
    1087             :   // If we have the appropriate vector bit operations, it is better to use them
    1088             :   // than unrolling and expanding each component.
    1089             :   if (isPowerOf2_32(NumBitsPerElt) &&
    1090           0 :       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
    1091           0 :       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
    1092           0 :       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
    1093           0 :     return Op;
    1094             : 
    1095             :   // Otherwise go ahead and unroll.
    1096           0 :   return DAG.UnrollVectorOp(Op.getNode());
    1097             : }
    1098             : 
    1099           0 : SDValue VectorLegalizer::ExpandCTTZ(SDValue Op) {
    1100           0 :   EVT VT = Op.getValueType();
    1101             :   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
    1102             : 
    1103             :   // If the non-ZERO_UNDEF version is supported we can use that instead.
    1104           0 :   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, VT)) {
    1105           0 :     SDLoc DL(Op);
    1106           0 :     return DAG.getNode(ISD::CTTZ, DL, VT, Op.getOperand(0));
    1107             :   }
    1108             : 
    1109             :   // If we have the appropriate vector bit operations, it is better to use them
    1110             :   // than unrolling and expanding each component.
    1111             :   if (isPowerOf2_32(NumBitsPerElt) &&
    1112           0 :       (TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) ||
    1113           0 :        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) &&
    1114           0 :       TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
    1115           0 :       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
    1116           0 :       TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT))
    1117           0 :     return Op;
    1118             : 
    1119             :   // Otherwise go ahead and unroll.
    1120           0 :   return DAG.UnrollVectorOp(Op.getNode());
    1121             : }
    1122             : 
    1123           0 : SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
    1124           0 :   EVT VT = Op.getValueType();
    1125           0 :   EVT EltVT = VT.getVectorElementType();
    1126             :   unsigned NumElems = VT.getVectorNumElements();
    1127             :   unsigned NumOpers = Op.getNumOperands();
    1128           0 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    1129           0 :   EVT ValueVTs[] = {EltVT, MVT::Other};
    1130           0 :   SDValue Chain = Op.getOperand(0);
    1131           0 :   SDLoc dl(Op);
    1132             : 
    1133             :   SmallVector<SDValue, 32> OpValues;
    1134             :   SmallVector<SDValue, 32> OpChains;
    1135           0 :   for (unsigned i = 0; i < NumElems; ++i) {
    1136             :     SmallVector<SDValue, 4> Opers;
    1137           0 :     SDValue Idx = DAG.getConstant(i, dl,
    1138           0 :                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
    1139             : 
    1140             :     // The Chain is the first operand.
    1141           0 :     Opers.push_back(Chain);
    1142             : 
    1143             :     // Now process the remaining operands.
    1144           0 :     for (unsigned j = 1; j < NumOpers; ++j) {
    1145           0 :       SDValue Oper = Op.getOperand(j);
    1146           0 :       EVT OperVT = Oper.getValueType();
    1147             : 
    1148           0 :       if (OperVT.isVector())
    1149           0 :         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
    1150           0 :                            EltVT, Oper, Idx);
    1151             : 
    1152           0 :       Opers.push_back(Oper);
    1153             :     }
    1154             : 
    1155           0 :     SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
    1156             : 
    1157           0 :     OpValues.push_back(ScalarOp.getValue(0));
    1158           0 :     OpChains.push_back(ScalarOp.getValue(1));
    1159             :   }
    1160             : 
    1161           0 :   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
    1162           0 :   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
    1163             : 
    1164           0 :   AddLegalizedOperand(Op.getValue(0), Result);
    1165           0 :   AddLegalizedOperand(Op.getValue(1), NewChain);
    1166             : 
    1167           0 :   return Op.getResNo() ? NewChain : Result;
    1168             : }
    1169             : 
    1170           0 : SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
    1171           0 :   EVT VT = Op.getValueType();
    1172             :   unsigned NumElems = VT.getVectorNumElements();
    1173           0 :   EVT EltVT = VT.getVectorElementType();
    1174           0 :   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
    1175           0 :   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
    1176           0 :   SDLoc dl(Op);
    1177           0 :   SmallVector<SDValue, 8> Ops(NumElems);
    1178           0 :   for (unsigned i = 0; i < NumElems; ++i) {
    1179           0 :     SDValue LHSElem = DAG.getNode(
    1180             :         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
    1181           0 :         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
    1182           0 :     SDValue RHSElem = DAG.getNode(
    1183             :         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
    1184           0 :         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
    1185           0 :     Ops[i] = DAG.getNode(ISD::SETCC, dl,
    1186           0 :                          TLI.getSetCCResultType(DAG.getDataLayout(),
    1187           0 :                                                 *DAG.getContext(), TmpEltVT),
    1188           0 :                          LHSElem, RHSElem, CC);
    1189           0 :     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
    1190           0 :                            DAG.getConstant(APInt::getAllOnesValue
    1191           0 :                                            (EltVT.getSizeInBits()), dl, EltVT),
    1192           0 :                            DAG.getConstant(0, dl, EltVT));
    1193             :   }
    1194           0 :   return DAG.getBuildVector(VT, dl, Ops);
    1195             : }
    1196             : 
    1197     1269116 : bool SelectionDAG::LegalizeVectors() {
    1198     1269116 :   return VectorLegalizer(*this).Run();
    1199             : }

Generated by: LCOV version 1.13