LCOV - code coverage report
Current view: top level - lib/CodeGen/SelectionDAG - LegalizeVectorOps.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 471 489 96.3 %
Date: 2018-07-13 00:08:38 Functions: 27 27 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the SelectionDAG::LegalizeVectors method.
      11             : //
      12             : // The vector legalizer looks for vector operations which might need to be
      13             : // scalarized and legalizes them. This is a separate step from Legalize because
      14             : // scalarizing can introduce illegal types.  For example, suppose we have an
      15             : // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
      16             : // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
      17             : // operation, which introduces nodes with the illegal type i64 which must be
      18             : // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
      19             : // the operation must be unrolled, which introduces nodes with the illegal
      20             : // type i8 which must be promoted.
      21             : //
      22             : // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
      23             : // or operations that happen to take a vector which are custom-lowered;
      24             : // the legalization for such operations never produces nodes
      25             : // with illegal types, so it's okay to put off legalizing them until
      26             : // SelectionDAG::Legalize runs.
      27             : //
      28             : //===----------------------------------------------------------------------===//
      29             : 
      30             : #include "llvm/ADT/APInt.h"
      31             : #include "llvm/ADT/DenseMap.h"
      32             : #include "llvm/ADT/SmallVector.h"
      33             : #include "llvm/CodeGen/ISDOpcodes.h"
      34             : #include "llvm/CodeGen/MachineMemOperand.h"
      35             : #include "llvm/CodeGen/SelectionDAG.h"
      36             : #include "llvm/CodeGen/SelectionDAGNodes.h"
      37             : #include "llvm/CodeGen/TargetLowering.h"
      38             : #include "llvm/CodeGen/ValueTypes.h"
      39             : #include "llvm/IR/DataLayout.h"
      40             : #include "llvm/Support/Casting.h"
      41             : #include "llvm/Support/Compiler.h"
      42             : #include "llvm/Support/ErrorHandling.h"
      43             : #include "llvm/Support/MachineValueType.h"
      44             : #include "llvm/Support/MathExtras.h"
      45             : #include <cassert>
      46             : #include <cstdint>
      47             : #include <iterator>
      48             : #include <utility>
      49             : 
      50             : using namespace llvm;
      51             : 
      52             : #define DEBUG_TYPE "legalizevectorops"
      53             : 
      54             : namespace {
      55             : 
      56             : class VectorLegalizer {
      57             :   SelectionDAG& DAG;
      58             :   const TargetLowering &TLI;
      59             :   bool Changed = false; // Keep track of whether anything changed
      60             : 
      61             :   /// For nodes that are of legal width, and that have more than one use, this
      62             :   /// map indicates what regularized operand to use.  This allows us to avoid
      63             :   /// legalizing the same thing more than once.
      64             :   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
      65             : 
      66             :   /// Adds a node to the translation cache.
      67     5148259 :   void AddLegalizedOperand(SDValue From, SDValue To) {
      68    10296518 :     LegalizedNodes.insert(std::make_pair(From, To));
      69             :     // If someone requests legalization of the new node, return itself.
      70             :     if (From != To)
      71       63437 :       LegalizedNodes.insert(std::make_pair(To, To));
      72     5148259 :   }
      73             : 
      74             :   /// Legalizes the given node.
      75             :   SDValue LegalizeOp(SDValue Op);
      76             : 
      77             :   /// Assuming the node is legal, "legalize" the results.
      78             :   SDValue TranslateLegalizeResults(SDValue Op, SDValue Result);
      79             : 
      80             :   /// Implements unrolling a VSETCC.
      81             :   SDValue UnrollVSETCC(SDValue Op);
      82             : 
      83             :   /// Implement expand-based legalization of vector operations.
      84             :   ///
      85             :   /// This is just a high-level routine to dispatch to specific code paths for
      86             :   /// operations to legalize them.
      87             :   SDValue Expand(SDValue Op);
      88             : 
      89             :   /// Implements expansion for FNEG; falls back to UnrollVectorOp if
      90             :   /// FSUB isn't legal.
      91             :   ///
      92             :   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
      93             :   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
      94             :   SDValue ExpandUINT_TO_FLOAT(SDValue Op);
      95             : 
      96             :   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
      97             :   SDValue ExpandSEXTINREG(SDValue Op);
      98             : 
      99             :   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
     100             :   ///
     101             :   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
     102             :   /// type. The contents of the bits in the extended part of each element are
     103             :   /// undef.
     104             :   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op);
     105             : 
     106             :   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
     107             :   ///
     108             :   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
     109             :   /// type, then shifts left and arithmetic shifts right to introduce a sign
     110             :   /// extension.
     111             :   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op);
     112             : 
     113             :   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
     114             :   ///
     115             :   /// Shuffles the low lanes of the operand into place and blends zeros into
     116             :   /// the remaining lanes, finally bitcasting to the proper type.
     117             :   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op);
     118             : 
     119             :   /// Expand bswap of vectors into a shuffle if legal.
     120             :   SDValue ExpandBSWAP(SDValue Op);
     121             : 
     122             :   /// Implement vselect in terms of XOR, AND, OR when blend is not
     123             :   /// supported by the target.
     124             :   SDValue ExpandVSELECT(SDValue Op);
     125             :   SDValue ExpandSELECT(SDValue Op);
     126             :   SDValue ExpandLoad(SDValue Op);
     127             :   SDValue ExpandStore(SDValue Op);
     128             :   SDValue ExpandFNEG(SDValue Op);
     129             :   SDValue ExpandFSUB(SDValue Op);
     130             :   SDValue ExpandBITREVERSE(SDValue Op);
     131             :   SDValue ExpandCTLZ(SDValue Op);
     132             :   SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
     133             :   SDValue ExpandStrictFPOp(SDValue Op);
     134             :   
     135             :   /// Implements vector promotion.
     136             :   ///
     137             :   /// This is essentially just bitcasting the operands to a different type and
     138             :   /// bitcasting the result back to the original type.
     139             :   SDValue Promote(SDValue Op);
     140             : 
     141             :   /// Implements [SU]INT_TO_FP vector promotion.
     142             :   ///
     143             :   /// This is a [zs]ext of the input operand to a larger integer type.
     144             :   SDValue PromoteINT_TO_FP(SDValue Op);
     145             : 
     146             :   /// Implements FP_TO_[SU]INT vector promotion of the result type.
     147             :   ///
     148             :   /// It is promoted to a larger integer type.  The result is then
     149             :   /// truncated back to the original type.
     150             :   SDValue PromoteFP_TO_INT(SDValue Op);
     151             : 
     152             : public:
     153      363367 :   VectorLegalizer(SelectionDAG& dag) :
     154      363367 :       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
     155             : 
     156             :   /// Begin legalizer the vector operations in the DAG.
     157             :   bool Run();
     158             : };
     159             : 
     160             : } // end anonymous namespace
     161             : 
     162      363367 : bool VectorLegalizer::Run() {
     163             :   // Before we start legalizing vector nodes, check if there are any vectors.
     164             :   bool HasVectors = false;
     165      363367 :   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
     166     6584395 :        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
     167             :     // Check if the values of the nodes contain vectors. We don't need to check
     168             :     // the operands because we are going to check their values at some point.
     169    14345997 :     for (SDNode::value_iterator J = I->value_begin(), E = I->value_end();
     170    14345997 :          J != E; ++J)
     171     7982026 :       HasVectors |= J->isVector();
     172             : 
     173             :     // If we found a vector node we can start the legalization.
     174     6363971 :     if (HasVectors)
     175             :       break;
     176             :   }
     177             : 
     178             :   // If this basic block has no vectors then no need to legalize vectors.
     179      363367 :   if (!HasVectors)
     180             :     return false;
     181             : 
     182             :   // The legalize process is inherently a bottom-up recursive process (users
     183             :   // legalize their uses before themselves).  Given infinite stack space, we
     184             :   // could just start legalizing on the root and traverse the whole graph.  In
     185             :   // practice however, this causes us to run out of stack space on large basic
     186             :   // blocks.  To avoid this problem, compute an ordering of the nodes where each
     187             :   // node is only legalized after all of its operands are legalized.
     188      142943 :   DAG.AssignTopologicalOrder();
     189      142943 :   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
     190     4170985 :        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
     191     4028042 :     LegalizeOp(SDValue(&*I, 0));
     192             : 
     193             :   // Finally, it's possible the root changed.  Get the new root.
     194      142943 :   SDValue OldRoot = DAG.getRoot();
     195             :   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
     196      285886 :   DAG.setRoot(LegalizedNodes[OldRoot]);
     197             : 
     198      142943 :   LegalizedNodes.clear();
     199             : 
     200             :   // Remove dead nodes now.
     201      142943 :   DAG.RemoveDeadNodes();
     202             : 
     203      142943 :   return Changed;
     204             : }
     205             : 
     206     4051223 : SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) {
     207             :   // Generic legalization: just pass the operand through.
     208    17951118 :   for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i)
     209     4924336 :     AddLegalizedOperand(Op.getValue(i), Result.getValue(i));
     210     4051223 :   return Result.getValue(Op.getResNo());
     211             : }
     212             : 
     213    11430522 : SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
     214             :   // Note that LegalizeOp may be reentered even from single-use nodes, which
     215             :   // means that we always must cache transformed nodes.
     216    11430522 :   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
     217    11430522 :   if (I != LegalizedNodes.end()) return I->second;
     218             : 
     219     4275045 :   SDNode* Node = Op.getNode();
     220             : 
     221             :   // Legalize the operands
     222             :   SmallVector<SDValue, 8> Ops;
     223    11614500 :   for (const SDValue &Op : Node->op_values())
     224     7339455 :     Ops.push_back(LegalizeOp(Op));
     225             : 
     226     8550090 :   SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0);
     227             : 
     228             :   bool HasVectorValue = false;
     229     8550090 :   if (Op.getOpcode() == ISD::LOAD) {
     230             :     LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     231             :     ISD::LoadExtType ExtType = LD->getExtensionType();
     232      772372 :     if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) {
     233             :       LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: ";
     234             :                  Node->dump(&DAG));
     235        8900 :       switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0),
     236             :                                    LD->getMemoryVT())) {
     237           0 :       default: llvm_unreachable("This action is not supported yet!");
     238        1370 :       case TargetLowering::Legal:
     239        1370 :         return TranslateLegalizeResults(Op, Result);
     240        1515 :       case TargetLowering::Custom:
     241        1515 :         if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) {
     242             :           if (Lowered == Result)
     243           0 :             return TranslateLegalizeResults(Op, Lowered);
     244        1515 :           Changed = true;
     245        1515 :           if (Lowered->getNumValues() != Op->getNumValues()) {
     246             :             // This expanded to something other than the load. Assume the
     247             :             // lowering code took care of any chain values, and just handle the
     248             :             // returned value.
     249             :             assert(Result.getValue(1).use_empty() &&
     250             :                    "There are still live users of the old chain!");
     251        1515 :             return LegalizeOp(Lowered);
     252             :           }
     253           0 :           return TranslateLegalizeResults(Op, Lowered);
     254           0 :         }
     255             :         LLVM_FALLTHROUGH;
     256             :       case TargetLowering::Expand:
     257        1572 :         Changed = true;
     258        1572 :         return LegalizeOp(ExpandLoad(Op));
     259             :       }
     260             :     }
     261     4016102 :   } else if (Op.getOpcode() == ISD::STORE) {
     262             :     StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     263      290450 :     EVT StVT = ST->getMemoryVT();
     264             :     MVT ValVT = ST->getValue().getSimpleValueType();
     265      425961 :     if (StVT.isVector() && ST->isTruncatingStore()) {
     266             :       LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: ";
     267             :                  Node->dump(&DAG));
     268        1373 :       switch (TLI.getTruncStoreAction(ValVT, StVT)) {
     269           0 :       default: llvm_unreachable("This action is not supported yet!");
     270         248 :       case TargetLowering::Legal:
     271         945 :         return TranslateLegalizeResults(Op, Result);
     272         244 :       case TargetLowering::Custom: {
     273         244 :         SDValue Lowered = TLI.LowerOperation(Result, DAG);
     274         244 :         Changed = Lowered != Result;
     275         244 :         return TranslateLegalizeResults(Op, Lowered);
     276             :       }
     277         205 :       case TargetLowering::Expand:
     278         205 :         Changed = true;
     279         205 :         return LegalizeOp(ExpandStore(Op));
     280             :       }
     281             :     }
     282     3725652 :   } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE)
     283             :     HasVectorValue = true;
     284             : 
     285     9411575 :   for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
     286     9411575 :        J != E;
     287             :        ++J)
     288     5141684 :     HasVectorValue |= J->isVector();
     289     4269891 :   if (!HasVectorValue)
     290     3184558 :     return TranslateLegalizeResults(Op, Result);
     291             : 
     292             :   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
     293     2170666 :   switch (Op.getOpcode()) {
     294      864803 :   default:
     295      864803 :     return TranslateLegalizeResults(Op, Result);
     296          50 :   case ISD::STRICT_FADD:
     297             :   case ISD::STRICT_FSUB:
     298             :   case ISD::STRICT_FMUL:
     299             :   case ISD::STRICT_FDIV:
     300             :   case ISD::STRICT_FSQRT:
     301             :   case ISD::STRICT_FMA:
     302             :   case ISD::STRICT_FPOW:
     303             :   case ISD::STRICT_FPOWI:
     304             :   case ISD::STRICT_FSIN:
     305             :   case ISD::STRICT_FCOS:
     306             :   case ISD::STRICT_FEXP:
     307             :   case ISD::STRICT_FEXP2:
     308             :   case ISD::STRICT_FLOG:
     309             :   case ISD::STRICT_FLOG10:
     310             :   case ISD::STRICT_FLOG2:
     311             :   case ISD::STRICT_FRINT:
     312             :   case ISD::STRICT_FNEARBYINT:
     313             :     // These pseudo-ops get legalized as if they were their non-strict
     314             :     // equivalent.  For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
     315             :     // is also legal, but if ISD::FSQRT requires expansion then so does
     316             :     // ISD::STRICT_FSQRT.
     317         150 :     Action = TLI.getStrictFPOperationAction(Node->getOpcode(), 
     318             :                                             Node->getValueType(0));
     319          50 :     break;
     320      218078 :   case ISD::ADD:
     321             :   case ISD::SUB:
     322             :   case ISD::MUL:
     323             :   case ISD::SDIV:
     324             :   case ISD::UDIV:
     325             :   case ISD::SREM:
     326             :   case ISD::UREM:
     327             :   case ISD::SDIVREM:
     328             :   case ISD::UDIVREM:
     329             :   case ISD::FADD:
     330             :   case ISD::FSUB:
     331             :   case ISD::FMUL:
     332             :   case ISD::FDIV:
     333             :   case ISD::FREM:
     334             :   case ISD::AND:
     335             :   case ISD::OR:
     336             :   case ISD::XOR:
     337             :   case ISD::SHL:
     338             :   case ISD::SRA:
     339             :   case ISD::SRL:
     340             :   case ISD::ROTL:
     341             :   case ISD::ROTR:
     342             :   case ISD::BSWAP:
     343             :   case ISD::BITREVERSE:
     344             :   case ISD::CTLZ:
     345             :   case ISD::CTTZ:
     346             :   case ISD::CTLZ_ZERO_UNDEF:
     347             :   case ISD::CTTZ_ZERO_UNDEF:
     348             :   case ISD::CTPOP:
     349             :   case ISD::SELECT:
     350             :   case ISD::VSELECT:
     351             :   case ISD::SELECT_CC:
     352             :   case ISD::SETCC:
     353             :   case ISD::ZERO_EXTEND:
     354             :   case ISD::ANY_EXTEND:
     355             :   case ISD::TRUNCATE:
     356             :   case ISD::SIGN_EXTEND:
     357             :   case ISD::FP_TO_SINT:
     358             :   case ISD::FP_TO_UINT:
     359             :   case ISD::FNEG:
     360             :   case ISD::FABS:
     361             :   case ISD::FMINNUM:
     362             :   case ISD::FMAXNUM:
     363             :   case ISD::FMINNAN:
     364             :   case ISD::FMAXNAN:
     365             :   case ISD::FCOPYSIGN:
     366             :   case ISD::FSQRT:
     367             :   case ISD::FSIN:
     368             :   case ISD::FCOS:
     369             :   case ISD::FPOWI:
     370             :   case ISD::FPOW:
     371             :   case ISD::FLOG:
     372             :   case ISD::FLOG2:
     373             :   case ISD::FLOG10:
     374             :   case ISD::FEXP:
     375             :   case ISD::FEXP2:
     376             :   case ISD::FCEIL:
     377             :   case ISD::FTRUNC:
     378             :   case ISD::FRINT:
     379             :   case ISD::FNEARBYINT:
     380             :   case ISD::FROUND:
     381             :   case ISD::FFLOOR:
     382             :   case ISD::FP_ROUND:
     383             :   case ISD::FP_EXTEND:
     384             :   case ISD::FMA:
     385             :   case ISD::SIGN_EXTEND_INREG:
     386             :   case ISD::ANY_EXTEND_VECTOR_INREG:
     387             :   case ISD::SIGN_EXTEND_VECTOR_INREG:
     388             :   case ISD::ZERO_EXTEND_VECTOR_INREG:
     389             :   case ISD::SMIN:
     390             :   case ISD::SMAX:
     391             :   case ISD::UMIN:
     392             :   case ISD::UMAX:
     393             :   case ISD::SMUL_LOHI:
     394             :   case ISD::UMUL_LOHI:
     395             :   case ISD::FCANONICALIZE:
     396      436156 :     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
     397      218078 :     break;
     398           0 :   case ISD::FP_ROUND_INREG:
     399           0 :     Action = TLI.getOperationAction(Node->getOpcode(), 
     400             :                cast<VTSDNode>(Node->getOperand(1))->getVT());
     401           0 :     break;
     402        2015 :   case ISD::SINT_TO_FP:
     403             :   case ISD::UINT_TO_FP:
     404        6045 :     Action = TLI.getOperationAction(Node->getOpcode(), 
     405        2015 :                                     Node->getOperand(0).getValueType());
     406        2015 :     break;
     407         103 :   case ISD::MSCATTER:
     408         309 :     Action = TLI.getOperationAction(Node->getOpcode(),
     409             :                cast<MaskedScatterSDNode>(Node)->getValue().getValueType());
     410         103 :     break;
     411         284 :   case ISD::MSTORE:
     412         852 :     Action = TLI.getOperationAction(Node->getOpcode(),
     413             :                cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
     414         284 :     break;
     415             :   }
     416             : 
     417             :   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
     418             : 
     419      220530 :   switch (Action) {
     420           0 :   default: llvm_unreachable("This action is not supported yet!");
     421       17310 :   case TargetLowering::Promote:
     422       17310 :     Result = Promote(Op);
     423       17310 :     Changed = true;
     424       17310 :     break;
     425             :   case TargetLowering::Legal:
     426             :     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
     427             :     break;
     428       56472 :   case TargetLowering::Custom: {
     429             :     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
     430       56472 :     if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) {
     431             :       LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n");
     432       55055 :       Result = Tmp1;
     433       55055 :       break;
     434             :     }
     435             :     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
     436             :     LLVM_FALLTHROUGH;
     437             :   }
     438             :   case TargetLowering::Expand:
     439        8742 :     Result = Expand(Op);
     440             :   }
     441             : 
     442             :   // Make sure that the generated code is itself legal.
     443             :   if (Result != Op) {
     444       59733 :     Result = LegalizeOp(Result);
     445       59733 :     Changed = true;
     446             :   }
     447             : 
     448             :   // Note that LegalizeOp may be reentered even from single-use nodes, which
     449             :   // means that we always must cache transformed nodes.
     450      220530 :   AddLegalizedOperand(Op, Result);
     451      220530 :   return Result;
     452             : }
     453             : 
     454       17310 : SDValue VectorLegalizer::Promote(SDValue Op) {
     455             :   // For a few operations there is a specific concept for promotion based on
     456             :   // the operand's type.
     457       17310 :   switch (Op.getOpcode()) {
     458          22 :   case ISD::SINT_TO_FP:
     459             :   case ISD::UINT_TO_FP:
     460             :     // "Promote" the operation by extending the operand.
     461          22 :     return PromoteINT_TO_FP(Op);
     462         154 :   case ISD::FP_TO_UINT:
     463             :   case ISD::FP_TO_SINT:
     464             :     // Promote the operation by extending the operand.
     465         154 :     return PromoteFP_TO_INT(Op);
     466             :   }
     467             : 
     468             :   // There are currently two cases of vector promotion:
     469             :   // 1) Bitcasting a vector of integers to a different type to a vector of the
     470             :   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
     471             :   // 2) Extending a vector of floats to a vector of the same number of larger
     472             :   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
     473       17134 :   MVT VT = Op.getSimpleValueType();
     474             :   assert(Op.getNode()->getNumValues() == 1 &&
     475             :          "Can't promote a vector with multiple results!");
     476       17134 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     477             :   SDLoc dl(Op);
     478       51402 :   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
     479             : 
     480       85834 :   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     481      103050 :     if (Op.getOperand(j).getValueType().isVector())
     482             :       if (Op.getOperand(j)
     483       68370 :               .getValueType()
     484       68370 :               .getVectorElementType()
     485       34448 :               .isFloatingPoint() &&
     486       68743 :           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
     487         423 :         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j));
     488             :       else
     489      102132 :         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
     490             :     else
     491         165 :       Operands[j] = Op.getOperand(j);
     492             :   }
     493             : 
     494       34268 :   Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags());
     495       34329 :   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
     496       51239 :       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
     497       17138 :        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
     498         336 :     return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl));
     499             :   else
     500       34044 :     return DAG.getNode(ISD::BITCAST, dl, VT, Op);
     501             : }
     502             : 
     503          22 : SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) {
     504             :   // INT_TO_FP operations may require the input operand be promoted even
     505             :   // when the type is otherwise legal.
     506          22 :   MVT VT = Op.getOperand(0).getSimpleValueType();
     507          44 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     508             :   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
     509             :          "Vectors have different number of elements!");
     510             : 
     511             :   SDLoc dl(Op);
     512          66 :   SmallVector<SDValue, 4> Operands(Op.getNumOperands());
     513             : 
     514          22 :   unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND :
     515             :     ISD::SIGN_EXTEND;
     516          66 :   for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
     517          66 :     if (Op.getOperand(j).getValueType().isVector())
     518          66 :       Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j));
     519             :     else
     520           0 :       Operands[j] = Op.getOperand(j);
     521             :   }
     522             : 
     523          66 :   return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands);
     524             : }
     525             : 
     526             : // For FP_TO_INT we promote the result type to a vector type with wider
     527             : // elements and then truncate the result.  This is different from the default
     528             : // PromoteVector which uses bitcast to promote thus assumning that the
     529             : // promoted vector type has the same overall size.
     530         154 : SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) {
     531         154 :   MVT VT = Op.getSimpleValueType();
     532         308 :   MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT);
     533             :   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
     534             :          "Vectors have different number of elements!");
     535             : 
     536         154 :   unsigned NewOpc = Op->getOpcode();
     537             :   // Change FP_TO_UINT to FP_TO_SINT if possible.
     538             :   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
     539         154 :   if (NewOpc == ISD::FP_TO_UINT &&
     540          88 :       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
     541             :     NewOpc = ISD::FP_TO_SINT;
     542             : 
     543             :   SDLoc dl(Op);
     544         308 :   SDValue Promoted  = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0));
     545             : 
     546             :   // Assert that the converted value fits in the original type.  If it doesn't
     547             :   // (eg: because the value being converted is too big), then the result of the
     548             :   // original operation was undefined anyway, so the assert is still correct.
     549         462 :   Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
     550             :                                                             : ISD::AssertSext,
     551             :                          dl, NVT, Promoted,
     552         462 :                          DAG.getValueType(VT.getScalarType()));
     553         462 :   return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
     554             : }
     555             : 
     556        1572 : SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
     557             :   LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
     558             : 
     559        1572 :   EVT SrcVT = LD->getMemoryVT();
     560        1572 :   EVT SrcEltVT = SrcVT.getScalarType();
     561        1572 :   unsigned NumElem = SrcVT.getVectorNumElements();
     562             : 
     563        1572 :   SDValue NewChain;
     564        1572 :   SDValue Value;
     565        3144 :   if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
     566             :     SDLoc dl(Op);
     567             : 
     568             :     SmallVector<SDValue, 8> Vals;
     569             :     SmallVector<SDValue, 8> LoadChains;
     570             : 
     571         942 :     EVT DstEltVT = LD->getValueType(0).getScalarType();
     572         471 :     SDValue Chain = LD->getChain();
     573         471 :     SDValue BasePTR = LD->getBasePtr();
     574             :     ISD::LoadExtType ExtType = LD->getExtensionType();
     575             : 
     576             :     // When elements in a vector is not byte-addressable, we cannot directly
     577             :     // load each element by advancing pointer, which could only address bytes.
     578             :     // Instead, we load all significant words, mask bits off, and concatenate
     579             :     // them to form each element. Finally, they are extended to destination
     580             :     // scalar type to build the destination vector.
     581         471 :     EVT WideVT = TLI.getPointerTy(DAG.getDataLayout());
     582             : 
     583             :     assert(WideVT.isRound() &&
     584             :            "Could not handle the sophisticated case when the widest integer is"
     585             :            " not power of 2.");
     586             :     assert(WideVT.bitsGE(SrcEltVT) &&
     587             :            "Type is not legalized?");
     588             : 
     589             :     unsigned WideBytes = WideVT.getStoreSize();
     590             :     unsigned Offset = 0;
     591             :     unsigned RemainingBytes = SrcVT.getStoreSize();
     592             :     SmallVector<SDValue, 8> LoadVals;
     593         946 :     while (RemainingBytes > 0) {
     594             :       SDValue ScalarLoad;
     595             :       unsigned LoadBytes = WideBytes;
     596             : 
     597         475 :       if (RemainingBytes >= LoadBytes) {
     598          17 :         ScalarLoad =
     599          51 :             DAG.getLoad(WideVT, dl, Chain, BasePTR,
     600          17 :                         LD->getPointerInfo().getWithOffset(Offset),
     601          17 :                         MinAlign(LD->getAlignment(), Offset),
     602          85 :                         LD->getMemOperand()->getFlags(), LD->getAAInfo());
     603             :       } else {
     604         458 :         EVT LoadVT = WideVT;
     605        1406 :         while (RemainingBytes < LoadBytes) {
     606         948 :           LoadBytes >>= 1; // Reduce the load size by half.
     607         948 :           LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3);
     608             :         }
     609         458 :         ScalarLoad =
     610        1374 :             DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR,
     611         458 :                            LD->getPointerInfo().getWithOffset(Offset), LoadVT,
     612         458 :                            MinAlign(LD->getAlignment(), Offset),
     613        2290 :                            LD->getMemOperand()->getFlags(), LD->getAAInfo());
     614             :       }
     615             : 
     616         475 :       RemainingBytes -= LoadBytes;
     617         475 :       Offset += LoadBytes;
     618             : 
     619         475 :       BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
     620             : 
     621         475 :       LoadVals.push_back(ScalarLoad.getValue(0));
     622         475 :       LoadChains.push_back(ScalarLoad.getValue(1));
     623             :     }
     624             : 
     625             :     // Extract bits, pack and extend/trunc them into destination type.
     626         471 :     unsigned SrcEltBits = SrcEltVT.getSizeInBits();
     627         471 :     SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
     628             : 
     629             :     unsigned BitOffset = 0;
     630             :     unsigned WideIdx = 0;
     631         471 :     unsigned WideBits = WideVT.getSizeInBits();
     632             : 
     633        5363 :     for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
     634        2446 :       SDValue Lo, Hi, ShAmt;
     635             : 
     636        2446 :       if (BitOffset < WideBits) {
     637        4892 :         ShAmt = DAG.getConstant(
     638        4892 :             BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     639        7338 :         Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
     640        4892 :         Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
     641             :       }
     642             : 
     643        2446 :       BitOffset += SrcEltBits;
     644        2446 :       if (BitOffset >= WideBits) {
     645          17 :         WideIdx++;
     646          17 :         BitOffset -= WideBits;
     647          17 :         if (BitOffset > 0) {
     648           0 :           ShAmt = DAG.getConstant(
     649           0 :               SrcEltBits - BitOffset, dl,
     650           0 :               TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     651           0 :           Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
     652           0 :           Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
     653             :         }
     654             :       }
     655             : 
     656        2446 :       if (Hi.getNode())
     657           0 :         Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
     658             : 
     659        2446 :       switch (ExtType) {
     660           0 :       default: llvm_unreachable("Unknown extended-load op!");
     661        1198 :       case ISD::EXTLOAD:
     662        1198 :         Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT);
     663             :         break;
     664         408 :       case ISD::ZEXTLOAD:
     665         408 :         Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT);
     666             :         break;
     667         840 :       case ISD::SEXTLOAD:
     668         840 :         ShAmt =
     669        3360 :             DAG.getConstant(WideBits - SrcEltBits, dl,
     670        2520 :                             TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
     671        1680 :         Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt);
     672        1680 :         Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt);
     673         840 :         Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT);
     674             :         break;
     675             :       }
     676        2446 :       Vals.push_back(Lo);
     677             :     }
     678             : 
     679         942 :     NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
     680        1413 :     Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals);
     681             :   } else {
     682        1101 :     SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG);
     683             : 
     684        1101 :     NewChain = Scalarized.getValue(1);
     685        1101 :     Value = Scalarized.getValue(0);
     686             :   }
     687             : 
     688        1572 :   AddLegalizedOperand(Op.getValue(0), Value);
     689        1572 :   AddLegalizedOperand(Op.getValue(1), NewChain);
     690             : 
     691        1572 :   return (Op.getResNo() ? NewChain : Value);
     692             : }
     693             : 
     694         205 : SDValue VectorLegalizer::ExpandStore(SDValue Op) {
     695         205 :   StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
     696         205 :   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
     697         205 :   AddLegalizedOperand(Op, TF);
     698         205 :   return TF;
     699             : }
     700             : 
     701        8742 : SDValue VectorLegalizer::Expand(SDValue Op) {
     702       17484 :   switch (Op->getOpcode()) {
     703         665 :   case ISD::SIGN_EXTEND_INREG:
     704         665 :     return ExpandSEXTINREG(Op);
     705         261 :   case ISD::ANY_EXTEND_VECTOR_INREG:
     706         261 :     return ExpandANY_EXTEND_VECTOR_INREG(Op);
     707          12 :   case ISD::SIGN_EXTEND_VECTOR_INREG:
     708          12 :     return ExpandSIGN_EXTEND_VECTOR_INREG(Op);
     709         652 :   case ISD::ZERO_EXTEND_VECTOR_INREG:
     710         652 :     return ExpandZERO_EXTEND_VECTOR_INREG(Op);
     711          49 :   case ISD::BSWAP:
     712          49 :     return ExpandBSWAP(Op);
     713        1432 :   case ISD::VSELECT:
     714        1432 :     return ExpandVSELECT(Op);
     715          82 :   case ISD::SELECT:
     716          82 :     return ExpandSELECT(Op);
     717         136 :   case ISD::UINT_TO_FP:
     718         136 :     return ExpandUINT_TO_FLOAT(Op);
     719          40 :   case ISD::FNEG:
     720          40 :     return ExpandFNEG(Op);
     721          44 :   case ISD::FSUB:
     722          44 :     return ExpandFSUB(Op);
     723          94 :   case ISD::SETCC:
     724          94 :     return UnrollVSETCC(Op);
     725         132 :   case ISD::BITREVERSE:
     726         132 :     return ExpandBITREVERSE(Op);
     727         136 :   case ISD::CTLZ:
     728             :   case ISD::CTLZ_ZERO_UNDEF:
     729         136 :     return ExpandCTLZ(Op);
     730         111 :   case ISD::CTTZ_ZERO_UNDEF:
     731         111 :     return ExpandCTTZ_ZERO_UNDEF(Op);
     732          22 :   case ISD::STRICT_FADD:
     733             :   case ISD::STRICT_FSUB: 
     734             :   case ISD::STRICT_FMUL:
     735             :   case ISD::STRICT_FDIV:
     736             :   case ISD::STRICT_FSQRT:
     737             :   case ISD::STRICT_FMA:
     738             :   case ISD::STRICT_FPOW:
     739             :   case ISD::STRICT_FPOWI:
     740             :   case ISD::STRICT_FSIN:
     741             :   case ISD::STRICT_FCOS:
     742             :   case ISD::STRICT_FEXP:
     743             :   case ISD::STRICT_FEXP2:
     744             :   case ISD::STRICT_FLOG:
     745             :   case ISD::STRICT_FLOG10:
     746             :   case ISD::STRICT_FLOG2:
     747             :   case ISD::STRICT_FRINT:
     748             :   case ISD::STRICT_FNEARBYINT:
     749          22 :     return ExpandStrictFPOp(Op);
     750        4874 :   default:
     751        4874 :     return DAG.UnrollVectorOp(Op.getNode());
     752             :   }
     753             : }
     754             : 
     755          82 : SDValue VectorLegalizer::ExpandSELECT(SDValue Op) {
     756             :   // Lower a select instruction where the condition is a scalar and the
     757             :   // operands are vectors. Lower this select to VSELECT and implement it
     758             :   // using XOR AND OR. The selector bit is broadcasted.
     759          82 :   EVT VT = Op.getValueType();
     760             :   SDLoc DL(Op);
     761             : 
     762          82 :   SDValue Mask = Op.getOperand(0);
     763          82 :   SDValue Op1 = Op.getOperand(1);
     764          82 :   SDValue Op2 = Op.getOperand(2);
     765             : 
     766             :   assert(VT.isVector() && !Mask.getValueType().isVector()
     767             :          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
     768             : 
     769             :   // If we can't even use the basic vector operations of
     770             :   // AND,OR,XOR, we will have to scalarize the op.
     771             :   // Notice that the operation may be 'promoted' which means that it is
     772             :   // 'bitcasted' to another type which is handled.
     773             :   // Also, we need to be able to construct a splat vector using BUILD_VECTOR.
     774          89 :   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
     775           7 :       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
     776          89 :       TLI.getOperationAction(ISD::OR,  VT) == TargetLowering::Expand ||
     777             :       TLI.getOperationAction(ISD::BUILD_VECTOR,  VT) == TargetLowering::Expand)
     778          75 :     return DAG.UnrollVectorOp(Op.getNode());
     779             : 
     780             :   // Generate a mask operand.
     781           7 :   EVT MaskTy = VT.changeVectorElementTypeToInteger();
     782             : 
     783             :   // What is the size of each element in the vector mask.
     784           7 :   EVT BitTy = MaskTy.getScalarType();
     785             : 
     786          14 :   Mask = DAG.getSelect(DL, BitTy, Mask,
     787          14 :           DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL,
     788             :                           BitTy),
     789          14 :           DAG.getConstant(0, DL, BitTy));
     790             : 
     791             :   // Broadcast the mask so that the entire vector is all-one or all zero.
     792           7 :   Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask);
     793             : 
     794             :   // Bitcast the operands to be the same type as the mask.
     795             :   // This is needed when we select between FP types because
     796             :   // the mask is a vector of integers.
     797          14 :   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
     798          14 :   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
     799             : 
     800           7 :   SDValue AllOnes = DAG.getConstant(
     801          14 :             APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy);
     802          14 :   SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes);
     803             : 
     804          14 :   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
     805          14 :   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
     806          14 :   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
     807          14 :   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
     808             : }
     809             : 
     810         665 : SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
     811         665 :   EVT VT = Op.getValueType();
     812             : 
     813             :   // Make sure that the SRA and SHL instructions are available.
     814        1972 :   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
     815             :       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
     816          23 :     return DAG.UnrollVectorOp(Op.getNode());
     817             : 
     818             :   SDLoc DL(Op);
     819        1284 :   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
     820             : 
     821             :   unsigned BW = VT.getScalarSizeInBits();
     822             :   unsigned OrigBW = OrigTy.getScalarSizeInBits();
     823         642 :   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
     824             : 
     825         642 :   Op = Op.getOperand(0);
     826        1284 :   Op =   DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz);
     827        1284 :   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
     828             : }
     829             : 
     830             : // Generically expand a vector anyext in register to a shuffle of the relevant
     831             : // lanes into the appropriate locations, with other lanes left undef.
     832         261 : SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
     833             :   SDLoc DL(Op);
     834         261 :   EVT VT = Op.getValueType();
     835         261 :   int NumElements = VT.getVectorNumElements();
     836         261 :   SDValue Src = Op.getOperand(0);
     837         261 :   EVT SrcVT = Src.getValueType();
     838         261 :   int NumSrcElements = SrcVT.getVectorNumElements();
     839             : 
     840             :   // Build a base mask of undef shuffles.
     841             :   SmallVector<int, 16> ShuffleMask;
     842         261 :   ShuffleMask.resize(NumSrcElements, -1);
     843             : 
     844             :   // Place the extended lanes into the correct locations.
     845         261 :   int ExtLaneScale = NumSrcElements / NumElements;
     846         522 :   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
     847        1885 :   for (int i = 0; i < NumElements; ++i)
     848        1624 :     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
     849             : 
     850         261 :   return DAG.getNode(
     851             :       ISD::BITCAST, DL, VT,
     852         522 :       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
     853             : }
     854             : 
     855          12 : SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
     856             :   SDLoc DL(Op);
     857          12 :   EVT VT = Op.getValueType();
     858          12 :   SDValue Src = Op.getOperand(0);
     859          12 :   EVT SrcVT = Src.getValueType();
     860             : 
     861             :   // First build an any-extend node which can be legalized above when we
     862             :   // recurse through it.
     863          12 :   Op = DAG.getAnyExtendVectorInReg(Src, DL, VT);
     864             : 
     865             :   // Now we need sign extend. Do this by shifting the elements. Even if these
     866             :   // aren't legal operations, they have a better chance of being legalized
     867             :   // without full scalarization than the sign extension does.
     868             :   unsigned EltWidth = VT.getScalarSizeInBits();
     869             :   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
     870          12 :   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
     871          12 :   return DAG.getNode(ISD::SRA, DL, VT,
     872             :                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
     873          24 :                      ShiftAmount);
     874             : }
     875             : 
     876             : // Generically expand a vector zext in register to a shuffle of the relevant
     877             : // lanes into the appropriate locations, a blend of zero into the high bits,
     878             : // and a bitcast to the wider element type.
     879         652 : SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
     880             :   SDLoc DL(Op);
     881         652 :   EVT VT = Op.getValueType();
     882         652 :   int NumElements = VT.getVectorNumElements();
     883         652 :   SDValue Src = Op.getOperand(0);
     884         652 :   EVT SrcVT = Src.getValueType();
     885         652 :   int NumSrcElements = SrcVT.getVectorNumElements();
     886             : 
     887             :   // Build up a zero vector to blend into this one.
     888         652 :   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
     889             : 
     890             :   // Shuffle the incoming lanes into the correct position, and pull all other
     891             :   // lanes from the zero vector.
     892             :   SmallVector<int, 16> ShuffleMask;
     893         652 :   ShuffleMask.reserve(NumSrcElements);
     894       11388 :   for (int i = 0; i < NumSrcElements; ++i)
     895       10736 :     ShuffleMask.push_back(i);
     896             : 
     897         652 :   int ExtLaneScale = NumSrcElements / NumElements;
     898        1304 :   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
     899        6884 :   for (int i = 0; i < NumElements; ++i)
     900        6232 :     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
     901             : 
     902         652 :   return DAG.getNode(ISD::BITCAST, DL, VT,
     903        1304 :                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
     904             : }
     905             : 
     906         141 : static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
     907         141 :   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
     908         983 :   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
     909        3866 :     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
     910        3024 :       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
     911         141 : }
     912             : 
     913          49 : SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) {
     914          49 :   EVT VT = Op.getValueType();
     915             : 
     916             :   // Generate a byte wise shuffle mask for the BSWAP.
     917             :   SmallVector<int, 16> ShuffleMask;
     918          49 :   createBSWAPShuffleMask(VT, ShuffleMask);
     919          98 :   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
     920             : 
     921             :   // Only emit a shuffle if the mask is legal.
     922          98 :   if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT))
     923          12 :     return DAG.UnrollVectorOp(Op.getNode());
     924             : 
     925             :   SDLoc DL(Op);
     926          74 :   Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
     927          74 :   Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
     928          74 :   return DAG.getNode(ISD::BITCAST, DL, VT, Op);
     929             : }
     930             : 
     931         132 : SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) {
     932         132 :   EVT VT = Op.getValueType();
     933             : 
     934             :   // If we have the scalar operation, it's probably cheaper to unroll it.
     935         132 :   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
     936          10 :     return DAG.UnrollVectorOp(Op.getNode());
     937             : 
     938             :   // If the vector element width is a whole number of bytes, test if its legal
     939             :   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
     940             :   // vector. This greatly reduces the number of bit shifts necessary.
     941             :   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
     942         122 :   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
     943             :     SmallVector<int, 16> BSWAPMask;
     944          92 :     createBSWAPShuffleMask(VT, BSWAPMask);
     945             : 
     946         184 :     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
     947         276 :     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
     948          84 :         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
     949             :          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
     950          22 :           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
     951          44 :           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
     952          22 :           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
     953             :       SDLoc DL(Op);
     954         168 :       Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0));
     955         168 :       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
     956         168 :                                 BSWAPMask);
     957         168 :       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
     958         168 :       return DAG.getNode(ISD::BITCAST, DL, VT, Op);
     959             :     }
     960             :   }
     961             : 
     962             :   // If we have the appropriate vector bit operations, it is better to use them
     963             :   // than unrolling and expanding each component.
     964          38 :   if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) ||
     965          32 :       !TLI.isOperationLegalOrCustom(ISD::SRL, VT) ||
     966          64 :       !TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
     967          32 :       !TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
     968           6 :     return DAG.UnrollVectorOp(Op.getNode());
     969             : 
     970             :   // Let LegalizeDAG handle this later.
     971          32 :   return Op;
     972             : }
     973             : 
     974        1432 : SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
     975             :   // Implement VSELECT in terms of XOR, AND, OR
     976             :   // on platforms which do not support blend natively.
     977             :   SDLoc DL(Op);
     978             : 
     979        1432 :   SDValue Mask = Op.getOperand(0);
     980        1432 :   SDValue Op1 = Op.getOperand(1);
     981        1432 :   SDValue Op2 = Op.getOperand(2);
     982             : 
     983        1432 :   EVT VT = Mask.getValueType();
     984             : 
     985             :   // If we can't even use the basic vector operations of
     986             :   // AND,OR,XOR, we will have to scalarize the op.
     987             :   // Notice that the operation may be 'promoted' which means that it is
     988             :   // 'bitcasted' to another type which is handled.
     989             :   // This operation also isn't safe with AND, OR, XOR when the boolean
     990             :   // type is 0/1 as we need an all ones vector constant to mask with.
     991             :   // FIXME: Sign extend 1 to all ones if thats legal on the target.
     992        2847 :   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
     993        1415 :       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
     994        2847 :       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
     995        2830 :       TLI.getBooleanContents(Op1.getValueType()) !=
     996             :           TargetLowering::ZeroOrNegativeOneBooleanContent)
     997          17 :     return DAG.UnrollVectorOp(Op.getNode());
     998             : 
     999             :   // If the mask and the type are different sizes, unroll the vector op. This
    1000             :   // can occur when getSetCCResultType returns something that is different in
    1001             :   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
    1002        1415 :   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
    1003           0 :     return DAG.UnrollVectorOp(Op.getNode());
    1004             : 
    1005             :   // Bitcast the operands to be the same type as the mask.
    1006             :   // This is needed when we select between FP types because
    1007             :   // the mask is a vector of integers.
    1008        2830 :   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
    1009        2830 :   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
    1010             : 
    1011        1415 :   SDValue AllOnes = DAG.getConstant(
    1012        2830 :     APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
    1013        2830 :   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
    1014             : 
    1015        2830 :   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
    1016        2830 :   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
    1017        2830 :   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
    1018        2830 :   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val);
    1019             : }
    1020             : 
    1021         136 : SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
    1022         272 :   EVT VT = Op.getOperand(0).getValueType();
    1023             :   SDLoc DL(Op);
    1024             : 
    1025             :   // Make sure that the SINT_TO_FP and SRL instructions are available.
    1026         293 :   if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand ||
    1027             :       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
    1028         115 :     return DAG.UnrollVectorOp(Op.getNode());
    1029             : 
    1030             :   unsigned BW = VT.getScalarSizeInBits();
    1031             :   assert((BW == 64 || BW == 32) &&
    1032             :          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
    1033             : 
    1034          21 :   SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
    1035             : 
    1036             :   // Constants to clear the upper part of the word.
    1037             :   // Notice that we can also use SHL+SHR, but using a constant is slightly
    1038             :   // faster on x86.
    1039          21 :   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
    1040          21 :   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
    1041             : 
    1042             :   // Two to the power of half-word-size.
    1043          42 :   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType());
    1044             : 
    1045             :   // Clear upper part of LO, lower HI
    1046          42 :   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
    1047          42 :   SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask);
    1048             : 
    1049             :   // Convert hi and lo to floats
    1050             :   // Convert the hi part back to the upper values
    1051             :   // TODO: Can any fast-math-flags be set on these nodes?
    1052          42 :   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI);
    1053          42 :           fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW);
    1054          42 :   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO);
    1055             : 
    1056             :   // Add the two halves
    1057          42 :   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
    1058             : }
    1059             : 
    1060          40 : SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
    1061          40 :   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
    1062             :     SDLoc DL(Op);
    1063           8 :     SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType());
    1064             :     // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB.
    1065           4 :     return DAG.getNode(ISD::FSUB, DL, Op.getValueType(),
    1066           4 :                        Zero, Op.getOperand(0));
    1067             :   }
    1068          36 :   return DAG.UnrollVectorOp(Op.getNode());
    1069             : }
    1070             : 
    1071          44 : SDValue VectorLegalizer::ExpandFSUB(SDValue Op) {
    1072             :   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
    1073             :   // we can defer this to operation legalization where it will be lowered as
    1074             :   // a+(-b).
    1075             :   EVT VT = Op.getValueType();
    1076          44 :   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
    1077             :       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
    1078           3 :     return Op; // Defer to LegalizeDAG
    1079             : 
    1080          41 :   return DAG.UnrollVectorOp(Op.getNode());
    1081             : }
    1082             : 
    1083         136 : SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
    1084         136 :   EVT VT = Op.getValueType();
    1085             :   unsigned NumBitsPerElt = VT.getScalarSizeInBits();
    1086             : 
    1087             :   // If the non-ZERO_UNDEF version is supported we can use that instead.
    1088         136 :   if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
    1089         111 :       TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
    1090             :     SDLoc DL(Op);
    1091         180 :     return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
    1092             :   }
    1093             : 
    1094             :   // If CTPOP is available we can lower with a CTPOP based method:
    1095             :   // u16 ctlz(u16 x) {
    1096             :   //   x |= (x >> 1);
    1097             :   //   x |= (x >> 2);
    1098             :   //   x |= (x >> 4);
    1099             :   //   x |= (x >> 8);
    1100             :   //   return ctpop(~x);
    1101             :   // }
    1102             :   // Ref: "Hacker's Delight" by Henry Warren
    1103             :   if (isPowerOf2_32(NumBitsPerElt) &&
    1104          46 :       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
    1105          26 :       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
    1106          52 :       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
    1107          26 :       TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
    1108             :     SDLoc DL(Op);
    1109          26 :     SDValue Res = Op.getOperand(0);
    1110          52 :     EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
    1111             : 
    1112         282 :     for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
    1113         256 :       Res = DAG.getNode(
    1114             :           ISD::OR, DL, VT, Res,
    1115         256 :           DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
    1116             : 
    1117          26 :     Res = DAG.getNOT(DL, Res, VT);
    1118          52 :     return DAG.getNode(ISD::CTPOP, DL, VT, Res);
    1119             :   }
    1120             : 
    1121             :   // Otherwise go ahead and unroll.
    1122          20 :   return DAG.UnrollVectorOp(Op.getNode());
    1123             : }
    1124             : 
    1125         111 : SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
    1126             :   // If the non-ZERO_UNDEF version is supported we can use that instead.
    1127         111 :   if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
    1128             :     SDLoc DL(Op);
    1129         210 :     return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
    1130             :   }
    1131             : 
    1132             :   // Otherwise go ahead and unroll.
    1133           6 :   return DAG.UnrollVectorOp(Op.getNode());
    1134             : }
    1135             : 
    1136          22 : SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
    1137          22 :   EVT VT = Op.getValueType();
    1138          22 :   EVT EltVT = VT.getVectorElementType();
    1139          22 :   unsigned NumElems = VT.getVectorNumElements();
    1140             :   unsigned NumOpers = Op.getNumOperands();
    1141          22 :   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
    1142          22 :   EVT ValueVTs[] = {EltVT, MVT::Other};
    1143          22 :   SDValue Chain = Op.getOperand(0);
    1144             :   SDLoc dl(Op);
    1145             : 
    1146             :   SmallVector<SDValue, 32> OpValues;
    1147             :   SmallVector<SDValue, 32> OpChains;
    1148         114 :   for (unsigned i = 0; i < NumElems; ++i) {
    1149             :     SmallVector<SDValue, 4> Opers;
    1150          46 :     SDValue Idx = DAG.getConstant(i, dl, 
    1151         138 :                                   TLI.getVectorIdxTy(DAG.getDataLayout()));
    1152             : 
    1153             :     // The Chain is the first operand.
    1154          46 :     Opers.push_back(Chain);
    1155             : 
    1156             :     // Now process the remaining operands. 
    1157         178 :     for (unsigned j = 1; j < NumOpers; ++j) {
    1158          66 :       SDValue Oper = Op.getOperand(j);
    1159         132 :       EVT OperVT = Oper.getValueType();
    1160             : 
    1161          66 :       if (OperVT.isVector())
    1162         124 :         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 
    1163          62 :                            EltVT, Oper, Idx);
    1164             : 
    1165          66 :       Opers.push_back(Oper);
    1166             :     }
    1167             :  
    1168         138 :     SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers);
    1169             : 
    1170          46 :     OpValues.push_back(ScalarOp.getValue(0));
    1171          46 :     OpChains.push_back(ScalarOp.getValue(1));
    1172             :   }
    1173             : 
    1174          44 :   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
    1175          44 :   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
    1176             : 
    1177          22 :   AddLegalizedOperand(Op.getValue(0), Result);
    1178          22 :   AddLegalizedOperand(Op.getValue(1), NewChain);
    1179             : 
    1180          44 :   return NewChain;
    1181             : }
    1182             : 
    1183          94 : SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
    1184          94 :   EVT VT = Op.getValueType();
    1185          94 :   unsigned NumElems = VT.getVectorNumElements();
    1186          94 :   EVT EltVT = VT.getVectorElementType();
    1187          94 :   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2);
    1188          94 :   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
    1189             :   SDLoc dl(Op);
    1190         188 :   SmallVector<SDValue, 8> Ops(NumElems);
    1191         934 :   for (unsigned i = 0; i < NumElems; ++i) {
    1192         420 :     SDValue LHSElem = DAG.getNode(
    1193             :         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
    1194        1260 :         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
    1195         420 :     SDValue RHSElem = DAG.getNode(
    1196             :         ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
    1197        1260 :         DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
    1198        1260 :     Ops[i] = DAG.getNode(ISD::SETCC, dl,
    1199         420 :                          TLI.getSetCCResultType(DAG.getDataLayout(),
    1200         420 :                                                 *DAG.getContext(), TmpEltVT),
    1201        1680 :                          LHSElem, RHSElem, CC);
    1202        1260 :     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
    1203         420 :                            DAG.getConstant(APInt::getAllOnesValue
    1204         840 :                                            (EltVT.getSizeInBits()), dl, EltVT),
    1205         840 :                            DAG.getConstant(0, dl, EltVT));
    1206             :   }
    1207         282 :   return DAG.getBuildVector(VT, dl, Ops);
    1208             : }
    1209             : 
    1210      363367 : bool SelectionDAG::LegalizeVectors() {
    1211      726734 :   return VectorLegalizer(*this).Run();
    1212             : }

Generated by: LCOV version 1.13