LCOV - code coverage report
Current view: top level - lib/Target/ARM - ARMSelectionDAGInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 92 96 95.8 %
Date: 2018-10-20 13:21:21 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- ARMSelectionDAGInfo.cpp - ARM SelectionDAG Info -------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the ARMSelectionDAGInfo class.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "ARMTargetMachine.h"
      15             : #include "llvm/CodeGen/SelectionDAG.h"
      16             : #include "llvm/IR/DerivedTypes.h"
      17             : using namespace llvm;
      18             : 
      19             : #define DEBUG_TYPE "arm-selectiondag-info"
      20             : 
      21             : // Emit, if possible, a specialized version of the given Libcall. Typically this
      22             : // means selecting the appropriately aligned version, but we also convert memset
      23             : // of 0 into memclr.
      24         331 : SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
      25             :     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
      26             :     SDValue Size, unsigned Align, RTLIB::Libcall LC) const {
      27             :   const ARMSubtarget &Subtarget =
      28         331 :       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
      29         331 :   const ARMTargetLowering *TLI = Subtarget.getTargetLowering();
      30             : 
      31             :   // Only use a specialized AEABI function if the default version of this
      32             :   // Libcall is an AEABI function.
      33         331 :   if (std::strncmp(TLI->getLibcallName(LC), "__aeabi", 7) != 0)
      34         197 :     return SDValue();
      35             : 
      36             :   // Translate RTLIB::Libcall to AEABILibcall. We only do this in order to be
      37             :   // able to translate memset to memclr and use the value to index the function
      38             :   // name array.
      39             :   enum {
      40             :     AEABI_MEMCPY = 0,
      41             :     AEABI_MEMMOVE,
      42             :     AEABI_MEMSET,
      43             :     AEABI_MEMCLR
      44             :   } AEABILibcall;
      45         134 :   switch (LC) {
      46             :   case RTLIB::MEMCPY:
      47             :     AEABILibcall = AEABI_MEMCPY;
      48             :     break;
      49          48 :   case RTLIB::MEMMOVE:
      50             :     AEABILibcall = AEABI_MEMMOVE;
      51          48 :     break;
      52          57 :   case RTLIB::MEMSET:
      53             :     AEABILibcall = AEABI_MEMSET;
      54             :     if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
      55         112 :       if (ConstantSrc->getZExtValue() == 0)
      56             :         AEABILibcall = AEABI_MEMCLR;
      57             :     break;
      58           0 :   default:
      59           0 :     return SDValue();
      60             :   }
      61             : 
      62             :   // Choose the most-aligned libcall variant that we can
      63             :   enum {
      64             :     ALIGN1 = 0,
      65             :     ALIGN4,
      66             :     ALIGN8
      67             :   } AlignVariant;
      68         134 :   if ((Align & 7) == 0)
      69             :     AlignVariant = ALIGN8;
      70         122 :   else if ((Align & 3) == 0)
      71             :     AlignVariant = ALIGN4;
      72             :   else
      73             :     AlignVariant = ALIGN1;
      74             : 
      75             :   TargetLowering::ArgListTy Args;
      76             :   TargetLowering::ArgListEntry Entry;
      77         134 :   Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
      78         134 :   Entry.Node = Dst;
      79         134 :   Args.push_back(Entry);
      80         134 :   if (AEABILibcall == AEABI_MEMCLR) {
      81           9 :     Entry.Node = Size;
      82           9 :     Args.push_back(Entry);
      83         125 :   } else if (AEABILibcall == AEABI_MEMSET) {
      84             :     // Adjust parameters for memset, EABI uses format (ptr, size, value),
      85             :     // GNU library uses (ptr, value, size)
      86             :     // See RTABI section 4.3.4
      87          48 :     Entry.Node = Size;
      88          48 :     Args.push_back(Entry);
      89             : 
      90             :     // Extend or truncate the argument to be an i32 value for the call.
      91          96 :     if (Src.getValueType().bitsGT(MVT::i32))
      92           0 :       Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
      93          96 :     else if (Src.getValueType().bitsLT(MVT::i32))
      94          48 :       Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src);
      95             : 
      96          48 :     Entry.Node = Src;
      97          48 :     Entry.Ty = Type::getInt32Ty(*DAG.getContext());
      98          48 :     Entry.IsSExt = false;
      99          48 :     Args.push_back(Entry);
     100             :   } else {
     101          77 :     Entry.Node = Src;
     102          77 :     Args.push_back(Entry);
     103             : 
     104          77 :     Entry.Node = Size;
     105          77 :     Args.push_back(Entry);
     106             :   }
     107             : 
     108         134 :   char const *FunctionNames[4][3] = {
     109             :     { "__aeabi_memcpy",  "__aeabi_memcpy4",  "__aeabi_memcpy8"  },
     110             :     { "__aeabi_memmove", "__aeabi_memmove4", "__aeabi_memmove8" },
     111             :     { "__aeabi_memset",  "__aeabi_memset4",  "__aeabi_memset8"  },
     112             :     { "__aeabi_memclr",  "__aeabi_memclr4",  "__aeabi_memclr8"  }
     113             :   };
     114         134 :   TargetLowering::CallLoweringInfo CLI(DAG);
     115             :   CLI.setDebugLoc(dl)
     116         134 :       .setChain(Chain)
     117             :       .setLibCallee(
     118         134 :           TLI->getLibcallCallingConv(LC), Type::getVoidTy(*DAG.getContext()),
     119             :           DAG.getExternalSymbol(FunctionNames[AEABILibcall][AlignVariant],
     120             :                                 TLI->getPointerTy(DAG.getDataLayout())),
     121         268 :           std::move(Args))
     122             :       .setDiscardResult();
     123         134 :   std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI);
     124             : 
     125         134 :   return CallResult.second;
     126             : }
     127             : 
     128         289 : SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(
     129             :     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     130             :     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
     131             :     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
     132             :   const ARMSubtarget &Subtarget =
     133         289 :       DAG.getMachineFunction().getSubtarget<ARMSubtarget>();
     134             :   // Do repeated 4-byte loads and stores. To be improved.
     135             :   // This requires 4-byte alignment.
     136         289 :   if ((Align & 3) != 0)
     137         190 :     return SDValue();
     138             :   // This requires the copy size to be a constant, preferably
     139             :   // within a subtarget-specific limit.
     140             :   ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
     141             :   if (!ConstantSize)
     142             :     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
     143           0 :                                   RTLIB::MEMCPY);
     144          99 :   uint64_t SizeVal = ConstantSize->getZExtValue();
     145          99 :   if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold())
     146             :     return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
     147          63 :                                   RTLIB::MEMCPY);
     148             : 
     149          36 :   unsigned BytesLeft = SizeVal & 3;
     150          36 :   unsigned NumMemOps = SizeVal >> 2;
     151             :   unsigned EmittedNumMemOps = 0;
     152          36 :   EVT VT = MVT::i32;
     153             :   unsigned VTSize = 4;
     154             :   unsigned i = 0;
     155             :   // Emit a maximum of 4 loads in Thumb1 since we have fewer registers
     156          36 :   const unsigned MaxLoadsInLDM = Subtarget.isThumb1Only() ? 4 : 6;
     157          36 :   SDValue TFOps[6];
     158          36 :   SDValue Loads[6];
     159             :   uint64_t SrcOff = 0, DstOff = 0;
     160             : 
     161             :   // FIXME: We should invent a VMEMCPY pseudo-instruction that lowers to
     162             :   // VLDM/VSTM and make this code emit it when appropriate. This would reduce
     163             :   // pressure on the general purpose registers. However this seems harder to map
     164             :   // onto the register allocator's view of the world.
     165             : 
     166             :   // The number of MEMCPY pseudo-instructions to emit. We use up to
     167             :   // MaxLoadsInLDM registers per mcopy, which will get lowered into ldm/stm
     168             :   // later on. This is a lower bound on the number of MEMCPY operations we must
     169             :   // emit.
     170          36 :   unsigned NumMEMCPYs = (NumMemOps + MaxLoadsInLDM - 1) / MaxLoadsInLDM;
     171             : 
     172             :   // Code size optimisation: do not inline memcpy if expansion results in
     173             :   // more instructions than the libary call.
     174          36 :   if (NumMEMCPYs > 1 && DAG.getMachineFunction().getFunction().optForMinSize()) {
     175           1 :     return SDValue();
     176             :   }
     177             : 
     178          35 :   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other, MVT::Glue);
     179             : 
     180          91 :   for (unsigned I = 0; I != NumMEMCPYs; ++I) {
     181             :     // Evenly distribute registers among MEMCPY operations to reduce register
     182             :     // pressure.
     183          56 :     unsigned NextEmittedNumMemOps = NumMemOps * (I + 1) / NumMEMCPYs;
     184          56 :     unsigned NumRegs = NextEmittedNumMemOps - EmittedNumMemOps;
     185             : 
     186          56 :     Dst = DAG.getNode(ARMISD::MEMCPY, dl, VTs, Chain, Dst, Src,
     187          56 :                       DAG.getConstant(NumRegs, dl, MVT::i32));
     188          56 :     Src = Dst.getValue(1);
     189          56 :     Chain = Dst.getValue(2);
     190             : 
     191          56 :     DstPtrInfo = DstPtrInfo.getWithOffset(NumRegs * VTSize);
     192          56 :     SrcPtrInfo = SrcPtrInfo.getWithOffset(NumRegs * VTSize);
     193             : 
     194             :     EmittedNumMemOps = NextEmittedNumMemOps;
     195             :   }
     196             : 
     197          35 :   if (BytesLeft == 0)
     198          26 :     return Chain;
     199             : 
     200             :   // Issue loads / stores for the trailing (1 - 3) bytes.
     201             :   auto getRemainingValueType = [](unsigned BytesLeft) {
     202          13 :     return (BytesLeft >= 2) ? MVT::i16 : MVT::i8;
     203             :   };
     204             :   auto getRemainingSize = [](unsigned BytesLeft) {
     205          13 :     return (BytesLeft >= 2) ? 2 : 1;
     206             :   };
     207             : 
     208             :   unsigned BytesLeftSave = BytesLeft;
     209             :   i = 0;
     210          22 :   while (BytesLeft) {
     211          13 :     VT = getRemainingValueType(BytesLeft);
     212          13 :     VTSize = getRemainingSize(BytesLeft);
     213          13 :     Loads[i] = DAG.getLoad(VT, dl, Chain,
     214             :                            DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
     215             :                                        DAG.getConstant(SrcOff, dl, MVT::i32)),
     216          13 :                            SrcPtrInfo.getWithOffset(SrcOff));
     217          13 :     TFOps[i] = Loads[i].getValue(1);
     218          13 :     ++i;
     219          13 :     SrcOff += VTSize;
     220          13 :     BytesLeft -= VTSize;
     221             :   }
     222           9 :   Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
     223          18 :                       makeArrayRef(TFOps, i));
     224             : 
     225             :   i = 0;
     226             :   BytesLeft = BytesLeftSave;
     227          22 :   while (BytesLeft) {
     228             :     VT = getRemainingValueType(BytesLeft);
     229          13 :     VTSize = getRemainingSize(BytesLeft);
     230          13 :     TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
     231             :                             DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
     232             :                                         DAG.getConstant(DstOff, dl, MVT::i32)),
     233          13 :                             DstPtrInfo.getWithOffset(DstOff));
     234          13 :     ++i;
     235          13 :     DstOff += VTSize;
     236          13 :     BytesLeft -= VTSize;
     237             :   }
     238             :   return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
     239          18 :                      makeArrayRef(TFOps, i));
     240             : }
     241             : 
     242         120 : SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemmove(
     243             :     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     244             :     SDValue Size, unsigned Align, bool isVolatile,
     245             :     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
     246             :   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
     247         120 :                                 RTLIB::MEMMOVE);
     248             : }
     249             : 
     250         148 : SDValue ARMSelectionDAGInfo::EmitTargetCodeForMemset(
     251             :     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     252             :     SDValue Size, unsigned Align, bool isVolatile,
     253             :     MachinePointerInfo DstPtrInfo) const {
     254             :   return EmitSpecializedLibcall(DAG, dl, Chain, Dst, Src, Size, Align,
     255         148 :                                 RTLIB::MEMSET);
     256             : }

Generated by: LCOV version 1.13