LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUCallLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 100 115 87.0 %
Date: 2018-07-13 00:08:38 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : ///
      10             : /// \file
      11             : /// This file implements the lowering of LLVM calls to machine code calls for
      12             : /// GlobalISel.
      13             : ///
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPUCallLowering.h"
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUISelLowering.h"
      19             : #include "AMDGPUSubtarget.h"
      20             : #include "SIISelLowering.h"
      21             : #include "SIMachineFunctionInfo.h"
      22             : #include "SIRegisterInfo.h"
      23             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      24             : #include "llvm/CodeGen/CallingConvLower.h"
      25             : #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : 
      28             : using namespace llvm;
      29             : 
      30        2271 : AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
      31        6813 :   : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) {
      32        2271 : }
      33             : 
      34          60 : bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
      35             :                                      const Value *Val, unsigned VReg) const {
      36             :   // FIXME: Add support for non-void returns.
      37          60 :   if (Val)
      38             :     return false;
      39             : 
      40          59 :   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
      41          59 :   return true;
      42             : }
      43             : 
      44          90 : unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
      45             :                                                Type *ParamTy,
      46             :                                                uint64_t Offset) const {
      47             : 
      48          90 :   MachineFunction &MF = MIRBuilder.getMF();
      49          90 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
      50          90 :   MachineRegisterInfo &MRI = MF.getRegInfo();
      51          90 :   const Function &F = MF.getFunction();
      52          90 :   const DataLayout &DL = F.getParent()->getDataLayout();
      53          90 :   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
      54          90 :   LLT PtrType = getLLTForType(*PtrTy, DL);
      55          90 :   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
      56             :   unsigned KernArgSegmentPtr =
      57             :     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
      58          90 :   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
      59             : 
      60          90 :   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
      61          90 :   MIRBuilder.buildConstant(OffsetReg, Offset);
      62             : 
      63          90 :   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
      64             : 
      65          90 :   return DstReg;
      66             : }
      67             : 
      68          90 : void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
      69             :                                         Type *ParamTy, uint64_t Offset,
      70             :                                         unsigned Align,
      71             :                                         unsigned DstReg) const {
      72          90 :   MachineFunction &MF = MIRBuilder.getMF();
      73          90 :   const Function &F = MF.getFunction();
      74          90 :   const DataLayout &DL = F.getParent()->getDataLayout();
      75          90 :   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS);
      76          90 :   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
      77             :   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
      78          90 :   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
      79             : 
      80             :   MachineMemOperand *MMO =
      81          90 :       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
      82             :                                        MachineMemOperand::MONonTemporal |
      83             :                                        MachineMemOperand::MOInvariant,
      84          90 :                                        TypeSize, Align);
      85             : 
      86          90 :   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
      87          90 : }
      88             : 
      89          60 : bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
      90             :                                               const Function &F,
      91             :                                               ArrayRef<unsigned> VRegs) const {
      92             :   // AMDGPU_GS and AMDGP_HS are not supported yet.
      93          60 :   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
      94             :       F.getCallingConv() == CallingConv::AMDGPU_HS)
      95             :     return false;
      96             : 
      97          60 :   MachineFunction &MF = MIRBuilder.getMF();
      98          60 :   const SISubtarget *Subtarget = &MF.getSubtarget<SISubtarget>();
      99          60 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     100          60 :   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     101          60 :   const SIRegisterInfo *TRI = MF.getSubtarget<SISubtarget>().getRegisterInfo();
     102          60 :   const DataLayout &DL = F.getParent()->getDataLayout();
     103             : 
     104             :   SmallVector<CCValAssign, 16> ArgLocs;
     105         240 :   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
     106             : 
     107             :   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
     108          60 :   if (Info->hasPrivateSegmentBuffer()) {
     109          38 :     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
     110          38 :     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
     111          38 :     CCInfo.AllocateReg(PrivateSegmentBufferReg);
     112             :   }
     113             : 
     114          60 :   if (Info->hasDispatchPtr()) {
     115           0 :     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
     116             :     // FIXME: Need to add reg as live-in
     117           0 :     CCInfo.AllocateReg(DispatchPtrReg);
     118             :   }
     119             : 
     120          60 :   if (Info->hasQueuePtr()) {
     121           0 :     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
     122             :     // FIXME: Need to add reg as live-in
     123           0 :     CCInfo.AllocateReg(QueuePtrReg);
     124             :   }
     125             : 
     126          60 :   if (Info->hasKernargSegmentPtr()) {
     127          51 :     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
     128          51 :     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
     129          51 :     unsigned VReg = MRI.createGenericVirtualRegister(P2);
     130             :     MRI.addLiveIn(InputPtrReg, VReg);
     131          51 :     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
     132          51 :     MIRBuilder.buildCopy(VReg, InputPtrReg);
     133          51 :     CCInfo.AllocateReg(InputPtrReg);
     134             :   }
     135             : 
     136          60 :   if (Info->hasDispatchID()) {
     137           0 :     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
     138             :     // FIXME: Need to add reg as live-in
     139           0 :     CCInfo.AllocateReg(DispatchIDReg);
     140             :   }
     141             : 
     142          60 :   if (Info->hasFlatScratchInit()) {
     143           0 :     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
     144             :     // FIXME: Need to add reg as live-in
     145           0 :     CCInfo.AllocateReg(FlatScratchInitReg);
     146             :   }
     147             : 
     148             :   // The infrastructure for normal calling convention lowering is essentially
     149             :   // useless for kernels. We want to avoid any kind of legalization or argument
     150             :   // splitting.
     151          60 :   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
     152             :     unsigned i = 0;
     153             :     const unsigned KernArgBaseAlign = 16;
     154          51 :     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
     155             :     uint64_t ExplicitArgOffset = 0;
     156             : 
     157             :     // TODO: Align down to dword alignment and extract bits for extending loads.
     158         142 :     for (auto &Arg : F.args()) {
     159          91 :       Type *ArgTy = Arg.getType();
     160          91 :       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
     161          91 :       if (AllocSize == 0)
     162           1 :         continue;
     163             : 
     164          90 :       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
     165             : 
     166         180 :       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
     167          90 :       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
     168             : 
     169          90 :       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
     170          90 :       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
     171         180 :       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
     172          90 :       ++i;
     173             :     }
     174             : 
     175             :     return true;
     176             :   }
     177             : 
     178           9 :   unsigned NumArgs = F.arg_size();
     179             :   Function::const_arg_iterator CurOrigArg = F.arg_begin();
     180           9 :   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
     181             :   unsigned PSInputNum = 0;
     182           9 :   BitVector Skipped(NumArgs);
     183          35 :   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
     184          13 :     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
     185             : 
     186             :     // We can only hanlde simple value types at the moment.
     187             :     ISD::ArgFlagsTy Flags;
     188          26 :     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
     189          13 :     setArgFlags(OrigArg, i + 1, DL, F);
     190          13 :     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
     191             : 
     192           3 :     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
     193          15 :         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
     194             :         PSInputNum <= 15) {
     195           4 :       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
     196             :         Skipped.set(i);
     197           1 :         ++PSInputNum;
     198           1 :         continue;
     199             :       }
     200             : 
     201             :       Info->markPSInputAllocated(PSInputNum);
     202           1 :       if (!CurOrigArg->use_empty())
     203             :         Info->markPSInputEnabled(PSInputNum);
     204             : 
     205           1 :       ++PSInputNum;
     206             :     }
     207             : 
     208             :     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
     209          12 :                                              /*IsVarArg=*/false);
     210             : 
     211          12 :     if (ValEVT.isVector()) {
     212           0 :       EVT ElemVT = ValEVT.getVectorElementType();
     213           0 :       if (!ValEVT.isSimple())
     214           0 :         return false;
     215           0 :       MVT ValVT = ElemVT.getSimpleVT();
     216             :       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
     217           0 :                           OrigArg.Flags, CCInfo);
     218           0 :       if (!Res)
     219             :         return false;
     220             :     } else {
     221          12 :       MVT ValVT = ValEVT.getSimpleVT();
     222          12 :       if (!ValEVT.isSimple())
     223           0 :         return false;
     224             :       bool Res =
     225          12 :           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
     226             : 
     227             :       // Fail if we don't know how to handle this type.
     228          12 :       if (Res)
     229             :         return false;
     230             :     }
     231             :   }
     232             : 
     233             :   Function::const_arg_iterator Arg = F.arg_begin();
     234             : 
     235           9 :   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
     236             :       F.getCallingConv() == CallingConv::AMDGPU_PS) {
     237          13 :     for (unsigned i = 0, OrigArgIdx = 0;
     238          35 :          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
     239          13 :        if (Skipped.test(OrigArgIdx))
     240           1 :           continue;
     241          12 :       CCValAssign &VA = ArgLocs[i++];
     242          12 :       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
     243          12 :       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
     244          12 :       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
     245             :     }
     246             :     return true;
     247             :   }
     248             : 
     249             :   return false;
     250             : }

Generated by: LCOV version 1.13