LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU - AMDGPUCallLowering.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 97 112 86.6 %
Date: 2018-10-20 13:21:21 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : ///
      10             : /// \file
      11             : /// This file implements the lowering of LLVM calls to machine code calls for
      12             : /// GlobalISel.
      13             : ///
      14             : //===----------------------------------------------------------------------===//
      15             : 
      16             : #include "AMDGPUCallLowering.h"
      17             : #include "AMDGPU.h"
      18             : #include "AMDGPUISelLowering.h"
      19             : #include "AMDGPUSubtarget.h"
      20             : #include "SIISelLowering.h"
      21             : #include "SIMachineFunctionInfo.h"
      22             : #include "SIRegisterInfo.h"
      23             : #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
      24             : #include "llvm/CodeGen/CallingConvLower.h"
      25             : #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
      26             : #include "llvm/CodeGen/MachineInstrBuilder.h"
      27             : 
      28             : using namespace llvm;
      29             : 
      30        2492 : AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
      31        4984 :   : CallLowering(&TLI) {
      32        2492 : }
      33             : 
      34          60 : bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
      35             :                                      const Value *Val,
      36             :                                      ArrayRef<unsigned> VRegs) const {
      37             :   // FIXME: Add support for non-void returns.
      38          60 :   if (Val)
      39             :     return false;
      40             : 
      41          59 :   MIRBuilder.buildInstr(AMDGPU::S_ENDPGM);
      42          59 :   return true;
      43             : }
      44             : 
      45          90 : unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder,
      46             :                                                Type *ParamTy,
      47             :                                                uint64_t Offset) const {
      48             : 
      49          90 :   MachineFunction &MF = MIRBuilder.getMF();
      50          90 :   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
      51          90 :   MachineRegisterInfo &MRI = MF.getRegInfo();
      52          90 :   const Function &F = MF.getFunction();
      53          90 :   const DataLayout &DL = F.getParent()->getDataLayout();
      54          90 :   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
      55          90 :   LLT PtrType = getLLTForType(*PtrTy, DL);
      56          90 :   unsigned DstReg = MRI.createGenericVirtualRegister(PtrType);
      57             :   unsigned KernArgSegmentPtr =
      58             :     MFI->getPreloadedReg(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
      59          90 :   unsigned KernArgSegmentVReg = MRI.getLiveInVirtReg(KernArgSegmentPtr);
      60             : 
      61          90 :   unsigned OffsetReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
      62          90 :   MIRBuilder.buildConstant(OffsetReg, Offset);
      63             : 
      64          90 :   MIRBuilder.buildGEP(DstReg, KernArgSegmentVReg, OffsetReg);
      65             : 
      66          90 :   return DstReg;
      67             : }
      68             : 
      69          90 : void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder,
      70             :                                         Type *ParamTy, uint64_t Offset,
      71             :                                         unsigned Align,
      72             :                                         unsigned DstReg) const {
      73          90 :   MachineFunction &MF = MIRBuilder.getMF();
      74          90 :   const Function &F = MF.getFunction();
      75          90 :   const DataLayout &DL = F.getParent()->getDataLayout();
      76          90 :   PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS);
      77          90 :   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
      78             :   unsigned TypeSize = DL.getTypeStoreSize(ParamTy);
      79          90 :   unsigned PtrReg = lowerParameterPtr(MIRBuilder, ParamTy, Offset);
      80             : 
      81             :   MachineMemOperand *MMO =
      82          90 :       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad |
      83             :                                        MachineMemOperand::MONonTemporal |
      84             :                                        MachineMemOperand::MOInvariant,
      85             :                                        TypeSize, Align);
      86             : 
      87          90 :   MIRBuilder.buildLoad(DstReg, PtrReg, *MMO);
      88          90 : }
      89             : 
      90          60 : bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
      91             :                                               const Function &F,
      92             :                                               ArrayRef<unsigned> VRegs) const {
      93             :   // AMDGPU_GS and AMDGP_HS are not supported yet.
      94          60 :   if (F.getCallingConv() == CallingConv::AMDGPU_GS ||
      95             :       F.getCallingConv() == CallingConv::AMDGPU_HS)
      96             :     return false;
      97             : 
      98          60 :   MachineFunction &MF = MIRBuilder.getMF();
      99          60 :   const GCNSubtarget *Subtarget = &MF.getSubtarget<GCNSubtarget>();
     100          60 :   MachineRegisterInfo &MRI = MF.getRegInfo();
     101          60 :   SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
     102          60 :   const SIRegisterInfo *TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
     103          60 :   const DataLayout &DL = F.getParent()->getDataLayout();
     104             : 
     105             :   SmallVector<CCValAssign, 16> ArgLocs;
     106         120 :   CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
     107             : 
     108             :   // FIXME: How should these inputs interact with inreg / custom SGPR inputs?
     109          60 :   if (Info->hasPrivateSegmentBuffer()) {
     110          38 :     unsigned PrivateSegmentBufferReg = Info->addPrivateSegmentBuffer(*TRI);
     111          38 :     MF.addLiveIn(PrivateSegmentBufferReg, &AMDGPU::SReg_128RegClass);
     112          38 :     CCInfo.AllocateReg(PrivateSegmentBufferReg);
     113             :   }
     114             : 
     115          60 :   if (Info->hasDispatchPtr()) {
     116           0 :     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
     117             :     // FIXME: Need to add reg as live-in
     118           0 :     CCInfo.AllocateReg(DispatchPtrReg);
     119             :   }
     120             : 
     121          60 :   if (Info->hasQueuePtr()) {
     122           0 :     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
     123             :     // FIXME: Need to add reg as live-in
     124           0 :     CCInfo.AllocateReg(QueuePtrReg);
     125             :   }
     126             : 
     127          60 :   if (Info->hasKernargSegmentPtr()) {
     128          51 :     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
     129          51 :     const LLT P2 = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
     130          51 :     unsigned VReg = MRI.createGenericVirtualRegister(P2);
     131             :     MRI.addLiveIn(InputPtrReg, VReg);
     132          51 :     MIRBuilder.getMBB().addLiveIn(InputPtrReg);
     133          51 :     MIRBuilder.buildCopy(VReg, InputPtrReg);
     134          51 :     CCInfo.AllocateReg(InputPtrReg);
     135             :   }
     136             : 
     137          60 :   if (Info->hasDispatchID()) {
     138           0 :     unsigned DispatchIDReg = Info->addDispatchID(*TRI);
     139             :     // FIXME: Need to add reg as live-in
     140           0 :     CCInfo.AllocateReg(DispatchIDReg);
     141             :   }
     142             : 
     143          60 :   if (Info->hasFlatScratchInit()) {
     144           0 :     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
     145             :     // FIXME: Need to add reg as live-in
     146           0 :     CCInfo.AllocateReg(FlatScratchInitReg);
     147             :   }
     148             : 
     149             :   // The infrastructure for normal calling convention lowering is essentially
     150             :   // useless for kernels. We want to avoid any kind of legalization or argument
     151             :   // splitting.
     152          60 :   if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL) {
     153             :     unsigned i = 0;
     154             :     const unsigned KernArgBaseAlign = 16;
     155          51 :     const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
     156             :     uint64_t ExplicitArgOffset = 0;
     157             : 
     158             :     // TODO: Align down to dword alignment and extract bits for extending loads.
     159         142 :     for (auto &Arg : F.args()) {
     160          91 :       Type *ArgTy = Arg.getType();
     161          91 :       unsigned AllocSize = DL.getTypeAllocSize(ArgTy);
     162          91 :       if (AllocSize == 0)
     163             :         continue;
     164             : 
     165          90 :       unsigned ABIAlign = DL.getABITypeAlignment(ArgTy);
     166             : 
     167          90 :       uint64_t ArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + BaseOffset;
     168          90 :       ExplicitArgOffset = alignTo(ExplicitArgOffset, ABIAlign) + AllocSize;
     169             : 
     170          90 :       unsigned Align = MinAlign(KernArgBaseAlign, ArgOffset);
     171          90 :       ArgOffset = alignTo(ArgOffset, DL.getABITypeAlignment(ArgTy));
     172         180 :       lowerParameter(MIRBuilder, ArgTy, ArgOffset, Align, VRegs[i]);
     173          90 :       ++i;
     174             :     }
     175             : 
     176             :     return true;
     177             :   }
     178             : 
     179           9 :   unsigned NumArgs = F.arg_size();
     180             :   Function::const_arg_iterator CurOrigArg = F.arg_begin();
     181           9 :   const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>();
     182             :   unsigned PSInputNum = 0;
     183           9 :   BitVector Skipped(NumArgs);
     184          22 :   for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
     185          13 :     EVT ValEVT = TLI.getValueType(DL, CurOrigArg->getType());
     186             : 
     187             :     // We can only hanlde simple value types at the moment.
     188             :     ISD::ArgFlagsTy Flags;
     189          26 :     ArgInfo OrigArg{VRegs[i], CurOrigArg->getType()};
     190          13 :     setArgFlags(OrigArg, i + 1, DL, F);
     191          13 :     Flags.setOrigAlign(DL.getABITypeAlignment(CurOrigArg->getType()));
     192             : 
     193           3 :     if (F.getCallingConv() == CallingConv::AMDGPU_PS &&
     194          13 :         !OrigArg.Flags.isInReg() && !OrigArg.Flags.isByVal() &&
     195             :         PSInputNum <= 15) {
     196           2 :       if (CurOrigArg->use_empty() && !Info->isPSInputAllocated(PSInputNum)) {
     197             :         Skipped.set(i);
     198           1 :         ++PSInputNum;
     199           1 :         continue;
     200             :       }
     201             : 
     202             :       Info->markPSInputAllocated(PSInputNum);
     203           1 :       if (!CurOrigArg->use_empty())
     204             :         Info->markPSInputEnabled(PSInputNum);
     205             : 
     206           1 :       ++PSInputNum;
     207             :     }
     208             : 
     209          12 :     CCAssignFn *AssignFn = CCAssignFnForCall(F.getCallingConv(),
     210             :                                              /*IsVarArg=*/false);
     211             : 
     212          12 :     if (ValEVT.isVector()) {
     213           0 :       EVT ElemVT = ValEVT.getVectorElementType();
     214           0 :       if (!ValEVT.isSimple())
     215           0 :         return false;
     216           0 :       MVT ValVT = ElemVT.getSimpleVT();
     217           0 :       bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full,
     218             :                           OrigArg.Flags, CCInfo);
     219           0 :       if (!Res)
     220             :         return false;
     221             :     } else {
     222          12 :       MVT ValVT = ValEVT.getSimpleVT();
     223          12 :       if (!ValEVT.isSimple())
     224           0 :         return false;
     225             :       bool Res =
     226          12 :           AssignFn(i, ValVT, ValVT, CCValAssign::Full, OrigArg.Flags, CCInfo);
     227             : 
     228             :       // Fail if we don't know how to handle this type.
     229          12 :       if (Res)
     230             :         return false;
     231             :     }
     232             :   }
     233             : 
     234             :   Function::const_arg_iterator Arg = F.arg_begin();
     235             : 
     236           9 :   if (F.getCallingConv() == CallingConv::AMDGPU_VS ||
     237             :       F.getCallingConv() == CallingConv::AMDGPU_PS) {
     238          13 :     for (unsigned i = 0, OrigArgIdx = 0;
     239          22 :          OrigArgIdx != NumArgs && i != ArgLocs.size(); ++Arg, ++OrigArgIdx) {
     240          13 :        if (Skipped.test(OrigArgIdx))
     241             :           continue;
     242          12 :       CCValAssign &VA = ArgLocs[i++];
     243          24 :       MRI.addLiveIn(VA.getLocReg(), VRegs[OrigArgIdx]);
     244          12 :       MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
     245          12 :       MIRBuilder.buildCopy(VRegs[OrigArgIdx], VA.getLocReg());
     246             :     }
     247             :     return true;
     248             :   }
     249             : 
     250             :   return false;
     251             : }

Generated by: LCOV version 1.13