LCOV - code coverage report
Current view: top level - lib/Target/AMDGPU/MCTargetDesc - AMDGPUTargetStreamer.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 212 216 98.1 %
Date: 2018-07-13 00:08:38 Functions: 28 29 96.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AMDGPUTargetStreamer.cpp - Mips Target Streamer Methods -----------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file provides AMDGPU specific target streamer methods.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AMDGPUTargetStreamer.h"
      15             : #include "AMDGPU.h"
      16             : #include "SIDefines.h"
      17             : #include "Utils/AMDGPUBaseInfo.h"
      18             : #include "Utils/AMDKernelCodeTUtils.h"
      19             : #include "llvm/ADT/Twine.h"
      20             : #include "llvm/BinaryFormat/ELF.h"
      21             : #include "llvm/IR/Constants.h"
      22             : #include "llvm/IR/Function.h"
      23             : #include "llvm/IR/Metadata.h"
      24             : #include "llvm/IR/Module.h"
      25             : #include "llvm/MC/MCContext.h"
      26             : #include "llvm/MC/MCELFStreamer.h"
      27             : #include "llvm/MC/MCObjectFileInfo.h"
      28             : #include "llvm/MC/MCSectionELF.h"
      29             : #include "llvm/Support/FormattedStream.h"
      30             : 
      31             : namespace llvm {
      32             : #include "AMDGPUPTNote.h"
      33             : }
      34             : 
      35             : using namespace llvm;
      36             : using namespace llvm::AMDGPU;
      37             : 
      38             : //===----------------------------------------------------------------------===//
      39             : // AMDGPUTargetStreamer
      40             : //===----------------------------------------------------------------------===//
      41             : 
      42             : static const struct {
      43             :   const char *Name;
      44             :   unsigned Mach;
      45             : } MachTable[] = {
      46             :       // Radeon HD 2000/3000 Series (R600).
      47             :       { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
      48             :       { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
      49             :       { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
      50             :       { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
      51             :       // Radeon HD 4000 Series (R700).
      52             :       { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
      53             :       { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
      54             :       { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
      55             :       // Radeon HD 5000 Series (Evergreen).
      56             :       { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
      57             :       { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
      58             :       { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
      59             :       { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
      60             :       { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
      61             :       // Radeon HD 6000 Series (Northern Islands).
      62             :       { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
      63             :       { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
      64             :       { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
      65             :       { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
      66             :       // AMDGCN GFX6.
      67             :       { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
      68             :       { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
      69             :       { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
      70             :       { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
      71             :       { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
      72             :       { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
      73             :       { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
      74             :       // AMDGCN GFX7.
      75             :       { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
      76             :       { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
      77             :       { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
      78             :       { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
      79             :       { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
      80             :       { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
      81             :       { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
      82             :       { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
      83             :       { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
      84             :       { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
      85             :       // AMDGCN GFX8.
      86             :       { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
      87             :       { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
      88             :       { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
      89             :       { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
      90             :       { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
      91             :       { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
      92             :       { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
      93             :       { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
      94             :       { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
      95             :       { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
      96             :       { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
      97             :       // AMDGCN GFX9.
      98             :       { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
      99             :       { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
     100             :       { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
     101             :       { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
     102             :       // Not specified processor.
     103             :       { nullptr, ELF::EF_AMDGPU_MACH_NONE }
     104             : };
     105             : 
     106         142 : unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
     107             :   auto Entry = MachTable;
     108        8896 :   for (; Entry->Name && GPU != Entry->Name; ++Entry)
     109             :     ;
     110         142 :   return Entry->Mach;
     111             : }
     112             : 
     113           0 : const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
     114             :   auto Entry = MachTable;
     115           0 :   for (; Entry->Name && Mach != Entry->Mach; ++Entry)
     116             :     ;
     117           0 :   return Entry->Name;
     118             : }
     119             : 
     120          22 : bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
     121          22 :   HSAMD::Metadata HSAMetadata;
     122          44 :   if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
     123             :     return false;
     124             : 
     125          16 :   return EmitHSAMetadata(HSAMetadata);
     126             : }
     127             : 
     128             : //===----------------------------------------------------------------------===//
     129             : // AMDGPUTargetAsmStreamer
     130             : //===----------------------------------------------------------------------===//
     131             : 
     132        2068 : AMDGPUTargetAsmStreamer::AMDGPUTargetAsmStreamer(MCStreamer &S,
     133        2068 :                                                  formatted_raw_ostream &OS)
     134        2068 :     : AMDGPUTargetStreamer(S), OS(OS) { }
     135             : 
     136           1 : void AMDGPUTargetAsmStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {
     137           1 :   OS << "\t.amdgcn_target \"" << Target << "\"\n";
     138           1 : }
     139             : 
     140         263 : void AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectVersion(
     141             :     uint32_t Major, uint32_t Minor) {
     142         263 :   OS << "\t.hsa_code_object_version " <<
     143         789 :         Twine(Major) << "," << Twine(Minor) << '\n';
     144         263 : }
     145             : 
     146             : void
     147         308 : AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
     148             :                                                        uint32_t Minor,
     149             :                                                        uint32_t Stepping,
     150             :                                                        StringRef VendorName,
     151             :                                                        StringRef ArchName) {
     152         308 :   OS << "\t.hsa_code_object_isa " <<
     153        1232 :         Twine(Major) << "," << Twine(Minor) << "," << Twine(Stepping) <<
     154         308 :         ",\"" << VendorName << "\",\"" << ArchName << "\"\n";
     155             : 
     156         308 : }
     157             : 
     158             : void
     159        2133 : AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
     160        2133 :   OS << "\t.amd_kernel_code_t\n";
     161        2133 :   dumpAmdKernelCode(&Header, OS, "\t\t");
     162        2133 :   OS << "\t.end_amd_kernel_code_t\n";
     163        2133 : }
     164             : 
     165        2135 : void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
     166             :                                                    unsigned Type) {
     167        2135 :   switch (Type) {
     168           0 :     default: llvm_unreachable("Invalid AMDGPU symbol type");
     169        2135 :     case ELF::STT_AMDGPU_HSA_KERNEL:
     170        2135 :       OS << "\t.amdgpu_hsa_kernel " << SymbolName << '\n' ;
     171             :       break;
     172             :   }
     173        2135 : }
     174             : 
     175        1680 : bool AMDGPUTargetAsmStreamer::EmitISAVersion(StringRef IsaVersionString) {
     176        1680 :   OS << "\t.amd_amdgpu_isa \"" << IsaVersionString << "\"\n";
     177        1680 :   return true;
     178             : }
     179             : 
     180         270 : bool AMDGPUTargetAsmStreamer::EmitHSAMetadata(
     181             :     const AMDGPU::HSAMD::Metadata &HSAMetadata) {
     182             :   std::string HSAMetadataString;
     183         270 :   if (HSAMD::toString(HSAMetadata, HSAMetadataString))
     184             :     return false;
     185             : 
     186         540 :   OS << '\t' << HSAMD::AssemblerDirectiveBegin << '\n';
     187         270 :   OS << HSAMetadataString << '\n';
     188         540 :   OS << '\t' << HSAMD::AssemblerDirectiveEnd << '\n';
     189             :   return true;
     190             : }
     191             : 
     192          41 : bool AMDGPUTargetAsmStreamer::EmitPALMetadata(
     193             :     const PALMD::Metadata &PALMetadata) {
     194             :   std::string PALMetadataString;
     195          41 :   if (PALMD::toString(PALMetadata, PALMetadataString))
     196             :     return false;
     197             : 
     198          82 :   OS << '\t' << PALMD::AssemblerDirective << PALMetadataString << '\n';
     199             :   return true;
     200             : }
     201             : 
     202           5 : void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
     203             :     const MCSubtargetInfo &STI, StringRef KernelName,
     204             :     const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
     205             :     bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
     206           5 :   amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
     207             : 
     208           5 :   IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
     209             : 
     210           5 :   OS << "\t.amdhsa_kernel " << KernelName << '\n';
     211             : 
     212             : #define PRINT_IF_NOT_DEFAULT(STREAM, DIRECTIVE, KERNEL_DESC,                   \
     213             :                              DEFAULT_KERNEL_DESC, MEMBER_NAME, FIELD_NAME)     \
     214             :   if (AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) !=                  \
     215             :       AMDHSA_BITS_GET(DEFAULT_KERNEL_DESC.MEMBER_NAME, FIELD_NAME))            \
     216             :     STREAM << "\t\t" << DIRECTIVE << " "                                       \
     217             :            << AMDHSA_BITS_GET(KERNEL_DESC.MEMBER_NAME, FIELD_NAME) << '\n';
     218             : 
     219           5 :   if (KD.group_segment_fixed_size != DefaultKD.group_segment_fixed_size)
     220           1 :     OS << "\t\t.amdhsa_group_segment_fixed_size " << KD.group_segment_fixed_size
     221             :        << '\n';
     222           5 :   if (KD.private_segment_fixed_size != DefaultKD.private_segment_fixed_size)
     223           1 :     OS << "\t\t.amdhsa_private_segment_fixed_size "
     224           1 :        << KD.private_segment_fixed_size << '\n';
     225             : 
     226           5 :   PRINT_IF_NOT_DEFAULT(
     227             :       OS, ".amdhsa_user_sgpr_private_segment_buffer", KD, DefaultKD,
     228             :       kernel_code_properties,
     229             :       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
     230           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_ptr", KD, DefaultKD,
     231             :                        kernel_code_properties,
     232             :                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
     233           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_queue_ptr", KD, DefaultKD,
     234             :                        kernel_code_properties,
     235             :                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
     236           5 :   PRINT_IF_NOT_DEFAULT(
     237             :       OS, ".amdhsa_user_sgpr_kernarg_segment_ptr", KD, DefaultKD,
     238             :       kernel_code_properties,
     239             :       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
     240           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_user_sgpr_dispatch_id", KD, DefaultKD,
     241             :                        kernel_code_properties,
     242             :                        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
     243           5 :   PRINT_IF_NOT_DEFAULT(
     244             :       OS, ".amdhsa_user_sgpr_flat_scratch_init", KD, DefaultKD,
     245             :       kernel_code_properties,
     246             :       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
     247           5 :   PRINT_IF_NOT_DEFAULT(
     248             :       OS, ".amdhsa_user_sgpr_private_segment_size", KD, DefaultKD,
     249             :       kernel_code_properties,
     250             :       amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
     251           5 :   PRINT_IF_NOT_DEFAULT(
     252             :       OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD, DefaultKD,
     253             :       compute_pgm_rsrc2,
     254             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET);
     255           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_x", KD, DefaultKD,
     256             :                        compute_pgm_rsrc2,
     257             :                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X);
     258           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_y", KD, DefaultKD,
     259             :                        compute_pgm_rsrc2,
     260             :                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y);
     261           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_id_z", KD, DefaultKD,
     262             :                        compute_pgm_rsrc2,
     263             :                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z);
     264           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_sgpr_workgroup_info", KD, DefaultKD,
     265             :                        compute_pgm_rsrc2,
     266             :                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO);
     267           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_system_vgpr_workitem_id", KD, DefaultKD,
     268             :                        compute_pgm_rsrc2,
     269             :                        amdhsa::COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID);
     270             : 
     271             :   // These directives are required.
     272           5 :   OS << "\t\t.amdhsa_next_free_vgpr " << NextVGPR << '\n';
     273           5 :   OS << "\t\t.amdhsa_next_free_sgpr " << NextSGPR << '\n';
     274             : 
     275           5 :   if (!ReserveVCC)
     276           4 :     OS << "\t\t.amdhsa_reserve_vcc " << ReserveVCC << '\n';
     277           5 :   if (IVersion.Major >= 7 && !ReserveFlatScr)
     278           3 :     OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
     279           5 :   if (IVersion.Major >= 8 && ReserveXNACK != hasXNACK(STI))
     280           2 :     OS << "\t\t.amdhsa_reserve_xnack_mask " << ReserveXNACK << '\n';
     281             : 
     282           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_32", KD, DefaultKD,
     283             :                        compute_pgm_rsrc1,
     284             :                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32);
     285           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_round_mode_16_64", KD, DefaultKD,
     286             :                        compute_pgm_rsrc1,
     287             :                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64);
     288           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_32", KD, DefaultKD,
     289             :                        compute_pgm_rsrc1,
     290             :                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32);
     291           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_float_denorm_mode_16_64", KD, DefaultKD,
     292             :                        compute_pgm_rsrc1,
     293             :                        amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64);
     294           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_dx10_clamp", KD, DefaultKD,
     295             :                        compute_pgm_rsrc1,
     296             :                        amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP);
     297           5 :   PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_ieee_mode", KD, DefaultKD,
     298             :                        compute_pgm_rsrc1,
     299             :                        amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE);
     300           5 :   if (IVersion.Major >= 9)
     301           3 :     PRINT_IF_NOT_DEFAULT(OS, ".amdhsa_fp16_overflow", KD, DefaultKD,
     302             :                          compute_pgm_rsrc1,
     303             :                          amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
     304           5 :   PRINT_IF_NOT_DEFAULT(
     305             :       OS, ".amdhsa_exception_fp_ieee_invalid_op", KD, DefaultKD,
     306             :       compute_pgm_rsrc2,
     307             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION);
     308           5 :   PRINT_IF_NOT_DEFAULT(
     309             :       OS, ".amdhsa_exception_fp_denorm_src", KD, DefaultKD, compute_pgm_rsrc2,
     310             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE);
     311           5 :   PRINT_IF_NOT_DEFAULT(
     312             :       OS, ".amdhsa_exception_fp_ieee_div_zero", KD, DefaultKD,
     313             :       compute_pgm_rsrc2,
     314             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO);
     315           5 :   PRINT_IF_NOT_DEFAULT(
     316             :       OS, ".amdhsa_exception_fp_ieee_overflow", KD, DefaultKD,
     317             :       compute_pgm_rsrc2,
     318             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW);
     319           5 :   PRINT_IF_NOT_DEFAULT(
     320             :       OS, ".amdhsa_exception_fp_ieee_underflow", KD, DefaultKD,
     321             :       compute_pgm_rsrc2,
     322             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW);
     323           5 :   PRINT_IF_NOT_DEFAULT(
     324             :       OS, ".amdhsa_exception_fp_ieee_inexact", KD, DefaultKD, compute_pgm_rsrc2,
     325             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT);
     326           5 :   PRINT_IF_NOT_DEFAULT(
     327             :       OS, ".amdhsa_exception_int_div_zero", KD, DefaultKD, compute_pgm_rsrc2,
     328             :       amdhsa::COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO);
     329             : #undef PRINT_IF_NOT_DEFAULT
     330             : 
     331           5 :   OS << "\t.end_amdhsa_kernel\n";
     332           5 : }
     333             : 
     334             : //===----------------------------------------------------------------------===//
     335             : // AMDGPUTargetELFStreamer
     336             : //===----------------------------------------------------------------------===//
     337             : 
     338         142 : AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(
     339         142 :     MCStreamer &S, const MCSubtargetInfo &STI)
     340         284 :     : AMDGPUTargetStreamer(S), Streamer(S) {
     341         142 :   MCAssembler &MCA = getStreamer().getAssembler();
     342         142 :   unsigned EFlags = MCA.getELFHeaderEFlags();
     343             : 
     344         142 :   EFlags &= ~ELF::EF_AMDGPU_MACH;
     345         142 :   EFlags |= getMACH(STI.getCPU());
     346             : 
     347         142 :   EFlags &= ~ELF::EF_AMDGPU_XNACK;
     348         142 :   if (AMDGPU::hasXNACK(STI))
     349          21 :     EFlags |= ELF::EF_AMDGPU_XNACK;
     350             : 
     351             :   MCA.setELFHeaderEFlags(EFlags);
     352         142 : }
     353             : 
     354         959 : MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
     355         959 :   return static_cast<MCELFStreamer &>(Streamer);
     356             : }
     357             : 
     358         254 : void AMDGPUTargetELFStreamer::EmitAMDGPUNote(
     359             :     const MCExpr *DescSZ, unsigned NoteType,
     360             :     function_ref<void(MCELFStreamer &)> EmitDesc) {
     361         254 :   auto &S = getStreamer();
     362         254 :   auto &Context = S.getContext();
     363             : 
     364             :   auto NameSZ = sizeof(ElfNote::NoteName);
     365             : 
     366         254 :   S.PushSection();
     367         254 :   S.SwitchSection(Context.getELFSection(
     368         254 :     ElfNote::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
     369         254 :   S.EmitIntValue(NameSZ, 4);                                  // namesz
     370         254 :   S.EmitValue(DescSZ, 4);                                     // descz
     371         254 :   S.EmitIntValue(NoteType, 4);                                // type
     372         508 :   S.EmitBytes(StringRef(ElfNote::NoteName, NameSZ));          // name
     373         254 :   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
     374             :   EmitDesc(S);                                                // desc
     375         254 :   S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
     376         254 :   S.PopSection();
     377         254 : }
     378             : 
     379           2 : void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget(StringRef Target) {}
     380             : 
     381          50 : void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
     382             :     uint32_t Major, uint32_t Minor) {
     383             : 
     384         100 :   EmitAMDGPUNote(
     385         100 :     MCConstantExpr::create(8, getContext()),
     386             :     ElfNote::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
     387          50 :     [&](MCELFStreamer &OS){
     388          50 :       OS.EmitIntValue(Major, 4);
     389          50 :       OS.EmitIntValue(Minor, 4);
     390          50 :     }
     391             :   );
     392          50 : }
     393             : 
     394             : void
     395          57 : AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
     396             :                                                        uint32_t Minor,
     397             :                                                        uint32_t Stepping,
     398             :                                                        StringRef VendorName,
     399             :                                                        StringRef ArchName) {
     400          57 :   uint16_t VendorNameSize = VendorName.size() + 1;
     401          57 :   uint16_t ArchNameSize = ArchName.size() + 1;
     402             : 
     403          57 :   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
     404             :     sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
     405          57 :     VendorNameSize + ArchNameSize;
     406             : 
     407         114 :   EmitAMDGPUNote(
     408         114 :     MCConstantExpr::create(DescSZ, getContext()),
     409             :     ElfNote::NT_AMDGPU_HSA_ISA,
     410          57 :     [&](MCELFStreamer &OS) {
     411          57 :       OS.EmitIntValue(VendorNameSize, 2);
     412          57 :       OS.EmitIntValue(ArchNameSize, 2);
     413          57 :       OS.EmitIntValue(Major, 4);
     414          57 :       OS.EmitIntValue(Minor, 4);
     415          57 :       OS.EmitIntValue(Stepping, 4);
     416          57 :       OS.EmitBytes(VendorName);
     417          57 :       OS.EmitIntValue(0, 1); // NULL terminate VendorName
     418          57 :       OS.EmitBytes(ArchName);
     419          57 :       OS.EmitIntValue(0, 1); // NULL terminte ArchName
     420          57 :     }
     421             :   );
     422          57 : }
     423             : 
     424             : void
     425         276 : AMDGPUTargetELFStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
     426             : 
     427         276 :   MCStreamer &OS = getStreamer();
     428         276 :   OS.PushSection();
     429         552 :   OS.EmitBytes(StringRef((const char*)&Header, sizeof(Header)));
     430         276 :   OS.PopSection();
     431         276 : }
     432             : 
     433         281 : void AMDGPUTargetELFStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
     434             :                                                    unsigned Type) {
     435         281 :   MCSymbolELF *Symbol = cast<MCSymbolELF>(
     436         281 :       getStreamer().getContext().getOrCreateSymbol(SymbolName));
     437         281 :   Symbol->setType(Type);
     438         281 : }
     439             : 
     440          98 : bool AMDGPUTargetELFStreamer::EmitISAVersion(StringRef IsaVersionString) {
     441             :   // Create two labels to mark the beginning and end of the desc field
     442             :   // and a MCExpr to calculate the size of the desc field.
     443          98 :   auto &Context = getContext();
     444          98 :   auto *DescBegin = Context.createTempSymbol();
     445          98 :   auto *DescEnd = Context.createTempSymbol();
     446             :   auto *DescSZ = MCBinaryExpr::createSub(
     447          98 :     MCSymbolRefExpr::create(DescEnd, Context),
     448          98 :     MCSymbolRefExpr::create(DescBegin, Context), Context);
     449             : 
     450         196 :   EmitAMDGPUNote(
     451             :     DescSZ,
     452             :     ELF::NT_AMD_AMDGPU_ISA,
     453          98 :     [&](MCELFStreamer &OS) {
     454          98 :       OS.EmitLabel(DescBegin);
     455          98 :       OS.EmitBytes(IsaVersionString);
     456          98 :       OS.EmitLabel(DescEnd);
     457          98 :     }
     458             :   );
     459          98 :   return true;
     460             : }
     461             : 
     462          44 : bool AMDGPUTargetELFStreamer::EmitHSAMetadata(
     463             :     const AMDGPU::HSAMD::Metadata &HSAMetadata) {
     464             :   std::string HSAMetadataString;
     465          44 :   if (HSAMD::toString(HSAMetadata, HSAMetadataString))
     466             :     return false;
     467             : 
     468             :   // Create two labels to mark the beginning and end of the desc field
     469             :   // and a MCExpr to calculate the size of the desc field.
     470          44 :   auto &Context = getContext();
     471          44 :   auto *DescBegin = Context.createTempSymbol();
     472          44 :   auto *DescEnd = Context.createTempSymbol();
     473             :   auto *DescSZ = MCBinaryExpr::createSub(
     474          44 :     MCSymbolRefExpr::create(DescEnd, Context),
     475          44 :     MCSymbolRefExpr::create(DescBegin, Context), Context);
     476             : 
     477          88 :   EmitAMDGPUNote(
     478             :     DescSZ,
     479             :     ELF::NT_AMD_AMDGPU_HSA_METADATA,
     480          44 :     [&](MCELFStreamer &OS) {
     481          44 :       OS.EmitLabel(DescBegin);
     482          88 :       OS.EmitBytes(HSAMetadataString);
     483          44 :       OS.EmitLabel(DescEnd);
     484          44 :     }
     485             :   );
     486          44 :   return true;
     487             : }
     488             : 
     489           5 : bool AMDGPUTargetELFStreamer::EmitPALMetadata(
     490             :     const PALMD::Metadata &PALMetadata) {
     491          10 :   EmitAMDGPUNote(
     492          15 :     MCConstantExpr::create(PALMetadata.size() * sizeof(uint32_t), getContext()),
     493             :     ELF::NT_AMD_AMDGPU_PAL_METADATA,
     494             :     [&](MCELFStreamer &OS){
     495           5 :       for (auto I : PALMetadata)
     496          44 :         OS.EmitIntValue(I, sizeof(uint32_t));
     497             :     }
     498             :   );
     499           5 :   return true;
     500             : }
     501             : 
     502           6 : void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
     503             :     const MCSubtargetInfo &STI, StringRef KernelName,
     504             :     const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
     505             :     uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
     506             :     bool ReserveXNACK) {
     507           6 :   auto &Streamer = getStreamer();
     508           6 :   auto &Context = Streamer.getContext();
     509             : 
     510           6 :   MCSymbolELF *KernelDescriptorSymbol = cast<MCSymbolELF>(
     511           6 :       Context.getOrCreateSymbol(Twine(KernelName) + Twine(".kd")));
     512           6 :   KernelDescriptorSymbol->setBinding(ELF::STB_GLOBAL);
     513           6 :   KernelDescriptorSymbol->setType(ELF::STT_OBJECT);
     514             :   KernelDescriptorSymbol->setSize(
     515           6 :       MCConstantExpr::create(sizeof(KernelDescriptor), Context));
     516             : 
     517           6 :   MCSymbolELF *KernelCodeSymbol = cast<MCSymbolELF>(
     518           6 :       Context.getOrCreateSymbol(Twine(KernelName)));
     519           6 :   KernelCodeSymbol->setBinding(ELF::STB_LOCAL);
     520             : 
     521           6 :   Streamer.EmitLabel(KernelDescriptorSymbol);
     522          12 :   Streamer.EmitBytes(StringRef(
     523             :       (const char*)&(KernelDescriptor),
     524           6 :       offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset)));
     525             :   // FIXME: Remove the use of VK_AMDGPU_REL64 in the expression below. The
     526             :   // expression being created is:
     527             :   //   (start of kernel code) - (start of kernel descriptor)
     528             :   // It implies R_AMDGPU_REL64, but ends up being R_AMDGPU_ABS64.
     529          18 :   Streamer.EmitValue(MCBinaryExpr::createSub(
     530          12 :       MCSymbolRefExpr::create(
     531           6 :           KernelCodeSymbol, MCSymbolRefExpr::VK_AMDGPU_REL64, Context),
     532          12 :       MCSymbolRefExpr::create(
     533           6 :           KernelDescriptorSymbol, MCSymbolRefExpr::VK_None, Context),
     534             :       Context),
     535           6 :       sizeof(KernelDescriptor.kernel_code_entry_byte_offset));
     536           6 :   Streamer.EmitBytes(StringRef(
     537             :       (const char*)&(KernelDescriptor) +
     538             :           offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) +
     539             :           sizeof(KernelDescriptor.kernel_code_entry_byte_offset),
     540             :       sizeof(KernelDescriptor) -
     541             :           offsetof(amdhsa::kernel_descriptor_t, kernel_code_entry_byte_offset) -
     542           6 :           sizeof(KernelDescriptor.kernel_code_entry_byte_offset)));
     543           6 : }

Generated by: LCOV version 1.13