doxygen/NVPTXSubtarget_8cpp_source.html

//===- NVPTXSubtarget.cpp - NVPTX Subtarget Information -------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file implements the NVPTX specific subclass of TargetSubtarget.

//

//===----------------------------------------------------------------------===//


#include "NVPTXSubtarget.h"

#include "NVPTXSelectionDAGInfo.h"

#include "NVPTXTargetMachine.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/FormatVariadic.h"


using namespace llvm;


#define DEBUG_TYPE "nvptx-subtarget"


#define GET_SUBTARGETINFO_ENUM

#define GET_SUBTARGETINFO_TARGET_DESC

#define GET_SUBTARGETINFO_CTOR

#include "NVPTXGenSubtargetInfo.inc"


static cl::opt<bool>

    NoF16Math("nvptx-no-f16-math", cl::Hidden,

              cl::desc("NVPTX Specific: Disable generation of f16 math ops."),

              cl::init(false));

// Pin the vtable to this file.

void NVPTXSubtarget::anchor() {}


NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,

                                                                StringRef FS) {

  TargetName = std::string(CPU);


  ParseSubtargetFeatures(getTargetName(), /*TuneCPU=*/getTargetName(), FS);


  // Re-map SM version numbers, SmVersion carries the regular SMs which do

  // have relative order, while FullSmVersion allows distinguishing sm_90 from

  // sm_90a, which would *not* be a subset of sm_91.

  SmVersion = getSmVersion();


  // Set default to PTX 6.0 (CUDA 9.0)

  if (PTXVersion == 0) {

    PTXVersion = 60;

  }


  return *this;

}


NVPTXSubtarget::NVPTXSubtarget(const Triple &TT, const std::string &CPU,

                               const std::string &FS,

                               const NVPTXTargetMachine &TM)

    : NVPTXGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), PTXVersion(0),

      FullSmVersion(200), SmVersion(getSmVersion()),

      TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {

  TSInfo = std::make_unique<NVPTXSelectionDAGInfo>();

}


NVPTXSubtarget::~NVPTXSubtarget() = default;


const SelectionDAGTargetInfo *NVPTXSubtarget::getSelectionDAGInfo() const {

  return TSInfo.get();

}


bool NVPTXSubtarget::allowFP16Math() const {

  return hasFP16Math() && NoF16Math == false;

}


bool NVPTXSubtarget::hasNativeBF16Support(int Opcode) const {

  if (!hasBF16Math())

    return false;


  switch (Opcode) {

  // Several BF16 instructions are available on sm_90 only.

  case ISD::FADD:

  case ISD::FMUL:

  case ISD::FSUB:

  case ISD::SELECT:

  case ISD::SELECT_CC:

  case ISD::SETCC:

  case ISD::FEXP2:

  case ISD::FCEIL:

  case ISD::FFLOOR:

  case ISD::FNEARBYINT:

  case ISD::FRINT:

  case ISD::FROUNDEVEN:

  case ISD::FTRUNC:

    return getSmVersion() >= 90 && getPTXVersion() >= 78;

  // Several BF16 instructions are available on sm_80 only.

  case ISD::FMINNUM:

  case ISD::FMAXNUM:

  case ISD::FMAXNUM_IEEE:

  case ISD::FMINNUM_IEEE:

  case ISD::FMAXIMUM:

  case ISD::FMINIMUM:

    return getSmVersion() >= 80 && getPTXVersion() >= 70;

  }

  return true;

}


void NVPTXSubtarget::failIfClustersUnsupported(

    std::string const &FailureMessage) const {

  if (hasClusters())

    return;


  report_fatal_error(formatv(

      "NVPTX SM architecture \"{}\" and PTX version \"{}\" do not support {}. "

      "Requires SM >= 90 and PTX >= 78.",

      getFullSmVersion(), PTXVersion, FailureMessage));

}

FormatVariadic.h

NVPTXSelectionDAGInfo.h

NoF16Math
static cl::opt< bool > NoF16Math("nvptx-no-f16-math", cl::Hidden, cl::desc("NVPTX Specific: Disable generation of f16 math ops."), cl::init(false))

NVPTXSubtarget.h

NVPTXTargetMachine.h

SmVersion
unsigned SmVersion
Definition: NVVMReflect.cpp:81

NVPTXGenSubtargetInfo

llvm::NVPTXSubtarget
Definition: NVPTXSubtarget.h:30

llvm::NVPTXSubtarget::failIfClustersUnsupported
void failIfClustersUnsupported(std::string const &FailureMessage) const
Definition: NVPTXSubtarget.cpp:105

llvm::NVPTXSubtarget::getTargetName
std::string getTargetName() const
Definition: NVPTXSubtarget.h:116

llvm::NVPTXSubtarget::hasClusters
bool hasClusters() const
Definition: NVPTXSubtarget.h:80

llvm::NVPTXSubtarget::ParseSubtargetFeatures
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)

llvm::NVPTXSubtarget::getPTXVersion
unsigned getPTXVersion() const
Definition: NVPTXSubtarget.h:132

llvm::NVPTXSubtarget::~NVPTXSubtarget
~NVPTXSubtarget() override

llvm::NVPTXSubtarget::hasNativeBF16Support
bool hasNativeBF16Support(int Opcode) const
Definition: NVPTXSubtarget.cpp:73

llvm::NVPTXSubtarget::getFullSmVersion
unsigned int getFullSmVersion() const
Definition: NVPTXSubtarget.h:105

llvm::NVPTXSubtarget::getSmVersion
unsigned int getSmVersion() const
Definition: NVPTXSubtarget.h:106

llvm::NVPTXSubtarget::hasBF16Math
bool hasBF16Math() const
Definition: NVPTXSubtarget.h:84

llvm::NVPTXSubtarget::allowFP16Math
bool allowFP16Math() const
Definition: NVPTXSubtarget.cpp:69

llvm::NVPTXSubtarget::NVPTXSubtarget
NVPTXSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const NVPTXTargetMachine &TM)
This constructor initializes the data members to match that of the specified module.
Definition: NVPTXSubtarget.cpp:54

llvm::NVPTXSubtarget::initializeSubtargetDependencies
NVPTXSubtarget & initializeSubtargetDependencies(StringRef CPU, StringRef FS)
Definition: NVPTXSubtarget.cpp:35

llvm::NVPTXSubtarget::getSelectionDAGInfo
const SelectionDAGTargetInfo * getSelectionDAGInfo() const override
Definition: NVPTXSubtarget.cpp:65

llvm::NVPTXSubtarget::hasFP16Math
bool hasFP16Math() const
Definition: NVPTXSubtarget.h:83

llvm::NVPTXTargetMachine
NVPTXTargetMachine.
Definition: NVPTXTargetMachine.h:25

llvm::SelectionDAGTargetInfo
Targets can subclass this to parameterize the SelectionDAG lowering and instruction selection process...
Definition: SelectionDAGTargetInfo.h:31

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44

llvm::cl::opt
Definition: CommandLine.h:1423

ErrorHandling.h

llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780

llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:1013

llvm::ISD::FMAXNUM_IEEE
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:1045

llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:398

llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:1015

llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397

llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:1051

llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:1018

llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:1032

llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:1014

llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757

llvm::ISD::FMINNUM_IEEE
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
Definition: ISDOpcodes.h:1044

llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772

llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:399

llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:1031

llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:1017

llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:1050

llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:1012

llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:1010

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::formatv
auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
Definition: FormatVariadic.h:252

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::cl::desc
Definition: CommandLine.h:409