15 #include "clang/Config/config.h" 21 #include "llvm/Option/ArgList.h" 22 #include "llvm/Support/FileSystem.h" 23 #include "llvm/Support/Path.h" 24 #include "llvm/Support/Process.h" 25 #include "llvm/Support/Program.h" 26 #include <system_error> 31 using namespace clang;
37 if (!V.startswith(
"CUDA Version "))
39 V = V.substr(strlen(
"CUDA Version "));
40 int Major = -1, Minor = -1;
41 auto First = V.split(
'.');
42 auto Second =
First.second.split(
'.');
43 if (
First.first.getAsInteger(10, Major) ||
44 Second.first.getAsInteger(10, Minor))
47 if (Major == 7 && Minor == 0) {
52 if (Major == 7 && Minor == 5)
54 if (Major == 8 && Minor == 0)
56 if (Major == 9 && Minor == 0)
58 if (Major == 9 && Minor == 1)
60 if (Major == 9 && Minor == 2)
66 const Driver &D,
const llvm::Triple &HostTriple,
67 const llvm::opt::ArgList &Args)
73 Candidate(std::string Path,
bool StrictChecking =
false)
74 : Path(Path), StrictChecking(StrictChecking) {}
79 std::initializer_list<const char *> Versions = {
"8.0",
"7.5",
"7.0"};
81 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
82 Candidates.emplace_back(
83 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
84 }
else if (HostTriple.isOSWindows()) {
85 for (
const char *Ver : Versions)
86 Candidates.emplace_back(
87 D.
SysRoot +
"/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
90 if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
99 if (llvm::ErrorOr<std::string> ptxas =
100 llvm::sys::findProgramByName(
"ptxas")) {
104 StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
105 if (llvm::sys::path::filename(ptxasDir) ==
"bin")
106 Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
111 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda");
112 for (
const char *Ver : Versions)
113 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda-" + Ver);
118 Candidates.emplace_back(D.
SysRoot +
"/usr/lib/cuda");
121 bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
123 for (
const auto &Candidate : Candidates) {
124 InstallPath = Candidate.Path;
125 if (InstallPath.empty() || !D.
getVFS().
exists(InstallPath))
128 BinPath = InstallPath +
"/bin";
129 IncludePath = InstallPath +
"/include";
130 LibDevicePath = InstallPath +
"/nvvm/libdevice";
133 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
135 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
136 if (CheckLibDevice && !FS.exists(LibDevicePath))
145 if (HostTriple.isArch64Bit() && FS.exists(InstallPath +
"/lib64"))
146 LibPath = InstallPath +
"/lib64";
147 else if (FS.exists(InstallPath +
"/lib"))
148 LibPath = InstallPath +
"/lib";
152 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
164 std::string FilePath = LibDevicePath +
"/libdevice.10.bc";
165 if (FS.exists(FilePath)) {
166 for (
const char *GpuArchName :
167 {
"sm_30",
"sm_32",
"sm_35",
"sm_37",
"sm_50",
"sm_52",
"sm_53",
168 "sm_60",
"sm_61",
"sm_62",
"sm_70",
"sm_72"}) {
172 LibDeviceMap[GpuArchName] = FilePath;
177 for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
178 !EC && LI != LE; LI = LI.increment(EC)) {
179 StringRef FilePath = LI->path();
180 StringRef FileName = llvm::sys::path::filename(FilePath);
183 const StringRef LibDeviceName =
"libdevice.";
184 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(
".bc")))
186 StringRef GpuArch = FileName.slice(
187 LibDeviceName.size(), FileName.find(
'.', LibDeviceName.size()));
188 LibDeviceMap[GpuArch] = FilePath.str();
192 if (GpuArch ==
"compute_20") {
193 LibDeviceMap[
"sm_20"] = FilePath;
194 LibDeviceMap[
"sm_21"] = FilePath;
195 LibDeviceMap[
"sm_32"] = FilePath;
196 }
else if (GpuArch ==
"compute_30") {
197 LibDeviceMap[
"sm_30"] = FilePath;
199 LibDeviceMap[
"sm_50"] = FilePath;
200 LibDeviceMap[
"sm_52"] = FilePath;
201 LibDeviceMap[
"sm_53"] = FilePath;
203 LibDeviceMap[
"sm_60"] = FilePath;
204 LibDeviceMap[
"sm_61"] = FilePath;
205 LibDeviceMap[
"sm_62"] = FilePath;
206 }
else if (GpuArch ==
"compute_35") {
207 LibDeviceMap[
"sm_35"] = FilePath;
208 LibDeviceMap[
"sm_37"] = FilePath;
209 }
else if (GpuArch ==
"compute_50") {
211 LibDeviceMap[
"sm_50"] = FilePath;
212 LibDeviceMap[
"sm_52"] = FilePath;
213 LibDeviceMap[
"sm_53"] = FilePath;
221 if (LibDeviceMap.empty() && !NoCudaLib)
230 const ArgList &DriverArgs, ArgStringList &CC1Args)
const {
231 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
235 llvm::sys::path::append(P,
"include");
236 llvm::sys::path::append(P,
"cuda_wrappers");
237 CC1Args.push_back(
"-internal-isystem");
238 CC1Args.push_back(DriverArgs.MakeArgString(P));
241 if (DriverArgs.hasArg(options::OPT_nocudainc))
245 D.Diag(diag::err_drv_no_cuda_installation);
249 CC1Args.push_back(
"-internal-isystem");
251 CC1Args.push_back(
"-include");
252 CC1Args.push_back(
"__clang_cuda_runtime_wrapper.h");
258 ArchsWithBadVersion.count(Arch) > 0)
263 if (Version < MinVersion || Version > MaxVersion) {
264 ArchsWithBadVersion.insert(Arch);
265 D.Diag(diag::err_drv_cuda_version_unsupported)
274 OS <<
"Found CUDA installation: " << InstallPath <<
", version " 288 Arg *A = Args.getLastArg(options::OPT_O_Group);
289 if (Args.hasFlag(options::OPT_cuda_noopt_device_debug,
290 options::OPT_no_cuda_noopt_device_debug,
291 !A || A->getOption().matches(options::OPT_O0))) {
292 if (
const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
293 const Option &Opt = A->getOption();
294 if (Opt.matches(options::OPT_gN_Group)) {
295 if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
297 if (Opt.matches(options::OPT_gline_tables_only) ||
298 Opt.matches(options::OPT_ggdb1))
299 return LineTableOnly;
311 const char *LinkingOutput)
const {
314 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
316 StringRef GPUArchName;
321 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
322 assert(!GPUArchName.empty() &&
"Must have an architecture passed in.");
329 "Device action expected to have an architecture.");
332 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
333 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
336 ArgStringList CmdArgs;
337 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-m64" :
"-m32");
339 if (DIKind == FullDebug) {
342 CmdArgs.push_back(
"-g");
343 CmdArgs.push_back(
"--dont-merge-basicblocks");
344 CmdArgs.push_back(
"--return-at-end");
345 }
else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
353 StringRef OOpt =
"3";
354 if (A->getOption().matches(options::OPT_O4) ||
355 A->getOption().matches(options::OPT_Ofast))
357 else if (A->getOption().matches(options::OPT_O0))
359 else if (A->getOption().matches(options::OPT_O)) {
361 OOpt = llvm::StringSwitch<const char *>(A->getValue())
369 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"-O") + OOpt));
373 CmdArgs.push_back(
"-O0");
375 if (DIKind == LineTableOnly)
376 CmdArgs.push_back(
"-lineinfo");
379 if (Args.hasArg(options::OPT_v))
380 CmdArgs.push_back(
"-v");
382 CmdArgs.push_back(
"--gpu-name");
384 CmdArgs.push_back(
"--output-file");
385 CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
386 for (
const auto& II : Inputs)
387 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
389 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
390 CmdArgs.push_back(Args.MakeArgString(A));
392 bool Relocatable =
false;
395 Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
396 options::OPT_fnoopenmp_relocatable_target,
399 Relocatable = Args.hasFlag(options::OPT_fcuda_rdc,
400 options::OPT_fno_cuda_rdc,
false);
403 CmdArgs.push_back(
"-c");
406 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
407 Exec = A->getValue();
409 Exec = Args.MakeArgString(TC.GetProgramPath(
"ptxas"));
410 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
414 bool includePTX =
true;
415 for (Arg *A : Args) {
416 if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
417 A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
420 const StringRef ArchStr = A->getValue();
421 if (ArchStr ==
"all" || ArchStr == gpu_arch) {
422 includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
436 const char *LinkingOutput)
const {
439 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
441 ArgStringList CmdArgs;
442 CmdArgs.push_back(
"--cuda");
443 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-64" :
"-32");
444 CmdArgs.push_back(Args.MakeArgString(
"--create"));
445 CmdArgs.push_back(Args.MakeArgString(Output.
getFilename()));
447 CmdArgs.push_back(
"-g");
449 for (
const auto& II : Inputs) {
450 auto *A = II.getAction();
451 assert(A->getInputs().size() == 1 &&
452 "Device offload action is expected to have a single input");
453 const char *gpu_arch_str = A->getOffloadingArch();
454 assert(gpu_arch_str &&
455 "Device action expected to have associated a GPU architecture!");
458 if (II.getType() == types::TY_PP_Asm &&
464 (II.getType() == types::TY_PP_Asm)
467 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"--image=profile=") +
468 Arch +
",file=" + II.getFilename()));
471 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
472 CmdArgs.push_back(Args.MakeArgString(A));
474 const char *Exec = Args.MakeArgString(TC.GetProgramPath(
"fatbinary"));
475 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
482 const char *LinkingOutput)
const {
485 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
487 ArgStringList CmdArgs;
492 "CUDA toolchain not expected for an OpenMP host device.");
495 CmdArgs.push_back(
"-o");
498 assert(Output.
isNothing() &&
"Invalid output.");
500 CmdArgs.push_back(
"-g");
502 if (Args.hasArg(options::OPT_v))
503 CmdArgs.push_back(
"-v");
506 Args.getLastArgValue(options::OPT_march_EQ);
507 assert(!GPUArch.empty() &&
"At least one GPU Arch required for ptxas.");
509 CmdArgs.push_back(
"-arch");
510 CmdArgs.push_back(Args.MakeArgString(GPUArch));
517 llvm::sys::path::parent_path(TC.getDriver().Dir);
518 llvm::sys::path::append(DefaultLibPath,
"lib" CLANG_LIBDIR_SUFFIX);
519 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + DefaultLibPath));
522 CmdArgs.push_back(
"-lomptarget-nvptx");
524 for (
const auto &II : Inputs) {
525 if (II.getType() == types::TY_LLVM_IR ||
526 II.getType() == types::TY_LTO_IR ||
527 II.getType() == types::TY_LTO_BC ||
528 II.getType() == types::TY_LLVM_BC) {
530 << getToolChain().getTripleString();
536 if (!II.isFilename())
540 C.
getArgs().MakeArgString(getToolChain().getInputFilename(II)));
542 CmdArgs.push_back(CubinF);
548 Args.MakeArgString(getToolChain().GetProgramPath(
"nvlink"));
549 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
556 CudaToolChain::CudaToolChain(
const Driver &D,
const llvm::Triple &Triple,
557 const ToolChain &HostTC,
const ArgList &Args,
559 :
ToolChain(D, Triple, Args), HostTC(HostTC),
560 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
578 llvm::sys::path::replace_extension(Filename,
"cubin");
579 return Filename.str();
583 const llvm::opt::ArgList &DriverArgs,
584 llvm::opt::ArgStringList &CC1Args,
588 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
589 assert(!GpuArch.empty() &&
"Must have an explicit GPU arch.");
592 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
595 CC1Args.push_back(
"-fcuda-is-device");
597 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
598 options::OPT_fno_cuda_flush_denormals_to_zero,
false))
599 CC1Args.push_back(
"-fcuda-flush-denormals-to-zero");
601 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
602 options::OPT_fno_cuda_approx_transcendentals,
false))
603 CC1Args.push_back(
"-fcuda-approx-transcendentals");
605 if (DriverArgs.hasFlag(options::OPT_fcuda_rdc, options::OPT_fno_cuda_rdc,
607 CC1Args.push_back(
"-fcuda-rdc");
610 if (DriverArgs.hasArg(options::OPT_nocudalib))
615 if (LibDeviceFile.empty()) {
617 DriverArgs.hasArg(options::OPT_S))
620 getDriver().
Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
624 CC1Args.push_back(
"-mlink-cuda-bitcode");
625 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
630 const char *PtxFeature =
"+ptx42";
633 PtxFeature =
"+ptx61";
636 PtxFeature =
"+ptx60";
638 CC1Args.append({
"-target-feature", PtxFeature});
639 if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
640 options::OPT_fno_cuda_short_ptr,
false))
641 CC1Args.append({
"-mllvm",
"--nvptx-short-ptr"});
647 llvm::sys::path::parent_path(
getDriver().Dir);
648 llvm::sys::path::append(DefaultLibPath,
649 Twine(
"lib") + CLANG_LIBDIR_SUFFIX);
650 LibraryPaths.emplace_back(DefaultLibPath.c_str());
654 llvm::sys::Process::GetEnv(
"LIBRARY_PATH");
657 const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,
'\0'};
658 llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
659 for (StringRef Path : Frags)
660 LibraryPaths.emplace_back(Path.trim());
663 std::string LibOmpTargetName =
664 "libomptarget-nvptx-" + GpuArch.str() +
".bc";
665 bool FoundBCLibrary =
false;
666 for (StringRef LibraryPath : LibraryPaths) {
668 llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
669 if (llvm::sys::fs::exists(LibOmpTargetFile)) {
670 CC1Args.push_back(
"-mlink-cuda-bitcode");
671 CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
672 FoundBCLibrary =
true;
677 getDriver().
Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
683 const Option &O = A->getOption();
684 return (O.matches(options::OPT_gN_Group) &&
685 !O.matches(options::OPT_gmodules)) ||
686 O.matches(options::OPT_g_Flag) ||
687 O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
688 O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
689 O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
690 O.matches(options::OPT_gdwarf_5) ||
691 O.matches(options::OPT_gcolumn_info);
695 ArgStringList &CC1Args)
const {
697 if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
698 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
699 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
700 assert(!Arch.empty() &&
"Must have an explicit GPU arch.");
706 llvm::opt::DerivedArgList *
710 DerivedArgList *DAL =
713 DAL =
new DerivedArgList(Args.getBaseArgs());
721 for (Arg *A : Args) {
722 bool IsDuplicate =
false;
723 for (Arg *DALArg : *DAL) {
733 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
735 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ),
736 CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
741 for (Arg *A : Args) {
742 if (A->getOption().matches(options::OPT_Xarch__)) {
744 if (BoundArch.empty() || A->getValue(0) != BoundArch)
747 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
748 unsigned Prev = Index;
749 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
759 if (!XarchArg || Index > Prev + 1) {
760 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_with_args)
761 << A->getAsString(Args);
764 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
765 << A->getAsString(Args);
768 XarchArg->setBaseArg(A);
769 A = XarchArg.release();
770 DAL->AddSynthesizedArg(A);
775 if (!BoundArch.empty()) {
776 DAL->eraseArg(options::OPT_march_EQ);
777 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
802 ArgStringList &CC1Args)
const {
807 ArgStringList &CC1Args)
const {
812 ArgStringList &CC1Args)
const {
830 const ArgList &Args)
const {
const char * CudaArchToString(CudaArch A)
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
DiagnosticBuilder Diag(unsigned DiagID) const
CudaArch StringToCudaArch(llvm::StringRef S)
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Distro - Helper class for detecting and classifying Linux distributions.
bool isOffloading(OffloadKind OKind) const
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
bool isDeviceOffloading(OffloadKind OKind) const
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
const char * CudaVersionToString(CudaVersion V)
void addCommand(std::unique_ptr< Command > C)
const char * CudaVirtualArchToString(CudaVirtualArch A)
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
CudaVersion version() const
Get the detected Cuda install's version.
const llvm::opt::DerivedArgList & getArgs() const
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
vfs::FileSystem & getVFS() const
bool isValid() const
Check whether we detected a valid Cuda install.
StringRef getIncludePath() const
Get the detected Cuda Include path.
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Dataflow Directional Tag Classes.
std::string SysRoot
sysroot, if present
Defines the virtual file system interface vfs::FileSystem.
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
bool exists(const Twine &Path)
Check whether a file exists. Provided for convenience.
Compilation - A set of tasks to perform for a single driver invocation.
const Driver & getDriver() const
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false)
This is a convenience method that opens a file, gets its content and then closes the file...
const llvm::opt::OptTable & getOpts() const
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
StringRef getBinPath() const
Get the detected path to Cuda's bin directory.
const char * getOffloadingArch() const
static bool real_path(StringRef SrcPath, SmallVectorImpl< char > &RealPath)