14 #include "clang/Config/config.h" 20 #include "llvm/Option/ArgList.h" 21 #include "llvm/Support/FileSystem.h" 22 #include "llvm/Support/Path.h" 23 #include "llvm/Support/Process.h" 24 #include "llvm/Support/Program.h" 25 #include "llvm/Support/VirtualFileSystem.h" 26 #include <system_error> 31 using namespace clang;
37 if (!V.startswith(
"CUDA Version "))
39 V = V.substr(strlen(
"CUDA Version "));
40 int Major = -1, Minor = -1;
41 auto First = V.split(
'.');
42 auto Second =
First.second.split(
'.');
43 if (
First.first.getAsInteger(10, Major) ||
44 Second.first.getAsInteger(10, Minor))
47 if (Major == 7 && Minor == 0) {
52 if (Major == 7 && Minor == 5)
54 if (Major == 8 && Minor == 0)
56 if (Major == 9 && Minor == 0)
58 if (Major == 9 && Minor == 1)
60 if (Major == 9 && Minor == 2)
62 if (Major == 10 && Minor == 0)
68 const Driver &D,
const llvm::Triple &HostTriple,
69 const llvm::opt::ArgList &Args)
75 Candidate(std::string Path,
bool StrictChecking =
false)
76 : Path(Path), StrictChecking(StrictChecking) {}
81 std::initializer_list<const char *> Versions = {
"8.0",
"7.5",
"7.0"};
83 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
84 Candidates.emplace_back(
85 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
86 }
else if (HostTriple.isOSWindows()) {
87 for (
const char *Ver : Versions)
88 Candidates.emplace_back(
89 D.
SysRoot +
"/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
92 if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
101 if (llvm::ErrorOr<std::string> ptxas =
102 llvm::sys::findProgramByName(
"ptxas")) {
106 StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
107 if (llvm::sys::path::filename(ptxasDir) ==
"bin")
108 Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
113 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda");
114 for (
const char *Ver : Versions)
115 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda-" + Ver);
120 Candidates.emplace_back(D.
SysRoot +
"/usr/lib/cuda");
123 bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
125 for (
const auto &Candidate : Candidates) {
126 InstallPath = Candidate.Path;
127 if (InstallPath.empty() || !D.
getVFS().exists(InstallPath))
130 BinPath = InstallPath +
"/bin";
131 IncludePath = InstallPath +
"/include";
132 LibDevicePath = InstallPath +
"/nvvm/libdevice";
135 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
137 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
138 if (CheckLibDevice && !FS.exists(LibDevicePath))
147 if (HostTriple.isArch64Bit() && FS.exists(InstallPath +
"/lib64"))
148 LibPath = InstallPath +
"/lib64";
149 else if (FS.exists(InstallPath +
"/lib"))
150 LibPath = InstallPath +
"/lib";
154 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
155 FS.getBufferForFile(InstallPath +
"/version.txt");
166 std::string FilePath = LibDevicePath +
"/libdevice.10.bc";
167 if (FS.exists(FilePath)) {
168 for (
const char *GpuArchName :
169 {
"sm_30",
"sm_32",
"sm_35",
"sm_37",
"sm_50",
"sm_52",
"sm_53",
170 "sm_60",
"sm_61",
"sm_62",
"sm_70",
"sm_72",
"sm_75"}) {
174 LibDeviceMap[GpuArchName] = FilePath;
179 for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
180 !EC && LI != LE; LI = LI.increment(EC)) {
181 StringRef FilePath = LI->path();
182 StringRef FileName = llvm::sys::path::filename(FilePath);
185 const StringRef LibDeviceName =
"libdevice.";
186 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(
".bc")))
188 StringRef GpuArch = FileName.slice(
189 LibDeviceName.size(), FileName.find(
'.', LibDeviceName.size()));
190 LibDeviceMap[GpuArch] = FilePath.str();
194 if (GpuArch ==
"compute_20") {
195 LibDeviceMap[
"sm_20"] = FilePath;
196 LibDeviceMap[
"sm_21"] = FilePath;
197 LibDeviceMap[
"sm_32"] = FilePath;
198 }
else if (GpuArch ==
"compute_30") {
199 LibDeviceMap[
"sm_30"] = FilePath;
201 LibDeviceMap[
"sm_50"] = FilePath;
202 LibDeviceMap[
"sm_52"] = FilePath;
203 LibDeviceMap[
"sm_53"] = FilePath;
205 LibDeviceMap[
"sm_60"] = FilePath;
206 LibDeviceMap[
"sm_61"] = FilePath;
207 LibDeviceMap[
"sm_62"] = FilePath;
208 }
else if (GpuArch ==
"compute_35") {
209 LibDeviceMap[
"sm_35"] = FilePath;
210 LibDeviceMap[
"sm_37"] = FilePath;
211 }
else if (GpuArch ==
"compute_50") {
213 LibDeviceMap[
"sm_50"] = FilePath;
214 LibDeviceMap[
"sm_52"] = FilePath;
215 LibDeviceMap[
"sm_53"] = FilePath;
223 if (LibDeviceMap.empty() && !NoCudaLib)
232 const ArgList &DriverArgs, ArgStringList &CC1Args)
const {
233 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
237 llvm::sys::path::append(P,
"include");
238 llvm::sys::path::append(P,
"cuda_wrappers");
239 CC1Args.push_back(
"-internal-isystem");
240 CC1Args.push_back(DriverArgs.MakeArgString(P));
243 if (DriverArgs.hasArg(options::OPT_nocudainc))
247 D.Diag(diag::err_drv_no_cuda_installation);
251 CC1Args.push_back(
"-internal-isystem");
253 CC1Args.push_back(
"-include");
254 CC1Args.push_back(
"__clang_cuda_runtime_wrapper.h");
260 ArchsWithBadVersion.count(Arch) > 0)
265 if (Version < MinVersion || Version > MaxVersion) {
266 ArchsWithBadVersion.insert(Arch);
267 D.Diag(diag::err_drv_cuda_version_unsupported)
276 OS <<
"Found CUDA installation: " << InstallPath <<
", version " 289 EmitSameDebugInfoAsHost,
303 const Arg *A = Args.getLastArg(options::OPT_O_Group);
304 bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
305 Args.hasFlag(options::OPT_cuda_noopt_device_debug,
306 options::OPT_no_cuda_noopt_device_debug,
308 if (
const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
309 const Option &Opt = A->getOption();
310 if (Opt.matches(options::OPT_gN_Group)) {
311 if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
312 return DisableDebugInfo;
313 if (Opt.matches(options::OPT_gline_directives_only))
318 return DisableDebugInfo;
325 const char *LinkingOutput)
const {
328 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
330 StringRef GPUArchName;
335 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
336 assert(!GPUArchName.empty() &&
"Must have an architecture passed in.");
343 "Device action expected to have an architecture.");
346 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
347 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
350 ArgStringList CmdArgs;
351 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-m64" :
"-m32");
353 if (DIKind == EmitSameDebugInfoAsHost) {
356 CmdArgs.push_back(
"-g");
357 CmdArgs.push_back(
"--dont-merge-basicblocks");
358 CmdArgs.push_back(
"--return-at-end");
359 }
else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
367 StringRef OOpt =
"3";
368 if (A->getOption().matches(options::OPT_O4) ||
369 A->getOption().matches(options::OPT_Ofast))
371 else if (A->getOption().matches(options::OPT_O0))
373 else if (A->getOption().matches(options::OPT_O)) {
375 OOpt = llvm::StringSwitch<const char *>(A->getValue())
383 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"-O") + OOpt));
387 CmdArgs.push_back(
"-O0");
390 CmdArgs.push_back(
"-lineinfo");
393 if (Args.hasArg(options::OPT_v))
394 CmdArgs.push_back(
"-v");
396 CmdArgs.push_back(
"--gpu-name");
398 CmdArgs.push_back(
"--output-file");
399 CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
400 for (
const auto& II : Inputs)
401 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
403 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
404 CmdArgs.push_back(Args.MakeArgString(A));
406 bool Relocatable =
false;
409 Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
410 options::OPT_fnoopenmp_relocatable_target,
413 Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
414 options::OPT_fno_gpu_rdc,
false);
417 CmdArgs.push_back(
"-c");
420 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
421 Exec = A->getValue();
423 Exec = Args.MakeArgString(TC.GetProgramPath(
"ptxas"));
424 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
428 bool includePTX =
true;
429 for (Arg *A : Args) {
430 if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
431 A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
434 const StringRef ArchStr = A->getValue();
435 if (ArchStr ==
"all" || ArchStr == gpu_arch) {
436 includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
450 const char *LinkingOutput)
const {
453 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
455 ArgStringList CmdArgs;
456 CmdArgs.push_back(
"--cuda");
457 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-64" :
"-32");
458 CmdArgs.push_back(Args.MakeArgString(
"--create"));
459 CmdArgs.push_back(Args.MakeArgString(Output.
getFilename()));
461 CmdArgs.push_back(
"-g");
463 for (
const auto& II : Inputs) {
464 auto *A = II.getAction();
465 assert(A->getInputs().size() == 1 &&
466 "Device offload action is expected to have a single input");
467 const char *gpu_arch_str = A->getOffloadingArch();
468 assert(gpu_arch_str &&
469 "Device action expected to have associated a GPU architecture!");
472 if (II.getType() == types::TY_PP_Asm &&
478 (II.getType() == types::TY_PP_Asm)
481 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"--image=profile=") +
482 Arch +
",file=" + II.getFilename()));
485 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
486 CmdArgs.push_back(Args.MakeArgString(A));
488 const char *Exec = Args.MakeArgString(TC.GetProgramPath(
"fatbinary"));
489 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
496 const char *LinkingOutput)
const {
499 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
501 ArgStringList CmdArgs;
506 "CUDA toolchain not expected for an OpenMP host device.");
509 CmdArgs.push_back(
"-o");
512 assert(Output.
isNothing() &&
"Invalid output.");
514 CmdArgs.push_back(
"-g");
516 if (Args.hasArg(options::OPT_v))
517 CmdArgs.push_back(
"-v");
520 Args.getLastArgValue(options::OPT_march_EQ);
521 assert(!GPUArch.empty() &&
"At least one GPU Arch required for ptxas.");
523 CmdArgs.push_back(
"-arch");
524 CmdArgs.push_back(Args.MakeArgString(GPUArch));
528 if (
const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
529 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + A->getValue()));
536 llvm::sys::path::parent_path(TC.getDriver().Dir);
537 llvm::sys::path::append(DefaultLibPath,
"lib" CLANG_LIBDIR_SUFFIX);
538 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + DefaultLibPath));
541 CmdArgs.push_back(
"-lomptarget-nvptx");
543 for (
const auto &II : Inputs) {
544 if (II.getType() == types::TY_LLVM_IR ||
545 II.getType() == types::TY_LTO_IR ||
546 II.getType() == types::TY_LTO_BC ||
547 II.getType() == types::TY_LLVM_BC) {
549 << getToolChain().getTripleString();
555 if (!II.isFilename())
559 C.
getArgs().MakeArgString(getToolChain().getInputFilename(II)));
561 CmdArgs.push_back(CubinF);
567 Args.MakeArgString(getToolChain().GetProgramPath(
"nvlink"));
568 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
575 CudaToolChain::CudaToolChain(
const Driver &D,
const llvm::Triple &Triple,
576 const ToolChain &HostTC,
const ArgList &Args,
578 :
ToolChain(D, Triple, Args), HostTC(HostTC),
579 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
597 llvm::sys::path::replace_extension(Filename,
"cubin");
598 return Filename.str();
602 const llvm::opt::ArgList &DriverArgs,
603 llvm::opt::ArgStringList &CC1Args,
607 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
608 assert(!GpuArch.empty() &&
"Must have an explicit GPU arch.");
611 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
614 CC1Args.push_back(
"-fcuda-is-device");
616 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
617 options::OPT_fno_cuda_flush_denormals_to_zero,
false))
618 CC1Args.push_back(
"-fcuda-flush-denormals-to-zero");
620 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
621 options::OPT_fno_cuda_approx_transcendentals,
false))
622 CC1Args.push_back(
"-fcuda-approx-transcendentals");
624 if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
626 CC1Args.push_back(
"-fgpu-rdc");
629 if (DriverArgs.hasArg(options::OPT_nocudalib))
634 if (LibDeviceFile.empty()) {
636 DriverArgs.hasArg(options::OPT_S))
639 getDriver().
Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
643 CC1Args.push_back(
"-mlink-builtin-bitcode");
644 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
649 const char *PtxFeature =
"+ptx42";
655 PtxFeature =
"+ptx61";
658 PtxFeature =
"+ptx60";
660 CC1Args.append({
"-target-feature", PtxFeature});
661 if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
662 options::OPT_fno_cuda_short_ptr,
false))
663 CC1Args.append({
"-mllvm",
"--nvptx-short-ptr"});
668 if (
const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
669 LibraryPaths.push_back(A->getValue());
673 llvm::sys::Process::GetEnv(
"LIBRARY_PATH");
676 const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,
'\0'};
677 llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
678 for (StringRef Path : Frags)
679 LibraryPaths.emplace_back(Path.trim());
684 llvm::sys::path::parent_path(
getDriver().Dir);
685 llvm::sys::path::append(DefaultLibPath, Twine(
"lib") + CLANG_LIBDIR_SUFFIX);
686 LibraryPaths.emplace_back(DefaultLibPath.c_str());
688 std::string LibOmpTargetName =
689 "libomptarget-nvptx-" + GpuArch.str() +
".bc";
690 bool FoundBCLibrary =
false;
691 for (StringRef LibraryPath : LibraryPaths) {
693 llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
694 if (llvm::sys::fs::exists(LibOmpTargetFile)) {
695 CC1Args.push_back(
"-mlink-builtin-bitcode");
696 CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
697 FoundBCLibrary =
true;
702 getDriver().
Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
708 const Option &O = A->getOption();
709 return (O.matches(options::OPT_gN_Group) &&
710 !O.matches(options::OPT_gmodules)) ||
711 O.matches(options::OPT_g_Flag) ||
712 O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
713 O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
714 O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
715 O.matches(options::OPT_gdwarf_5) ||
716 O.matches(options::OPT_gcolumn_info);
722 case DisableDebugInfo:
728 case EmitSameDebugInfoAsHost:
735 ArgStringList &CC1Args)
const {
737 if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
738 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
739 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
740 assert(!Arch.empty() &&
"Must have an explicit GPU arch.");
746 llvm::opt::DerivedArgList *
750 DerivedArgList *DAL =
753 DAL =
new DerivedArgList(Args.getBaseArgs());
761 for (Arg *A : Args) {
762 bool IsDuplicate =
false;
763 for (Arg *DALArg : *DAL) {
773 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
775 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ),
776 CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
781 for (Arg *A : Args) {
782 if (A->getOption().matches(options::OPT_Xarch__)) {
784 if (BoundArch.empty() || A->getValue(0) != BoundArch)
787 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
788 unsigned Prev = Index;
789 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
799 if (!XarchArg || Index > Prev + 1) {
800 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_with_args)
801 << A->getAsString(Args);
804 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
805 << A->getAsString(Args);
808 XarchArg->setBaseArg(A);
809 A = XarchArg.release();
810 DAL->AddSynthesizedArg(A);
815 if (!BoundArch.empty()) {
816 DAL->eraseArg(options::OPT_march_EQ);
817 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
842 ArgStringList &CC1Args)
const {
847 ArgStringList &CC1Args)
const {
852 ArgStringList &CC1Args)
const {
870 const ArgList &Args)
const {
const char * CudaArchToString(CudaArch A)
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
DiagnosticBuilder Diag(unsigned DiagID) const
CudaArch StringToCudaArch(llvm::StringRef S)
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Distro - Helper class for detecting and classifying Linux distributions.
bool isOffloading(OffloadKind OKind) const
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Emit location information but do not generate debug info in the output.
bool isDeviceOffloading(OffloadKind OKind) const
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
llvm::vfs::FileSystem & getVFS() const
const char * CudaVersionToString(CudaVersion V)
void addCommand(std::unique_ptr< Command > C)
const char * CudaVirtualArchToString(CudaVirtualArch A)
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
CudaVersion version() const
Get the detected Cuda install's version.
const llvm::opt::DerivedArgList & getArgs() const
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
bool isValid() const
Check whether we detected a valid Cuda install.
StringRef getIncludePath() const
Get the detected Cuda Include path.
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Dataflow Directional Tag Classes.
std::string SysRoot
sysroot, if present
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Compilation - A set of tasks to perform for a single driver invocation.
const Driver & getDriver() const
const llvm::opt::OptTable & getOpts() const
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
StringRef getBinPath() const
Get the detected path to Cuda's bin directory.
const char * getOffloadingArch() const
static bool real_path(StringRef SrcPath, SmallVectorImpl< char > &RealPath)