13 #include "clang/Config/config.h" 19 #include "llvm/Option/ArgList.h" 20 #include "llvm/Support/FileSystem.h" 21 #include "llvm/Support/Path.h" 22 #include "llvm/Support/Process.h" 23 #include "llvm/Support/Program.h" 24 #include "llvm/Support/VirtualFileSystem.h" 25 #include <system_error> 30 using namespace clang;
36 if (!V.startswith(
"CUDA Version "))
38 V = V.substr(strlen(
"CUDA Version "));
39 int Major = -1, Minor = -1;
40 auto First = V.split(
'.');
41 auto Second =
First.second.split(
'.');
42 if (
First.first.getAsInteger(10, Major) ||
43 Second.first.getAsInteger(10, Minor))
46 if (Major == 7 && Minor == 0) {
51 if (Major == 7 && Minor == 5)
53 if (Major == 8 && Minor == 0)
55 if (Major == 9 && Minor == 0)
57 if (Major == 9 && Minor == 1)
59 if (Major == 9 && Minor == 2)
61 if (Major == 10 && Minor == 0)
63 if (Major == 10 && Minor == 1)
69 const Driver &D,
const llvm::Triple &HostTriple,
70 const llvm::opt::ArgList &Args)
76 Candidate(std::string Path,
bool StrictChecking =
false)
77 : Path(Path), StrictChecking(StrictChecking) {}
82 std::initializer_list<const char *> Versions = {
"8.0",
"7.5",
"7.0"};
84 if (Args.hasArg(clang::driver::options::OPT_cuda_path_EQ)) {
85 Candidates.emplace_back(
86 Args.getLastArgValue(clang::driver::options::OPT_cuda_path_EQ).str());
87 }
else if (HostTriple.isOSWindows()) {
88 for (
const char *Ver : Versions)
89 Candidates.emplace_back(
90 D.
SysRoot +
"/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v" +
93 if (!Args.hasArg(clang::driver::options::OPT_cuda_path_ignore_env)) {
102 if (llvm::ErrorOr<std::string> ptxas =
103 llvm::sys::findProgramByName(
"ptxas")) {
105 llvm::sys::fs::real_path(*ptxas, ptxasAbsolutePath);
107 StringRef ptxasDir = llvm::sys::path::parent_path(ptxasAbsolutePath);
108 if (llvm::sys::path::filename(ptxasDir) ==
"bin")
109 Candidates.emplace_back(llvm::sys::path::parent_path(ptxasDir),
114 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda");
115 for (
const char *Ver : Versions)
116 Candidates.emplace_back(D.
SysRoot +
"/usr/local/cuda-" + Ver);
121 Candidates.emplace_back(D.
SysRoot +
"/usr/lib/cuda");
124 bool NoCudaLib = Args.hasArg(options::OPT_nocudalib);
126 for (
const auto &Candidate : Candidates) {
127 InstallPath = Candidate.Path;
128 if (InstallPath.empty() || !D.
getVFS().exists(InstallPath))
131 BinPath = InstallPath +
"/bin";
132 IncludePath = InstallPath +
"/include";
133 LibDevicePath = InstallPath +
"/nvvm/libdevice";
136 if (!(FS.exists(IncludePath) && FS.exists(BinPath)))
138 bool CheckLibDevice = (!NoCudaLib || Candidate.StrictChecking);
139 if (CheckLibDevice && !FS.exists(LibDevicePath))
148 if (HostTriple.isArch64Bit() && FS.exists(InstallPath +
"/lib64"))
149 LibPath = InstallPath +
"/lib64";
150 else if (FS.exists(InstallPath +
"/lib"))
151 LibPath = InstallPath +
"/lib";
155 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
156 FS.getBufferForFile(InstallPath +
"/version.txt");
167 std::string FilePath = LibDevicePath +
"/libdevice.10.bc";
168 if (FS.exists(FilePath)) {
169 for (
const char *GpuArchName :
170 {
"sm_30",
"sm_32",
"sm_35",
"sm_37",
"sm_50",
"sm_52",
"sm_53",
171 "sm_60",
"sm_61",
"sm_62",
"sm_70",
"sm_72",
"sm_75"}) {
175 LibDeviceMap[GpuArchName] = FilePath;
180 for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
181 !EC && LI != LE; LI = LI.increment(EC)) {
182 StringRef FilePath = LI->path();
183 StringRef FileName = llvm::sys::path::filename(FilePath);
186 const StringRef LibDeviceName =
"libdevice.";
187 if (!(FileName.startswith(LibDeviceName) && FileName.endswith(
".bc")))
189 StringRef GpuArch = FileName.slice(
190 LibDeviceName.size(), FileName.find(
'.', LibDeviceName.size()));
191 LibDeviceMap[GpuArch] = FilePath.str();
195 if (GpuArch ==
"compute_20") {
196 LibDeviceMap[
"sm_20"] = FilePath;
197 LibDeviceMap[
"sm_21"] = FilePath;
198 LibDeviceMap[
"sm_32"] = FilePath;
199 }
else if (GpuArch ==
"compute_30") {
200 LibDeviceMap[
"sm_30"] = FilePath;
202 LibDeviceMap[
"sm_50"] = FilePath;
203 LibDeviceMap[
"sm_52"] = FilePath;
204 LibDeviceMap[
"sm_53"] = FilePath;
206 LibDeviceMap[
"sm_60"] = FilePath;
207 LibDeviceMap[
"sm_61"] = FilePath;
208 LibDeviceMap[
"sm_62"] = FilePath;
209 }
else if (GpuArch ==
"compute_35") {
210 LibDeviceMap[
"sm_35"] = FilePath;
211 LibDeviceMap[
"sm_37"] = FilePath;
212 }
else if (GpuArch ==
"compute_50") {
214 LibDeviceMap[
"sm_50"] = FilePath;
215 LibDeviceMap[
"sm_52"] = FilePath;
216 LibDeviceMap[
"sm_53"] = FilePath;
224 if (LibDeviceMap.empty() && !NoCudaLib)
233 const ArgList &DriverArgs, ArgStringList &CC1Args)
const {
234 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
238 llvm::sys::path::append(P,
"include");
239 llvm::sys::path::append(P,
"cuda_wrappers");
240 CC1Args.push_back(
"-internal-isystem");
241 CC1Args.push_back(DriverArgs.MakeArgString(P));
244 if (DriverArgs.hasArg(options::OPT_nocudainc))
248 D.Diag(diag::err_drv_no_cuda_installation);
252 CC1Args.push_back(
"-internal-isystem");
254 CC1Args.push_back(
"-include");
255 CC1Args.push_back(
"__clang_cuda_runtime_wrapper.h");
261 ArchsWithBadVersion.count(Arch) > 0)
266 if (Version < MinVersion || Version > MaxVersion) {
267 ArchsWithBadVersion.insert(Arch);
268 D.Diag(diag::err_drv_cuda_version_unsupported)
277 OS <<
"Found CUDA installation: " << InstallPath <<
", version " 290 EmitSameDebugInfoAsHost,
304 const Arg *A = Args.getLastArg(options::OPT_O_Group);
305 bool IsDebugEnabled = !A || A->getOption().matches(options::OPT_O0) ||
306 Args.hasFlag(options::OPT_cuda_noopt_device_debug,
307 options::OPT_no_cuda_noopt_device_debug,
309 if (
const Arg *A = Args.getLastArg(options::OPT_g_Group)) {
310 const Option &Opt = A->getOption();
311 if (Opt.matches(options::OPT_gN_Group)) {
312 if (Opt.matches(options::OPT_g0) || Opt.matches(options::OPT_ggdb0))
313 return DisableDebugInfo;
314 if (Opt.matches(options::OPT_gline_directives_only))
319 return DisableDebugInfo;
326 const char *LinkingOutput)
const {
329 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
331 StringRef GPUArchName;
336 GPUArchName = Args.getLastArgValue(options::OPT_march_EQ);
337 assert(!GPUArchName.empty() &&
"Must have an architecture passed in.");
344 "Device action expected to have an architecture.");
347 if (!Args.hasArg(options::OPT_no_cuda_version_check)) {
348 TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch);
351 ArgStringList CmdArgs;
352 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-m64" :
"-m32");
354 if (DIKind == EmitSameDebugInfoAsHost) {
357 CmdArgs.push_back(
"-g");
358 CmdArgs.push_back(
"--dont-merge-basicblocks");
359 CmdArgs.push_back(
"--return-at-end");
360 }
else if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
368 StringRef OOpt =
"3";
369 if (A->getOption().matches(options::OPT_O4) ||
370 A->getOption().matches(options::OPT_Ofast))
372 else if (A->getOption().matches(options::OPT_O0))
374 else if (A->getOption().matches(options::OPT_O)) {
376 OOpt = llvm::StringSwitch<const char *>(A->getValue())
384 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"-O") + OOpt));
388 CmdArgs.push_back(
"-O0");
391 CmdArgs.push_back(
"-lineinfo");
394 if (Args.hasArg(options::OPT_v))
395 CmdArgs.push_back(
"-v");
397 CmdArgs.push_back(
"--gpu-name");
399 CmdArgs.push_back(
"--output-file");
400 CmdArgs.push_back(Args.MakeArgString(TC.getInputFilename(Output)));
401 for (
const auto& II : Inputs)
402 CmdArgs.push_back(Args.MakeArgString(II.getFilename()));
404 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_ptxas))
405 CmdArgs.push_back(Args.MakeArgString(A));
407 bool Relocatable =
false;
410 Relocatable = Args.hasFlag(options::OPT_fopenmp_relocatable_target,
411 options::OPT_fnoopenmp_relocatable_target,
414 Relocatable = Args.hasFlag(options::OPT_fgpu_rdc,
415 options::OPT_fno_gpu_rdc,
false);
418 CmdArgs.push_back(
"-c");
421 if (Arg *A = Args.getLastArg(options::OPT_ptxas_path_EQ))
422 Exec = A->getValue();
424 Exec = Args.MakeArgString(TC.GetProgramPath(
"ptxas"));
425 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
429 bool includePTX =
true;
430 for (Arg *A : Args) {
431 if (!(A->getOption().matches(options::OPT_cuda_include_ptx_EQ) ||
432 A->getOption().matches(options::OPT_no_cuda_include_ptx_EQ)))
435 const StringRef ArchStr = A->getValue();
436 if (ArchStr ==
"all" || ArchStr == gpu_arch) {
437 includePTX = A->getOption().matches(options::OPT_cuda_include_ptx_EQ);
451 const char *LinkingOutput)
const {
454 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
456 ArgStringList CmdArgs;
458 CmdArgs.push_back(
"--cuda");
459 CmdArgs.push_back(TC.getTriple().isArch64Bit() ?
"-64" :
"-32");
460 CmdArgs.push_back(Args.MakeArgString(
"--create"));
461 CmdArgs.push_back(Args.MakeArgString(Output.
getFilename()));
463 CmdArgs.push_back(
"-g");
465 for (
const auto& II : Inputs) {
466 auto *A = II.getAction();
467 assert(A->getInputs().size() == 1 &&
468 "Device offload action is expected to have a single input");
469 const char *gpu_arch_str = A->getOffloadingArch();
470 assert(gpu_arch_str &&
471 "Device action expected to have associated a GPU architecture!");
474 if (II.getType() == types::TY_PP_Asm &&
480 (II.getType() == types::TY_PP_Asm)
483 CmdArgs.push_back(Args.MakeArgString(llvm::Twine(
"--image=profile=") +
484 Arch +
",file=" + II.getFilename()));
487 for (
const auto& A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
488 CmdArgs.push_back(Args.MakeArgString(A));
490 const char *Exec = Args.MakeArgString(TC.GetProgramPath(
"fatbinary"));
491 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
498 const char *LinkingOutput)
const {
501 assert(TC.getTriple().isNVPTX() &&
"Wrong platform");
503 ArgStringList CmdArgs;
508 "CUDA toolchain not expected for an OpenMP host device.");
511 CmdArgs.push_back(
"-o");
514 assert(Output.
isNothing() &&
"Invalid output.");
516 CmdArgs.push_back(
"-g");
518 if (Args.hasArg(options::OPT_v))
519 CmdArgs.push_back(
"-v");
522 Args.getLastArgValue(options::OPT_march_EQ);
523 assert(!GPUArch.empty() &&
"At least one GPU Arch required for ptxas.");
525 CmdArgs.push_back(
"-arch");
526 CmdArgs.push_back(Args.MakeArgString(GPUArch));
530 if (
const Arg *A = Args.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
531 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + A->getValue()));
538 llvm::sys::path::parent_path(TC.getDriver().Dir);
539 llvm::sys::path::append(DefaultLibPath,
"lib" CLANG_LIBDIR_SUFFIX);
540 CmdArgs.push_back(Args.MakeArgString(Twine(
"-L") + DefaultLibPath));
543 CmdArgs.push_back(
"-lomptarget-nvptx");
545 for (
const auto &II : Inputs) {
546 if (II.getType() == types::TY_LLVM_IR ||
547 II.getType() == types::TY_LTO_IR ||
548 II.getType() == types::TY_LTO_BC ||
549 II.getType() == types::TY_LLVM_BC) {
551 << getToolChain().getTripleString();
557 if (!II.isFilename())
561 C.
getArgs().MakeArgString(getToolChain().getInputFilename(II)));
563 CmdArgs.push_back(CubinF);
569 Args.MakeArgString(getToolChain().GetProgramPath(
"nvlink"));
570 C.
addCommand(llvm::make_unique<Command>(JA, *
this, Exec, CmdArgs, Inputs));
577 CudaToolChain::CudaToolChain(
const Driver &D,
const llvm::Triple &Triple,
578 const ToolChain &HostTC,
const ArgList &Args,
580 :
ToolChain(D, Triple, Args), HostTC(HostTC),
581 CudaInstallation(D, HostTC.getTriple(), Args), OK(OK) {
599 llvm::sys::path::replace_extension(Filename,
"cubin");
600 return Filename.str();
604 const llvm::opt::ArgList &DriverArgs,
605 llvm::opt::ArgStringList &CC1Args,
609 StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
610 assert(!GpuArch.empty() &&
"Must have an explicit GPU arch.");
613 "Only OpenMP or CUDA offloading kinds are supported for NVIDIA GPUs.");
616 CC1Args.push_back(
"-fcuda-is-device");
618 if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
619 options::OPT_fno_cuda_flush_denormals_to_zero,
false))
620 CC1Args.push_back(
"-fcuda-flush-denormals-to-zero");
622 if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
623 options::OPT_fno_cuda_approx_transcendentals,
false))
624 CC1Args.push_back(
"-fcuda-approx-transcendentals");
626 if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
628 CC1Args.push_back(
"-fgpu-rdc");
631 if (DriverArgs.hasArg(options::OPT_nocudalib))
636 if (LibDeviceFile.empty()) {
638 DriverArgs.hasArg(options::OPT_S))
641 getDriver().
Diag(diag::err_drv_no_cuda_libdevice) << GpuArch;
645 CC1Args.push_back(
"-mlink-builtin-bitcode");
646 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
651 const char *PtxFeature =
nullptr;
654 PtxFeature =
"+ptx64";
657 PtxFeature =
"+ptx63";
660 PtxFeature =
"+ptx61";
663 PtxFeature =
"+ptx61";
666 PtxFeature =
"+ptx60";
669 PtxFeature =
"+ptx42";
671 CC1Args.append({
"-target-feature", PtxFeature});
672 if (DriverArgs.hasFlag(options::OPT_fcuda_short_ptr,
673 options::OPT_fno_cuda_short_ptr,
false))
674 CC1Args.append({
"-mllvm",
"--nvptx-short-ptr"});
677 CC1Args.push_back(DriverArgs.MakeArgString(
678 Twine(
"-target-sdk-version=") +
683 if (
const Arg *A = DriverArgs.getLastArg(options::OPT_libomptarget_nvptx_path_EQ))
684 LibraryPaths.push_back(A->getValue());
688 llvm::sys::Process::GetEnv(
"LIBRARY_PATH");
691 const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator,
'\0'};
692 llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
693 for (StringRef Path : Frags)
694 LibraryPaths.emplace_back(Path.trim());
699 llvm::sys::path::parent_path(
getDriver().Dir);
700 llvm::sys::path::append(DefaultLibPath, Twine(
"lib") + CLANG_LIBDIR_SUFFIX);
701 LibraryPaths.emplace_back(DefaultLibPath.c_str());
703 std::string LibOmpTargetName =
704 "libomptarget-nvptx-" + GpuArch.str() +
".bc";
705 bool FoundBCLibrary =
false;
706 for (StringRef LibraryPath : LibraryPaths) {
708 llvm::sys::path::append(LibOmpTargetFile, LibOmpTargetName);
709 if (llvm::sys::fs::exists(LibOmpTargetFile)) {
710 CC1Args.push_back(
"-mlink-builtin-bitcode");
711 CC1Args.push_back(DriverArgs.MakeArgString(LibOmpTargetFile));
712 FoundBCLibrary =
true;
717 getDriver().
Diag(diag::warn_drv_omp_offload_target_missingbcruntime)
723 const Option &O = A->getOption();
724 return (O.matches(options::OPT_gN_Group) &&
725 !O.matches(options::OPT_gmodules)) ||
726 O.matches(options::OPT_g_Flag) ||
727 O.matches(options::OPT_ggdbN_Group) || O.matches(options::OPT_ggdb) ||
728 O.matches(options::OPT_gdwarf) || O.matches(options::OPT_gdwarf_2) ||
729 O.matches(options::OPT_gdwarf_3) || O.matches(options::OPT_gdwarf_4) ||
730 O.matches(options::OPT_gdwarf_5) ||
731 O.matches(options::OPT_gcolumn_info);
737 case DisableDebugInfo:
743 case EmitSameDebugInfoAsHost:
750 ArgStringList &CC1Args)
const {
752 if (!DriverArgs.hasArg(options::OPT_nocudainc) &&
753 !DriverArgs.hasArg(options::OPT_no_cuda_version_check)) {
754 StringRef Arch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
755 assert(!Arch.empty() &&
"Must have an explicit GPU arch.");
761 llvm::opt::DerivedArgList *
765 DerivedArgList *DAL =
768 DAL =
new DerivedArgList(Args.getBaseArgs());
776 for (Arg *A : Args) {
777 bool IsDuplicate =
false;
778 for (Arg *DALArg : *DAL) {
788 StringRef Arch = DAL->getLastArgValue(options::OPT_march_EQ);
790 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ),
791 CLANG_OPENMP_NVPTX_DEFAULT_ARCH);
796 for (Arg *A : Args) {
797 if (A->getOption().matches(options::OPT_Xarch__)) {
799 if (BoundArch.empty() || A->getValue(0) != BoundArch)
802 unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
803 unsigned Prev = Index;
804 std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
814 if (!XarchArg || Index > Prev + 1) {
815 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_with_args)
816 << A->getAsString(Args);
819 getDriver().
Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
820 << A->getAsString(Args);
823 XarchArg->setBaseArg(A);
824 A = XarchArg.release();
825 DAL->AddSynthesizedArg(A);
830 if (!BoundArch.empty()) {
831 DAL->eraseArg(options::OPT_march_EQ);
832 DAL->AddJoinedArg(
nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
857 ArgStringList &CC1Args)
const {
862 ArgStringList &CC1Args)
const {
867 ArgStringList &CC1Args)
const {
885 const ArgList &Args)
const {
const char * CudaArchToString(CudaArch A)
bool isHostOffloading(OffloadKind OKind) const
Check if this action have any offload kinds.
DiagnosticBuilder Diag(unsigned DiagID) const
CudaArch StringToCudaArch(llvm::StringRef S)
CudaInstallationDetector(const Driver &D, const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args)
void print(raw_ostream &OS) const
Print information about the detected CUDA installation.
Distro - Helper class for detecting and classifying Linux distributions.
bool isOffloading(OffloadKind OKind) const
void CheckCudaVersionSupportsArch(CudaArch Arch) const
Emit an error if Version does not support the given Arch.
Emit location information but do not generate debug info in the output.
bool isDeviceOffloading(OffloadKind OKind) const
std::string getLibDeviceFile(StringRef Gpu) const
Get libdevice file for given architecture.
Driver - Encapsulate logic for constructing compilation processes from a set of gcc-driver-like comma...
llvm::vfs::FileSystem & getVFS() const
const char * CudaVersionToString(CudaVersion V)
void addCommand(std::unique_ptr< Command > C)
const char * CudaVirtualArchToString(CudaVirtualArch A)
void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const
CudaVersion version() const
Get the detected Cuda install's version.
const llvm::opt::DerivedArgList & getArgs() const
CudaVersion MaxVersionForCudaArch(CudaArch A)
Get the latest CudaVersion that supports the given CudaArch.
bool isValid() const
Check whether we detected a valid Cuda install.
StringRef getIncludePath() const
Get the detected Cuda Include path.
CudaVersion MinVersionForCudaArch(CudaArch A)
Get the earliest CudaVersion that supports the given CudaArch.
Dataflow Directional Tag Classes.
std::string SysRoot
sysroot, if present
CudaVirtualArch VirtualArchForCudaArch(CudaArch A)
Get the compute_xx corresponding to an sm_yy.
Compilation - A set of tasks to perform for a single driver invocation.
const Driver & getDriver() const
const llvm::opt::OptTable & getOpts() const
const char * addTempFile(const char *Name)
addTempFile - Add a file to remove on exit, and returns its argument.
StringRef getBinPath() const
Get the detected path to Cuda's bin directory.
const char * getOffloadingArch() const