Bug Summary

File:clang/lib/Driver/ToolChains/AMDGPU.cpp
Warning:line 105, column 23
1st function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AMDGPU.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D CLANG_VENDOR="Debian " -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/tools/clang/lib/Driver -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/clang/lib/Driver -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/clang/include -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/tools/clang/include -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/build-llvm/tools/clang/lib/Driver -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-09-26-161721-17566-1 -x c++ /build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/clang/lib/Driver/ToolChains/AMDGPU.cpp
1//===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AMDGPU.h"
10#include "CommonArgs.h"
11#include "InputInfo.h"
12#include "clang/Basic/TargetID.h"
13#include "clang/Driver/Compilation.h"
14#include "clang/Driver/DriverDiagnostic.h"
15#include "llvm/Option/ArgList.h"
16#include "llvm/Support/Path.h"
17#include "llvm/Support/VirtualFileSystem.h"
18
19using namespace clang::driver;
20using namespace clang::driver::tools;
21using namespace clang::driver::toolchains;
22using namespace clang;
23using namespace llvm::opt;
24
25void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) {
26 assert(!Path.empty())((!Path.empty()) ? static_cast<void> (0) : __assert_fail
("!Path.empty()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/clang/lib/Driver/ToolChains/AMDGPU.cpp"
, 26, __PRETTY_FUNCTION__))
;
27
28 const StringRef Suffix(".bc");
29 const StringRef Suffix2(".amdgcn.bc");
30
31 std::error_code EC;
32 for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE;
33 !EC && LI != LE; LI = LI.increment(EC)) {
34 StringRef FilePath = LI->path();
35 StringRef FileName = llvm::sys::path::filename(FilePath);
36 if (!FileName.endswith(Suffix))
37 continue;
38
39 StringRef BaseName;
40 if (FileName.endswith(Suffix2))
41 BaseName = FileName.drop_back(Suffix2.size());
42 else if (FileName.endswith(Suffix))
43 BaseName = FileName.drop_back(Suffix.size());
44
45 if (BaseName == "ocml") {
46 OCML = FilePath;
47 } else if (BaseName == "ockl") {
48 OCKL = FilePath;
49 } else if (BaseName == "opencl") {
50 OpenCL = FilePath;
51 } else if (BaseName == "hip") {
52 HIP = FilePath;
53 } else if (BaseName == "oclc_finite_only_off") {
54 FiniteOnly.Off = FilePath;
55 } else if (BaseName == "oclc_finite_only_on") {
56 FiniteOnly.On = FilePath;
57 } else if (BaseName == "oclc_daz_opt_on") {
58 DenormalsAreZero.On = FilePath;
59 } else if (BaseName == "oclc_daz_opt_off") {
60 DenormalsAreZero.Off = FilePath;
61 } else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
62 CorrectlyRoundedSqrt.On = FilePath;
63 } else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
64 CorrectlyRoundedSqrt.Off = FilePath;
65 } else if (BaseName == "oclc_unsafe_math_on") {
66 UnsafeMath.On = FilePath;
67 } else if (BaseName == "oclc_unsafe_math_off") {
68 UnsafeMath.Off = FilePath;
69 } else if (BaseName == "oclc_wavefrontsize64_on") {
70 WavefrontSize64.On = FilePath;
71 } else if (BaseName == "oclc_wavefrontsize64_off") {
72 WavefrontSize64.Off = FilePath;
73 } else {
74 // Process all bitcode filenames that look like
75 // ocl_isa_version_XXX.amdgcn.bc
76 const StringRef DeviceLibPrefix = "oclc_isa_version_";
77 if (!BaseName.startswith(DeviceLibPrefix))
78 continue;
79
80 StringRef IsaVersionNumber =
81 BaseName.drop_front(DeviceLibPrefix.size());
82
83 llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
84 SmallString<8> Tmp;
85 LibDeviceMap.insert(
86 std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
87 }
88 }
89}
90
91void RocmInstallationDetector::ParseHIPVersionFile(llvm::StringRef V) {
92 SmallVector<StringRef, 4> VersionParts;
93 V.split(VersionParts, '\n');
94 unsigned Major;
12
'Major' declared without an initial value
95 unsigned Minor;
96 for (auto Part : VersionParts) {
13
Assuming '__begin1' is equal to '__end1'
97 auto Splits = Part.split('=');
98 if (Splits.first == "HIP_VERSION_MAJOR")
99 Splits.second.getAsInteger(0, Major);
100 else if (Splits.first == "HIP_VERSION_MINOR")
101 Splits.second.getAsInteger(0, Minor);
102 else if (Splits.first == "HIP_VERSION_PATCH")
103 VersionPatch = Splits.second.str();
104 }
105 VersionMajorMinor = llvm::VersionTuple(Major, Minor);
14
1st function call argument is an uninitialized value
106 DetectedVersion =
107 (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
108}
109
110// For candidate specified by --rocm-path we do not do strict check.
111SmallVector<RocmInstallationDetector::Candidate, 4>
112RocmInstallationDetector::getInstallationPathCandidates() {
113 SmallVector<Candidate, 4> Candidates;
114 if (!RocmPathArg.empty()) {
115 Candidates.emplace_back(RocmPathArg.str());
116 return Candidates;
117 }
118
119 // Try to find relative to the compiler binary.
120 const char *InstallDir = D.getInstalledDir();
121
122 // Check both a normal Unix prefix position of the clang binary, as well as
123 // the Windows-esque layout the ROCm packages use with the host architecture
124 // subdirectory of bin.
125
126 // Strip off directory (usually bin)
127 StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
128 StringRef ParentName = llvm::sys::path::filename(ParentDir);
129
130 // Some builds use bin/{host arch}, so go up again.
131 if (ParentName == "bin") {
132 ParentDir = llvm::sys::path::parent_path(ParentDir);
133 ParentName = llvm::sys::path::filename(ParentDir);
134 }
135
136 // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin
137 if (ParentName == "llvm")
138 ParentDir = llvm::sys::path::parent_path(ParentDir);
139
140 Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true);
141
142 // Device library may be installed in clang resource directory.
143 Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true);
144
145 Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true);
146 return Candidates;
147}
148
149RocmInstallationDetector::RocmInstallationDetector(
150 const Driver &D, const llvm::Triple &HostTriple,
151 const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib)
152 : D(D) {
153 RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ);
154 RocmDeviceLibPathArg =
155 Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
156 if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
1
Assuming 'A' is null
2
Taking false branch
157 HIPVersionArg = A->getValue();
158 unsigned Major = 0;
159 unsigned Minor = 0;
160 SmallVector<StringRef, 3> Parts;
161 HIPVersionArg.split(Parts, '.');
162 if (Parts.size())
163 Parts[0].getAsInteger(0, Major);
164 if (Parts.size() > 1)
165 Parts[1].getAsInteger(0, Minor);
166 if (Parts.size() > 2)
167 VersionPatch = Parts[2].str();
168 if (VersionPatch.empty())
169 VersionPatch = "0";
170 if (Major == 0 || Minor == 0)
171 D.Diag(diag::err_drv_invalid_value)
172 << A->getAsString(Args) << HIPVersionArg;
173
174 VersionMajorMinor = llvm::VersionTuple(Major, Minor);
175 DetectedVersion =
176 (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str();
177 } else {
178 VersionPatch = DefaultVersionPatch;
179 VersionMajorMinor =
180 llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor);
181 DetectedVersion = (Twine(DefaultVersionMajor) + "." +
182 Twine(DefaultVersionMinor) + "." + VersionPatch)
183 .str();
184 }
185
186 if (DetectHIPRuntime)
3
Assuming 'DetectHIPRuntime' is true
4
Taking true branch
187 detectHIPRuntime();
5
Calling 'RocmInstallationDetector::detectHIPRuntime'
188 if (DetectDeviceLib)
189 detectDeviceLibrary();
190}
191
192void RocmInstallationDetector::detectDeviceLibrary() {
193 assert(LibDevicePath.empty())((LibDevicePath.empty()) ? static_cast<void> (0) : __assert_fail
("LibDevicePath.empty()", "/build/llvm-toolchain-snapshot-12~++20200926111128+c6c5629f2fb/clang/lib/Driver/ToolChains/AMDGPU.cpp"
, 193, __PRETTY_FUNCTION__))
;
194
195 if (!RocmDeviceLibPathArg.empty())
196 LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1];
197 else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH"))
198 LibDevicePath = LibPathEnv;
199
200 auto &FS = D.getVFS();
201 if (!LibDevicePath.empty()) {
202 // Maintain compatability with HIP flag/envvar pointing directly at the
203 // bitcode library directory. This points directly at the library path instead
204 // of the rocm root installation.
205 if (!FS.exists(LibDevicePath))
206 return;
207
208 scanLibDevicePath(LibDevicePath);
209 HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty();
210 return;
211 }
212
213 // The install path situation in old versions of ROCm is a real mess, and
214 // use a different install layout. Multiple copies of the device libraries
215 // exist for each frontend project, and differ depending on which build
216 // system produced the packages. Standalone OpenCL builds also have a
217 // different directory structure from the ROCm OpenCL package.
218 auto Candidates = getInstallationPathCandidates();
219 for (const auto &Candidate : Candidates) {
220 auto CandidatePath = Candidate.Path;
221
222 // Check device library exists at the given path.
223 auto CheckDeviceLib = [&](StringRef Path) {
224 bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
225 if (CheckLibDevice && !FS.exists(Path))
226 return false;
227
228 scanLibDevicePath(Path);
229
230 if (!NoBuiltinLibs) {
231 // Check that the required non-target libraries are all available.
232 if (!allGenericLibsValid())
233 return false;
234
235 // Check that we have found at least one libdevice that we can link in
236 // if -nobuiltinlib hasn't been specified.
237 if (LibDeviceMap.empty())
238 return false;
239 }
240 return true;
241 };
242
243 // The possible structures are:
244 // - ${ROCM_ROOT}/amdgcn/bitcode/*
245 // - ${ROCM_ROOT}/lib/*
246 // - ${ROCM_ROOT}/lib/bitcode/*
247 // so try to detect these layouts.
248 static constexpr std::array<const char *, 2> SubDirsList[] = {
249 {"amdgcn", "bitcode"},
250 {"lib", ""},
251 {"lib", "bitcode"},
252 };
253
254 // Make a path by appending sub-directories to InstallPath.
255 auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) {
256 auto Path = CandidatePath;
257 for (auto SubDir : SubDirs)
258 llvm::sys::path::append(Path, SubDir);
259 return Path;
260 };
261
262 for (auto SubDirs : SubDirsList) {
263 LibDevicePath = MakePath(SubDirs);
264 HasDeviceLibrary = CheckDeviceLib(LibDevicePath);
265 if (HasDeviceLibrary)
266 return;
267 }
268 }
269}
270
271void RocmInstallationDetector::detectHIPRuntime() {
272 auto Candidates = getInstallationPathCandidates();
273 auto &FS = D.getVFS();
274
275 for (const auto &Candidate : Candidates) {
6
Assuming '__begin1' is not equal to '__end1'
276 InstallPath = Candidate.Path;
277 if (InstallPath.empty() || !FS.exists(InstallPath))
7
Assuming the condition is false
8
Taking false branch
278 continue;
279
280 BinPath = InstallPath;
281 llvm::sys::path::append(BinPath, "bin");
282 IncludePath = InstallPath;
283 llvm::sys::path::append(IncludePath, "include");
284 LibPath = InstallPath;
285 llvm::sys::path::append(LibPath, "lib");
286
287 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile =
288 FS.getBufferForFile(BinPath + "/.hipVersion");
289 if (!VersionFile && Candidate.StrictChecking)
290 continue;
291
292 if (HIPVersionArg.empty() && VersionFile)
9
Assuming the condition is true
10
Taking true branch
293 ParseHIPVersionFile((*VersionFile)->getBuffer());
11
Calling 'RocmInstallationDetector::ParseHIPVersionFile'
294
295 HasHIPRuntime = true;
296 return;
297 }
298 HasHIPRuntime = false;
299}
300
301void RocmInstallationDetector::print(raw_ostream &OS) const {
302 if (hasHIPRuntime())
303 OS << "Found HIP installation: " << InstallPath << ", version "
304 << DetectedVersion << '\n';
305}
306
307void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
308 ArgStringList &CC1Args) const {
309 bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5);
310
311 if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
312 // HIP header includes standard library wrapper headers under clang
313 // cuda_wrappers directory. Since these wrapper headers include_next
314 // standard C++ headers, whereas libc++ headers include_next other clang
315 // headers. The include paths have to follow this order:
316 // - wrapper include path
317 // - standard C++ include path
318 // - other clang include path
319 // Since standard C++ and other clang include paths are added in other
320 // places after this function, here we only need to make sure wrapper
321 // include path is added.
322 //
323 // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs
324 // a workaround.
325 SmallString<128> P(D.ResourceDir);
326 if (UsesRuntimeWrapper)
327 llvm::sys::path::append(P, "include", "cuda_wrappers");
328 CC1Args.push_back("-internal-isystem");
329 CC1Args.push_back(DriverArgs.MakeArgString(P));
330 }
331
332 if (DriverArgs.hasArg(options::OPT_nogpuinc))
333 return;
334
335 if (!hasHIPRuntime()) {
336 D.Diag(diag::err_drv_no_hip_runtime);
337 return;
338 }
339
340 CC1Args.push_back("-internal-isystem");
341 CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
342 if (UsesRuntimeWrapper)
343 CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
344}
345
346void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
347 const InputInfo &Output,
348 const InputInfoList &Inputs,
349 const ArgList &Args,
350 const char *LinkingOutput) const {
351
352 std::string Linker = getToolChain().GetProgramPath(getShortName());
353 ArgStringList CmdArgs;
354 addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs);
355 AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
356 CmdArgs.push_back("-shared");
357 CmdArgs.push_back("-o");
358 CmdArgs.push_back(Output.getFilename());
359 C.addCommand(
360 std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(),
361 Args.MakeArgString(Linker), CmdArgs, Inputs));
362}
363
364void amdgpu::getAMDGPUTargetFeatures(const Driver &D,
365 const llvm::Triple &Triple,
366 const llvm::opt::ArgList &Args,
367 std::vector<StringRef> &Features) {
368 if (const Arg *dAbi = Args.getLastArg(options::OPT_mamdgpu_debugger_abi))
369 D.Diag(diag::err_drv_clang_unsupported) << dAbi->getAsString(Args);
370
371 // Add target ID features to -target-feature options. No diagnostics should
372 // be emitted here since invalid target ID is diagnosed at other places.
373 StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ);
374 if (!TargetID.empty()) {
375 llvm::StringMap<bool> FeatureMap;
376 auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap);
377 if (OptionalGpuArch) {
378 StringRef GpuArch = OptionalGpuArch.getValue();
379 // Iterate through all possible target ID features for the given GPU.
380 // If it is mapped to true, add +feature.
381 // If it is mapped to false, add -feature.
382 // If it is not in the map (default), do not add it
383 for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) {
384 auto Pos = FeatureMap.find(Feature);
385 if (Pos == FeatureMap.end())
386 continue;
387 Features.push_back(Args.MakeArgStringRef(
388 (Twine(Pos->second ? "+" : "-") + Feature).str()));
389 }
390 }
391 }
392
393 if (Args.getLastArg(options::OPT_mwavefrontsize64)) {
394 Features.push_back("-wavefrontsize16");
395 Features.push_back("-wavefrontsize32");
396 Features.push_back("+wavefrontsize64");
397 }
398 if (Args.getLastArg(options::OPT_mno_wavefrontsize64)) {
399 Features.push_back("-wavefrontsize16");
400 Features.push_back("+wavefrontsize32");
401 Features.push_back("-wavefrontsize64");
402 }
403
404 handleTargetFeaturesGroup(
405 Args, Features, options::OPT_m_amdgpu_Features_Group);
406}
407
408/// AMDGPU Toolchain
409AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple,
410 const ArgList &Args)
411 : Generic_ELF(D, Triple, Args),
412 OptionsDefault({{options::OPT_O, "3"},
413 {options::OPT_cl_std_EQ, "CL1.2"}}) {}
414
415Tool *AMDGPUToolChain::buildLinker() const {
416 return new tools::amdgpu::Linker(*this);
417}
418
419DerivedArgList *
420AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch,
421 Action::OffloadKind DeviceOffloadKind) const {
422
423 DerivedArgList *DAL =
424 Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind);
425
426 const OptTable &Opts = getDriver().getOpts();
427
428 if (!DAL)
429 DAL = new DerivedArgList(Args.getBaseArgs());
430 for (auto *A : Args)
431 DAL->append(A);
432
433 if (!Args.getLastArgValue(options::OPT_x).equals("cl"))
434 return DAL;
435
436 // Phase 1 (.cl -> .bc)
437 if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) {
438 DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit()
439 ? options::OPT_m64
440 : options::OPT_m32));
441
442 // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately
443 // as they defined that way in Options.td
444 if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
445 options::OPT_Ofast))
446 DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
447 getOptionDefault(options::OPT_O));
448 }
449
450 return DAL;
451}
452
453bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget(
454 llvm::AMDGPU::GPUKind Kind) {
455
456 // Assume nothing without a specific target.
457 if (Kind == llvm::AMDGPU::GK_NONE)
458 return false;
459
460 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
461
462 // Default to enabling f32 denormals by default on subtargets where fma is
463 // fast with denormals
464 const bool BothDenormAndFMAFast =
465 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) &&
466 (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32);
467 return !BothDenormAndFMAFast;
468}
469
470llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
471 const llvm::opt::ArgList &DriverArgs, const JobAction &JA,
472 const llvm::fltSemantics *FPType) const {
473 // Denormals should always be enabled for f16 and f64.
474 if (!FPType || FPType != &llvm::APFloat::IEEEsingle())
475 return llvm::DenormalMode::getIEEE();
476
477 if (JA.getOffloadingDeviceKind() == Action::OFK_HIP ||
478 JA.getOffloadingDeviceKind() == Action::OFK_Cuda) {
479 auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch());
480 auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch);
481 if (FPType && FPType == &llvm::APFloat::IEEEsingle() &&
482 DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
483 options::OPT_fno_cuda_flush_denormals_to_zero,
484 getDefaultDenormsAreZeroForTarget(Kind)))
485 return llvm::DenormalMode::getPreserveSign();
486
487 return llvm::DenormalMode::getIEEE();
488 }
489
490 const StringRef GpuArch = getGPUArch(DriverArgs);
491 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
492
493 // TODO: There are way too many flags that change this. Do we need to check
494 // them all?
495 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
496 getDefaultDenormsAreZeroForTarget(Kind);
497
498 // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are
499 // also implicit treated as zero (DAZ).
500 return DAZ ? llvm::DenormalMode::getPreserveSign() :
501 llvm::DenormalMode::getIEEE();
502}
503
504bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
505 llvm::AMDGPU::GPUKind Kind) {
506 const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
507 static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
508
509 return !HasWave32 || DriverArgs.hasFlag(
510 options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
511}
512
513
514/// ROCM Toolchain
515ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
516 const ArgList &Args)
517 : AMDGPUToolChain(D, Triple, Args) {
518 RocmInstallation.detectDeviceLibrary();
519}
520
521void AMDGPUToolChain::addClangTargetOptions(
522 const llvm::opt::ArgList &DriverArgs,
523 llvm::opt::ArgStringList &CC1Args,
524 Action::OffloadKind DeviceOffloadingKind) const {
525 // Allow using target ID in -mcpu.
526 translateTargetID(DriverArgs, CC1Args);
527 // Default to "hidden" visibility, as object level linking will not be
528 // supported for the foreseeable future.
529 if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
530 options::OPT_fvisibility_ms_compat)) {
531 CC1Args.push_back("-fvisibility");
532 CC1Args.push_back("hidden");
533 CC1Args.push_back("-fapply-global-visibility-to-externs");
534 }
535}
536
537StringRef
538AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const {
539 return getProcessorFromTargetID(
540 getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ));
541}
542
543StringRef
544AMDGPUToolChain::translateTargetID(const llvm::opt::ArgList &DriverArgs,
545 llvm::opt::ArgStringList &CC1Args) const {
546 StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
547 if (TargetID.empty())
548 return StringRef();
549
550 llvm::StringMap<bool> FeatureMap;
551 auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap);
552 if (!OptionalGpuArch) {
553 getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID;
554 return StringRef();
555 }
556
557 return OptionalGpuArch.getValue();
558}
559
560void ROCMToolChain::addClangTargetOptions(
561 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
562 Action::OffloadKind DeviceOffloadingKind) const {
563 AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
564 DeviceOffloadingKind);
565
566 // For the OpenCL case where there is no offload target, accept -nostdlib to
567 // disable bitcode linking.
568 if (DeviceOffloadingKind == Action::OFK_None &&
569 DriverArgs.hasArg(options::OPT_nostdlib))
570 return;
571
572 if (DriverArgs.hasArg(options::OPT_nogpulib))
573 return;
574
575 if (!RocmInstallation.hasDeviceLibrary()) {
576 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0;
577 return;
578 }
579
580 // Get the device name and canonicalize it
581 const StringRef GpuArch = getGPUArch(DriverArgs);
582 auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
583 const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
584 std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
585 if (LibDeviceFile.empty()) {
586 getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch;
587 return;
588 }
589
590 bool Wave64 = isWave64(DriverArgs, Kind);
591
592 // TODO: There are way too many flags that change this. Do we need to check
593 // them all?
594 bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
595 getDefaultDenormsAreZeroForTarget(Kind);
596 bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
597
598 bool UnsafeMathOpt =
599 DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
600 bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
601 bool CorrectSqrt =
602 DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
603
604 // Add the OpenCL specific bitcode library.
605 CC1Args.push_back("-mlink-builtin-bitcode");
606 CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
607
608 // Add the generic set of libraries.
609 RocmInstallation.addCommonBitcodeLibCC1Args(
610 DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
611 UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
612}
613
614void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
615 const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
616 StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
617 bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
618 static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
619
620 CC1Args.push_back(LinkBitcodeFlag);
621 CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
622
623 CC1Args.push_back(LinkBitcodeFlag);
624 CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
625
626 CC1Args.push_back(LinkBitcodeFlag);
627 CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
628
629 CC1Args.push_back(LinkBitcodeFlag);
630 CC1Args.push_back(DriverArgs.MakeArgString(
631 getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
632
633 CC1Args.push_back(LinkBitcodeFlag);
634 CC1Args.push_back(DriverArgs.MakeArgString(
635 getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
636
637 CC1Args.push_back(LinkBitcodeFlag);
638 CC1Args.push_back(
639 DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
640
641 CC1Args.push_back(LinkBitcodeFlag);
642 CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
643
644 CC1Args.push_back(LinkBitcodeFlag);
645 CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
646}