File: | clang/lib/Driver/ToolChains/AMDGPU.cpp |
Warning: | line 105, column 23 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===--- AMDGPU.cpp - AMDGPU ToolChain Implementations ----------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "AMDGPU.h" | |||
10 | #include "CommonArgs.h" | |||
11 | #include "InputInfo.h" | |||
12 | #include "clang/Basic/TargetID.h" | |||
13 | #include "clang/Driver/Compilation.h" | |||
14 | #include "clang/Driver/DriverDiagnostic.h" | |||
15 | #include "llvm/Option/ArgList.h" | |||
16 | #include "llvm/Support/Path.h" | |||
17 | #include "llvm/Support/VirtualFileSystem.h" | |||
18 | ||||
19 | using namespace clang::driver; | |||
20 | using namespace clang::driver::tools; | |||
21 | using namespace clang::driver::toolchains; | |||
22 | using namespace clang; | |||
23 | using namespace llvm::opt; | |||
24 | ||||
25 | void RocmInstallationDetector::scanLibDevicePath(llvm::StringRef Path) { | |||
26 | assert(!Path.empty())((!Path.empty()) ? static_cast<void> (0) : __assert_fail ("!Path.empty()", "/build/llvm-toolchain-snapshot-12~++20210105111114+53a341a61d1f/clang/lib/Driver/ToolChains/AMDGPU.cpp" , 26, __PRETTY_FUNCTION__)); | |||
27 | ||||
28 | const StringRef Suffix(".bc"); | |||
29 | const StringRef Suffix2(".amdgcn.bc"); | |||
30 | ||||
31 | std::error_code EC; | |||
32 | for (llvm::vfs::directory_iterator LI = D.getVFS().dir_begin(Path, EC), LE; | |||
33 | !EC && LI != LE; LI = LI.increment(EC)) { | |||
34 | StringRef FilePath = LI->path(); | |||
35 | StringRef FileName = llvm::sys::path::filename(FilePath); | |||
36 | if (!FileName.endswith(Suffix)) | |||
37 | continue; | |||
38 | ||||
39 | StringRef BaseName; | |||
40 | if (FileName.endswith(Suffix2)) | |||
41 | BaseName = FileName.drop_back(Suffix2.size()); | |||
42 | else if (FileName.endswith(Suffix)) | |||
43 | BaseName = FileName.drop_back(Suffix.size()); | |||
44 | ||||
45 | if (BaseName == "ocml") { | |||
46 | OCML = FilePath; | |||
47 | } else if (BaseName == "ockl") { | |||
48 | OCKL = FilePath; | |||
49 | } else if (BaseName == "opencl") { | |||
50 | OpenCL = FilePath; | |||
51 | } else if (BaseName == "hip") { | |||
52 | HIP = FilePath; | |||
53 | } else if (BaseName == "oclc_finite_only_off") { | |||
54 | FiniteOnly.Off = FilePath; | |||
55 | } else if (BaseName == "oclc_finite_only_on") { | |||
56 | FiniteOnly.On = FilePath; | |||
57 | } else if (BaseName == "oclc_daz_opt_on") { | |||
58 | DenormalsAreZero.On = FilePath; | |||
59 | } else if (BaseName == "oclc_daz_opt_off") { | |||
60 | DenormalsAreZero.Off = FilePath; | |||
61 | } else if (BaseName == "oclc_correctly_rounded_sqrt_on") { | |||
62 | CorrectlyRoundedSqrt.On = FilePath; | |||
63 | } else if (BaseName == "oclc_correctly_rounded_sqrt_off") { | |||
64 | CorrectlyRoundedSqrt.Off = FilePath; | |||
65 | } else if (BaseName == "oclc_unsafe_math_on") { | |||
66 | UnsafeMath.On = FilePath; | |||
67 | } else if (BaseName == "oclc_unsafe_math_off") { | |||
68 | UnsafeMath.Off = FilePath; | |||
69 | } else if (BaseName == "oclc_wavefrontsize64_on") { | |||
70 | WavefrontSize64.On = FilePath; | |||
71 | } else if (BaseName == "oclc_wavefrontsize64_off") { | |||
72 | WavefrontSize64.Off = FilePath; | |||
73 | } else { | |||
74 | // Process all bitcode filenames that look like | |||
75 | // ocl_isa_version_XXX.amdgcn.bc | |||
76 | const StringRef DeviceLibPrefix = "oclc_isa_version_"; | |||
77 | if (!BaseName.startswith(DeviceLibPrefix)) | |||
78 | continue; | |||
79 | ||||
80 | StringRef IsaVersionNumber = | |||
81 | BaseName.drop_front(DeviceLibPrefix.size()); | |||
82 | ||||
83 | llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber; | |||
84 | SmallString<8> Tmp; | |||
85 | LibDeviceMap.insert( | |||
86 | std::make_pair(GfxName.toStringRef(Tmp), FilePath.str())); | |||
87 | } | |||
88 | } | |||
89 | } | |||
90 | ||||
91 | void RocmInstallationDetector::ParseHIPVersionFile(llvm::StringRef V) { | |||
92 | SmallVector<StringRef, 4> VersionParts; | |||
93 | V.split(VersionParts, '\n'); | |||
94 | unsigned Major; | |||
95 | unsigned Minor; | |||
96 | for (auto Part : VersionParts) { | |||
97 | auto Splits = Part.split('='); | |||
98 | if (Splits.first == "HIP_VERSION_MAJOR") | |||
99 | Splits.second.getAsInteger(0, Major); | |||
100 | else if (Splits.first == "HIP_VERSION_MINOR") | |||
101 | Splits.second.getAsInteger(0, Minor); | |||
102 | else if (Splits.first == "HIP_VERSION_PATCH") | |||
103 | VersionPatch = Splits.second.str(); | |||
104 | } | |||
105 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); | |||
| ||||
106 | DetectedVersion = | |||
107 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); | |||
108 | } | |||
109 | ||||
110 | // For candidate specified by --rocm-path we do not do strict check. | |||
111 | SmallVector<RocmInstallationDetector::Candidate, 4> | |||
112 | RocmInstallationDetector::getInstallationPathCandidates() { | |||
113 | SmallVector<Candidate, 4> Candidates; | |||
114 | if (!RocmPathArg.empty()) { | |||
115 | Candidates.emplace_back(RocmPathArg.str()); | |||
116 | return Candidates; | |||
117 | } | |||
118 | ||||
119 | // Try to find relative to the compiler binary. | |||
120 | const char *InstallDir = D.getInstalledDir(); | |||
121 | ||||
122 | // Check both a normal Unix prefix position of the clang binary, as well as | |||
123 | // the Windows-esque layout the ROCm packages use with the host architecture | |||
124 | // subdirectory of bin. | |||
125 | ||||
126 | // Strip off directory (usually bin) | |||
127 | StringRef ParentDir = llvm::sys::path::parent_path(InstallDir); | |||
128 | StringRef ParentName = llvm::sys::path::filename(ParentDir); | |||
129 | ||||
130 | // Some builds use bin/{host arch}, so go up again. | |||
131 | if (ParentName == "bin") { | |||
132 | ParentDir = llvm::sys::path::parent_path(ParentDir); | |||
133 | ParentName = llvm::sys::path::filename(ParentDir); | |||
134 | } | |||
135 | ||||
136 | // Some versions of the rocm llvm package install to /opt/rocm/llvm/bin | |||
137 | if (ParentName == "llvm") | |||
138 | ParentDir = llvm::sys::path::parent_path(ParentDir); | |||
139 | ||||
140 | Candidates.emplace_back(ParentDir.str(), /*StrictChecking=*/true); | |||
141 | ||||
142 | // Device library may be installed in clang resource directory. | |||
143 | Candidates.emplace_back(D.ResourceDir, /*StrictChecking=*/true); | |||
144 | ||||
145 | Candidates.emplace_back(D.SysRoot + "/opt/rocm", /*StrictChecking=*/true); | |||
146 | return Candidates; | |||
147 | } | |||
148 | ||||
149 | RocmInstallationDetector::RocmInstallationDetector( | |||
150 | const Driver &D, const llvm::Triple &HostTriple, | |||
151 | const llvm::opt::ArgList &Args, bool DetectHIPRuntime, bool DetectDeviceLib) | |||
152 | : D(D) { | |||
153 | RocmPathArg = Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ); | |||
154 | RocmDeviceLibPathArg = | |||
155 | Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); | |||
156 | if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { | |||
| ||||
157 | HIPVersionArg = A->getValue(); | |||
158 | unsigned Major = 0; | |||
159 | unsigned Minor = 0; | |||
160 | SmallVector<StringRef, 3> Parts; | |||
161 | HIPVersionArg.split(Parts, '.'); | |||
162 | if (Parts.size()) | |||
163 | Parts[0].getAsInteger(0, Major); | |||
164 | if (Parts.size() > 1) | |||
165 | Parts[1].getAsInteger(0, Minor); | |||
166 | if (Parts.size() > 2) | |||
167 | VersionPatch = Parts[2].str(); | |||
168 | if (VersionPatch.empty()) | |||
169 | VersionPatch = "0"; | |||
170 | if (Major == 0 || Minor == 0) | |||
171 | D.Diag(diag::err_drv_invalid_value) | |||
172 | << A->getAsString(Args) << HIPVersionArg; | |||
173 | ||||
174 | VersionMajorMinor = llvm::VersionTuple(Major, Minor); | |||
175 | DetectedVersion = | |||
176 | (Twine(Major) + "." + Twine(Minor) + "." + VersionPatch).str(); | |||
177 | } else { | |||
178 | VersionPatch = DefaultVersionPatch; | |||
179 | VersionMajorMinor = | |||
180 | llvm::VersionTuple(DefaultVersionMajor, DefaultVersionMinor); | |||
181 | DetectedVersion = (Twine(DefaultVersionMajor) + "." + | |||
182 | Twine(DefaultVersionMinor) + "." + VersionPatch) | |||
183 | .str(); | |||
184 | } | |||
185 | ||||
186 | if (DetectHIPRuntime) | |||
187 | detectHIPRuntime(); | |||
188 | if (DetectDeviceLib) | |||
189 | detectDeviceLibrary(); | |||
190 | } | |||
191 | ||||
192 | void RocmInstallationDetector::detectDeviceLibrary() { | |||
193 | assert(LibDevicePath.empty())((LibDevicePath.empty()) ? static_cast<void> (0) : __assert_fail ("LibDevicePath.empty()", "/build/llvm-toolchain-snapshot-12~++20210105111114+53a341a61d1f/clang/lib/Driver/ToolChains/AMDGPU.cpp" , 193, __PRETTY_FUNCTION__)); | |||
194 | ||||
195 | if (!RocmDeviceLibPathArg.empty()) | |||
196 | LibDevicePath = RocmDeviceLibPathArg[RocmDeviceLibPathArg.size() - 1]; | |||
197 | else if (const char *LibPathEnv = ::getenv("HIP_DEVICE_LIB_PATH")) | |||
198 | LibDevicePath = LibPathEnv; | |||
199 | ||||
200 | auto &FS = D.getVFS(); | |||
201 | if (!LibDevicePath.empty()) { | |||
202 | // Maintain compatability with HIP flag/envvar pointing directly at the | |||
203 | // bitcode library directory. This points directly at the library path instead | |||
204 | // of the rocm root installation. | |||
205 | if (!FS.exists(LibDevicePath)) | |||
206 | return; | |||
207 | ||||
208 | scanLibDevicePath(LibDevicePath); | |||
209 | HasDeviceLibrary = allGenericLibsValid() && !LibDeviceMap.empty(); | |||
210 | return; | |||
211 | } | |||
212 | ||||
213 | // The install path situation in old versions of ROCm is a real mess, and | |||
214 | // use a different install layout. Multiple copies of the device libraries | |||
215 | // exist for each frontend project, and differ depending on which build | |||
216 | // system produced the packages. Standalone OpenCL builds also have a | |||
217 | // different directory structure from the ROCm OpenCL package. | |||
218 | auto Candidates = getInstallationPathCandidates(); | |||
219 | for (const auto &Candidate : Candidates) { | |||
220 | auto CandidatePath = Candidate.Path; | |||
221 | ||||
222 | // Check device library exists at the given path. | |||
223 | auto CheckDeviceLib = [&](StringRef Path) { | |||
224 | bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking); | |||
225 | if (CheckLibDevice && !FS.exists(Path)) | |||
226 | return false; | |||
227 | ||||
228 | scanLibDevicePath(Path); | |||
229 | ||||
230 | if (!NoBuiltinLibs) { | |||
231 | // Check that the required non-target libraries are all available. | |||
232 | if (!allGenericLibsValid()) | |||
233 | return false; | |||
234 | ||||
235 | // Check that we have found at least one libdevice that we can link in | |||
236 | // if -nobuiltinlib hasn't been specified. | |||
237 | if (LibDeviceMap.empty()) | |||
238 | return false; | |||
239 | } | |||
240 | return true; | |||
241 | }; | |||
242 | ||||
243 | // The possible structures are: | |||
244 | // - ${ROCM_ROOT}/amdgcn/bitcode/* | |||
245 | // - ${ROCM_ROOT}/lib/* | |||
246 | // - ${ROCM_ROOT}/lib/bitcode/* | |||
247 | // so try to detect these layouts. | |||
248 | static constexpr std::array<const char *, 2> SubDirsList[] = { | |||
249 | {"amdgcn", "bitcode"}, | |||
250 | {"lib", ""}, | |||
251 | {"lib", "bitcode"}, | |||
252 | }; | |||
253 | ||||
254 | // Make a path by appending sub-directories to InstallPath. | |||
255 | auto MakePath = [&](const llvm::ArrayRef<const char *> &SubDirs) { | |||
256 | auto Path = CandidatePath; | |||
257 | for (auto SubDir : SubDirs) | |||
258 | llvm::sys::path::append(Path, SubDir); | |||
259 | return Path; | |||
260 | }; | |||
261 | ||||
262 | for (auto SubDirs : SubDirsList) { | |||
263 | LibDevicePath = MakePath(SubDirs); | |||
264 | HasDeviceLibrary = CheckDeviceLib(LibDevicePath); | |||
265 | if (HasDeviceLibrary) | |||
266 | return; | |||
267 | } | |||
268 | } | |||
269 | } | |||
270 | ||||
271 | void RocmInstallationDetector::detectHIPRuntime() { | |||
272 | auto Candidates = getInstallationPathCandidates(); | |||
273 | auto &FS = D.getVFS(); | |||
274 | ||||
275 | for (const auto &Candidate : Candidates) { | |||
276 | InstallPath = Candidate.Path; | |||
277 | if (InstallPath.empty() || !FS.exists(InstallPath)) | |||
278 | continue; | |||
279 | ||||
280 | BinPath = InstallPath; | |||
281 | llvm::sys::path::append(BinPath, "bin"); | |||
282 | IncludePath = InstallPath; | |||
283 | llvm::sys::path::append(IncludePath, "include"); | |||
284 | LibPath = InstallPath; | |||
285 | llvm::sys::path::append(LibPath, "lib"); | |||
286 | ||||
287 | llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> VersionFile = | |||
288 | FS.getBufferForFile(BinPath + "/.hipVersion"); | |||
289 | if (!VersionFile && Candidate.StrictChecking) | |||
290 | continue; | |||
291 | ||||
292 | if (HIPVersionArg.empty() && VersionFile) | |||
293 | ParseHIPVersionFile((*VersionFile)->getBuffer()); | |||
294 | ||||
295 | HasHIPRuntime = true; | |||
296 | return; | |||
297 | } | |||
298 | HasHIPRuntime = false; | |||
299 | } | |||
300 | ||||
301 | void RocmInstallationDetector::print(raw_ostream &OS) const { | |||
302 | if (hasHIPRuntime()) | |||
303 | OS << "Found HIP installation: " << InstallPath << ", version " | |||
304 | << DetectedVersion << '\n'; | |||
305 | } | |||
306 | ||||
307 | void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, | |||
308 | ArgStringList &CC1Args) const { | |||
309 | bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5); | |||
310 | ||||
311 | if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { | |||
312 | // HIP header includes standard library wrapper headers under clang | |||
313 | // cuda_wrappers directory. Since these wrapper headers include_next | |||
314 | // standard C++ headers, whereas libc++ headers include_next other clang | |||
315 | // headers. The include paths have to follow this order: | |||
316 | // - wrapper include path | |||
317 | // - standard C++ include path | |||
318 | // - other clang include path | |||
319 | // Since standard C++ and other clang include paths are added in other | |||
320 | // places after this function, here we only need to make sure wrapper | |||
321 | // include path is added. | |||
322 | // | |||
323 | // ROCm 3.5 does not fully support the wrapper headers. Therefore it needs | |||
324 | // a workaround. | |||
325 | SmallString<128> P(D.ResourceDir); | |||
326 | if (UsesRuntimeWrapper) | |||
327 | llvm::sys::path::append(P, "include", "cuda_wrappers"); | |||
328 | CC1Args.push_back("-internal-isystem"); | |||
329 | CC1Args.push_back(DriverArgs.MakeArgString(P)); | |||
330 | } | |||
331 | ||||
332 | if (DriverArgs.hasArg(options::OPT_nogpuinc)) | |||
333 | return; | |||
334 | ||||
335 | if (!hasHIPRuntime()) { | |||
336 | D.Diag(diag::err_drv_no_hip_runtime); | |||
337 | return; | |||
338 | } | |||
339 | ||||
340 | CC1Args.push_back("-internal-isystem"); | |||
341 | CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); | |||
342 | if (UsesRuntimeWrapper) | |||
343 | CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); | |||
344 | } | |||
345 | ||||
346 | void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, | |||
347 | const InputInfo &Output, | |||
348 | const InputInfoList &Inputs, | |||
349 | const ArgList &Args, | |||
350 | const char *LinkingOutput) const { | |||
351 | ||||
352 | std::string Linker = getToolChain().GetProgramPath(getShortName()); | |||
353 | ArgStringList CmdArgs; | |||
354 | addLinkerCompressDebugSectionsOption(getToolChain(), Args, CmdArgs); | |||
355 | AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); | |||
356 | CmdArgs.push_back("-shared"); | |||
357 | CmdArgs.push_back("-o"); | |||
358 | CmdArgs.push_back(Output.getFilename()); | |||
359 | C.addCommand(std::make_unique<Command>( | |||
360 | JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), | |||
361 | CmdArgs, Inputs, Output)); | |||
362 | } | |||
363 | ||||
364 | void amdgpu::getAMDGPUTargetFeatures(const Driver &D, | |||
365 | const llvm::Triple &Triple, | |||
366 | const llvm::opt::ArgList &Args, | |||
367 | std::vector<StringRef> &Features) { | |||
368 | // Add target ID features to -target-feature options. No diagnostics should | |||
369 | // be emitted here since invalid target ID is diagnosed at other places. | |||
370 | StringRef TargetID = Args.getLastArgValue(options::OPT_mcpu_EQ); | |||
371 | if (!TargetID.empty()) { | |||
372 | llvm::StringMap<bool> FeatureMap; | |||
373 | auto OptionalGpuArch = parseTargetID(Triple, TargetID, &FeatureMap); | |||
374 | if (OptionalGpuArch) { | |||
375 | StringRef GpuArch = OptionalGpuArch.getValue(); | |||
376 | // Iterate through all possible target ID features for the given GPU. | |||
377 | // If it is mapped to true, add +feature. | |||
378 | // If it is mapped to false, add -feature. | |||
379 | // If it is not in the map (default), do not add it | |||
380 | for (auto &&Feature : getAllPossibleTargetIDFeatures(Triple, GpuArch)) { | |||
381 | auto Pos = FeatureMap.find(Feature); | |||
382 | if (Pos == FeatureMap.end()) | |||
383 | continue; | |||
384 | Features.push_back(Args.MakeArgStringRef( | |||
385 | (Twine(Pos->second ? "+" : "-") + Feature).str())); | |||
386 | } | |||
387 | } | |||
388 | } | |||
389 | ||||
390 | if (Args.hasFlag(options::OPT_mwavefrontsize64, | |||
391 | options::OPT_mno_wavefrontsize64, false)) | |||
392 | Features.push_back("+wavefrontsize64"); | |||
393 | ||||
394 | handleTargetFeaturesGroup( | |||
395 | Args, Features, options::OPT_m_amdgpu_Features_Group); | |||
396 | } | |||
397 | ||||
398 | /// AMDGPU Toolchain | |||
399 | AMDGPUToolChain::AMDGPUToolChain(const Driver &D, const llvm::Triple &Triple, | |||
400 | const ArgList &Args) | |||
401 | : Generic_ELF(D, Triple, Args), | |||
402 | OptionsDefault( | |||
403 | {{options::OPT_O, "3"}, {options::OPT_cl_std_EQ, "CL1.2"}}) { | |||
404 | // Check code object version options. Emit warnings for legacy options | |||
405 | // and errors for the last invalid code object version options. | |||
406 | // It is done here to avoid repeated warning or error messages for | |||
407 | // each tool invocation. | |||
408 | (void)getOrCheckAMDGPUCodeObjectVersion(D, Args, /*Diagnose=*/true); | |||
409 | } | |||
410 | ||||
411 | Tool *AMDGPUToolChain::buildLinker() const { | |||
412 | return new tools::amdgpu::Linker(*this); | |||
413 | } | |||
414 | ||||
415 | DerivedArgList * | |||
416 | AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, | |||
417 | Action::OffloadKind DeviceOffloadKind) const { | |||
418 | ||||
419 | DerivedArgList *DAL = | |||
420 | Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); | |||
421 | ||||
422 | const OptTable &Opts = getDriver().getOpts(); | |||
423 | ||||
424 | if (!DAL) | |||
425 | DAL = new DerivedArgList(Args.getBaseArgs()); | |||
426 | ||||
427 | for (Arg *A : Args) { | |||
428 | if (!shouldSkipArgument(A)) | |||
429 | DAL->append(A); | |||
430 | } | |||
431 | ||||
432 | checkTargetID(*DAL); | |||
433 | ||||
434 | if (!Args.getLastArgValue(options::OPT_x).equals("cl")) | |||
435 | return DAL; | |||
436 | ||||
437 | // Phase 1 (.cl -> .bc) | |||
438 | if (Args.hasArg(options::OPT_c) && Args.hasArg(options::OPT_emit_llvm)) { | |||
439 | DAL->AddFlagArg(nullptr, Opts.getOption(getTriple().isArch64Bit() | |||
440 | ? options::OPT_m64 | |||
441 | : options::OPT_m32)); | |||
442 | ||||
443 | // Have to check OPT_O4, OPT_O0 & OPT_Ofast separately | |||
444 | // as they defined that way in Options.td | |||
445 | if (!Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4, | |||
446 | options::OPT_Ofast)) | |||
447 | DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O), | |||
448 | getOptionDefault(options::OPT_O)); | |||
449 | } | |||
450 | ||||
451 | return DAL; | |||
452 | } | |||
453 | ||||
454 | bool AMDGPUToolChain::getDefaultDenormsAreZeroForTarget( | |||
455 | llvm::AMDGPU::GPUKind Kind) { | |||
456 | ||||
457 | // Assume nothing without a specific target. | |||
458 | if (Kind == llvm::AMDGPU::GK_NONE) | |||
459 | return false; | |||
460 | ||||
461 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); | |||
462 | ||||
463 | // Default to enabling f32 denormals by default on subtargets where fma is | |||
464 | // fast with denormals | |||
465 | const bool BothDenormAndFMAFast = | |||
466 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_FMA_F32) && | |||
467 | (ArchAttr & llvm::AMDGPU::FEATURE_FAST_DENORMAL_F32); | |||
468 | return !BothDenormAndFMAFast; | |||
469 | } | |||
470 | ||||
471 | llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( | |||
472 | const llvm::opt::ArgList &DriverArgs, const JobAction &JA, | |||
473 | const llvm::fltSemantics *FPType) const { | |||
474 | // Denormals should always be enabled for f16 and f64. | |||
475 | if (!FPType || FPType != &llvm::APFloat::IEEEsingle()) | |||
476 | return llvm::DenormalMode::getIEEE(); | |||
477 | ||||
478 | if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || | |||
479 | JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { | |||
480 | auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); | |||
481 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); | |||
482 | if (FPType && FPType == &llvm::APFloat::IEEEsingle() && | |||
483 | DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero, | |||
484 | options::OPT_fno_cuda_flush_denormals_to_zero, | |||
485 | getDefaultDenormsAreZeroForTarget(Kind))) | |||
486 | return llvm::DenormalMode::getPreserveSign(); | |||
487 | ||||
488 | return llvm::DenormalMode::getIEEE(); | |||
489 | } | |||
490 | ||||
491 | const StringRef GpuArch = getGPUArch(DriverArgs); | |||
492 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); | |||
493 | ||||
494 | // TODO: There are way too many flags that change this. Do we need to check | |||
495 | // them all? | |||
496 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || | |||
497 | getDefaultDenormsAreZeroForTarget(Kind); | |||
498 | ||||
499 | // Outputs are flushed to zero (FTZ), preserving sign. Denormal inputs are | |||
500 | // also implicit treated as zero (DAZ). | |||
501 | return DAZ ? llvm::DenormalMode::getPreserveSign() : | |||
502 | llvm::DenormalMode::getIEEE(); | |||
503 | } | |||
504 | ||||
505 | bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs, | |||
506 | llvm::AMDGPU::GPUKind Kind) { | |||
507 | const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind); | |||
508 | bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32); | |||
509 | ||||
510 | return !HasWave32 || DriverArgs.hasFlag( | |||
511 | options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false); | |||
512 | } | |||
513 | ||||
514 | ||||
515 | /// ROCM Toolchain | |||
516 | ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, | |||
517 | const ArgList &Args) | |||
518 | : AMDGPUToolChain(D, Triple, Args) { | |||
519 | RocmInstallation.detectDeviceLibrary(); | |||
520 | } | |||
521 | ||||
522 | void AMDGPUToolChain::addClangTargetOptions( | |||
523 | const llvm::opt::ArgList &DriverArgs, | |||
524 | llvm::opt::ArgStringList &CC1Args, | |||
525 | Action::OffloadKind DeviceOffloadingKind) const { | |||
526 | // Default to "hidden" visibility, as object level linking will not be | |||
527 | // supported for the foreseeable future. | |||
528 | if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ, | |||
529 | options::OPT_fvisibility_ms_compat)) { | |||
530 | CC1Args.push_back("-fvisibility"); | |||
531 | CC1Args.push_back("hidden"); | |||
532 | CC1Args.push_back("-fapply-global-visibility-to-externs"); | |||
533 | } | |||
534 | } | |||
535 | ||||
536 | StringRef | |||
537 | AMDGPUToolChain::getGPUArch(const llvm::opt::ArgList &DriverArgs) const { | |||
538 | return getProcessorFromTargetID( | |||
539 | getTriple(), DriverArgs.getLastArgValue(options::OPT_mcpu_EQ)); | |||
540 | } | |||
541 | ||||
542 | void AMDGPUToolChain::checkTargetID( | |||
543 | const llvm::opt::ArgList &DriverArgs) const { | |||
544 | StringRef TargetID = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); | |||
545 | if (TargetID.empty()) | |||
546 | return; | |||
547 | ||||
548 | llvm::StringMap<bool> FeatureMap; | |||
549 | auto OptionalGpuArch = parseTargetID(getTriple(), TargetID, &FeatureMap); | |||
550 | if (!OptionalGpuArch) { | |||
551 | getDriver().Diag(clang::diag::err_drv_bad_target_id) << TargetID; | |||
552 | } | |||
553 | } | |||
554 | ||||
555 | void ROCMToolChain::addClangTargetOptions( | |||
556 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, | |||
557 | Action::OffloadKind DeviceOffloadingKind) const { | |||
558 | AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, | |||
559 | DeviceOffloadingKind); | |||
560 | ||||
561 | // For the OpenCL case where there is no offload target, accept -nostdlib to | |||
562 | // disable bitcode linking. | |||
563 | if (DeviceOffloadingKind == Action::OFK_None && | |||
564 | DriverArgs.hasArg(options::OPT_nostdlib)) | |||
565 | return; | |||
566 | ||||
567 | if (DriverArgs.hasArg(options::OPT_nogpulib)) | |||
568 | return; | |||
569 | ||||
570 | if (!RocmInstallation.hasDeviceLibrary()) { | |||
571 | getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 0; | |||
572 | return; | |||
573 | } | |||
574 | ||||
575 | // Get the device name and canonicalize it | |||
576 | const StringRef GpuArch = getGPUArch(DriverArgs); | |||
577 | auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch); | |||
578 | const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind); | |||
579 | std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch); | |||
580 | if (LibDeviceFile.empty()) { | |||
581 | getDriver().Diag(diag::err_drv_no_rocm_device_lib) << 1 << GpuArch; | |||
582 | return; | |||
583 | } | |||
584 | ||||
585 | bool Wave64 = isWave64(DriverArgs, Kind); | |||
586 | ||||
587 | // TODO: There are way too many flags that change this. Do we need to check | |||
588 | // them all? | |||
589 | bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) || | |||
590 | getDefaultDenormsAreZeroForTarget(Kind); | |||
591 | bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only); | |||
592 | ||||
593 | bool UnsafeMathOpt = | |||
594 | DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations); | |||
595 | bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math); | |||
596 | bool CorrectSqrt = | |||
597 | DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt); | |||
598 | ||||
599 | // Add the OpenCL specific bitcode library. | |||
600 | CC1Args.push_back("-mlink-builtin-bitcode"); | |||
601 | CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath())); | |||
602 | ||||
603 | // Add the generic set of libraries. | |||
604 | RocmInstallation.addCommonBitcodeLibCC1Args( | |||
605 | DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly, | |||
606 | UnsafeMathOpt, FastRelaxedMath, CorrectSqrt); | |||
607 | } | |||
608 | ||||
609 | void RocmInstallationDetector::addCommonBitcodeLibCC1Args( | |||
610 | const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, | |||
611 | StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly, | |||
612 | bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const { | |||
613 | static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode"; | |||
614 | ||||
615 | CC1Args.push_back(LinkBitcodeFlag); | |||
616 | CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath())); | |||
617 | ||||
618 | CC1Args.push_back(LinkBitcodeFlag); | |||
619 | CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath())); | |||
620 | ||||
621 | CC1Args.push_back(LinkBitcodeFlag); | |||
622 | CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ))); | |||
623 | ||||
624 | CC1Args.push_back(LinkBitcodeFlag); | |||
625 | CC1Args.push_back(DriverArgs.MakeArgString( | |||
626 | getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath))); | |||
627 | ||||
628 | CC1Args.push_back(LinkBitcodeFlag); | |||
629 | CC1Args.push_back(DriverArgs.MakeArgString( | |||
630 | getFiniteOnlyPath(FiniteOnly || FastRelaxedMath))); | |||
631 | ||||
632 | CC1Args.push_back(LinkBitcodeFlag); | |||
633 | CC1Args.push_back( | |||
634 | DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt))); | |||
635 | ||||
636 | CC1Args.push_back(LinkBitcodeFlag); | |||
637 | CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64))); | |||
638 | ||||
639 | CC1Args.push_back(LinkBitcodeFlag); | |||
640 | CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile)); | |||
641 | } | |||
642 | ||||
643 | bool AMDGPUToolChain::shouldSkipArgument(const llvm::opt::Arg *A) const { | |||
644 | Option O = A->getOption(); | |||
645 | if (O.matches(options::OPT_fPIE) || O.matches(options::OPT_fpie)) | |||
646 | return true; | |||
647 | return false; | |||
648 | } |