File: | tools/clang/lib/Sema/SemaCUDA.cpp |
Warning: | line 606, column 7 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// This file implements semantic analysis for CUDA constructs. | |||
10 | /// | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "clang/AST/ASTContext.h" | |||
14 | #include "clang/AST/Decl.h" | |||
15 | #include "clang/AST/ExprCXX.h" | |||
16 | #include "clang/Basic/Cuda.h" | |||
17 | #include "clang/Lex/Preprocessor.h" | |||
18 | #include "clang/Sema/Lookup.h" | |||
19 | #include "clang/Sema/Sema.h" | |||
20 | #include "clang/Sema/SemaDiagnostic.h" | |||
21 | #include "clang/Sema/SemaInternal.h" | |||
22 | #include "clang/Sema/Template.h" | |||
23 | #include "llvm/ADT/Optional.h" | |||
24 | #include "llvm/ADT/SmallVector.h" | |||
25 | using namespace clang; | |||
26 | ||||
27 | void Sema::PushForceCUDAHostDevice() { | |||
28 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 28, __PRETTY_FUNCTION__)); | |||
29 | ForceCUDAHostDeviceDepth++; | |||
30 | } | |||
31 | ||||
32 | bool Sema::PopForceCUDAHostDevice() { | |||
33 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 33, __PRETTY_FUNCTION__)); | |||
34 | if (ForceCUDAHostDeviceDepth == 0) | |||
35 | return false; | |||
36 | ForceCUDAHostDeviceDepth--; | |||
37 | return true; | |||
38 | } | |||
39 | ||||
40 | ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc, | |||
41 | MultiExprArg ExecConfig, | |||
42 | SourceLocation GGGLoc) { | |||
43 | FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl(); | |||
44 | if (!ConfigDecl) | |||
45 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) | |||
46 | << getCudaConfigureFuncName()); | |||
47 | QualType ConfigQTy = ConfigDecl->getType(); | |||
48 | ||||
49 | DeclRefExpr *ConfigDR = new (Context) | |||
50 | DeclRefExpr(Context, ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); | |||
51 | MarkFunctionReferenced(LLLLoc, ConfigDecl); | |||
52 | ||||
53 | return BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, | |||
54 | /*IsExecConfig=*/true); | |||
55 | } | |||
56 | ||||
57 | Sema::CUDAFunctionTarget | |||
58 | Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) { | |||
59 | bool HasHostAttr = false; | |||
60 | bool HasDeviceAttr = false; | |||
61 | bool HasGlobalAttr = false; | |||
62 | bool HasInvalidTargetAttr = false; | |||
63 | for (const ParsedAttr &AL : Attrs) { | |||
64 | switch (AL.getKind()) { | |||
65 | case ParsedAttr::AT_CUDAGlobal: | |||
66 | HasGlobalAttr = true; | |||
67 | break; | |||
68 | case ParsedAttr::AT_CUDAHost: | |||
69 | HasHostAttr = true; | |||
70 | break; | |||
71 | case ParsedAttr::AT_CUDADevice: | |||
72 | HasDeviceAttr = true; | |||
73 | break; | |||
74 | case ParsedAttr::AT_CUDAInvalidTarget: | |||
75 | HasInvalidTargetAttr = true; | |||
76 | break; | |||
77 | default: | |||
78 | break; | |||
79 | } | |||
80 | } | |||
81 | ||||
82 | if (HasInvalidTargetAttr) | |||
83 | return CFT_InvalidTarget; | |||
84 | ||||
85 | if (HasGlobalAttr) | |||
86 | return CFT_Global; | |||
87 | ||||
88 | if (HasHostAttr && HasDeviceAttr) | |||
89 | return CFT_HostDevice; | |||
90 | ||||
91 | if (HasDeviceAttr) | |||
92 | return CFT_Device; | |||
93 | ||||
94 | return CFT_Host; | |||
95 | } | |||
96 | ||||
97 | template <typename A> | |||
98 | static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) { | |||
99 | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { | |||
100 | return isa<A>(Attribute) && | |||
101 | !(IgnoreImplicitAttr && Attribute->isImplicit()); | |||
102 | }); | |||
103 | } | |||
104 | ||||
105 | /// IdentifyCUDATarget - Determine the CUDA compilation target for this function | |||
106 | Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D, | |||
107 | bool IgnoreImplicitHDAttr) { | |||
108 | // Code that lives outside a function is run on the host. | |||
109 | if (D == nullptr) | |||
110 | return CFT_Host; | |||
111 | ||||
112 | if (D->hasAttr<CUDAInvalidTargetAttr>()) | |||
113 | return CFT_InvalidTarget; | |||
114 | ||||
115 | if (D->hasAttr<CUDAGlobalAttr>()) | |||
116 | return CFT_Global; | |||
117 | ||||
118 | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { | |||
119 | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) | |||
120 | return CFT_HostDevice; | |||
121 | return CFT_Device; | |||
122 | } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { | |||
123 | return CFT_Host; | |||
124 | } else if (D->isImplicit() && !IgnoreImplicitHDAttr) { | |||
125 | // Some implicit declarations (like intrinsic functions) are not marked. | |||
126 | // Set the most lenient target on them for maximal flexibility. | |||
127 | return CFT_HostDevice; | |||
128 | } | |||
129 | ||||
130 | return CFT_Host; | |||
131 | } | |||
132 | ||||
133 | // * CUDA Call preference table | |||
134 | // | |||
135 | // F - from, | |||
136 | // T - to | |||
137 | // Ph - preference in host mode | |||
138 | // Pd - preference in device mode | |||
139 | // H - handled in (x) | |||
140 | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. | |||
141 | // | |||
142 | // | F | T | Ph | Pd | H | | |||
143 | // |----+----+-----+-----+-----+ | |||
144 | // | d | d | N | N | (c) | | |||
145 | // | d | g | -- | -- | (a) | | |||
146 | // | d | h | -- | -- | (e) | | |||
147 | // | d | hd | HD | HD | (b) | | |||
148 | // | g | d | N | N | (c) | | |||
149 | // | g | g | -- | -- | (a) | | |||
150 | // | g | h | -- | -- | (e) | | |||
151 | // | g | hd | HD | HD | (b) | | |||
152 | // | h | d | -- | -- | (e) | | |||
153 | // | h | g | N | N | (c) | | |||
154 | // | h | h | N | N | (c) | | |||
155 | // | h | hd | HD | HD | (b) | | |||
156 | // | hd | d | WS | SS | (d) | | |||
157 | // | hd | g | SS | -- |(d/a)| | |||
158 | // | hd | h | SS | WS | (d) | | |||
159 | // | hd | hd | HD | HD | (b) | | |||
160 | ||||
161 | Sema::CUDAFunctionPreference | |||
162 | Sema::IdentifyCUDAPreference(const FunctionDecl *Caller, | |||
163 | const FunctionDecl *Callee) { | |||
164 | assert(Callee && "Callee must be valid.")((Callee && "Callee must be valid.") ? static_cast< void> (0) : __assert_fail ("Callee && \"Callee must be valid.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 164, __PRETTY_FUNCTION__)); | |||
165 | CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller); | |||
166 | CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee); | |||
167 | ||||
168 | // If one of the targets is invalid, the check always fails, no matter what | |||
169 | // the other target is. | |||
170 | if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget) | |||
171 | return CFP_Never; | |||
172 | ||||
173 | // (a) Can't call global from some contexts until we support CUDA's | |||
174 | // dynamic parallelism. | |||
175 | if (CalleeTarget == CFT_Global && | |||
176 | (CallerTarget == CFT_Global || CallerTarget == CFT_Device)) | |||
177 | return CFP_Never; | |||
178 | ||||
179 | // (b) Calling HostDevice is OK for everyone. | |||
180 | if (CalleeTarget == CFT_HostDevice) | |||
181 | return CFP_HostDevice; | |||
182 | ||||
183 | // (c) Best case scenarios | |||
184 | if (CalleeTarget == CallerTarget || | |||
185 | (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) || | |||
186 | (CallerTarget == CFT_Global && CalleeTarget == CFT_Device)) | |||
187 | return CFP_Native; | |||
188 | ||||
189 | // (d) HostDevice behavior depends on compilation mode. | |||
190 | if (CallerTarget == CFT_HostDevice) { | |||
191 | // It's OK to call a compilation-mode matching function from an HD one. | |||
192 | if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) || | |||
193 | (!getLangOpts().CUDAIsDevice && | |||
194 | (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global))) | |||
195 | return CFP_SameSide; | |||
196 | ||||
197 | // Calls from HD to non-mode-matching functions (i.e., to host functions | |||
198 | // when compiling in device mode or to device functions when compiling in | |||
199 | // host mode) are allowed at the sema level, but eventually rejected if | |||
200 | // they're ever codegened. TODO: Reject said calls earlier. | |||
201 | return CFP_WrongSide; | |||
202 | } | |||
203 | ||||
204 | // (e) Calling across device/host boundary is not something you should do. | |||
205 | if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) || | |||
206 | (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) || | |||
207 | (CallerTarget == CFT_Global && CalleeTarget == CFT_Host)) | |||
208 | return CFP_Never; | |||
209 | ||||
210 | llvm_unreachable("All cases should've been handled by now.")::llvm::llvm_unreachable_internal("All cases should've been handled by now." , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 210); | |||
211 | } | |||
212 | ||||
213 | void Sema::EraseUnwantedCUDAMatches( | |||
214 | const FunctionDecl *Caller, | |||
215 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { | |||
216 | if (Matches.size() <= 1) | |||
217 | return; | |||
218 | ||||
219 | using Pair = std::pair<DeclAccessPair, FunctionDecl*>; | |||
220 | ||||
221 | // Gets the CUDA function preference for a call from Caller to Match. | |||
222 | auto GetCFP = [&](const Pair &Match) { | |||
223 | return IdentifyCUDAPreference(Caller, Match.second); | |||
224 | }; | |||
225 | ||||
226 | // Find the best call preference among the functions in Matches. | |||
227 | CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( | |||
228 | Matches.begin(), Matches.end(), | |||
229 | [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); | |||
230 | ||||
231 | // Erase all functions with lower priority. | |||
232 | llvm::erase_if(Matches, | |||
233 | [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); | |||
234 | } | |||
235 | ||||
236 | /// When an implicitly-declared special member has to invoke more than one | |||
237 | /// base/field special member, conflicts may occur in the targets of these | |||
238 | /// members. For example, if one base's member __host__ and another's is | |||
239 | /// __device__, it's a conflict. | |||
240 | /// This function figures out if the given targets \param Target1 and | |||
241 | /// \param Target2 conflict, and if they do not it fills in | |||
242 | /// \param ResolvedTarget with a target that resolves for both calls. | |||
243 | /// \return true if there's a conflict, false otherwise. | |||
244 | static bool | |||
245 | resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1, | |||
246 | Sema::CUDAFunctionTarget Target2, | |||
247 | Sema::CUDAFunctionTarget *ResolvedTarget) { | |||
248 | // Only free functions and static member functions may be global. | |||
249 | assert(Target1 != Sema::CFT_Global)((Target1 != Sema::CFT_Global) ? static_cast<void> (0) : __assert_fail ("Target1 != Sema::CFT_Global", "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 249, __PRETTY_FUNCTION__)); | |||
250 | assert(Target2 != Sema::CFT_Global)((Target2 != Sema::CFT_Global) ? static_cast<void> (0) : __assert_fail ("Target2 != Sema::CFT_Global", "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 250, __PRETTY_FUNCTION__)); | |||
251 | ||||
252 | if (Target1 == Sema::CFT_HostDevice) { | |||
253 | *ResolvedTarget = Target2; | |||
254 | } else if (Target2 == Sema::CFT_HostDevice) { | |||
255 | *ResolvedTarget = Target1; | |||
256 | } else if (Target1 != Target2) { | |||
257 | return true; | |||
258 | } else { | |||
259 | *ResolvedTarget = Target1; | |||
260 | } | |||
261 | ||||
262 | return false; | |||
263 | } | |||
264 | ||||
265 | bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, | |||
266 | CXXSpecialMember CSM, | |||
267 | CXXMethodDecl *MemberDecl, | |||
268 | bool ConstRHS, | |||
269 | bool Diagnose) { | |||
270 | // If the defaulted special member is defined lexically outside of its | |||
271 | // owning class, or the special member already has explicit device or host | |||
272 | // attributes, do not infer. | |||
273 | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); | |||
274 | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); | |||
275 | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); | |||
276 | bool HasExplicitAttr = | |||
277 | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || | |||
278 | (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); | |||
279 | if (!InClass || HasExplicitAttr) | |||
280 | return false; | |||
281 | ||||
282 | llvm::Optional<CUDAFunctionTarget> InferredTarget; | |||
283 | ||||
284 | // We're going to invoke special member lookup; mark that these special | |||
285 | // members are called from this one, and not from its caller. | |||
286 | ContextRAII MethodContext(*this, MemberDecl); | |||
287 | ||||
288 | // Look for special members in base classes that should be invoked from here. | |||
289 | // Infer the target of this member base on the ones it should call. | |||
290 | // Skip direct and indirect virtual bases for abstract classes. | |||
291 | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; | |||
292 | for (const auto &B : ClassDecl->bases()) { | |||
293 | if (!B.isVirtual()) { | |||
294 | Bases.push_back(&B); | |||
295 | } | |||
296 | } | |||
297 | ||||
298 | if (!ClassDecl->isAbstract()) { | |||
299 | for (const auto &VB : ClassDecl->vbases()) { | |||
300 | Bases.push_back(&VB); | |||
301 | } | |||
302 | } | |||
303 | ||||
304 | for (const auto *B : Bases) { | |||
305 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); | |||
306 | if (!BaseType) { | |||
307 | continue; | |||
308 | } | |||
309 | ||||
310 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl()); | |||
311 | Sema::SpecialMemberOverloadResult SMOR = | |||
312 | LookupSpecialMember(BaseClassDecl, CSM, | |||
313 | /* ConstArg */ ConstRHS, | |||
314 | /* VolatileArg */ false, | |||
315 | /* RValueThis */ false, | |||
316 | /* ConstThis */ false, | |||
317 | /* VolatileThis */ false); | |||
318 | ||||
319 | if (!SMOR.getMethod()) | |||
320 | continue; | |||
321 | ||||
322 | CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR.getMethod()); | |||
323 | if (!InferredTarget.hasValue()) { | |||
324 | InferredTarget = BaseMethodTarget; | |||
325 | } else { | |||
326 | bool ResolutionError = resolveCalleeCUDATargetConflict( | |||
327 | InferredTarget.getValue(), BaseMethodTarget, | |||
328 | InferredTarget.getPointer()); | |||
329 | if (ResolutionError) { | |||
330 | if (Diagnose) { | |||
331 | Diag(ClassDecl->getLocation(), | |||
332 | diag::note_implicit_member_target_infer_collision) | |||
333 | << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget; | |||
334 | } | |||
335 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); | |||
336 | return true; | |||
337 | } | |||
338 | } | |||
339 | } | |||
340 | ||||
341 | // Same as for bases, but now for special members of fields. | |||
342 | for (const auto *F : ClassDecl->fields()) { | |||
343 | if (F->isInvalidDecl()) { | |||
344 | continue; | |||
345 | } | |||
346 | ||||
347 | const RecordType *FieldType = | |||
348 | Context.getBaseElementType(F->getType())->getAs<RecordType>(); | |||
349 | if (!FieldType) { | |||
350 | continue; | |||
351 | } | |||
352 | ||||
353 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); | |||
354 | Sema::SpecialMemberOverloadResult SMOR = | |||
355 | LookupSpecialMember(FieldRecDecl, CSM, | |||
356 | /* ConstArg */ ConstRHS && !F->isMutable(), | |||
357 | /* VolatileArg */ false, | |||
358 | /* RValueThis */ false, | |||
359 | /* ConstThis */ false, | |||
360 | /* VolatileThis */ false); | |||
361 | ||||
362 | if (!SMOR.getMethod()) | |||
363 | continue; | |||
364 | ||||
365 | CUDAFunctionTarget FieldMethodTarget = | |||
366 | IdentifyCUDATarget(SMOR.getMethod()); | |||
367 | if (!InferredTarget.hasValue()) { | |||
368 | InferredTarget = FieldMethodTarget; | |||
369 | } else { | |||
370 | bool ResolutionError = resolveCalleeCUDATargetConflict( | |||
371 | InferredTarget.getValue(), FieldMethodTarget, | |||
372 | InferredTarget.getPointer()); | |||
373 | if (ResolutionError) { | |||
374 | if (Diagnose) { | |||
375 | Diag(ClassDecl->getLocation(), | |||
376 | diag::note_implicit_member_target_infer_collision) | |||
377 | << (unsigned)CSM << InferredTarget.getValue() | |||
378 | << FieldMethodTarget; | |||
379 | } | |||
380 | MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context)); | |||
381 | return true; | |||
382 | } | |||
383 | } | |||
384 | } | |||
385 | ||||
386 | ||||
387 | // If no target was inferred, mark this member as __host__ __device__; | |||
388 | // it's the least restrictive option that can be invoked from any target. | |||
389 | bool NeedsH = true, NeedsD = true; | |||
390 | if (InferredTarget.hasValue()) { | |||
391 | if (InferredTarget.getValue() == CFT_Device) | |||
392 | NeedsH = false; | |||
393 | else if (InferredTarget.getValue() == CFT_Host) | |||
394 | NeedsD = false; | |||
395 | } | |||
396 | ||||
397 | // We either setting attributes first time, or the inferred ones must match | |||
398 | // previously set ones. | |||
399 | if (NeedsD && !HasD) | |||
400 | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | |||
401 | if (NeedsH && !HasH) | |||
402 | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context)); | |||
403 | ||||
404 | return false; | |||
405 | } | |||
406 | ||||
407 | bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { | |||
408 | if (!CD->isDefined() && CD->isTemplateInstantiation()) | |||
409 | InstantiateFunctionDefinition(Loc, CD->getFirstDecl()); | |||
410 | ||||
411 | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered | |||
412 | // empty at a point in the translation unit, if it is either a | |||
413 | // trivial constructor | |||
414 | if (CD->isTrivial()) | |||
415 | return true; | |||
416 | ||||
417 | // ... or it satisfies all of the following conditions: | |||
418 | // The constructor function has been defined. | |||
419 | // The constructor function has no parameters, | |||
420 | // and the function body is an empty compound statement. | |||
421 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) | |||
422 | return false; | |||
423 | ||||
424 | // Its class has no virtual functions and no virtual base classes. | |||
425 | if (CD->getParent()->isDynamicClass()) | |||
426 | return false; | |||
427 | ||||
428 | // The only form of initializer allowed is an empty constructor. | |||
429 | // This will recursively check all base classes and member initializers | |||
430 | if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) { | |||
431 | if (const CXXConstructExpr *CE = | |||
432 | dyn_cast<CXXConstructExpr>(CI->getInit())) | |||
433 | return isEmptyCudaConstructor(Loc, CE->getConstructor()); | |||
434 | return false; | |||
435 | })) | |||
436 | return false; | |||
437 | ||||
438 | return true; | |||
439 | } | |||
440 | ||||
441 | bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { | |||
442 | // No destructor -> no problem. | |||
443 | if (!DD) | |||
444 | return true; | |||
445 | ||||
446 | if (!DD->isDefined() && DD->isTemplateInstantiation()) | |||
447 | InstantiateFunctionDefinition(Loc, DD->getFirstDecl()); | |||
448 | ||||
449 | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered | |||
450 | // empty at a point in the translation unit, if it is either a | |||
451 | // trivial constructor | |||
452 | if (DD->isTrivial()) | |||
453 | return true; | |||
454 | ||||
455 | // ... or it satisfies all of the following conditions: | |||
456 | // The destructor function has been defined. | |||
457 | // and the function body is an empty compound statement. | |||
458 | if (!DD->hasTrivialBody()) | |||
459 | return false; | |||
460 | ||||
461 | const CXXRecordDecl *ClassDecl = DD->getParent(); | |||
462 | ||||
463 | // Its class has no virtual functions and no virtual base classes. | |||
464 | if (ClassDecl->isDynamicClass()) | |||
465 | return false; | |||
466 | ||||
467 | // Only empty destructors are allowed. This will recursively check | |||
468 | // destructors for all base classes... | |||
469 | if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) { | |||
470 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) | |||
471 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); | |||
472 | return true; | |||
473 | })) | |||
474 | return false; | |||
475 | ||||
476 | // ... and member fields. | |||
477 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { | |||
478 | if (CXXRecordDecl *RD = Field->getType() | |||
479 | ->getBaseElementTypeUnsafe() | |||
480 | ->getAsCXXRecordDecl()) | |||
481 | return isEmptyCudaDestructor(Loc, RD->getDestructor()); | |||
482 | return true; | |||
483 | })) | |||
484 | return false; | |||
485 | ||||
486 | return true; | |||
487 | } | |||
488 | ||||
489 | void Sema::checkAllowedCUDAInitializer(VarDecl *VD) { | |||
490 | if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage()) | |||
491 | return; | |||
492 | const Expr *Init = VD->getInit(); | |||
493 | if (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>() || | |||
494 | VD->hasAttr<CUDASharedAttr>()) { | |||
495 | assert(!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>())((!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr >()) ? static_cast<void> (0) : __assert_fail ("!VD->isStaticLocal() || VD->hasAttr<CUDASharedAttr>()" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 495, __PRETTY_FUNCTION__)); | |||
496 | bool AllowedInit = false; | |||
497 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) | |||
498 | AllowedInit = | |||
499 | isEmptyCudaConstructor(VD->getLocation(), CE->getConstructor()); | |||
500 | // We'll allow constant initializers even if it's a non-empty | |||
501 | // constructor according to CUDA rules. This deviates from NVCC, | |||
502 | // but allows us to handle things like constexpr constructors. | |||
503 | if (!AllowedInit && | |||
504 | (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>())) | |||
505 | AllowedInit = VD->getInit()->isConstantInitializer( | |||
506 | Context, VD->getType()->isReferenceType()); | |||
507 | ||||
508 | // Also make sure that destructor, if there is one, is empty. | |||
509 | if (AllowedInit) | |||
510 | if (CXXRecordDecl *RD = VD->getType()->getAsCXXRecordDecl()) | |||
511 | AllowedInit = | |||
512 | isEmptyCudaDestructor(VD->getLocation(), RD->getDestructor()); | |||
513 | ||||
514 | if (!AllowedInit) { | |||
515 | Diag(VD->getLocation(), VD->hasAttr<CUDASharedAttr>() | |||
516 | ? diag::err_shared_var_init | |||
517 | : diag::err_dynamic_var_init) | |||
518 | << Init->getSourceRange(); | |||
519 | VD->setInvalidDecl(); | |||
520 | } | |||
521 | } else { | |||
522 | // This is a host-side global variable. Check that the initializer is | |||
523 | // callable from the host side. | |||
524 | const FunctionDecl *InitFn = nullptr; | |||
525 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Init)) { | |||
526 | InitFn = CE->getConstructor(); | |||
527 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Init)) { | |||
528 | InitFn = CE->getDirectCallee(); | |||
529 | } | |||
530 | if (InitFn) { | |||
531 | CUDAFunctionTarget InitFnTarget = IdentifyCUDATarget(InitFn); | |||
532 | if (InitFnTarget != CFT_Host && InitFnTarget != CFT_HostDevice) { | |||
533 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) | |||
534 | << InitFnTarget << InitFn; | |||
535 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; | |||
536 | VD->setInvalidDecl(); | |||
537 | } | |||
538 | } | |||
539 | } | |||
540 | } | |||
541 | ||||
542 | // With -fcuda-host-device-constexpr, an unattributed constexpr function is | |||
543 | // treated as implicitly __host__ __device__, unless: | |||
544 | // * it is a variadic function (device-side variadic functions are not | |||
545 | // allowed), or | |||
546 | // * a __device__ function with this signature was already declared, in which | |||
547 | // case in which case we output an error, unless the __device__ decl is in a | |||
548 | // system header, in which case we leave the constexpr function unattributed. | |||
549 | // | |||
550 | // In addition, all function decls are treated as __host__ __device__ when | |||
551 | // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a | |||
552 | // #pragma clang force_cuda_host_device_begin/end | |||
553 | // pair). | |||
554 | void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD, | |||
555 | const LookupResult &Previous) { | |||
556 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 556, __PRETTY_FUNCTION__)); | |||
557 | ||||
558 | if (ForceCUDAHostDeviceDepth > 0) { | |||
559 | if (!NewD->hasAttr<CUDAHostAttr>()) | |||
560 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); | |||
561 | if (!NewD->hasAttr<CUDADeviceAttr>()) | |||
562 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | |||
563 | return; | |||
564 | } | |||
565 | ||||
566 | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || | |||
567 | NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || | |||
568 | NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) | |||
569 | return; | |||
570 | ||||
571 | // Is D a __device__ function with the same signature as NewD, ignoring CUDA | |||
572 | // attributes? | |||
573 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { | |||
574 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D)) | |||
575 | D = Using->getTargetDecl(); | |||
576 | FunctionDecl *OldD = D->getAsFunction(); | |||
577 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && | |||
578 | !OldD->hasAttr<CUDAHostAttr>() && | |||
579 | !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false, | |||
580 | /* ConsiderCudaAttrs = */ false); | |||
581 | }; | |||
582 | auto It = llvm::find_if(Previous, IsMatchingDeviceFn); | |||
583 | if (It != Previous.end()) { | |||
584 | // We found a __device__ function with the same name and signature as NewD | |||
585 | // (ignoring CUDA attrs). This is an error unless that function is defined | |||
586 | // in a system header, in which case we simply return without making NewD | |||
587 | // host+device. | |||
588 | NamedDecl *Match = *It; | |||
589 | if (!getSourceManager().isInSystemHeader(Match->getLocation())) { | |||
590 | Diag(NewD->getLocation(), | |||
591 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) | |||
592 | << NewD; | |||
593 | Diag(Match->getLocation(), | |||
594 | diag::note_cuda_conflicting_device_function_declared_here); | |||
595 | } | |||
596 | return; | |||
597 | } | |||
598 | ||||
599 | NewD->addAttr(CUDAHostAttr::CreateImplicit(Context)); | |||
600 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | |||
601 | } | |||
602 | ||||
603 | // Do we know that we will eventually codegen the given function? | |||
604 | static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) { | |||
605 | // Templates are emitted when they're instantiated. | |||
606 | if (FD->isDependentContext()) | |||
| ||||
607 | return false; | |||
608 | ||||
609 | // When compiling for device, host functions are never emitted. Similarly, | |||
610 | // when compiling for host, device and global functions are never emitted. | |||
611 | // (Technically, we do emit a host-side stub for global functions, but this | |||
612 | // doesn't count for our purposes here.) | |||
613 | Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD); | |||
614 | if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host) | |||
615 | return false; | |||
616 | if (!S.getLangOpts().CUDAIsDevice && | |||
617 | (T == Sema::CFT_Device || T == Sema::CFT_Global)) | |||
618 | return false; | |||
619 | ||||
620 | // Check whether this function is externally visible -- if so, it's | |||
621 | // known-emitted. | |||
622 | // | |||
623 | // We have to check the GVA linkage of the function's *definition* -- if we | |||
624 | // only have a declaration, we don't know whether or not the function will be | |||
625 | // emitted, because (say) the definition could include "inline". | |||
626 | FunctionDecl *Def = FD->getDefinition(); | |||
627 | ||||
628 | if (Def && | |||
629 | !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def))) | |||
630 | return true; | |||
631 | ||||
632 | // Otherwise, the function is known-emitted if it's in our set of | |||
633 | // known-emitted functions. | |||
634 | return S.DeviceKnownEmittedFns.count(FD) > 0; | |||
635 | } | |||
636 | ||||
637 | Sema::DeviceDiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc, | |||
638 | unsigned DiagID) { | |||
639 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 639, __PRETTY_FUNCTION__)); | |||
640 | DeviceDiagBuilder::Kind DiagKind = [this] { | |||
641 | switch (CurrentCUDATarget()) { | |||
642 | case CFT_Global: | |||
643 | case CFT_Device: | |||
644 | return DeviceDiagBuilder::K_Immediate; | |||
645 | case CFT_HostDevice: | |||
646 | // An HD function counts as host code if we're compiling for host, and | |||
647 | // device code if we're compiling for device. Defer any errors in device | |||
648 | // mode until the function is known-emitted. | |||
649 | if (getLangOpts().CUDAIsDevice) { | |||
650 | return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) | |||
651 | ? DeviceDiagBuilder::K_ImmediateWithCallStack | |||
652 | : DeviceDiagBuilder::K_Deferred; | |||
653 | } | |||
654 | return DeviceDiagBuilder::K_Nop; | |||
655 | ||||
656 | default: | |||
657 | return DeviceDiagBuilder::K_Nop; | |||
658 | } | |||
659 | }(); | |||
660 | return DeviceDiagBuilder(DiagKind, Loc, DiagID, | |||
661 | dyn_cast<FunctionDecl>(CurContext), *this); | |||
662 | } | |||
663 | ||||
664 | Sema::DeviceDiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc, | |||
665 | unsigned DiagID) { | |||
666 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 666, __PRETTY_FUNCTION__)); | |||
| ||||
667 | DeviceDiagBuilder::Kind DiagKind = [this] { | |||
668 | switch (CurrentCUDATarget()) { | |||
669 | case CFT_Host: | |||
670 | return DeviceDiagBuilder::K_Immediate; | |||
671 | case CFT_HostDevice: | |||
672 | // An HD function counts as host code if we're compiling for host, and | |||
673 | // device code if we're compiling for device. Defer any errors in device | |||
674 | // mode until the function is known-emitted. | |||
675 | if (getLangOpts().CUDAIsDevice) | |||
676 | return DeviceDiagBuilder::K_Nop; | |||
677 | ||||
678 | return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext)) | |||
679 | ? DeviceDiagBuilder::K_ImmediateWithCallStack | |||
680 | : DeviceDiagBuilder::K_Deferred; | |||
681 | default: | |||
682 | return DeviceDiagBuilder::K_Nop; | |||
683 | } | |||
684 | }(); | |||
685 | return DeviceDiagBuilder(DiagKind, Loc, DiagID, | |||
686 | dyn_cast<FunctionDecl>(CurContext), *this); | |||
687 | } | |||
688 | ||||
689 | bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) { | |||
690 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 690, __PRETTY_FUNCTION__)); | |||
691 | assert(Callee && "Callee may not be null.")((Callee && "Callee may not be null.") ? static_cast< void> (0) : __assert_fail ("Callee && \"Callee may not be null.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 691, __PRETTY_FUNCTION__)); | |||
692 | ||||
693 | auto &ExprEvalCtx = ExprEvalContexts.back(); | |||
694 | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) | |||
695 | return true; | |||
696 | ||||
697 | // FIXME: Is bailing out early correct here? Should we instead assume that | |||
698 | // the caller is a global initializer? | |||
699 | FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext); | |||
700 | if (!Caller) | |||
701 | return true; | |||
702 | ||||
703 | // If the caller is known-emitted, mark the callee as known-emitted. | |||
704 | // Otherwise, mark the call in our call graph so we can traverse it later. | |||
705 | bool CallerKnownEmitted = IsKnownEmitted(*this, Caller); | |||
706 | if (CallerKnownEmitted) { | |||
707 | // Host-side references to a __global__ function refer to the stub, so the | |||
708 | // function itself is never emitted and therefore should not be marked. | |||
709 | if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) | |||
710 | markKnownEmitted(*this, Caller, Callee, Loc, IsKnownEmitted); | |||
711 | } else { | |||
712 | // If we have | |||
713 | // host fn calls kernel fn calls host+device, | |||
714 | // the HD function does not get instantiated on the host. We model this by | |||
715 | // omitting at the call to the kernel from the callgraph. This ensures | |||
716 | // that, when compiling for host, only HD functions actually called from the | |||
717 | // host get marked as known-emitted. | |||
718 | if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global) | |||
719 | DeviceCallGraph[Caller].insert({Callee, Loc}); | |||
720 | } | |||
721 | ||||
722 | DeviceDiagBuilder::Kind DiagKind = [this, Caller, Callee, | |||
723 | CallerKnownEmitted] { | |||
724 | switch (IdentifyCUDAPreference(Caller, Callee)) { | |||
725 | case CFP_Never: | |||
726 | return DeviceDiagBuilder::K_Immediate; | |||
727 | case CFP_WrongSide: | |||
728 | assert(Caller && "WrongSide calls require a non-null caller")((Caller && "WrongSide calls require a non-null caller" ) ? static_cast<void> (0) : __assert_fail ("Caller && \"WrongSide calls require a non-null caller\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 728, __PRETTY_FUNCTION__)); | |||
729 | // If we know the caller will be emitted, we know this wrong-side call | |||
730 | // will be emitted, so it's an immediate error. Otherwise, defer the | |||
731 | // error until we know the caller is emitted. | |||
732 | return CallerKnownEmitted ? DeviceDiagBuilder::K_ImmediateWithCallStack | |||
733 | : DeviceDiagBuilder::K_Deferred; | |||
734 | default: | |||
735 | return DeviceDiagBuilder::K_Nop; | |||
736 | } | |||
737 | }(); | |||
738 | ||||
739 | if (DiagKind == DeviceDiagBuilder::K_Nop) | |||
740 | return true; | |||
741 | ||||
742 | // Avoid emitting this error twice for the same location. Using a hashtable | |||
743 | // like this is unfortunate, but because we must continue parsing as normal | |||
744 | // after encountering a deferred error, it's otherwise very tricky for us to | |||
745 | // ensure that we only emit this deferred error once. | |||
746 | if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second) | |||
747 | return true; | |||
748 | ||||
749 | DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this) | |||
750 | << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller); | |||
751 | DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl, | |||
752 | Caller, *this) | |||
753 | << Callee; | |||
754 | return DiagKind != DeviceDiagBuilder::K_Immediate && | |||
755 | DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack; | |||
756 | } | |||
757 | ||||
758 | void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) { | |||
759 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 759, __PRETTY_FUNCTION__)); | |||
760 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) | |||
761 | return; | |||
762 | FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext); | |||
763 | if (!CurFn) | |||
764 | return; | |||
765 | CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn); | |||
766 | if (Target == CFT_Global || Target == CFT_Device) { | |||
767 | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | |||
768 | } else if (Target == CFT_HostDevice) { | |||
769 | Method->addAttr(CUDADeviceAttr::CreateImplicit(Context)); | |||
770 | Method->addAttr(CUDAHostAttr::CreateImplicit(Context)); | |||
771 | } | |||
772 | } | |||
773 | ||||
774 | void Sema::checkCUDATargetOverload(FunctionDecl *NewFD, | |||
775 | const LookupResult &Previous) { | |||
776 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation")((getLangOpts().CUDA && "Should only be called during CUDA compilation" ) ? static_cast<void> (0) : __assert_fail ("getLangOpts().CUDA && \"Should only be called during CUDA compilation\"" , "/build/llvm-toolchain-snapshot-10~svn373517/tools/clang/lib/Sema/SemaCUDA.cpp" , 776, __PRETTY_FUNCTION__)); | |||
777 | CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD); | |||
778 | for (NamedDecl *OldND : Previous) { | |||
779 | FunctionDecl *OldFD = OldND->getAsFunction(); | |||
780 | if (!OldFD) | |||
781 | continue; | |||
782 | ||||
783 | CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD); | |||
784 | // Don't allow HD and global functions to overload other functions with the | |||
785 | // same signature. We allow overloading based on CUDA attributes so that | |||
786 | // functions can have different implementations on the host and device, but | |||
787 | // HD/global functions "exist" in some sense on both the host and device, so | |||
788 | // should have the same implementation on both sides. | |||
789 | if (NewTarget != OldTarget && | |||
790 | ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) || | |||
791 | (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) && | |||
792 | !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false, | |||
793 | /* ConsiderCudaAttrs = */ false)) { | |||
794 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) | |||
795 | << NewTarget << NewFD->getDeclName() << OldTarget << OldFD; | |||
796 | Diag(OldFD->getLocation(), diag::note_previous_declaration); | |||
797 | NewFD->setInvalidDecl(); | |||
798 | break; | |||
799 | } | |||
800 | } | |||
801 | } | |||
802 | ||||
803 | template <typename AttrTy> | |||
804 | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, | |||
805 | const FunctionDecl &TemplateFD) { | |||
806 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { | |||
807 | AttrTy *Clone = Attribute->clone(S.Context); | |||
808 | Clone->setInherited(true); | |||
809 | FD->addAttr(Clone); | |||
810 | } | |||
811 | } | |||
812 | ||||
813 | void Sema::inheritCUDATargetAttrs(FunctionDecl *FD, | |||
814 | const FunctionTemplateDecl &TD) { | |||
815 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); | |||
816 | copyAttrIfPresent<CUDAGlobalAttr>(*this, FD, TemplateFD); | |||
817 | copyAttrIfPresent<CUDAHostAttr>(*this, FD, TemplateFD); | |||
818 | copyAttrIfPresent<CUDADeviceAttr>(*this, FD, TemplateFD); | |||
819 | } | |||
820 | ||||
821 | std::string Sema::getCudaConfigureFuncName() const { | |||
822 | if (getLangOpts().HIP) | |||
823 | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" | |||
824 | : "hipConfigureCall"; | |||
825 | ||||
826 | // New CUDA kernel launch sequence. | |||
827 | if (CudaFeatureEnabled(Context.getTargetInfo().getSDKVersion(), | |||
828 | CudaFeature::CUDA_USES_NEW_LAUNCH)) | |||
829 | return "__cudaPushCallConfiguration"; | |||
830 | ||||
831 | // Legacy CUDA kernel configuration call | |||
832 | return "cudaConfigureCall"; | |||
833 | } |