Bug Summary

File:build/source/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Warning:line 7826, column 9
2nd function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CGOpenMPRuntime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm -resource-dir /usr/lib/llvm-17/lib/clang/17 -I tools/clang/lib/CodeGen -I /build/source/clang/lib/CodeGen -I /build/source/clang/include -I tools/clang/include -I include -I /build/source/llvm/include -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm=build-llvm -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm=build-llvm -fcoverage-prefix-map=/build/source/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -fdebug-prefix-map=/build/source/build-llvm=build-llvm -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-04-16-131055-16441-1 -x c++ /build/source/clang/lib/CodeGen/CGOpenMPRuntime.cpp
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <numeric>
45#include <optional>
46
47using namespace clang;
48using namespace CodeGen;
49using namespace llvm::omp;
50
51namespace {
52/// Base class for handling code generation inside OpenMP regions.
53class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
54public:
55 /// Kinds of OpenMP regions used in codegen.
56 enum CGOpenMPRegionKind {
57 /// Region with outlined function for standalone 'parallel'
58 /// directive.
59 ParallelOutlinedRegion,
60 /// Region with outlined function for standalone 'task' directive.
61 TaskOutlinedRegion,
62 /// Region for constructs that do not require function outlining,
63 /// like 'for', 'sections', 'atomic' etc. directives.
64 InlinedRegion,
65 /// Region with outlined function for standalone 'target' directive.
66 TargetRegion,
67 };
68
69 CGOpenMPRegionInfo(const CapturedStmt &CS,
70 const CGOpenMPRegionKind RegionKind,
71 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
72 bool HasCancel)
73 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
74 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
75
76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
77 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
78 bool HasCancel)
79 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
80 Kind(Kind), HasCancel(HasCancel) {}
81
82 /// Get a variable or parameter for storing global thread id
83 /// inside OpenMP construct.
84 virtual const VarDecl *getThreadIDVariable() const = 0;
85
86 /// Emit the captured statement body.
87 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
88
89 /// Get an LValue for the current ThreadID variable.
90 /// \return LValue for thread id variable. This LValue always has type int32*.
91 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
92
93 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
94
95 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
96
97 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
98
99 bool hasCancel() const { return HasCancel; }
100
101 static bool classof(const CGCapturedStmtInfo *Info) {
102 return Info->getKind() == CR_OpenMP;
103 }
104
105 ~CGOpenMPRegionInfo() override = default;
106
107protected:
108 CGOpenMPRegionKind RegionKind;
109 RegionCodeGenTy CodeGen;
110 OpenMPDirectiveKind Kind;
111 bool HasCancel;
112};
113
114/// API for captured statement code generation in OpenMP constructs.
115class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
116public:
117 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
118 const RegionCodeGenTy &CodeGen,
119 OpenMPDirectiveKind Kind, bool HasCancel,
120 StringRef HelperName)
121 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
122 HasCancel),
123 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
124 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 124, __extension__
__PRETTY_FUNCTION__))
;
125 }
126
127 /// Get a variable or parameter for storing global thread id
128 /// inside OpenMP construct.
129 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
130
131 /// Get the name of the capture helper.
132 StringRef getHelperName() const override { return HelperName; }
133
134 static bool classof(const CGCapturedStmtInfo *Info) {
135 return CGOpenMPRegionInfo::classof(Info) &&
136 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
137 ParallelOutlinedRegion;
138 }
139
140private:
141 /// A variable or parameter storing global thread id for OpenMP
142 /// constructs.
143 const VarDecl *ThreadIDVar;
144 StringRef HelperName;
145};
146
147/// API for captured statement code generation in OpenMP constructs.
148class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
149public:
150 class UntiedTaskActionTy final : public PrePostActionTy {
151 bool Untied;
152 const VarDecl *PartIDVar;
153 const RegionCodeGenTy UntiedCodeGen;
154 llvm::SwitchInst *UntiedSwitch = nullptr;
155
156 public:
157 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
158 const RegionCodeGenTy &UntiedCodeGen)
159 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
160 void Enter(CodeGenFunction &CGF) override {
161 if (Untied) {
162 // Emit task switching point.
163 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164 CGF.GetAddrOfLocalVar(PartIDVar),
165 PartIDVar->getType()->castAs<PointerType>());
166 llvm::Value *Res =
167 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
168 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
169 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
170 CGF.EmitBlock(DoneBB);
171 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
172 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
174 CGF.Builder.GetInsertBlock());
175 emitUntiedSwitch(CGF);
176 }
177 }
178 void emitUntiedSwitch(CodeGenFunction &CGF) const {
179 if (Untied) {
180 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
181 CGF.GetAddrOfLocalVar(PartIDVar),
182 PartIDVar->getType()->castAs<PointerType>());
183 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
184 PartIdLVal);
185 UntiedCodeGen(CGF);
186 CodeGenFunction::JumpDest CurPoint =
187 CGF.getJumpDestInCurrentScope(".untied.next.");
188 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
189 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
190 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
191 CGF.Builder.GetInsertBlock());
192 CGF.EmitBranchThroughCleanup(CurPoint);
193 CGF.EmitBlock(CurPoint.getBlock());
194 }
195 }
196 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
197 };
198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
199 const VarDecl *ThreadIDVar,
200 const RegionCodeGenTy &CodeGen,
201 OpenMPDirectiveKind Kind, bool HasCancel,
202 const UntiedTaskActionTy &Action)
203 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
204 ThreadIDVar(ThreadIDVar), Action(Action) {
205 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 205, __extension__
__PRETTY_FUNCTION__))
;
206 }
207
208 /// Get a variable or parameter for storing global thread id
209 /// inside OpenMP construct.
210 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
211
212 /// Get an LValue for the current ThreadID variable.
213 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
214
215 /// Get the name of the capture helper.
216 StringRef getHelperName() const override { return ".omp_outlined."; }
217
218 void emitUntiedSwitch(CodeGenFunction &CGF) override {
219 Action.emitUntiedSwitch(CGF);
220 }
221
222 static bool classof(const CGCapturedStmtInfo *Info) {
223 return CGOpenMPRegionInfo::classof(Info) &&
224 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
225 TaskOutlinedRegion;
226 }
227
228private:
229 /// A variable or parameter storing global thread id for OpenMP
230 /// constructs.
231 const VarDecl *ThreadIDVar;
232 /// Action for emitting code for untied tasks.
233 const UntiedTaskActionTy &Action;
234};
235
236/// API for inlined captured statement code generation in OpenMP
237/// constructs.
238class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
239public:
240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
241 const RegionCodeGenTy &CodeGen,
242 OpenMPDirectiveKind Kind, bool HasCancel)
243 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
244 OldCSI(OldCSI),
245 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
246
247 // Retrieve the value of the context parameter.
248 llvm::Value *getContextValue() const override {
249 if (OuterRegionInfo)
250 return OuterRegionInfo->getContextValue();
251 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 251)
;
252 }
253
254 void setContextValue(llvm::Value *V) override {
255 if (OuterRegionInfo) {
256 OuterRegionInfo->setContextValue(V);
257 return;
258 }
259 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 259)
;
260 }
261
262 /// Lookup the captured field decl for a variable.
263 const FieldDecl *lookup(const VarDecl *VD) const override {
264 if (OuterRegionInfo)
265 return OuterRegionInfo->lookup(VD);
266 // If there is no outer outlined region,no need to lookup in a list of
267 // captured variables, we can use the original one.
268 return nullptr;
269 }
270
271 FieldDecl *getThisFieldDecl() const override {
272 if (OuterRegionInfo)
273 return OuterRegionInfo->getThisFieldDecl();
274 return nullptr;
275 }
276
277 /// Get a variable or parameter for storing global thread id
278 /// inside OpenMP construct.
279 const VarDecl *getThreadIDVariable() const override {
280 if (OuterRegionInfo)
281 return OuterRegionInfo->getThreadIDVariable();
282 return nullptr;
283 }
284
285 /// Get an LValue for the current ThreadID variable.
286 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
287 if (OuterRegionInfo)
288 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
289 llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 289)
;
290 }
291
292 /// Get the name of the capture helper.
293 StringRef getHelperName() const override {
294 if (auto *OuterRegionInfo = getOldCSI())
295 return OuterRegionInfo->getHelperName();
296 llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 296)
;
297 }
298
299 void emitUntiedSwitch(CodeGenFunction &CGF) override {
300 if (OuterRegionInfo)
301 OuterRegionInfo->emitUntiedSwitch(CGF);
302 }
303
304 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
305
306 static bool classof(const CGCapturedStmtInfo *Info) {
307 return CGOpenMPRegionInfo::classof(Info) &&
308 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
309 }
310
311 ~CGOpenMPInlinedRegionInfo() override = default;
312
313private:
314 /// CodeGen info about outer OpenMP region.
315 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
316 CGOpenMPRegionInfo *OuterRegionInfo;
317};
318
319/// API for captured statement code generation in OpenMP target
320/// constructs. For this captures, implicit parameters are used instead of the
321/// captured fields. The name of the target region has to be unique in a given
322/// application so it is provided by the client, because only the client has
323/// the information to generate that.
324class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
325public:
326 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
327 const RegionCodeGenTy &CodeGen, StringRef HelperName)
328 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
329 /*HasCancel=*/false),
330 HelperName(HelperName) {}
331
332 /// This is unused for target regions because each starts executing
333 /// with a single thread.
334 const VarDecl *getThreadIDVariable() const override { return nullptr; }
335
336 /// Get the name of the capture helper.
337 StringRef getHelperName() const override { return HelperName; }
338
339 static bool classof(const CGCapturedStmtInfo *Info) {
340 return CGOpenMPRegionInfo::classof(Info) &&
341 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
342 }
343
344private:
345 StringRef HelperName;
346};
347
348static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
349 llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 349)
;
350}
351/// API for generation of expressions captured in a innermost OpenMP
352/// region.
353class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
354public:
355 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
356 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
357 OMPD_unknown,
358 /*HasCancel=*/false),
359 PrivScope(CGF) {
360 // Make sure the globals captured in the provided statement are local by
361 // using the privatization logic. We assume the same variable is not
362 // captured more than once.
363 for (const auto &C : CS.captures()) {
364 if (!C.capturesVariable() && !C.capturesVariableByCopy())
365 continue;
366
367 const VarDecl *VD = C.getCapturedVar();
368 if (VD->isLocalVarDeclOrParm())
369 continue;
370
371 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
372 /*RefersToEnclosingVariableOrCapture=*/false,
373 VD->getType().getNonReferenceType(), VK_LValue,
374 C.getLocation());
375 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
376 }
377 (void)PrivScope.Privatize();
378 }
379
380 /// Lookup the captured field decl for a variable.
381 const FieldDecl *lookup(const VarDecl *VD) const override {
382 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
383 return FD;
384 return nullptr;
385 }
386
387 /// Emit the captured statement body.
388 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
389 llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 389)
;
390 }
391
392 /// Get a variable or parameter for storing global thread id
393 /// inside OpenMP construct.
394 const VarDecl *getThreadIDVariable() const override {
395 llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 395)
;
396 }
397
398 /// Get the name of the capture helper.
399 StringRef getHelperName() const override {
400 llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 400)
;
401 }
402
403 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
404
405private:
406 /// Private scope to capture global variables.
407 CodeGenFunction::OMPPrivateScope PrivScope;
408};
409
410/// RAII for emitting code of OpenMP constructs.
411class InlinedOpenMPRegionRAII {
412 CodeGenFunction &CGF;
413 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
414 FieldDecl *LambdaThisCaptureField = nullptr;
415 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
416 bool NoInheritance = false;
417
418public:
419 /// Constructs region for combined constructs.
420 /// \param CodeGen Code generation sequence for combined directives. Includes
421 /// a list of functions used for code generation of implicitly inlined
422 /// regions.
423 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
424 OpenMPDirectiveKind Kind, bool HasCancel,
425 bool NoInheritance = true)
426 : CGF(CGF), NoInheritance(NoInheritance) {
427 // Start emission for the construct.
428 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
429 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
430 if (NoInheritance) {
431 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
432 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
433 CGF.LambdaThisCaptureField = nullptr;
434 BlockInfo = CGF.BlockInfo;
435 CGF.BlockInfo = nullptr;
436 }
437 }
438
439 ~InlinedOpenMPRegionRAII() {
440 // Restore original CapturedStmtInfo only if we're done with code emission.
441 auto *OldCSI =
442 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
443 delete CGF.CapturedStmtInfo;
444 CGF.CapturedStmtInfo = OldCSI;
445 if (NoInheritance) {
446 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
447 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
448 CGF.BlockInfo = BlockInfo;
449 }
450 }
451};
452
453/// Values for bit flags used in the ident_t to describe the fields.
454/// All enumeric elements are named and described in accordance with the code
455/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456enum OpenMPLocationFlags : unsigned {
457 /// Use trampoline for internal microtask.
458 OMP_IDENT_IMD = 0x01,
459 /// Use c-style ident structure.
460 OMP_IDENT_KMPC = 0x02,
461 /// Atomic reduction option for kmpc_reduce.
462 OMP_ATOMIC_REDUCE = 0x10,
463 /// Explicit 'barrier' directive.
464 OMP_IDENT_BARRIER_EXPL = 0x20,
465 /// Implicit barrier in code.
466 OMP_IDENT_BARRIER_IMPL = 0x40,
467 /// Implicit barrier in 'for' directive.
468 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
469 /// Implicit barrier in 'sections' directive.
470 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
471 /// Implicit barrier in 'single' directive.
472 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
473 /// Call of __kmp_for_static_init for static loop.
474 OMP_IDENT_WORK_LOOP = 0x200,
475 /// Call of __kmp_for_static_init for sections.
476 OMP_IDENT_WORK_SECTIONS = 0x400,
477 /// Call of __kmp_for_static_init for distribute.
478 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE
480};
481
482namespace {
483LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
484/// Values for bit flags for marking which requires clauses have been used.
485enum OpenMPOffloadingRequiresDirFlags : int64_t {
486 /// flag undefined.
487 OMP_REQ_UNDEFINED = 0x000,
488 /// no requires clause present.
489 OMP_REQ_NONE = 0x001,
490 /// reverse_offload clause.
491 OMP_REQ_REVERSE_OFFLOAD = 0x002,
492 /// unified_address clause.
493 OMP_REQ_UNIFIED_ADDRESS = 0x004,
494 /// unified_shared_memory clause.
495 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
496 /// dynamic_allocators clause.
497 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
498 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS
499};
500
501} // anonymous namespace
502
503/// Describes ident structure that describes a source location.
504/// All descriptions are taken from
505/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
506/// Original structure:
507/// typedef struct ident {
508/// kmp_int32 reserved_1; /**< might be used in Fortran;
509/// see above */
510/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
511/// KMP_IDENT_KMPC identifies this union
512/// member */
513/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
514/// see above */
515///#if USE_ITT_BUILD
516/// /* but currently used for storing
517/// region-specific ITT */
518/// /* contextual information. */
519///#endif /* USE_ITT_BUILD */
520/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
521/// C++ */
522/// char const *psource; /**< String describing the source location.
523/// The string is composed of semi-colon separated
524// fields which describe the source file,
525/// the function and a pair of line numbers that
526/// delimit the construct.
527/// */
528/// } ident_t;
529enum IdentFieldIndex {
530 /// might be used in Fortran
531 IdentField_Reserved_1,
532 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
533 IdentField_Flags,
534 /// Not really used in Fortran any more
535 IdentField_Reserved_2,
536 /// Source[4] in Fortran, do not use for C++
537 IdentField_Reserved_3,
538 /// String describing the source location. The string is composed of
539 /// semi-colon separated fields which describe the source file, the function
540 /// and a pair of line numbers that delimit the construct.
541 IdentField_PSource
542};
543
544/// Schedule types for 'omp for' loops (these enumerators are taken from
545/// the enum sched_type in kmp.h).
546enum OpenMPSchedType {
547 /// Lower bound for default (unordered) versions.
548 OMP_sch_lower = 32,
549 OMP_sch_static_chunked = 33,
550 OMP_sch_static = 34,
551 OMP_sch_dynamic_chunked = 35,
552 OMP_sch_guided_chunked = 36,
553 OMP_sch_runtime = 37,
554 OMP_sch_auto = 38,
555 /// static with chunk adjustment (e.g., simd)
556 OMP_sch_static_balanced_chunked = 45,
557 /// Lower bound for 'ordered' versions.
558 OMP_ord_lower = 64,
559 OMP_ord_static_chunked = 65,
560 OMP_ord_static = 66,
561 OMP_ord_dynamic_chunked = 67,
562 OMP_ord_guided_chunked = 68,
563 OMP_ord_runtime = 69,
564 OMP_ord_auto = 70,
565 OMP_sch_default = OMP_sch_static,
566 /// dist_schedule types
567 OMP_dist_sch_static_chunked = 91,
568 OMP_dist_sch_static = 92,
569 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
570 /// Set if the monotonic schedule modifier was present.
571 OMP_sch_modifier_monotonic = (1 << 29),
572 /// Set if the nonmonotonic schedule modifier was present.
573 OMP_sch_modifier_nonmonotonic = (1 << 30),
574};
575
576/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
577/// region.
578class CleanupTy final : public EHScopeStack::Cleanup {
579 PrePostActionTy *Action;
580
581public:
582 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
583 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
584 if (!CGF.HaveInsertPoint())
585 return;
586 Action->Exit(CGF);
587 }
588};
589
590} // anonymous namespace
591
592void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
593 CodeGenFunction::RunCleanupsScope Scope(CGF);
594 if (PrePostAction) {
595 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
596 Callback(CodeGen, CGF, *PrePostAction);
597 } else {
598 PrePostActionTy Action;
599 Callback(CodeGen, CGF, Action);
600 }
601}
602
603/// Check if the combiner is a call to UDR combiner and if it is so return the
604/// UDR decl used for reduction.
605static const OMPDeclareReductionDecl *
606getReductionInit(const Expr *ReductionOp) {
607 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
608 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
609 if (const auto *DRE =
610 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
611 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
612 return DRD;
613 return nullptr;
614}
615
616static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
617 const OMPDeclareReductionDecl *DRD,
618 const Expr *InitOp,
619 Address Private, Address Original,
620 QualType Ty) {
621 if (DRD->getInitializer()) {
622 std::pair<llvm::Function *, llvm::Function *> Reduction =
623 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
624 const auto *CE = cast<CallExpr>(InitOp);
625 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
626 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
627 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
628 const auto *LHSDRE =
629 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
630 const auto *RHSDRE =
631 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
632 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
633 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
634 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
635 (void)PrivateScope.Privatize();
636 RValue Func = RValue::get(Reduction.second);
637 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
638 CGF.EmitIgnoredExpr(InitOp);
639 } else {
640 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
641 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
642 auto *GV = new llvm::GlobalVariable(
643 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
644 llvm::GlobalValue::PrivateLinkage, Init, Name);
645 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
646 RValue InitRVal;
647 switch (CGF.getEvaluationKind(Ty)) {
648 case TEK_Scalar:
649 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
650 break;
651 case TEK_Complex:
652 InitRVal =
653 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
654 break;
655 case TEK_Aggregate: {
656 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
657 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
658 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
659 /*IsInitializer=*/false);
660 return;
661 }
662 }
663 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
664 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
665 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
666 /*IsInitializer=*/false);
667 }
668}
669
670/// Emit initialization of arrays of complex types.
671/// \param DestAddr Address of the array.
672/// \param Type Type of array.
673/// \param Init Initial expression of array.
674/// \param SrcAddr Address of the original array.
675static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
676 QualType Type, bool EmitDeclareReductionInit,
677 const Expr *Init,
678 const OMPDeclareReductionDecl *DRD,
679 Address SrcAddr = Address::invalid()) {
680 // Perform element-by-element initialization.
681 QualType ElementTy;
682
683 // Drill down to the base element type on both arrays.
684 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
685 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
686 if (DRD)
687 SrcAddr =
688 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
689
690 llvm::Value *SrcBegin = nullptr;
691 if (DRD)
692 SrcBegin = SrcAddr.getPointer();
693 llvm::Value *DestBegin = DestAddr.getPointer();
694 // Cast from pointer to array type to pointer to single element.
695 llvm::Value *DestEnd =
696 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
697 // The basic structure here is a while-do loop.
698 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
699 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
700 llvm::Value *IsEmpty =
701 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
702 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
703
704 // Enter the loop body, making that address the current address.
705 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
706 CGF.EmitBlock(BodyBB);
707
708 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
709
710 llvm::PHINode *SrcElementPHI = nullptr;
711 Address SrcElementCurrent = Address::invalid();
712 if (DRD) {
713 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
714 "omp.arraycpy.srcElementPast");
715 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
716 SrcElementCurrent =
717 Address(SrcElementPHI, SrcAddr.getElementType(),
718 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
719 }
720 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
721 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
722 DestElementPHI->addIncoming(DestBegin, EntryBB);
723 Address DestElementCurrent =
724 Address(DestElementPHI, DestAddr.getElementType(),
725 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
726
727 // Emit copy.
728 {
729 CodeGenFunction::RunCleanupsScope InitScope(CGF);
730 if (EmitDeclareReductionInit) {
731 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
732 SrcElementCurrent, ElementTy);
733 } else
734 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
735 /*IsInitializer=*/false);
736 }
737
738 if (DRD) {
739 // Shift the address forward by one element.
740 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
741 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
742 "omp.arraycpy.dest.element");
743 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
744 }
745
746 // Shift the address forward by one element.
747 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
748 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
749 "omp.arraycpy.dest.element");
750 // Check whether we've reached the end.
751 llvm::Value *Done =
752 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
753 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
754 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
755
756 // Done.
757 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
758}
759
760LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
761 return CGF.EmitOMPSharedLValue(E);
762}
763
764LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
765 const Expr *E) {
766 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
767 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
768 return LValue();
769}
770
771void ReductionCodeGen::emitAggregateInitialization(
772 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
773 const OMPDeclareReductionDecl *DRD) {
774 // Emit VarDecl with copy init for arrays.
775 // Get the address of the original variable captured in current
776 // captured region.
777 const auto *PrivateVD =
778 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
779 bool EmitDeclareReductionInit =
780 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
781 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
782 EmitDeclareReductionInit,
783 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
784 : PrivateVD->getInit(),
785 DRD, SharedAddr);
786}
787
788ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
789 ArrayRef<const Expr *> Origs,
790 ArrayRef<const Expr *> Privates,
791 ArrayRef<const Expr *> ReductionOps) {
792 ClausesData.reserve(Shareds.size());
793 SharedAddresses.reserve(Shareds.size());
794 Sizes.reserve(Shareds.size());
795 BaseDecls.reserve(Shareds.size());
796 const auto *IOrig = Origs.begin();
797 const auto *IPriv = Privates.begin();
798 const auto *IRed = ReductionOps.begin();
799 for (const Expr *Ref : Shareds) {
800 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
801 std::advance(IOrig, 1);
802 std::advance(IPriv, 1);
803 std::advance(IRed, 1);
804 }
805}
806
807void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
808 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 809, __extension__
__PRETTY_FUNCTION__))
809 "Number of generated lvalues must be exactly N.")(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 809, __extension__
__PRETTY_FUNCTION__))
;
810 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
811 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
812 SharedAddresses.emplace_back(First, Second);
813 if (ClausesData[N].Shared == ClausesData[N].Ref) {
814 OrigAddresses.emplace_back(First, Second);
815 } else {
816 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
817 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
818 OrigAddresses.emplace_back(First, Second);
819 }
820}
821
822void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
823 QualType PrivateType = getPrivateType(N);
824 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
825 if (!PrivateType->isVariablyModifiedType()) {
826 Sizes.emplace_back(
827 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
828 nullptr);
829 return;
830 }
831 llvm::Value *Size;
832 llvm::Value *SizeInChars;
833 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
834 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
835 if (AsArraySection) {
836 Size = CGF.Builder.CreatePtrDiff(ElemType,
837 OrigAddresses[N].second.getPointer(CGF),
838 OrigAddresses[N].first.getPointer(CGF));
839 Size = CGF.Builder.CreateNUWAdd(
840 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
841 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
842 } else {
843 SizeInChars =
844 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
845 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
846 }
847 Sizes.emplace_back(SizeInChars, Size);
848 CodeGenFunction::OpaqueValueMapping OpaqueMap(
849 CGF,
850 cast<OpaqueValueExpr>(
851 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
852 RValue::get(Size));
853 CGF.EmitVariablyModifiedType(PrivateType);
854}
855
856void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
857 llvm::Value *Size) {
858 QualType PrivateType = getPrivateType(N);
859 if (!PrivateType->isVariablyModifiedType()) {
860 assert(!Size && !Sizes[N].second &&(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 862, __extension__
__PRETTY_FUNCTION__))
861 "Size should be nullptr for non-variably modified reduction "(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 862, __extension__
__PRETTY_FUNCTION__))
862 "items.")(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 862, __extension__
__PRETTY_FUNCTION__))
;
863 return;
864 }
865 CodeGenFunction::OpaqueValueMapping OpaqueMap(
866 CGF,
867 cast<OpaqueValueExpr>(
868 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
869 RValue::get(Size));
870 CGF.EmitVariablyModifiedType(PrivateType);
871}
872
873void ReductionCodeGen::emitInitialization(
874 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
875 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
876 assert(SharedAddresses.size() > N && "No variable was generated")(static_cast <bool> (SharedAddresses.size() > N &&
"No variable was generated") ? void (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 876, __extension__
__PRETTY_FUNCTION__))
;
877 const auto *PrivateVD =
878 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
879 const OMPDeclareReductionDecl *DRD =
880 getReductionInit(ClausesData[N].ReductionOp);
881 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
882 if (DRD && DRD->getInitializer())
883 (void)DefaultInit(CGF);
884 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
885 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
886 (void)DefaultInit(CGF);
887 QualType SharedType = SharedAddresses[N].first.getType();
888 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
889 PrivateAddr, SharedAddr, SharedType);
890 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
891 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
892 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
893 PrivateVD->getType().getQualifiers(),
894 /*IsInitializer=*/false);
895 }
896}
897
898bool ReductionCodeGen::needCleanups(unsigned N) {
899 QualType PrivateType = getPrivateType(N);
900 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
901 return DTorKind != QualType::DK_none;
902}
903
904void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
905 Address PrivateAddr) {
906 QualType PrivateType = getPrivateType(N);
907 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
908 if (needCleanups(N)) {
909 PrivateAddr = CGF.Builder.CreateElementBitCast(
910 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
911 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
912 }
913}
914
915static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
916 LValue BaseLV) {
917 BaseTy = BaseTy.getNonReferenceType();
918 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
919 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
920 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
921 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
922 } else {
923 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
924 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
925 }
926 BaseTy = BaseTy->getPointeeType();
927 }
928 return CGF.MakeAddrLValue(
929 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
930 CGF.ConvertTypeForMem(ElTy)),
931 BaseLV.getType(), BaseLV.getBaseInfo(),
932 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
933}
934
935static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
936 Address OriginalBaseAddress, llvm::Value *Addr) {
937 Address Tmp = Address::invalid();
938 Address TopTmp = Address::invalid();
939 Address MostTopTmp = Address::invalid();
940 BaseTy = BaseTy.getNonReferenceType();
941 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
942 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
943 Tmp = CGF.CreateMemTemp(BaseTy);
944 if (TopTmp.isValid())
945 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
946 else
947 MostTopTmp = Tmp;
948 TopTmp = Tmp;
949 BaseTy = BaseTy->getPointeeType();
950 }
951
952 if (Tmp.isValid()) {
953 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
954 Addr, Tmp.getElementType());
955 CGF.Builder.CreateStore(Addr, Tmp);
956 return MostTopTmp;
957 }
958
959 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
960 Addr, OriginalBaseAddress.getType());
961 return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull);
962}
963
964static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
965 const VarDecl *OrigVD = nullptr;
966 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
967 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
968 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
969 Base = TempOASE->getBase()->IgnoreParenImpCasts();
970 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
971 Base = TempASE->getBase()->IgnoreParenImpCasts();
972 DE = cast<DeclRefExpr>(Base);
973 OrigVD = cast<VarDecl>(DE->getDecl());
974 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
975 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
976 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
977 Base = TempASE->getBase()->IgnoreParenImpCasts();
978 DE = cast<DeclRefExpr>(Base);
979 OrigVD = cast<VarDecl>(DE->getDecl());
980 }
981 return OrigVD;
982}
983
984Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
985 Address PrivateAddr) {
986 const DeclRefExpr *DE;
987 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
988 BaseDecls.emplace_back(OrigVD);
989 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
990 LValue BaseLValue =
991 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
992 OriginalBaseLValue);
993 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
994 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
995 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
996 SharedAddr.getPointer());
997 llvm::Value *PrivatePointer =
998 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
999 PrivateAddr.getPointer(), SharedAddr.getType());
1000 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1001 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1002 return castToBase(CGF, OrigVD->getType(),
1003 SharedAddresses[N].first.getType(),
1004 OriginalBaseLValue.getAddress(CGF), Ptr);
1005 }
1006 BaseDecls.emplace_back(
1007 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1008 return PrivateAddr;
1009}
1010
1011bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1012 const OMPDeclareReductionDecl *DRD =
1013 getReductionInit(ClausesData[N].ReductionOp);
1014 return DRD && DRD->getInitializer();
1015}
1016
1017LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1018 return CGF.EmitLoadOfPointerLValue(
1019 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1020 getThreadIDVariable()->getType()->castAs<PointerType>());
1021}
1022
1023void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1024 if (!CGF.HaveInsertPoint())
1025 return;
1026 // 1.2.2 OpenMP Language Terminology
1027 // Structured block - An executable statement with a single entry at the
1028 // top and a single exit at the bottom.
1029 // The point of exit cannot be a branch out of the structured block.
1030 // longjmp() and throw() must not violate the entry/exit criteria.
1031 CGF.EHStack.pushTerminate();
1032 if (S)
1033 CGF.incrementProfileCounter(S);
1034 CodeGen(CGF);
1035 CGF.EHStack.popTerminate();
1036}
1037
1038LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1039 CodeGenFunction &CGF) {
1040 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1041 getThreadIDVariable()->getType(),
1042 AlignmentSource::Decl);
1043}
1044
1045static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1046 QualType FieldTy) {
1047 auto *Field = FieldDecl::Create(
1048 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1049 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1050 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1051 Field->setAccess(AS_public);
1052 DC->addDecl(Field);
1053 return Field;
1054}
1055
1056CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1057 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1058 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1059 llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false,
1060 hasRequiresUnifiedSharedMemory(),
1061 CGM.getLangOpts().OpenMPOffloadMandatory);
1062 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1063 OMPBuilder.initialize();
1064 OMPBuilder.setConfig(Config);
1065 loadOffloadInfoMetadata();
1066}
1067
1068void CGOpenMPRuntime::clear() {
1069 InternalVars.clear();
1070 // Clean non-target variable declarations possibly used only in debug info.
1071 for (const auto &Data : EmittedNonTargetVariables) {
1072 if (!Data.getValue().pointsToAliveValue())
1073 continue;
1074 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1075 if (!GV)
1076 continue;
1077 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1078 continue;
1079 GV->eraseFromParent();
1080 }
1081}
1082
1083std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1084 return OMPBuilder.createPlatformSpecificName(Parts);
1085}
1086
1087static llvm::Function *
1088emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1089 const Expr *CombinerInitializer, const VarDecl *In,
1090 const VarDecl *Out, bool IsCombiner) {
1091 // void .omp_combiner.(Ty *in, Ty *out);
1092 ASTContext &C = CGM.getContext();
1093 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1094 FunctionArgList Args;
1095 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1096 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1097 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1098 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1099 Args.push_back(&OmpOutParm);
1100 Args.push_back(&OmpInParm);
1101 const CGFunctionInfo &FnInfo =
1102 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1103 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1104 std::string Name = CGM.getOpenMPRuntime().getName(
1105 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1106 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1107 Name, &CGM.getModule());
1108 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1109 if (CGM.getLangOpts().Optimize) {
1110 Fn->removeFnAttr(llvm::Attribute::NoInline);
1111 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1112 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1113 }
1114 CodeGenFunction CGF(CGM);
1115 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1116 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1117 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1118 Out->getLocation());
1119 CodeGenFunction::OMPPrivateScope Scope(CGF);
1120 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1121 Scope.addPrivate(
1122 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1123 .getAddress(CGF));
1124 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1125 Scope.addPrivate(
1126 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1127 .getAddress(CGF));
1128 (void)Scope.Privatize();
1129 if (!IsCombiner && Out->hasInit() &&
1130 !CGF.isTrivialInitializer(Out->getInit())) {
1131 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1132 Out->getType().getQualifiers(),
1133 /*IsInitializer=*/true);
1134 }
1135 if (CombinerInitializer)
1136 CGF.EmitIgnoredExpr(CombinerInitializer);
1137 Scope.ForceCleanup();
1138 CGF.FinishFunction();
1139 return Fn;
1140}
1141
1142void CGOpenMPRuntime::emitUserDefinedReduction(
1143 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1144 if (UDRMap.count(D) > 0)
1145 return;
1146 llvm::Function *Combiner = emitCombinerOrInitializer(
1147 CGM, D->getType(), D->getCombiner(),
1148 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1149 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1150 /*IsCombiner=*/true);
1151 llvm::Function *Initializer = nullptr;
1152 if (const Expr *Init = D->getInitializer()) {
1153 Initializer = emitCombinerOrInitializer(
1154 CGM, D->getType(),
1155 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1156 : nullptr,
1157 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1158 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1159 /*IsCombiner=*/false);
1160 }
1161 UDRMap.try_emplace(D, Combiner, Initializer);
1162 if (CGF) {
1163 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1164 Decls.second.push_back(D);
1165 }
1166}
1167
1168std::pair<llvm::Function *, llvm::Function *>
1169CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1170 auto I = UDRMap.find(D);
1171 if (I != UDRMap.end())
1172 return I->second;
1173 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1174 return UDRMap.lookup(D);
1175}
1176
1177namespace {
1178// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1179// Builder if one is present.
1180struct PushAndPopStackRAII {
1181 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1182 bool HasCancel, llvm::omp::Directive Kind)
1183 : OMPBuilder(OMPBuilder) {
1184 if (!OMPBuilder)
1185 return;
1186
1187 // The following callback is the crucial part of clangs cleanup process.
1188 //
1189 // NOTE:
1190 // Once the OpenMPIRBuilder is used to create parallel regions (and
1191 // similar), the cancellation destination (Dest below) is determined via
1192 // IP. That means if we have variables to finalize we split the block at IP,
1193 // use the new block (=BB) as destination to build a JumpDest (via
1194 // getJumpDestInCurrentScope(BB)) which then is fed to
1195 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1196 // to push & pop an FinalizationInfo object.
1197 // The FiniCB will still be needed but at the point where the
1198 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1199 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1200 assert(IP.getBlock()->end() == IP.getPoint() &&(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1201, __extension__
__PRETTY_FUNCTION__))
1201 "Clang CG should cause non-terminated block!")(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1201, __extension__
__PRETTY_FUNCTION__))
;
1202 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1203 CGF.Builder.restoreIP(IP);
1204 CodeGenFunction::JumpDest Dest =
1205 CGF.getOMPCancelDestination(OMPD_parallel);
1206 CGF.EmitBranchThroughCleanup(Dest);
1207 };
1208
1209 // TODO: Remove this once we emit parallel regions through the
1210 // OpenMPIRBuilder as it can do this setup internally.
1211 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1212 OMPBuilder->pushFinalizationCB(std::move(FI));
1213 }
1214 ~PushAndPopStackRAII() {
1215 if (OMPBuilder)
1216 OMPBuilder->popFinalizationCB();
1217 }
1218 llvm::OpenMPIRBuilder *OMPBuilder;
1219};
1220} // namespace
1221
1222static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1223 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1224 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1225 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1226 assert(ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1227, __extension__
__PRETTY_FUNCTION__))
1227 "thread id variable must be of type kmp_int32 *")(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1227, __extension__
__PRETTY_FUNCTION__))
;
1228 CodeGenFunction CGF(CGM, true);
1229 bool HasCancel = false;
1230 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1231 HasCancel = OPD->hasCancel();
1232 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1233 HasCancel = OPD->hasCancel();
1234 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1235 HasCancel = OPSD->hasCancel();
1236 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1237 HasCancel = OPFD->hasCancel();
1238 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1239 HasCancel = OPFD->hasCancel();
1240 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1241 HasCancel = OPFD->hasCancel();
1242 else if (const auto *OPFD =
1243 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1244 HasCancel = OPFD->hasCancel();
1245 else if (const auto *OPFD =
1246 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1247 HasCancel = OPFD->hasCancel();
1248
1249 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1250 // parallel region to make cancellation barriers work properly.
1251 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1252 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1253 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1254 HasCancel, OutlinedHelperName);
1255 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1256 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1257}
1258
1259llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1260 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1261 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1262 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263 return emitParallelOrTeamsOutlinedFunction(
1264 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1265}
1266
1267llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1268 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1269 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1270 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1271 return emitParallelOrTeamsOutlinedFunction(
1272 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1273}
1274
1275llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1276 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1277 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1278 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1279 bool Tied, unsigned &NumberOfParts) {
1280 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1281 PrePostActionTy &) {
1282 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1283 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1284 llvm::Value *TaskArgs[] = {
1285 UpLoc, ThreadID,
1286 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1287 TaskTVar->getType()->castAs<PointerType>())
1288 .getPointer(CGF)};
1289 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1290 CGM.getModule(), OMPRTL___kmpc_omp_task),
1291 TaskArgs);
1292 };
1293 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1294 UntiedCodeGen);
1295 CodeGen.setAction(Action);
1296 assert(!ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1297, __extension__
__PRETTY_FUNCTION__))
1297 "thread id variable must be of type kmp_int32 for tasks")(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1297, __extension__
__PRETTY_FUNCTION__))
;
1298 const OpenMPDirectiveKind Region =
1299 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1300 : OMPD_task;
1301 const CapturedStmt *CS = D.getCapturedStmt(Region);
1302 bool HasCancel = false;
1303 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1304 HasCancel = TD->hasCancel();
1305 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1306 HasCancel = TD->hasCancel();
1307 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1308 HasCancel = TD->hasCancel();
1309 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1310 HasCancel = TD->hasCancel();
1311
1312 CodeGenFunction CGF(CGM, true);
1313 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1314 InnermostKind, HasCancel, Action);
1315 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1316 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1317 if (!Tied)
1318 NumberOfParts = Action.getNumberOfParts();
1319 return Res;
1320}
1321
1322void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1323 bool AtCurrentPoint) {
1324 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1325 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")(static_cast <bool> (!Elem.second.ServiceInsertPt &&
"Insert point is set already.") ? void (0) : __assert_fail (
"!Elem.second.ServiceInsertPt && \"Insert point is set already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1325, __extension__
__PRETTY_FUNCTION__))
;
1326
1327 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1328 if (AtCurrentPoint) {
1329 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1330 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1331 } else {
1332 Elem.second.ServiceInsertPt =
1333 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1334 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1335 }
1336}
1337
1338void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1339 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1340 if (Elem.second.ServiceInsertPt) {
1341 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1342 Elem.second.ServiceInsertPt = nullptr;
1343 Ptr->eraseFromParent();
1344 }
1345}
1346
1347static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1348 SourceLocation Loc,
1349 SmallString<128> &Buffer) {
1350 llvm::raw_svector_ostream OS(Buffer);
1351 // Build debug location
1352 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1353 OS << ";" << PLoc.getFilename() << ";";
1354 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1355 OS << FD->getQualifiedNameAsString();
1356 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1357 return OS.str();
1358}
1359
1360llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1361 SourceLocation Loc,
1362 unsigned Flags, bool EmitLoc) {
1363 uint32_t SrcLocStrSize;
1364 llvm::Constant *SrcLocStr;
1365 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1366 llvm::codegenoptions::NoDebugInfo) ||
1367 Loc.isInvalid()) {
1368 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1369 } else {
1370 std::string FunctionName;
1371 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1372 FunctionName = FD->getQualifiedNameAsString();
1373 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1374 const char *FileName = PLoc.getFilename();
1375 unsigned Line = PLoc.getLine();
1376 unsigned Column = PLoc.getColumn();
1377 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1378 Column, SrcLocStrSize);
1379 }
1380 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1381 return OMPBuilder.getOrCreateIdent(
1382 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1383}
1384
1385llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1386 SourceLocation Loc) {
1387 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1387, __extension__
__PRETTY_FUNCTION__))
;
1388 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1389 // the clang invariants used below might be broken.
1390 if (CGM.getLangOpts().OpenMPIRBuilder) {
1391 SmallString<128> Buffer;
1392 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1393 uint32_t SrcLocStrSize;
1394 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1395 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1396 return OMPBuilder.getOrCreateThreadID(
1397 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1398 }
1399
1400 llvm::Value *ThreadID = nullptr;
1401 // Check whether we've already cached a load of the thread id in this
1402 // function.
1403 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1404 if (I != OpenMPLocThreadIDMap.end()) {
1405 ThreadID = I->second.ThreadID;
1406 if (ThreadID != nullptr)
1407 return ThreadID;
1408 }
1409 // If exceptions are enabled, do not use parameter to avoid possible crash.
1410 if (auto *OMPRegionInfo =
1411 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1412 if (OMPRegionInfo->getThreadIDVariable()) {
1413 // Check if this an outlined function with thread id passed as argument.
1414 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1415 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1416 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1417 !CGF.getLangOpts().CXXExceptions ||
1418 CGF.Builder.GetInsertBlock() == TopBlock ||
1419 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1420 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1421 TopBlock ||
1422 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1423 CGF.Builder.GetInsertBlock()) {
1424 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1425 // If value loaded in entry block, cache it and use it everywhere in
1426 // function.
1427 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1428 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1429 Elem.second.ThreadID = ThreadID;
1430 }
1431 return ThreadID;
1432 }
1433 }
1434 }
1435
1436 // This is not an outlined function region - need to call __kmpc_int32
1437 // kmpc_global_thread_num(ident_t *loc).
1438 // Generate thread id value and cache this value for use across the
1439 // function.
1440 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1441 if (!Elem.second.ServiceInsertPt)
1442 setLocThreadIdInsertPt(CGF);
1443 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1444 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1445 llvm::CallInst *Call = CGF.Builder.CreateCall(
1446 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1447 OMPRTL___kmpc_global_thread_num),
1448 emitUpdateLocation(CGF, Loc));
1449 Call->setCallingConv(CGF.getRuntimeCC());
1450 Elem.second.ThreadID = Call;
1451 return Call;
1452}
1453
1454void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1455 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1455, __extension__
__PRETTY_FUNCTION__))
;
1456 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1457 clearLocThreadIdInsertPt(CGF);
1458 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1459 }
1460 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1461 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1462 UDRMap.erase(D);
1463 FunctionUDRMap.erase(CGF.CurFn);
1464 }
1465 auto I = FunctionUDMMap.find(CGF.CurFn);
1466 if (I != FunctionUDMMap.end()) {
1467 for(const auto *D : I->second)
1468 UDMMap.erase(D);
1469 FunctionUDMMap.erase(I);
1470 }
1471 LastprivateConditionalToTypes.erase(CGF.CurFn);
1472 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1473}
1474
1475llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1476 return OMPBuilder.IdentPtr;
1477}
1478
1479llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1480 if (!Kmpc_MicroTy) {
1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1483 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1484 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1485 }
1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1487}
1488
1489llvm::FunctionCallee
1490CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1491 bool IsGPUDistribute) {
1492 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1493, __extension__
__PRETTY_FUNCTION__))
1493 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1493, __extension__
__PRETTY_FUNCTION__))
;
1494 StringRef Name;
1495 if (IsGPUDistribute)
1496 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1497 : "__kmpc_distribute_static_init_4u")
1498 : (IVSigned ? "__kmpc_distribute_static_init_8"
1499 : "__kmpc_distribute_static_init_8u");
1500 else
1501 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1502 : "__kmpc_for_static_init_4u")
1503 : (IVSigned ? "__kmpc_for_static_init_8"
1504 : "__kmpc_for_static_init_8u");
1505
1506 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1507 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1508 llvm::Type *TypeParams[] = {
1509 getIdentTyPointerTy(), // loc
1510 CGM.Int32Ty, // tid
1511 CGM.Int32Ty, // schedtype
1512 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1513 PtrTy, // p_lower
1514 PtrTy, // p_upper
1515 PtrTy, // p_stride
1516 ITy, // incr
1517 ITy // chunk
1518 };
1519 auto *FnTy =
1520 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1521 return CGM.CreateRuntimeFunction(FnTy, Name);
1522}
1523
1524llvm::FunctionCallee
1525CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1526 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1527, __extension__
__PRETTY_FUNCTION__))
1527 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1527, __extension__
__PRETTY_FUNCTION__))
;
1528 StringRef Name =
1529 IVSize == 32
1530 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1531 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1532 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1533 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1534 CGM.Int32Ty, // tid
1535 CGM.Int32Ty, // schedtype
1536 ITy, // lower
1537 ITy, // upper
1538 ITy, // stride
1539 ITy // chunk
1540 };
1541 auto *FnTy =
1542 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1543 return CGM.CreateRuntimeFunction(FnTy, Name);
1544}
1545
1546llvm::FunctionCallee
1547CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1548 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1549, __extension__
__PRETTY_FUNCTION__))
1549 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1549, __extension__
__PRETTY_FUNCTION__))
;
1550 StringRef Name =
1551 IVSize == 32
1552 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1553 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1554 llvm::Type *TypeParams[] = {
1555 getIdentTyPointerTy(), // loc
1556 CGM.Int32Ty, // tid
1557 };
1558 auto *FnTy =
1559 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1560 return CGM.CreateRuntimeFunction(FnTy, Name);
1561}
1562
1563llvm::FunctionCallee
1564CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1565 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1566, __extension__
__PRETTY_FUNCTION__))
1566 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1566, __extension__
__PRETTY_FUNCTION__))
;
1567 StringRef Name =
1568 IVSize == 32
1569 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1570 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1571 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1572 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1573 llvm::Type *TypeParams[] = {
1574 getIdentTyPointerTy(), // loc
1575 CGM.Int32Ty, // tid
1576 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1577 PtrTy, // p_lower
1578 PtrTy, // p_upper
1579 PtrTy // p_stride
1580 };
1581 auto *FnTy =
1582 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1583 return CGM.CreateRuntimeFunction(FnTy, Name);
1584}
1585
1586/// Obtain information that uniquely identifies a target entry. This
1587/// consists of the file and device IDs as well as line number associated with
1588/// the relevant entry source location.
1589static llvm::TargetRegionEntryInfo
1590getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1591 StringRef ParentName = "") {
1592 SourceManager &SM = C.getSourceManager();
1593
1594 // The loc should be always valid and have a file ID (the user cannot use
1595 // #pragma directives in macros)
1596
1597 assert(Loc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (Loc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1597, __extension__
__PRETTY_FUNCTION__))
;
1598
1599 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1600 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1600, __extension__
__PRETTY_FUNCTION__))
;
1601
1602 llvm::sys::fs::UniqueID ID;
1603 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1604 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1605 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1605, __extension__
__PRETTY_FUNCTION__))
;
1606 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1607 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1608 << PLoc.getFilename() << EC.message();
1609 }
1610
1611 return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
1612 PLoc.getLine());
1613}
1614
1615Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1616 if (CGM.getLangOpts().OpenMPSimd)
1617 return Address::invalid();
1618 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1619 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1620 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1621 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1622 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1623 HasRequiresUnifiedSharedMemory))) {
1624 SmallString<64> PtrName;
1625 {
1626 llvm::raw_svector_ostream OS(PtrName);
1627 OS << CGM.getMangledName(GlobalDecl(VD));
1628 if (!VD->isExternallyVisible()) {
1629 auto EntryInfo = getTargetEntryUniqueInfo(
1630 CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc());
1631 OS << llvm::format("_%x", EntryInfo.FileID);
1632 }
1633 OS << "_decl_tgt_ref_ptr";
1634 }
1635 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1636 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1637 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1638 if (!Ptr) {
1639 Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1640
1641 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1642 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1643
1644 if (!CGM.getLangOpts().OpenMPIsDevice)
1645 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1646 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1647 }
1648 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1649 }
1650 return Address::invalid();
1651}
1652
1653llvm::Constant *
1654CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1655 assert(!CGM.getLangOpts().OpenMPUseTLS ||(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1656, __extension__
__PRETTY_FUNCTION__))
1656 !CGM.getContext().getTargetInfo().isTLSSupported())(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1656, __extension__
__PRETTY_FUNCTION__))
;
1657 // Lookup the entry, lazily creating it if necessary.
1658 std::string Suffix = getName({"cache", ""});
1659 return OMPBuilder.getOrCreateInternalVariable(
1660 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str());
1661}
1662
1663Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1664 const VarDecl *VD,
1665 Address VDAddr,
1666 SourceLocation Loc) {
1667 if (CGM.getLangOpts().OpenMPUseTLS &&
1668 CGM.getContext().getTargetInfo().isTLSSupported())
1669 return VDAddr;
1670
1671 llvm::Type *VarTy = VDAddr.getElementType();
1672 llvm::Value *Args[] = {
1673 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1674 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1675 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1676 getOrCreateThreadPrivateCache(VD)};
1677 return Address(
1678 CGF.EmitRuntimeCall(
1679 OMPBuilder.getOrCreateRuntimeFunction(
1680 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1681 Args),
1682 CGF.Int8Ty, VDAddr.getAlignment());
1683}
1684
1685void CGOpenMPRuntime::emitThreadPrivateVarInit(
1686 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1687 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1688 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1689 // library.
1690 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1691 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1692 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1693 OMPLoc);
1694 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1695 // to register constructor/destructor for variable.
1696 llvm::Value *Args[] = {
1697 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1698 Ctor, CopyCtor, Dtor};
1699 CGF.EmitRuntimeCall(
1700 OMPBuilder.getOrCreateRuntimeFunction(
1701 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1702 Args);
1703}
1704
1705llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1706 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1707 bool PerformInit, CodeGenFunction *CGF) {
1708 if (CGM.getLangOpts().OpenMPUseTLS &&
1709 CGM.getContext().getTargetInfo().isTLSSupported())
1710 return nullptr;
1711
1712 VD = VD->getDefinition(CGM.getContext());
1713 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1714 QualType ASTTy = VD->getType();
1715
1716 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1717 const Expr *Init = VD->getAnyInitializer();
1718 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1719 // Generate function that re-emits the declaration's initializer into the
1720 // threadprivate copy of the variable VD
1721 CodeGenFunction CtorCGF(CGM);
1722 FunctionArgList Args;
1723 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1724 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1725 ImplicitParamDecl::Other);
1726 Args.push_back(&Dst);
1727
1728 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1729 CGM.getContext().VoidPtrTy, Args);
1730 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1731 std::string Name = getName({"__kmpc_global_ctor_", ""});
1732 llvm::Function *Fn =
1733 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1734 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1735 Args, Loc, Loc);
1736 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1737 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1738 CGM.getContext().VoidPtrTy, Dst.getLocation());
1739 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1740 Arg = CtorCGF.Builder.CreateElementBitCast(
1741 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1742 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1743 /*IsInitializer=*/true);
1744 ArgVal = CtorCGF.EmitLoadOfScalar(
1745 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1746 CGM.getContext().VoidPtrTy, Dst.getLocation());
1747 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1748 CtorCGF.FinishFunction();
1749 Ctor = Fn;
1750 }
1751 if (VD->getType().isDestructedType() != QualType::DK_none) {
1752 // Generate function that emits destructor call for the threadprivate copy
1753 // of the variable VD
1754 CodeGenFunction DtorCGF(CGM);
1755 FunctionArgList Args;
1756 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1757 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1758 ImplicitParamDecl::Other);
1759 Args.push_back(&Dst);
1760
1761 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1762 CGM.getContext().VoidTy, Args);
1763 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1764 std::string Name = getName({"__kmpc_global_dtor_", ""});
1765 llvm::Function *Fn =
1766 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1767 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1768 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1769 Loc, Loc);
1770 // Create a scope with an artificial location for the body of this function.
1771 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1772 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1773 DtorCGF.GetAddrOfLocalVar(&Dst),
1774 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1775 DtorCGF.emitDestroy(
1776 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1777 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1778 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1779 DtorCGF.FinishFunction();
1780 Dtor = Fn;
1781 }
1782 // Do not emit init function if it is not required.
1783 if (!Ctor && !Dtor)
1784 return nullptr;
1785
1786 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1787 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1788 /*isVarArg=*/false)
1789 ->getPointerTo();
1790 // Copying constructor for the threadprivate variable.
1791 // Must be NULL - reserved by runtime, but currently it requires that this
1792 // parameter is always NULL. Otherwise it fires assertion.
1793 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1794 if (Ctor == nullptr) {
1795 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1796 /*isVarArg=*/false)
1797 ->getPointerTo();
1798 Ctor = llvm::Constant::getNullValue(CtorTy);
1799 }
1800 if (Dtor == nullptr) {
1801 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1802 /*isVarArg=*/false)
1803 ->getPointerTo();
1804 Dtor = llvm::Constant::getNullValue(DtorTy);
1805 }
1806 if (!CGF) {
1807 auto *InitFunctionTy =
1808 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1809 std::string Name = getName({"__omp_threadprivate_init_", ""});
1810 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1811 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1812 CodeGenFunction InitCGF(CGM);
1813 FunctionArgList ArgList;
1814 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1815 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1816 Loc, Loc);
1817 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1818 InitCGF.FinishFunction();
1819 return InitFunction;
1820 }
1821 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1822 }
1823 return nullptr;
1824}
1825
1826bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1827 llvm::GlobalVariable *Addr,
1828 bool PerformInit) {
1829 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1830 !CGM.getLangOpts().OpenMPIsDevice)
1831 return false;
1832 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1833 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1834 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1835 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
1836 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
1837 HasRequiresUnifiedSharedMemory))
1838 return CGM.getLangOpts().OpenMPIsDevice;
1839 VD = VD->getDefinition(CGM.getContext());
1840 assert(VD && "Unknown VarDecl")(static_cast <bool> (VD && "Unknown VarDecl") ?
void (0) : __assert_fail ("VD && \"Unknown VarDecl\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1840, __extension__
__PRETTY_FUNCTION__))
;
1841
1842 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1843 return CGM.getLangOpts().OpenMPIsDevice;
1844
1845 QualType ASTTy = VD->getType();
1846 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1847
1848 // Produce the unique prefix to identify the new target regions. We use
1849 // the source location of the variable declaration which we know to not
1850 // conflict with any target region.
1851 auto EntryInfo =
1852 getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName());
1853 SmallString<128> Buffer, Out;
1854 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo);
1855
1856 const Expr *Init = VD->getAnyInitializer();
1857 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1858 llvm::Constant *Ctor;
1859 llvm::Constant *ID;
1860 if (CGM.getLangOpts().OpenMPIsDevice) {
1861 // Generate function that re-emits the declaration's initializer into
1862 // the threadprivate copy of the variable VD
1863 CodeGenFunction CtorCGF(CGM);
1864
1865 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1866 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1867 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1868 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1869 llvm::GlobalValue::WeakODRLinkage);
1870 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1871 if (CGM.getTriple().isAMDGCN())
1872 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1873 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1874 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1875 FunctionArgList(), Loc, Loc);
1876 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1877 llvm::Constant *AddrInAS0 = Addr;
1878 if (Addr->getAddressSpace() != 0)
1879 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1880 Addr, llvm::PointerType::getWithSamePointeeType(
1881 cast<llvm::PointerType>(Addr->getType()), 0));
1882 CtorCGF.EmitAnyExprToMem(Init,
1883 Address(AddrInAS0, Addr->getValueType(),
1884 CGM.getContext().getDeclAlign(VD)),
1885 Init->getType().getQualifiers(),
1886 /*IsInitializer=*/true);
1887 CtorCGF.FinishFunction();
1888 Ctor = Fn;
1889 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1890 } else {
1891 Ctor = new llvm::GlobalVariable(
1892 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1893 llvm::GlobalValue::PrivateLinkage,
1894 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1895 ID = Ctor;
1896 }
1897
1898 // Register the information for the entry associated with the constructor.
1899 Out.clear();
1900 auto CtorEntryInfo = EntryInfo;
1901 CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out);
1902 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1903 CtorEntryInfo, Ctor, ID,
1904 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor);
1905 }
1906 if (VD->getType().isDestructedType() != QualType::DK_none) {
1907 llvm::Constant *Dtor;
1908 llvm::Constant *ID;
1909 if (CGM.getLangOpts().OpenMPIsDevice) {
1910 // Generate function that emits destructor call for the threadprivate
1911 // copy of the variable VD
1912 CodeGenFunction DtorCGF(CGM);
1913
1914 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1915 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1916 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1917 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1918 llvm::GlobalValue::WeakODRLinkage);
1919 Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1920 if (CGM.getTriple().isAMDGCN())
1921 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1922 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1923 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1924 FunctionArgList(), Loc, Loc);
1925 // Create a scope with an artificial location for the body of this
1926 // function.
1927 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1928 llvm::Constant *AddrInAS0 = Addr;
1929 if (Addr->getAddressSpace() != 0)
1930 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1931 Addr, llvm::PointerType::getWithSamePointeeType(
1932 cast<llvm::PointerType>(Addr->getType()), 0));
1933 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1934 CGM.getContext().getDeclAlign(VD)),
1935 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1936 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1937 DtorCGF.FinishFunction();
1938 Dtor = Fn;
1939 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1940 } else {
1941 Dtor = new llvm::GlobalVariable(
1942 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1943 llvm::GlobalValue::PrivateLinkage,
1944 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1945 ID = Dtor;
1946 }
1947 // Register the information for the entry associated with the destructor.
1948 Out.clear();
1949 auto DtorEntryInfo = EntryInfo;
1950 DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out);
1951 OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo(
1952 DtorEntryInfo, Dtor, ID,
1953 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor);
1954 }
1955 return CGM.getLangOpts().OpenMPIsDevice;
1956}
1957
1958Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1959 QualType VarType,
1960 StringRef Name) {
1961 std::string Suffix = getName({"artificial", ""});
1962 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1963 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1964 VarLVType, Twine(Name).concat(Suffix).str());
1965 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1966 CGM.getTarget().isTLSSupported()) {
1967 GAddr->setThreadLocal(/*Val=*/true);
1968 return Address(GAddr, GAddr->getValueType(),
1969 CGM.getContext().getTypeAlignInChars(VarType));
1970 }
1971 std::string CacheSuffix = getName({"cache", ""});
1972 llvm::Value *Args[] = {
1973 emitUpdateLocation(CGF, SourceLocation()),
1974 getThreadID(CGF, SourceLocation()),
1975 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1976 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1977 /*isSigned=*/false),
1978 OMPBuilder.getOrCreateInternalVariable(
1979 CGM.VoidPtrPtrTy,
1980 Twine(Name).concat(Suffix).concat(CacheSuffix).str())};
1981 return Address(
1982 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1983 CGF.EmitRuntimeCall(
1984 OMPBuilder.getOrCreateRuntimeFunction(
1985 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1986 Args),
1987 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1988 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1989}
1990
1991void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1992 const RegionCodeGenTy &ThenGen,
1993 const RegionCodeGenTy &ElseGen) {
1994 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1995
1996 // If the condition constant folds and can be elided, try to avoid emitting
1997 // the condition and the dead arm of the if/else.
1998 bool CondConstant;
1999 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2000 if (CondConstant)
2001 ThenGen(CGF);
2002 else
2003 ElseGen(CGF);
2004 return;
2005 }
2006
2007 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2008 // emit the conditional branch.
2009 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2010 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2011 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2012 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2013
2014 // Emit the 'then' code.
2015 CGF.EmitBlock(ThenBlock);
2016 ThenGen(CGF);
2017 CGF.EmitBranch(ContBlock);
2018 // Emit the 'else' code if present.
2019 // There is no need to emit line number for unconditional branch.
2020 (void)ApplyDebugLocation::CreateEmpty(CGF);
2021 CGF.EmitBlock(ElseBlock);
2022 ElseGen(CGF);
2023 // There is no need to emit line number for unconditional branch.
2024 (void)ApplyDebugLocation::CreateEmpty(CGF);
2025 CGF.EmitBranch(ContBlock);
2026 // Emit the continuation block for code after the if.
2027 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2028}
2029
2030void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2031 llvm::Function *OutlinedFn,
2032 ArrayRef<llvm::Value *> CapturedVars,
2033 const Expr *IfCond,
2034 llvm::Value *NumThreads) {
2035 if (!CGF.HaveInsertPoint())
2036 return;
2037 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2038 auto &M = CGM.getModule();
2039 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2040 this](CodeGenFunction &CGF, PrePostActionTy &) {
2041 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2042 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2043 llvm::Value *Args[] = {
2044 RTLoc,
2045 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2046 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2047 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2048 RealArgs.append(std::begin(Args), std::end(Args));
2049 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2050
2051 llvm::FunctionCallee RTLFn =
2052 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2053 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2054 };
2055 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2056 this](CodeGenFunction &CGF, PrePostActionTy &) {
2057 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2058 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2059 // Build calls:
2060 // __kmpc_serialized_parallel(&Loc, GTid);
2061 llvm::Value *Args[] = {RTLoc, ThreadID};
2062 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2063 M, OMPRTL___kmpc_serialized_parallel),
2064 Args);
2065
2066 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2067 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2068 Address ZeroAddrBound =
2069 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2070 /*Name=*/".bound.zero.addr");
2071 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2072 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2073 // ThreadId for serialized parallels is 0.
2074 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2075 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2076 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2077
2078 // Ensure we do not inline the function. This is trivially true for the ones
2079 // passed to __kmpc_fork_call but the ones called in serialized regions
2080 // could be inlined. This is not a perfect but it is closer to the invariant
2081 // we want, namely, every data environment starts with a new function.
2082 // TODO: We should pass the if condition to the runtime function and do the
2083 // handling there. Much cleaner code.
2084 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2085 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2086 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2087
2088 // __kmpc_end_serialized_parallel(&Loc, GTid);
2089 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2090 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2091 M, OMPRTL___kmpc_end_serialized_parallel),
2092 EndArgs);
2093 };
2094 if (IfCond) {
2095 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2096 } else {
2097 RegionCodeGenTy ThenRCG(ThenGen);
2098 ThenRCG(CGF);
2099 }
2100}
2101
2102// If we're inside an (outlined) parallel region, use the region info's
2103// thread-ID variable (it is passed in a first argument of the outlined function
2104// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2105// regular serial code region, get thread ID by calling kmp_int32
2106// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2107// return the address of that temp.
2108Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2109 SourceLocation Loc) {
2110 if (auto *OMPRegionInfo =
2111 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2112 if (OMPRegionInfo->getThreadIDVariable())
2113 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2114
2115 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2116 QualType Int32Ty =
2117 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2118 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2119 CGF.EmitStoreOfScalar(ThreadID,
2120 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2121
2122 return ThreadIDTemp;
2123}
2124
2125llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2126 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2127 std::string Name = getName({Prefix, "var"});
2128 return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2129}
2130
2131namespace {
2132/// Common pre(post)-action for different OpenMP constructs.
2133class CommonActionTy final : public PrePostActionTy {
2134 llvm::FunctionCallee EnterCallee;
2135 ArrayRef<llvm::Value *> EnterArgs;
2136 llvm::FunctionCallee ExitCallee;
2137 ArrayRef<llvm::Value *> ExitArgs;
2138 bool Conditional;
2139 llvm::BasicBlock *ContBlock = nullptr;
2140
2141public:
2142 CommonActionTy(llvm::FunctionCallee EnterCallee,
2143 ArrayRef<llvm::Value *> EnterArgs,
2144 llvm::FunctionCallee ExitCallee,
2145 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2146 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2147 ExitArgs(ExitArgs), Conditional(Conditional) {}
2148 void Enter(CodeGenFunction &CGF) override {
2149 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2150 if (Conditional) {
2151 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2152 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2153 ContBlock = CGF.createBasicBlock("omp_if.end");
2154 // Generate the branch (If-stmt)
2155 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2156 CGF.EmitBlock(ThenBlock);
2157 }
2158 }
2159 void Done(CodeGenFunction &CGF) {
2160 // Emit the rest of blocks/branches
2161 CGF.EmitBranch(ContBlock);
2162 CGF.EmitBlock(ContBlock, true);
2163 }
2164 void Exit(CodeGenFunction &CGF) override {
2165 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2166 }
2167};
2168} // anonymous namespace
2169
2170void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2171 StringRef CriticalName,
2172 const RegionCodeGenTy &CriticalOpGen,
2173 SourceLocation Loc, const Expr *Hint) {
2174 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2175 // CriticalOpGen();
2176 // __kmpc_end_critical(ident_t *, gtid, Lock);
2177 // Prepare arguments and build a call to __kmpc_critical
2178 if (!CGF.HaveInsertPoint())
2179 return;
2180 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2181 getCriticalRegionLock(CriticalName)};
2182 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2183 std::end(Args));
2184 if (Hint) {
2185 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2186 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2187 }
2188 CommonActionTy Action(
2189 OMPBuilder.getOrCreateRuntimeFunction(
2190 CGM.getModule(),
2191 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2192 EnterArgs,
2193 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2194 OMPRTL___kmpc_end_critical),
2195 Args);
2196 CriticalOpGen.setAction(Action);
2197 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2198}
2199
2200void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2201 const RegionCodeGenTy &MasterOpGen,
2202 SourceLocation Loc) {
2203 if (!CGF.HaveInsertPoint())
2204 return;
2205 // if(__kmpc_master(ident_t *, gtid)) {
2206 // MasterOpGen();
2207 // __kmpc_end_master(ident_t *, gtid);
2208 // }
2209 // Prepare arguments and build a call to __kmpc_master
2210 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2211 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2212 CGM.getModule(), OMPRTL___kmpc_master),
2213 Args,
2214 OMPBuilder.getOrCreateRuntimeFunction(
2215 CGM.getModule(), OMPRTL___kmpc_end_master),
2216 Args,
2217 /*Conditional=*/true);
2218 MasterOpGen.setAction(Action);
2219 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2220 Action.Done(CGF);
2221}
2222
2223void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2224 const RegionCodeGenTy &MaskedOpGen,
2225 SourceLocation Loc, const Expr *Filter) {
2226 if (!CGF.HaveInsertPoint())
2227 return;
2228 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2229 // MaskedOpGen();
2230 // __kmpc_end_masked(iden_t *, gtid);
2231 // }
2232 // Prepare arguments and build a call to __kmpc_masked
2233 llvm::Value *FilterVal = Filter
2234 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2235 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2236 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2237 FilterVal};
2238 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2239 getThreadID(CGF, Loc)};
2240 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241 CGM.getModule(), OMPRTL___kmpc_masked),
2242 Args,
2243 OMPBuilder.getOrCreateRuntimeFunction(
2244 CGM.getModule(), OMPRTL___kmpc_end_masked),
2245 ArgsEnd,
2246 /*Conditional=*/true);
2247 MaskedOpGen.setAction(Action);
2248 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2249 Action.Done(CGF);
2250}
2251
2252void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2253 SourceLocation Loc) {
2254 if (!CGF.HaveInsertPoint())
2255 return;
2256 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2257 OMPBuilder.createTaskyield(CGF.Builder);
2258 } else {
2259 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2260 llvm::Value *Args[] = {
2261 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2262 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2263 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2264 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2265 Args);
2266 }
2267
2268 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2269 Region->emitUntiedSwitch(CGF);
2270}
2271
2272void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2273 const RegionCodeGenTy &TaskgroupOpGen,
2274 SourceLocation Loc) {
2275 if (!CGF.HaveInsertPoint())
2276 return;
2277 // __kmpc_taskgroup(ident_t *, gtid);
2278 // TaskgroupOpGen();
2279 // __kmpc_end_taskgroup(ident_t *, gtid);
2280 // Prepare arguments and build a call to __kmpc_taskgroup
2281 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2282 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2283 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2284 Args,
2285 OMPBuilder.getOrCreateRuntimeFunction(
2286 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2287 Args);
2288 TaskgroupOpGen.setAction(Action);
2289 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2290}
2291
2292/// Given an array of pointers to variables, project the address of a
2293/// given variable.
2294static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2295 unsigned Index, const VarDecl *Var) {
2296 // Pull out the pointer to the variable.
2297 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2298 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2299
2300 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2301 return Address(
2302 CGF.Builder.CreateBitCast(
2303 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2304 ElemTy, CGF.getContext().getDeclAlign(Var));
2305}
2306
2307static llvm::Value *emitCopyprivateCopyFunction(
2308 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2309 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2310 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2311 SourceLocation Loc) {
2312 ASTContext &C = CGM.getContext();
2313 // void copy_func(void *LHSArg, void *RHSArg);
2314 FunctionArgList Args;
2315 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2316 ImplicitParamDecl::Other);
2317 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2318 ImplicitParamDecl::Other);
2319 Args.push_back(&LHSArg);
2320 Args.push_back(&RHSArg);
2321 const auto &CGFI =
2322 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2323 std::string Name =
2324 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2325 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2326 llvm::GlobalValue::InternalLinkage, Name,
2327 &CGM.getModule());
2328 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2329 Fn->setDoesNotRecurse();
2330 CodeGenFunction CGF(CGM);
2331 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2332 // Dest = (void*[n])(LHSArg);
2333 // Src = (void*[n])(RHSArg);
2334 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2335 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2336 ArgsElemType->getPointerTo()),
2337 ArgsElemType, CGF.getPointerAlign());
2338 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2339 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2340 ArgsElemType->getPointerTo()),
2341 ArgsElemType, CGF.getPointerAlign());
2342 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2343 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2344 // ...
2345 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2346 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2347 const auto *DestVar =
2348 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2349 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2350
2351 const auto *SrcVar =
2352 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2353 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2354
2355 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2356 QualType Type = VD->getType();
2357 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2358 }
2359 CGF.FinishFunction();
2360 return Fn;
2361}
2362
2363void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2364 const RegionCodeGenTy &SingleOpGen,
2365 SourceLocation Loc,
2366 ArrayRef<const Expr *> CopyprivateVars,
2367 ArrayRef<const Expr *> SrcExprs,
2368 ArrayRef<const Expr *> DstExprs,
2369 ArrayRef<const Expr *> AssignmentOps) {
2370 if (!CGF.HaveInsertPoint())
2371 return;
2372 assert(CopyprivateVars.size() == SrcExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2374, __extension__
__PRETTY_FUNCTION__))
2373 CopyprivateVars.size() == DstExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2374, __extension__
__PRETTY_FUNCTION__))
2374 CopyprivateVars.size() == AssignmentOps.size())(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2374, __extension__
__PRETTY_FUNCTION__))
;
2375 ASTContext &C = CGM.getContext();
2376 // int32 did_it = 0;
2377 // if(__kmpc_single(ident_t *, gtid)) {
2378 // SingleOpGen();
2379 // __kmpc_end_single(ident_t *, gtid);
2380 // did_it = 1;
2381 // }
2382 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2383 // <copy_func>, did_it);
2384
2385 Address DidIt = Address::invalid();
2386 if (!CopyprivateVars.empty()) {
2387 // int32 did_it = 0;
2388 QualType KmpInt32Ty =
2389 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2390 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2391 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2392 }
2393 // Prepare arguments and build a call to __kmpc_single
2394 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2395 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2396 CGM.getModule(), OMPRTL___kmpc_single),
2397 Args,
2398 OMPBuilder.getOrCreateRuntimeFunction(
2399 CGM.getModule(), OMPRTL___kmpc_end_single),
2400 Args,
2401 /*Conditional=*/true);
2402 SingleOpGen.setAction(Action);
2403 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2404 if (DidIt.isValid()) {
2405 // did_it = 1;
2406 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2407 }
2408 Action.Done(CGF);
2409 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2410 // <copy_func>, did_it);
2411 if (DidIt.isValid()) {
2412 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2413 QualType CopyprivateArrayTy = C.getConstantArrayType(
2414 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2415 /*IndexTypeQuals=*/0);
2416 // Create a list of all private variables for copyprivate.
2417 Address CopyprivateList =
2418 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2419 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2420 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2421 CGF.Builder.CreateStore(
2422 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2423 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2424 CGF.VoidPtrTy),
2425 Elem);
2426 }
2427 // Build function that copies private values from single region to all other
2428 // threads in the corresponding parallel region.
2429 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2430 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2431 SrcExprs, DstExprs, AssignmentOps, Loc);
2432 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2433 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2434 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2435 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2436 llvm::Value *Args[] = {
2437 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2438 getThreadID(CGF, Loc), // i32 <gtid>
2439 BufSize, // size_t <buf_size>
2440 CL.getPointer(), // void *<copyprivate list>
2441 CpyFn, // void (*) (void *, void *) <copy_func>
2442 DidItVal // i32 did_it
2443 };
2444 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2445 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2446 Args);
2447 }
2448}
2449
2450void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2451 const RegionCodeGenTy &OrderedOpGen,
2452 SourceLocation Loc, bool IsThreads) {
2453 if (!CGF.HaveInsertPoint())
2454 return;
2455 // __kmpc_ordered(ident_t *, gtid);
2456 // OrderedOpGen();
2457 // __kmpc_end_ordered(ident_t *, gtid);
2458 // Prepare arguments and build a call to __kmpc_ordered
2459 if (IsThreads) {
2460 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2461 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2462 CGM.getModule(), OMPRTL___kmpc_ordered),
2463 Args,
2464 OMPBuilder.getOrCreateRuntimeFunction(
2465 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2466 Args);
2467 OrderedOpGen.setAction(Action);
2468 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2469 return;
2470 }
2471 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2472}
2473
2474unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2475 unsigned Flags;
2476 if (Kind == OMPD_for)
2477 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2478 else if (Kind == OMPD_sections)
2479 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2480 else if (Kind == OMPD_single)
2481 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2482 else if (Kind == OMPD_barrier)
2483 Flags = OMP_IDENT_BARRIER_EXPL;
2484 else
2485 Flags = OMP_IDENT_BARRIER_IMPL;
2486 return Flags;
2487}
2488
2489void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2490 CodeGenFunction &CGF, const OMPLoopDirective &S,
2491 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2492 // Check if the loop directive is actually a doacross loop directive. In this
2493 // case choose static, 1 schedule.
2494 if (llvm::any_of(
2495 S.getClausesOfKind<OMPOrderedClause>(),
2496 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2497 ScheduleKind = OMPC_SCHEDULE_static;
2498 // Chunk size is 1 in this case.
2499 llvm::APInt ChunkSize(32, 1);
2500 ChunkExpr = IntegerLiteral::Create(
2501 CGF.getContext(), ChunkSize,
2502 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2503 SourceLocation());
2504 }
2505}
2506
2507void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2508 OpenMPDirectiveKind Kind, bool EmitChecks,
2509 bool ForceSimpleCall) {
2510 // Check if we should use the OMPBuilder
2511 auto *OMPRegionInfo =
2512 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2513 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2514 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2515 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2516 return;
2517 }
2518
2519 if (!CGF.HaveInsertPoint())
2520 return;
2521 // Build call __kmpc_cancel_barrier(loc, thread_id);
2522 // Build call __kmpc_barrier(loc, thread_id);
2523 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2524 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2525 // thread_id);
2526 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2527 getThreadID(CGF, Loc)};
2528 if (OMPRegionInfo) {
2529 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2530 llvm::Value *Result = CGF.EmitRuntimeCall(
2531 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2532 OMPRTL___kmpc_cancel_barrier),
2533 Args);
2534 if (EmitChecks) {
2535 // if (__kmpc_cancel_barrier()) {
2536 // exit from construct;
2537 // }
2538 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2539 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2540 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2541 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2542 CGF.EmitBlock(ExitBB);
2543 // exit from construct;
2544 CodeGenFunction::JumpDest CancelDestination =
2545 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2546 CGF.EmitBranchThroughCleanup(CancelDestination);
2547 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2548 }
2549 return;
2550 }
2551 }
2552 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2553 CGM.getModule(), OMPRTL___kmpc_barrier),
2554 Args);
2555}
2556
2557void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2558 Expr *ME, bool IsFatal) {
2559 llvm::Value *MVL =
2560 ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF)
2561 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
2562 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2563 // *message)
2564 llvm::Value *Args[] = {
2565 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true),
2566 llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1),
2567 CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)};
2568 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2569 CGM.getModule(), OMPRTL___kmpc_error),
2570 Args);
2571}
2572
2573/// Map the OpenMP loop schedule to the runtime enumeration.
2574static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2575 bool Chunked, bool Ordered) {
2576 switch (ScheduleKind) {
2577 case OMPC_SCHEDULE_static:
2578 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2579 : (Ordered ? OMP_ord_static : OMP_sch_static);
2580 case OMPC_SCHEDULE_dynamic:
2581 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2582 case OMPC_SCHEDULE_guided:
2583 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2584 case OMPC_SCHEDULE_runtime:
2585 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2586 case OMPC_SCHEDULE_auto:
2587 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2588 case OMPC_SCHEDULE_unknown:
2589 assert(!Chunked && "chunk was specified but schedule kind not known")(static_cast <bool> (!Chunked && "chunk was specified but schedule kind not known"
) ? void (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2589, __extension__
__PRETTY_FUNCTION__))
;
2590 return Ordered ? OMP_ord_static : OMP_sch_static;
2591 }
2592 llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2592)
;
2593}
2594
2595/// Map the OpenMP distribute schedule to the runtime enumeration.
2596static OpenMPSchedType
2597getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2598 // only static is allowed for dist_schedule
2599 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2600}
2601
2602bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2603 bool Chunked) const {
2604 OpenMPSchedType Schedule =
2605 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2606 return Schedule == OMP_sch_static;
2607}
2608
2609bool CGOpenMPRuntime::isStaticNonchunked(
2610 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2611 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2612 return Schedule == OMP_dist_sch_static;
2613}
2614
2615bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2616 bool Chunked) const {
2617 OpenMPSchedType Schedule =
2618 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619 return Schedule == OMP_sch_static_chunked;
2620}
2621
2622bool CGOpenMPRuntime::isStaticChunked(
2623 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625 return Schedule == OMP_dist_sch_static_chunked;
2626}
2627
2628bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2629 OpenMPSchedType Schedule =
2630 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2631 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")(static_cast <bool> (Schedule != OMP_sch_static_chunked
&& "cannot be chunked here") ? void (0) : __assert_fail
("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2631, __extension__
__PRETTY_FUNCTION__))
;
2632 return Schedule != OMP_sch_static;
2633}
2634
2635static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2636 OpenMPScheduleClauseModifier M1,
2637 OpenMPScheduleClauseModifier M2) {
2638 int Modifier = 0;
2639 switch (M1) {
2640 case OMPC_SCHEDULE_MODIFIER_monotonic:
2641 Modifier = OMP_sch_modifier_monotonic;
2642 break;
2643 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2644 Modifier = OMP_sch_modifier_nonmonotonic;
2645 break;
2646 case OMPC_SCHEDULE_MODIFIER_simd:
2647 if (Schedule == OMP_sch_static_chunked)
2648 Schedule = OMP_sch_static_balanced_chunked;
2649 break;
2650 case OMPC_SCHEDULE_MODIFIER_last:
2651 case OMPC_SCHEDULE_MODIFIER_unknown:
2652 break;
2653 }
2654 switch (M2) {
2655 case OMPC_SCHEDULE_MODIFIER_monotonic:
2656 Modifier = OMP_sch_modifier_monotonic;
2657 break;
2658 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2659 Modifier = OMP_sch_modifier_nonmonotonic;
2660 break;
2661 case OMPC_SCHEDULE_MODIFIER_simd:
2662 if (Schedule == OMP_sch_static_chunked)
2663 Schedule = OMP_sch_static_balanced_chunked;
2664 break;
2665 case OMPC_SCHEDULE_MODIFIER_last:
2666 case OMPC_SCHEDULE_MODIFIER_unknown:
2667 break;
2668 }
2669 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2670 // If the static schedule kind is specified or if the ordered clause is
2671 // specified, and if the nonmonotonic modifier is not specified, the effect is
2672 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2673 // modifier is specified, the effect is as if the nonmonotonic modifier is
2674 // specified.
2675 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2676 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2677 Schedule == OMP_sch_static_balanced_chunked ||
2678 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2679 Schedule == OMP_dist_sch_static_chunked ||
2680 Schedule == OMP_dist_sch_static))
2681 Modifier = OMP_sch_modifier_nonmonotonic;
2682 }
2683 return Schedule | Modifier;
2684}
2685
2686void CGOpenMPRuntime::emitForDispatchInit(
2687 CodeGenFunction &CGF, SourceLocation Loc,
2688 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2689 bool Ordered, const DispatchRTInput &DispatchValues) {
2690 if (!CGF.HaveInsertPoint())
2691 return;
2692 OpenMPSchedType Schedule = getRuntimeSchedule(
2693 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2694 assert(Ordered ||(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2697, __extension__
__PRETTY_FUNCTION__))
2695 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2697, __extension__
__PRETTY_FUNCTION__))
2696 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2697, __extension__
__PRETTY_FUNCTION__))
2697 Schedule != OMP_sch_static_balanced_chunked))(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2697, __extension__
__PRETTY_FUNCTION__))
;
2698 // Call __kmpc_dispatch_init(
2699 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2700 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2701 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2702
2703 // If the Chunk was not specified in the clause - use default value 1.
2704 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2705 : CGF.Builder.getIntN(IVSize, 1);
2706 llvm::Value *Args[] = {
2707 emitUpdateLocation(CGF, Loc),
2708 getThreadID(CGF, Loc),
2709 CGF.Builder.getInt32(addMonoNonMonoModifier(
2710 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2711 DispatchValues.LB, // Lower
2712 DispatchValues.UB, // Upper
2713 CGF.Builder.getIntN(IVSize, 1), // Stride
2714 Chunk // Chunk
2715 };
2716 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2717}
2718
2719static void emitForStaticInitCall(
2720 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2721 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2722 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2723 const CGOpenMPRuntime::StaticRTInput &Values) {
2724 if (!CGF.HaveInsertPoint())
2725 return;
2726
2727 assert(!Values.Ordered)(static_cast <bool> (!Values.Ordered) ? void (0) : __assert_fail
("!Values.Ordered", "clang/lib/CodeGen/CGOpenMPRuntime.cpp",
2727, __extension__ __PRETTY_FUNCTION__))
;
2728 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2732, __extension__
__PRETTY_FUNCTION__))
2729 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2732, __extension__
__PRETTY_FUNCTION__))
2730 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2732, __extension__
__PRETTY_FUNCTION__))
2731 Schedule == OMP_dist_sch_static ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2732, __extension__
__PRETTY_FUNCTION__))
2732 Schedule == OMP_dist_sch_static_chunked)(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2732, __extension__
__PRETTY_FUNCTION__))
;
2733
2734 // Call __kmpc_for_static_init(
2735 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2736 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2737 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2738 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2739 llvm::Value *Chunk = Values.Chunk;
2740 if (Chunk == nullptr) {
2741 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2743, __extension__
__PRETTY_FUNCTION__))
2742 Schedule == OMP_dist_sch_static) &&(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2743, __extension__
__PRETTY_FUNCTION__))
2743 "expected static non-chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2743, __extension__
__PRETTY_FUNCTION__))
;
2744 // If the Chunk was not specified in the clause - use default value 1.
2745 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2746 } else {
2747 assert((Schedule == OMP_sch_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2751, __extension__
__PRETTY_FUNCTION__))
2748 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2751, __extension__
__PRETTY_FUNCTION__))
2749 Schedule == OMP_ord_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2751, __extension__
__PRETTY_FUNCTION__))
2750 Schedule == OMP_dist_sch_static_chunked) &&(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2751, __extension__
__PRETTY_FUNCTION__))
2751 "expected static chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2751, __extension__
__PRETTY_FUNCTION__))
;
2752 }
2753 llvm::Value *Args[] = {
2754 UpdateLocation,
2755 ThreadId,
2756 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2757 M2)), // Schedule type
2758 Values.IL.getPointer(), // &isLastIter
2759 Values.LB.getPointer(), // &LB
2760 Values.UB.getPointer(), // &UB
2761 Values.ST.getPointer(), // &Stride
2762 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2763 Chunk // Chunk
2764 };
2765 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2766}
2767
2768void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2769 SourceLocation Loc,
2770 OpenMPDirectiveKind DKind,
2771 const OpenMPScheduleTy &ScheduleKind,
2772 const StaticRTInput &Values) {
2773 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2774 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2775 assert(isOpenMPWorksharingDirective(DKind) &&(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2776, __extension__
__PRETTY_FUNCTION__))
2776 "Expected loop-based or sections-based directive.")(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2776, __extension__
__PRETTY_FUNCTION__))
;
2777 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2778 isOpenMPLoopDirective(DKind)
2779 ? OMP_IDENT_WORK_LOOP
2780 : OMP_IDENT_WORK_SECTIONS);
2781 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2782 llvm::FunctionCallee StaticInitFunction =
2783 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2784 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2785 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2786 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2787}
2788
2789void CGOpenMPRuntime::emitDistributeStaticInit(
2790 CodeGenFunction &CGF, SourceLocation Loc,
2791 OpenMPDistScheduleClauseKind SchedKind,
2792 const CGOpenMPRuntime::StaticRTInput &Values) {
2793 OpenMPSchedType ScheduleNum =
2794 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2795 llvm::Value *UpdatedLocation =
2796 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2797 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2798 llvm::FunctionCallee StaticInitFunction;
2799 bool isGPUDistribute =
2800 CGM.getLangOpts().OpenMPIsDevice &&
2801 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2802 StaticInitFunction = createForStaticInitFunction(
2803 Values.IVSize, Values.IVSigned, isGPUDistribute);
2804
2805 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2806 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2807 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2808}
2809
2810void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2811 SourceLocation Loc,
2812 OpenMPDirectiveKind DKind) {
2813 if (!CGF.HaveInsertPoint())
2814 return;
2815 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2816 llvm::Value *Args[] = {
2817 emitUpdateLocation(CGF, Loc,
2818 isOpenMPDistributeDirective(DKind)
2819 ? OMP_IDENT_WORK_DISTRIBUTE
2820 : isOpenMPLoopDirective(DKind)
2821 ? OMP_IDENT_WORK_LOOP
2822 : OMP_IDENT_WORK_SECTIONS),
2823 getThreadID(CGF, Loc)};
2824 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2825 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2826 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2827 CGF.EmitRuntimeCall(
2828 OMPBuilder.getOrCreateRuntimeFunction(
2829 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2830 Args);
2831 else
2832 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2833 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2834 Args);
2835}
2836
2837void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2838 SourceLocation Loc,
2839 unsigned IVSize,
2840 bool IVSigned) {
2841 if (!CGF.HaveInsertPoint())
2842 return;
2843 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2844 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2845 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2846}
2847
2848llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2849 SourceLocation Loc, unsigned IVSize,
2850 bool IVSigned, Address IL,
2851 Address LB, Address UB,
2852 Address ST) {
2853 // Call __kmpc_dispatch_next(
2854 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2855 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2856 // kmp_int[32|64] *p_stride);
2857 llvm::Value *Args[] = {
2858 emitUpdateLocation(CGF, Loc),
2859 getThreadID(CGF, Loc),
2860 IL.getPointer(), // &isLastIter
2861 LB.getPointer(), // &Lower
2862 UB.getPointer(), // &Upper
2863 ST.getPointer() // &Stride
2864 };
2865 llvm::Value *Call =
2866 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2867 return CGF.EmitScalarConversion(
2868 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2869 CGF.getContext().BoolTy, Loc);
2870}
2871
2872void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2873 llvm::Value *NumThreads,
2874 SourceLocation Loc) {
2875 if (!CGF.HaveInsertPoint())
2876 return;
2877 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2878 llvm::Value *Args[] = {
2879 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2880 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2881 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2882 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2883 Args);
2884}
2885
2886void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2887 ProcBindKind ProcBind,
2888 SourceLocation Loc) {
2889 if (!CGF.HaveInsertPoint())
2890 return;
2891 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")(static_cast <bool> (ProcBind != OMP_PROC_BIND_unknown &&
"Unsupported proc_bind value.") ? void (0) : __assert_fail (
"ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2891, __extension__
__PRETTY_FUNCTION__))
;
2892 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2893 llvm::Value *Args[] = {
2894 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2895 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2896 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2897 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2898 Args);
2899}
2900
2901void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2902 SourceLocation Loc, llvm::AtomicOrdering AO) {
2903 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2904 OMPBuilder.createFlush(CGF.Builder);
2905 } else {
2906 if (!CGF.HaveInsertPoint())
2907 return;
2908 // Build call void __kmpc_flush(ident_t *loc)
2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910 CGM.getModule(), OMPRTL___kmpc_flush),
2911 emitUpdateLocation(CGF, Loc));
2912 }
2913}
2914
2915namespace {
2916/// Indexes of fields for type kmp_task_t.
2917enum KmpTaskTFields {
2918 /// List of shared variables.
2919 KmpTaskTShareds,
2920 /// Task routine.
2921 KmpTaskTRoutine,
2922 /// Partition id for the untied tasks.
2923 KmpTaskTPartId,
2924 /// Function with call of destructors for private variables.
2925 Data1,
2926 /// Task priority.
2927 Data2,
2928 /// (Taskloops only) Lower bound.
2929 KmpTaskTLowerBound,
2930 /// (Taskloops only) Upper bound.
2931 KmpTaskTUpperBound,
2932 /// (Taskloops only) Stride.
2933 KmpTaskTStride,
2934 /// (Taskloops only) Is last iteration flag.
2935 KmpTaskTLastIter,
2936 /// (Taskloops only) Reduction data.
2937 KmpTaskTReductions,
2938};
2939} // anonymous namespace
2940
2941void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2942 // If we are in simd mode or there are no entries, we don't need to do
2943 // anything.
2944 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2945 return;
2946
2947 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2948 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2949 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2950 SourceLocation Loc;
2951 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2952 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2953 E = CGM.getContext().getSourceManager().fileinfo_end();
2954 I != E; ++I) {
2955 if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID &&
2956 I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) {
2957 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2958 I->getFirst(), EntryInfo.Line, 1);
2959 break;
2960 }
2961 }
2962 }
2963 switch (Kind) {
2964 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2965 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2966 DiagnosticsEngine::Error, "Offloading entry for target region in "
2967 "%0 is incorrect: either the "
2968 "address or the ID is invalid.");
2969 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2970 } break;
2971 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2972 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2973 DiagnosticsEngine::Error, "Offloading entry for declare target "
2974 "variable %0 is incorrect: the "
2975 "address is invalid.");
2976 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2977 } break;
2978 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2979 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2980 DiagnosticsEngine::Error,
2981 "Offloading entry for declare target variable is incorrect: the "
2982 "address is invalid.");
2983 CGM.getDiags().Report(DiagID);
2984 } break;
2985 }
2986 };
2987
2988 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn);
2989}
2990
2991/// Loads all the offload entries information from the host IR
2992/// metadata.
2993void CGOpenMPRuntime::loadOffloadInfoMetadata() {
2994 // If we are in target mode, load the metadata from the host IR. This code has
2995 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
2996
2997 if (!CGM.getLangOpts().OpenMPIsDevice)
2998 return;
2999
3000 if (CGM.getLangOpts().OMPHostIRFile.empty())
3001 return;
3002
3003 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3004 if (auto EC = Buf.getError()) {
3005 CGM.getDiags().Report(diag::err_cannot_open_file)
3006 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3007 return;
3008 }
3009
3010 llvm::LLVMContext C;
3011 auto ME = expectedToErrorOrAndEmitErrors(
3012 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3013
3014 if (auto EC = ME.getError()) {
3015 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3016 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3017 CGM.getDiags().Report(DiagID)
3018 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3019 return;
3020 }
3021
3022 OMPBuilder.loadOffloadInfoMetadata(*ME.get());
3023}
3024
3025void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3026 if (!KmpRoutineEntryPtrTy) {
3027 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3028 ASTContext &C = CGM.getContext();
3029 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3030 FunctionProtoType::ExtProtoInfo EPI;
3031 KmpRoutineEntryPtrQTy = C.getPointerType(
3032 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3033 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3034 }
3035}
3036
3037namespace {
3038struct PrivateHelpersTy {
3039 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3040 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3041 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3042 PrivateElemInit(PrivateElemInit) {}
3043 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3044 const Expr *OriginalRef = nullptr;
3045 const VarDecl *Original = nullptr;
3046 const VarDecl *PrivateCopy = nullptr;
3047 const VarDecl *PrivateElemInit = nullptr;
3048 bool isLocalPrivate() const {
3049 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3050 }
3051};
3052typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3053} // anonymous namespace
3054
3055static bool isAllocatableDecl(const VarDecl *VD) {
3056 const VarDecl *CVD = VD->getCanonicalDecl();
3057 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3058 return false;
3059 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3060 // Use the default allocation.
3061 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3062 !AA->getAllocator());
3063}
3064
3065static RecordDecl *
3066createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3067 if (!Privates.empty()) {
3068 ASTContext &C = CGM.getContext();
3069 // Build struct .kmp_privates_t. {
3070 // /* private vars */
3071 // };
3072 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3073 RD->startDefinition();
3074 for (const auto &Pair : Privates) {
3075 const VarDecl *VD = Pair.second.Original;
3076 QualType Type = VD->getType().getNonReferenceType();
3077 // If the private variable is a local variable with lvalue ref type,
3078 // allocate the pointer instead of the pointee type.
3079 if (Pair.second.isLocalPrivate()) {
3080 if (VD->getType()->isLValueReferenceType())
3081 Type = C.getPointerType(Type);
3082 if (isAllocatableDecl(VD))
3083 Type = C.getPointerType(Type);
3084 }
3085 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3086 if (VD->hasAttrs()) {
3087 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3088 E(VD->getAttrs().end());
3089 I != E; ++I)
3090 FD->addAttr(*I);
3091 }
3092 }
3093 RD->completeDefinition();
3094 return RD;
3095 }
3096 return nullptr;
3097}
3098
3099static RecordDecl *
3100createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3101 QualType KmpInt32Ty,
3102 QualType KmpRoutineEntryPointerQTy) {
3103 ASTContext &C = CGM.getContext();
3104 // Build struct kmp_task_t {
3105 // void * shareds;
3106 // kmp_routine_entry_t routine;
3107 // kmp_int32 part_id;
3108 // kmp_cmplrdata_t data1;
3109 // kmp_cmplrdata_t data2;
3110 // For taskloops additional fields:
3111 // kmp_uint64 lb;
3112 // kmp_uint64 ub;
3113 // kmp_int64 st;
3114 // kmp_int32 liter;
3115 // void * reductions;
3116 // };
3117 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3118 UD->startDefinition();
3119 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3120 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3121 UD->completeDefinition();
3122 QualType KmpCmplrdataTy = C.getRecordType(UD);
3123 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3124 RD->startDefinition();
3125 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3126 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3127 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3128 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3129 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3130 if (isOpenMPTaskLoopDirective(Kind)) {
3131 QualType KmpUInt64Ty =
3132 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3133 QualType KmpInt64Ty =
3134 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3135 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3136 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3137 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3138 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3139 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3140 }
3141 RD->completeDefinition();
3142 return RD;
3143}
3144
3145static RecordDecl *
3146createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3147 ArrayRef<PrivateDataTy> Privates) {
3148 ASTContext &C = CGM.getContext();
3149 // Build struct kmp_task_t_with_privates {
3150 // kmp_task_t task_data;
3151 // .kmp_privates_t. privates;
3152 // };
3153 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3154 RD->startDefinition();
3155 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3156 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3157 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3158 RD->completeDefinition();
3159 return RD;
3160}
3161
3162/// Emit a proxy function which accepts kmp_task_t as the second
3163/// argument.
3164/// \code
3165/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3166/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3167/// For taskloops:
3168/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3169/// tt->reductions, tt->shareds);
3170/// return 0;
3171/// }
3172/// \endcode
3173static llvm::Function *
3174emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3175 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3176 QualType KmpTaskTWithPrivatesPtrQTy,
3177 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3178 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3179 llvm::Value *TaskPrivatesMap) {
3180 ASTContext &C = CGM.getContext();
3181 FunctionArgList Args;
3182 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3183 ImplicitParamDecl::Other);
3184 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3185 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3186 ImplicitParamDecl::Other);
3187 Args.push_back(&GtidArg);
3188 Args.push_back(&TaskTypeArg);
3189 const auto &TaskEntryFnInfo =
3190 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3191 llvm::FunctionType *TaskEntryTy =
3192 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3193 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3194 auto *TaskEntry = llvm::Function::Create(
3195 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3196 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3197 TaskEntry->setDoesNotRecurse();
3198 CodeGenFunction CGF(CGM);
3199 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3200 Loc, Loc);
3201
3202 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3203 // tt,
3204 // For taskloops:
3205 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3206 // tt->task_data.shareds);
3207 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3208 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3209 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3210 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3211 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3212 const auto *KmpTaskTWithPrivatesQTyRD =
3213 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3214 LValue Base =
3215 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3216 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3217 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3218 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3219 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3220
3221 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3222 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3223 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3224 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3225 CGF.ConvertTypeForMem(SharedsPtrTy));
3226
3227 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3228 llvm::Value *PrivatesParam;
3229 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3230 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3231 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3232 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3233 } else {
3234 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3235 }
3236
3237 llvm::Value *CommonArgs[] = {
3238 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3239 CGF.Builder
3240 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3241 CGF.VoidPtrTy, CGF.Int8Ty)
3242 .getPointer()};
3243 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3244 std::end(CommonArgs));
3245 if (isOpenMPTaskLoopDirective(Kind)) {
3246 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3247 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3248 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3249 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3250 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3251 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3252 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3253 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3254 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3255 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3256 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3257 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3258 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3259 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3260 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3261 CallArgs.push_back(LBParam);
3262 CallArgs.push_back(UBParam);
3263 CallArgs.push_back(StParam);
3264 CallArgs.push_back(LIParam);
3265 CallArgs.push_back(RParam);
3266 }
3267 CallArgs.push_back(SharedsParam);
3268
3269 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3270 CallArgs);
3271 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3272 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3273 CGF.FinishFunction();
3274 return TaskEntry;
3275}
3276
3277static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3278 SourceLocation Loc,
3279 QualType KmpInt32Ty,
3280 QualType KmpTaskTWithPrivatesPtrQTy,
3281 QualType KmpTaskTWithPrivatesQTy) {
3282 ASTContext &C = CGM.getContext();
3283 FunctionArgList Args;
3284 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3285 ImplicitParamDecl::Other);
3286 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3287 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3288 ImplicitParamDecl::Other);
3289 Args.push_back(&GtidArg);
3290 Args.push_back(&TaskTypeArg);
3291 const auto &DestructorFnInfo =
3292 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3293 llvm::FunctionType *DestructorFnTy =
3294 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3295 std::string Name =
3296 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3297 auto *DestructorFn =
3298 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3299 Name, &CGM.getModule());
3300 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3301 DestructorFnInfo);
3302 DestructorFn->setDoesNotRecurse();
3303 CodeGenFunction CGF(CGM);
3304 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3305 Args, Loc, Loc);
3306
3307 LValue Base = CGF.EmitLoadOfPointerLValue(
3308 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3309 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3310 const auto *KmpTaskTWithPrivatesQTyRD =
3311 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3312 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3313 Base = CGF.EmitLValueForField(Base, *FI);
3314 for (const auto *Field :
3315 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3316 if (QualType::DestructionKind DtorKind =
3317 Field->getType().isDestructedType()) {
3318 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3319 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3320 }
3321 }
3322 CGF.FinishFunction();
3323 return DestructorFn;
3324}
3325
3326/// Emit a privates mapping function for correct handling of private and
3327/// firstprivate variables.
3328/// \code
3329/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3330/// **noalias priv1,..., <tyn> **noalias privn) {
3331/// *priv1 = &.privates.priv1;
3332/// ...;
3333/// *privn = &.privates.privn;
3334/// }
3335/// \endcode
3336static llvm::Value *
3337emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3338 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3339 ArrayRef<PrivateDataTy> Privates) {
3340 ASTContext &C = CGM.getContext();
3341 FunctionArgList Args;
3342 ImplicitParamDecl TaskPrivatesArg(
3343 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3344 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3345 ImplicitParamDecl::Other);
3346 Args.push_back(&TaskPrivatesArg);
3347 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3348 unsigned Counter = 1;
3349 for (const Expr *E : Data.PrivateVars) {
3350 Args.push_back(ImplicitParamDecl::Create(
3351 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3352 C.getPointerType(C.getPointerType(E->getType()))
3353 .withConst()
3354 .withRestrict(),
3355 ImplicitParamDecl::Other));
3356 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3357 PrivateVarsPos[VD] = Counter;
3358 ++Counter;
3359 }
3360 for (const Expr *E : Data.FirstprivateVars) {
3361 Args.push_back(ImplicitParamDecl::Create(
3362 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3363 C.getPointerType(C.getPointerType(E->getType()))
3364 .withConst()
3365 .withRestrict(),
3366 ImplicitParamDecl::Other));
3367 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3368 PrivateVarsPos[VD] = Counter;
3369 ++Counter;
3370 }
3371 for (const Expr *E : Data.LastprivateVars) {
3372 Args.push_back(ImplicitParamDecl::Create(
3373 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3374 C.getPointerType(C.getPointerType(E->getType()))
3375 .withConst()
3376 .withRestrict(),
3377 ImplicitParamDecl::Other));
3378 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3379 PrivateVarsPos[VD] = Counter;
3380 ++Counter;
3381 }
3382 for (const VarDecl *VD : Data.PrivateLocals) {
3383 QualType Ty = VD->getType().getNonReferenceType();
3384 if (VD->getType()->isLValueReferenceType())
3385 Ty = C.getPointerType(Ty);
3386 if (isAllocatableDecl(VD))
3387 Ty = C.getPointerType(Ty);
3388 Args.push_back(ImplicitParamDecl::Create(
3389 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3390 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3391 ImplicitParamDecl::Other));
3392 PrivateVarsPos[VD] = Counter;
3393 ++Counter;
3394 }
3395 const auto &TaskPrivatesMapFnInfo =
3396 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3397 llvm::FunctionType *TaskPrivatesMapTy =
3398 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3399 std::string Name =
3400 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3401 auto *TaskPrivatesMap = llvm::Function::Create(
3402 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3403 &CGM.getModule());
3404 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3405 TaskPrivatesMapFnInfo);
3406 if (CGM.getLangOpts().Optimize) {
3407 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3408 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3409 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3410 }
3411 CodeGenFunction CGF(CGM);
3412 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3413 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3414
3415 // *privi = &.privates.privi;
3416 LValue Base = CGF.EmitLoadOfPointerLValue(
3417 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3418 TaskPrivatesArg.getType()->castAs<PointerType>());
3419 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3420 Counter = 0;
3421 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3422 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3423 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3424 LValue RefLVal =
3425 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3426 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3427 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3428 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3429 ++Counter;
3430 }
3431 CGF.FinishFunction();
3432 return TaskPrivatesMap;
3433}
3434
3435/// Emit initialization for private variables in task-based directives.
3436static void emitPrivatesInit(CodeGenFunction &CGF,
3437 const OMPExecutableDirective &D,
3438 Address KmpTaskSharedsPtr, LValue TDBase,
3439 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3440 QualType SharedsTy, QualType SharedsPtrTy,
3441 const OMPTaskDataTy &Data,
3442 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3443 ASTContext &C = CGF.getContext();
3444 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3445 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3446 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3447 ? OMPD_taskloop
3448 : OMPD_task;
3449 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3450 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3451 LValue SrcBase;
3452 bool IsTargetTask =
3453 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3454 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3455 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3456 // PointersArray, SizesArray, and MappersArray. The original variables for
3457 // these arrays are not captured and we get their addresses explicitly.
3458 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3459 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3460 SrcBase = CGF.MakeAddrLValue(
3461 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3462 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3463 CGF.ConvertTypeForMem(SharedsTy)),
3464 SharedsTy);
3465 }
3466 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3467 for (const PrivateDataTy &Pair : Privates) {
3468 // Do not initialize private locals.
3469 if (Pair.second.isLocalPrivate()) {
3470 ++FI;
3471 continue;
3472 }
3473 const VarDecl *VD = Pair.second.PrivateCopy;
3474 const Expr *Init = VD->getAnyInitializer();
3475 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3476 !CGF.isTrivialInitializer(Init)))) {
3477 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3478 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3479 const VarDecl *OriginalVD = Pair.second.Original;
3480 // Check if the variable is the target-based BasePointersArray,
3481 // PointersArray, SizesArray, or MappersArray.
3482 LValue SharedRefLValue;
3483 QualType Type = PrivateLValue.getType();
3484 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3485 if (IsTargetTask && !SharedField) {
3486 assert(isa<ImplicitParamDecl>(OriginalVD) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3487 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3488 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3489 ->getNumParams() == 0 &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3490 isa<TranslationUnitDecl>((static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3491 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3492 ->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
3493 "Expected artificial target data variable.")(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3493, __extension__
__PRETTY_FUNCTION__))
;
3494 SharedRefLValue =
3495 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3496 } else if (ForDup) {
3497 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3498 SharedRefLValue = CGF.MakeAddrLValue(
3499 SharedRefLValue.getAddress(CGF).withAlignment(
3500 C.getDeclAlign(OriginalVD)),
3501 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3502 SharedRefLValue.getTBAAInfo());
3503 } else if (CGF.LambdaCaptureFields.count(
3504 Pair.second.Original->getCanonicalDecl()) > 0 ||
3505 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3506 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3507 } else {
3508 // Processing for implicitly captured variables.
3509 InlinedOpenMPRegionRAII Region(
3510 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3511 /*HasCancel=*/false, /*NoInheritance=*/true);
3512 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3513 }
3514 if (Type->isArrayType()) {
3515 // Initialize firstprivate array.
3516 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3517 // Perform simple memcpy.
3518 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3519 } else {
3520 // Initialize firstprivate array using element-by-element
3521 // initialization.
3522 CGF.EmitOMPAggregateAssign(
3523 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3524 Type,
3525 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3526 Address SrcElement) {
3527 // Clean up any temporaries needed by the initialization.
3528 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3529 InitScope.addPrivate(Elem, SrcElement);
3530 (void)InitScope.Privatize();
3531 // Emit initialization for single element.
3532 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3533 CGF, &CapturesInfo);
3534 CGF.EmitAnyExprToMem(Init, DestElement,
3535 Init->getType().getQualifiers(),
3536 /*IsInitializer=*/false);
3537 });
3538 }
3539 } else {
3540 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3541 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3542 (void)InitScope.Privatize();
3543 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3544 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3545 /*capturedByInit=*/false);
3546 }
3547 } else {
3548 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3549 }
3550 }
3551 ++FI;
3552 }
3553}
3554
3555/// Check if duplication function is required for taskloops.
3556static bool checkInitIsRequired(CodeGenFunction &CGF,
3557 ArrayRef<PrivateDataTy> Privates) {
3558 bool InitRequired = false;
3559 for (const PrivateDataTy &Pair : Privates) {
3560 if (Pair.second.isLocalPrivate())
3561 continue;
3562 const VarDecl *VD = Pair.second.PrivateCopy;
3563 const Expr *Init = VD->getAnyInitializer();
3564 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3565 !CGF.isTrivialInitializer(Init));
3566 if (InitRequired)
3567 break;
3568 }
3569 return InitRequired;
3570}
3571
3572
3573/// Emit task_dup function (for initialization of
3574/// private/firstprivate/lastprivate vars and last_iter flag)
3575/// \code
3576/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3577/// lastpriv) {
3578/// // setup lastprivate flag
3579/// task_dst->last = lastpriv;
3580/// // could be constructor calls here...
3581/// }
3582/// \endcode
3583static llvm::Value *
3584emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3585 const OMPExecutableDirective &D,
3586 QualType KmpTaskTWithPrivatesPtrQTy,
3587 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3588 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3589 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3590 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3591 ASTContext &C = CGM.getContext();
3592 FunctionArgList Args;
3593 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3594 KmpTaskTWithPrivatesPtrQTy,
3595 ImplicitParamDecl::Other);
3596 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3597 KmpTaskTWithPrivatesPtrQTy,
3598 ImplicitParamDecl::Other);
3599 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3600 ImplicitParamDecl::Other);
3601 Args.push_back(&DstArg);
3602 Args.push_back(&SrcArg);
3603 Args.push_back(&LastprivArg);
3604 const auto &TaskDupFnInfo =
3605 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3606 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3607 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3608 auto *TaskDup = llvm::Function::Create(
3609 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3610 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3611 TaskDup->setDoesNotRecurse();
3612 CodeGenFunction CGF(CGM);
3613 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3614 Loc);
3615
3616 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3617 CGF.GetAddrOfLocalVar(&DstArg),
3618 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3619 // task_dst->liter = lastpriv;
3620 if (WithLastIter) {
3621 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3622 LValue Base = CGF.EmitLValueForField(
3623 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3624 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3625 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3626 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3627 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3628 }
3629
3630 // Emit initial values for private copies (if any).
3631 assert(!Privates.empty())(static_cast <bool> (!Privates.empty()) ? void (0) : __assert_fail
("!Privates.empty()", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3631, __extension__ __PRETTY_FUNCTION__))
;
3632 Address KmpTaskSharedsPtr = Address::invalid();
3633 if (!Data.FirstprivateVars.empty()) {
3634 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3635 CGF.GetAddrOfLocalVar(&SrcArg),
3636 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3637 LValue Base = CGF.EmitLValueForField(
3638 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3639 KmpTaskSharedsPtr = Address(
3640 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3641 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3642 KmpTaskTShareds)),
3643 Loc),
3644 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3645 }
3646 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3647 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3648 CGF.FinishFunction();
3649 return TaskDup;
3650}
3651
3652/// Checks if destructor function is required to be generated.
3653/// \return true if cleanups are required, false otherwise.
3654static bool
3655checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3656 ArrayRef<PrivateDataTy> Privates) {
3657 for (const PrivateDataTy &P : Privates) {
3658 if (P.second.isLocalPrivate())
3659 continue;
3660 QualType Ty = P.second.Original->getType().getNonReferenceType();
3661 if (Ty.isDestructedType())
3662 return true;
3663 }
3664 return false;
3665}
3666
3667namespace {
3668/// Loop generator for OpenMP iterator expression.
3669class OMPIteratorGeneratorScope final
3670 : public CodeGenFunction::OMPPrivateScope {
3671 CodeGenFunction &CGF;
3672 const OMPIteratorExpr *E = nullptr;
3673 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3674 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3675 OMPIteratorGeneratorScope() = delete;
3676 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3677
3678public:
3679 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3680 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3681 if (!E)
3682 return;
3683 SmallVector<llvm::Value *, 4> Uppers;
3684 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3685 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
3686 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
3687 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
3688 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3689 addPrivate(
3690 HelperData.CounterVD,
3691 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3692 }
3693 Privatize();
3694
3695 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3696 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3697 LValue CLVal =
3698 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
3699 HelperData.CounterVD->getType());
3700 // Counter = 0;
3701 CGF.EmitStoreOfScalar(
3702 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
3703 CLVal);
3704 CodeGenFunction::JumpDest &ContDest =
3705 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
3706 CodeGenFunction::JumpDest &ExitDest =
3707 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
3708 // N = <number-of_iterations>;
3709 llvm::Value *N = Uppers[I];
3710 // cont:
3711 // if (Counter < N) goto body; else goto exit;
3712 CGF.EmitBlock(ContDest.getBlock());
3713 auto *CVal =
3714 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3715 llvm::Value *Cmp =
3716 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3717 ? CGF.Builder.CreateICmpSLT(CVal, N)
3718 : CGF.Builder.CreateICmpULT(CVal, N);
3719 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
3720 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
3721 // body:
3722 CGF.EmitBlock(BodyBB);
3723 // Iteri = Begini + Counter * Stepi;
3724 CGF.EmitIgnoredExpr(HelperData.Update);
3725 }
3726 }
3727 ~OMPIteratorGeneratorScope() {
3728 if (!E)
3729 return;
3730 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3731 // Counter = Counter + 1;
3732 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
3733 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
3734 // goto cont;
3735 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
3736 // exit:
3737 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3738 }
3739 }
3740};
3741} // namespace
3742
3743static std::pair<llvm::Value *, llvm::Value *>
3744getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3745 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
3746 llvm::Value *Addr;
3747 if (OASE) {
3748 const Expr *Base = OASE->getBase();
3749 Addr = CGF.EmitScalarExpr(Base);
3750 } else {
3751 Addr = CGF.EmitLValue(E).getPointer(CGF);
3752 }
3753 llvm::Value *SizeVal;
3754 QualType Ty = E->getType();
3755 if (OASE) {
3756 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
3757 for (const Expr *SE : OASE->getDimensions()) {
3758 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
3759 Sz = CGF.EmitScalarConversion(
3760 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
3761 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
3762 }
3763 } else if (const auto *ASE =
3764 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
3765 LValue UpAddrLVal =
3766 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
3767 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3768 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3769 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
3770 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
3771 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
3772 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
3773 } else {
3774 SizeVal = CGF.getTypeSize(Ty);
3775 }
3776 return std::make_pair(Addr, SizeVal);
3777}
3778
3779/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3780static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3781 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
3782 if (KmpTaskAffinityInfoTy.isNull()) {
3783 RecordDecl *KmpAffinityInfoRD =
3784 C.buildImplicitRecord("kmp_task_affinity_info_t");
3785 KmpAffinityInfoRD->startDefinition();
3786 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3787 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3788 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3789 KmpAffinityInfoRD->completeDefinition();
3790 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
3791 }
3792}
3793
3794CGOpenMPRuntime::TaskResultTy
3795CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3796 const OMPExecutableDirective &D,
3797 llvm::Function *TaskFunction, QualType SharedsTy,
3798 Address Shareds, const OMPTaskDataTy &Data) {
3799 ASTContext &C = CGM.getContext();
3800 llvm::SmallVector<PrivateDataTy, 4> Privates;
3801 // Aggregate privates and sort them by the alignment.
3802 const auto *I = Data.PrivateCopies.begin();
3803 for (const Expr *E : Data.PrivateVars) {
3804 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3805 Privates.emplace_back(
3806 C.getDeclAlign(VD),
3807 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3808 /*PrivateElemInit=*/nullptr));
3809 ++I;
3810 }
3811 I = Data.FirstprivateCopies.begin();
3812 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3813 for (const Expr *E : Data.FirstprivateVars) {
3814 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3815 Privates.emplace_back(
3816 C.getDeclAlign(VD),
3817 PrivateHelpersTy(
3818 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3819 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
3820 ++I;
3821 ++IElemInitRef;
3822 }
3823 I = Data.LastprivateCopies.begin();
3824 for (const Expr *E : Data.LastprivateVars) {
3825 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3826 Privates.emplace_back(
3827 C.getDeclAlign(VD),
3828 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
3829 /*PrivateElemInit=*/nullptr));
3830 ++I;
3831 }
3832 for (const VarDecl *VD : Data.PrivateLocals) {
3833 if (isAllocatableDecl(VD))
3834 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3835 else
3836 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
3837 }
3838 llvm::stable_sort(Privates,
3839 [](const PrivateDataTy &L, const PrivateDataTy &R) {
3840 return L.first > R.first;
3841 });
3842 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3843 // Build type kmp_routine_entry_t (if not built yet).
3844 emitKmpRoutineEntryT(KmpInt32Ty);
3845 // Build type kmp_task_t (if not built yet).
3846 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3847 if (SavedKmpTaskloopTQTy.isNull()) {
3848 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3849 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3850 }
3851 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3852 } else {
3853 assert((D.getDirectiveKind() == OMPD_task ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3856, __extension__
__PRETTY_FUNCTION__))
3854 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3856, __extension__
__PRETTY_FUNCTION__))
3855 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3856, __extension__
__PRETTY_FUNCTION__))
3856 "Expected taskloop, task or target directive")(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3856, __extension__
__PRETTY_FUNCTION__))
;
3857 if (SavedKmpTaskTQTy.isNull()) {
3858 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3859 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3860 }
3861 KmpTaskTQTy = SavedKmpTaskTQTy;
3862 }
3863 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3864 // Build particular struct kmp_task_t for the given task.
3865 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3866 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3867 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
3868 QualType KmpTaskTWithPrivatesPtrQTy =
3869 C.getPointerType(KmpTaskTWithPrivatesQTy);
3870 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
3871 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3872 KmpTaskTWithPrivatesTy->getPointerTo();
3873 llvm::Value *KmpTaskTWithPrivatesTySize =
3874 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
3875 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
3876
3877 // Emit initial values for private copies (if any).
3878 llvm::Value *TaskPrivatesMap = nullptr;
3879 llvm::Type *TaskPrivatesMapTy =
3880 std::next(TaskFunction->arg_begin(), 3)->getType();
3881 if (!Privates.empty()) {
3882 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3883 TaskPrivatesMap =
3884 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3885 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3886 TaskPrivatesMap, TaskPrivatesMapTy);
3887 } else {
3888 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3889 cast<llvm::PointerType>(TaskPrivatesMapTy));
3890 }
3891 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3892 // kmp_task_t *tt);
3893 llvm::Function *TaskEntry = emitProxyTaskFunction(
3894 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3895 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3896 TaskPrivatesMap);
3897
3898 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3899 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3900 // kmp_routine_entry_t *task_entry);
3901 // Task flags. Format is taken from
3902 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3903 // description of kmp_tasking_flags struct.
3904 enum {
3905 TiedFlag = 0x1,
3906 FinalFlag = 0x2,
3907 DestructorsFlag = 0x8,
3908 PriorityFlag = 0x20,
3909 DetachableFlag = 0x40,
3910 };
3911 unsigned Flags = Data.Tied ? TiedFlag : 0;
3912 bool NeedsCleanup = false;
3913 if (!Privates.empty()) {
3914 NeedsCleanup =
3915 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3916 if (NeedsCleanup)
3917 Flags = Flags | DestructorsFlag;
3918 }
3919 if (Data.Priority.getInt())
3920 Flags = Flags | PriorityFlag;
3921 if (D.hasClausesOfKind<OMPDetachClause>())
3922 Flags = Flags | DetachableFlag;
3923 llvm::Value *TaskFlags =
3924 Data.Final.getPointer()
3925 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
3926 CGF.Builder.getInt32(FinalFlag),
3927 CGF.Builder.getInt32(/*C=*/0))
3928 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
3929 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
3930 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
3931 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3932 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3933 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3934 TaskEntry, KmpRoutineEntryPtrTy)};
3935 llvm::Value *NewTask;
3936 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3937 // Check if we have any device clause associated with the directive.
3938 const Expr *Device = nullptr;
3939 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3940 Device = C->getDevice();
3941 // Emit device ID if any otherwise use default value.
3942 llvm::Value *DeviceID;
3943 if (Device)
3944 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
3945 CGF.Int64Ty, /*isSigned=*/true);
3946 else
3947 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
3948 AllocArgs.push_back(DeviceID);
3949 NewTask = CGF.EmitRuntimeCall(
3950 OMPBuilder.getOrCreateRuntimeFunction(
3951 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
3952 AllocArgs);
3953 } else {
3954 NewTask =
3955 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
3956 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
3957 AllocArgs);
3958 }
3959 // Emit detach clause initialization.
3960 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3961 // task_descriptor);
3962 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3963 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3964 LValue EvtLVal = CGF.EmitLValue(Evt);
3965
3966 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3967 // int gtid, kmp_task_t *task);
3968 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
3969 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
3970 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
3971 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3972 OMPBuilder.getOrCreateRuntimeFunction(
3973 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
3974 {Loc, Tid, NewTask});
3975 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
3976 Evt->getExprLoc());
3977 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
3978 }
3979 // Process affinity clauses.
3980 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3981 // Process list of affinity data.
3982 ASTContext &C = CGM.getContext();
3983 Address AffinitiesArray = Address::invalid();
3984 // Calculate number of elements to form the array of affinity data.
3985 llvm::Value *NumOfElements = nullptr;
3986 unsigned NumAffinities = 0;
3987 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3988 if (const Expr *Modifier = C->getModifier()) {
3989 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
3990 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3991 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
3992 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
3993 NumOfElements =
3994 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
3995 }
3996 } else {
3997 NumAffinities += C->varlist_size();
3998 }
3999 }
4000 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4001 // Fields ids in kmp_task_affinity_info record.
4002 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4003
4004 QualType KmpTaskAffinityInfoArrayTy;
4005 if (NumOfElements) {
4006 NumOfElements = CGF.Builder.CreateNUWAdd(
4007 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4008 auto *OVE = new (C) OpaqueValueExpr(
4009 Loc,
4010 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4011 VK_PRValue);
4012 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4013 RValue::get(NumOfElements));
4014 KmpTaskAffinityInfoArrayTy =
4015 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4016 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4017 // Properly emit variable-sized array.
4018 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4019 ImplicitParamDecl::Other);
4020 CGF.EmitVarDecl(*PD);
4021 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4022 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4023 /*isSigned=*/false);
4024 } else {
4025 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4026 KmpTaskAffinityInfoTy,
4027 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4028 ArrayType::Normal, /*IndexTypeQuals=*/0);
4029 AffinitiesArray =
4030 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4031 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4032 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4033 /*isSigned=*/false);
4034 }
4035
4036 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4037 // Fill array by elements without iterators.
4038 unsigned Pos = 0;
4039 bool HasIterator = false;
4040 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4041 if (C->getModifier()) {
4042 HasIterator = true;
4043 continue;
4044 }
4045 for (const Expr *E : C->varlists()) {
4046 llvm::Value *Addr;
4047 llvm::Value *Size;
4048 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4049 LValue Base =
4050 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4051 KmpTaskAffinityInfoTy);
4052 // affs[i].base_addr = &<Affinities[i].second>;
4053 LValue BaseAddrLVal = CGF.EmitLValueForField(
4054 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4055 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4056 BaseAddrLVal);
4057 // affs[i].len = sizeof(<Affinities[i].second>);
4058 LValue LenLVal = CGF.EmitLValueForField(
4059 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4060 CGF.EmitStoreOfScalar(Size, LenLVal);
4061 ++Pos;
4062 }
4063 }
4064 LValue PosLVal;
4065 if (HasIterator) {
4066 PosLVal = CGF.MakeAddrLValue(
4067 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4068 C.getSizeType());
4069 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4070 }
4071 // Process elements with iterators.
4072 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4073 const Expr *Modifier = C->getModifier();
4074 if (!Modifier)
4075 continue;
4076 OMPIteratorGeneratorScope IteratorScope(
4077 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4078 for (const Expr *E : C->varlists()) {
4079 llvm::Value *Addr;
4080 llvm::Value *Size;
4081 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4082 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4083 LValue Base = CGF.MakeAddrLValue(
4084 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4085 // affs[i].base_addr = &<Affinities[i].second>;
4086 LValue BaseAddrLVal = CGF.EmitLValueForField(
4087 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4088 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4089 BaseAddrLVal);
4090 // affs[i].len = sizeof(<Affinities[i].second>);
4091 LValue LenLVal = CGF.EmitLValueForField(
4092 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4093 CGF.EmitStoreOfScalar(Size, LenLVal);
4094 Idx = CGF.Builder.CreateNUWAdd(
4095 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4096 CGF.EmitStoreOfScalar(Idx, PosLVal);
4097 }
4098 }
4099 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4100 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4101 // naffins, kmp_task_affinity_info_t *affin_list);
4102 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4103 llvm::Value *GTid = getThreadID(CGF, Loc);
4104 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4105 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4106 // FIXME: Emit the function and ignore its result for now unless the
4107 // runtime function is properly implemented.
4108 (void)CGF.EmitRuntimeCall(
4109 OMPBuilder.getOrCreateRuntimeFunction(
4110 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4111 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4112 }
4113 llvm::Value *NewTaskNewTaskTTy =
4114 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4115 NewTask, KmpTaskTWithPrivatesPtrTy);
4116 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4117 KmpTaskTWithPrivatesQTy);
4118 LValue TDBase =
4119 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4120 // Fill the data in the resulting kmp_task_t record.
4121 // Copy shareds if there are any.
4122 Address KmpTaskSharedsPtr = Address::invalid();
4123 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4124 KmpTaskSharedsPtr = Address(
4125 CGF.EmitLoadOfScalar(
4126 CGF.EmitLValueForField(
4127 TDBase,
4128 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4129 Loc),
4130 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4131 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4132 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4133 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4134 }
4135 // Emit initial values for private copies (if any).
4136 TaskResultTy Result;
4137 if (!Privates.empty()) {
4138 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4139 SharedsTy, SharedsPtrTy, Data, Privates,
4140 /*ForDup=*/false);
4141 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4142 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4143 Result.TaskDupFn = emitTaskDupFunction(
4144 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4145 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4146 /*WithLastIter=*/!Data.LastprivateVars.empty());
4147 }
4148 }
4149 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4150 enum { Priority = 0, Destructors = 1 };
4151 // Provide pointer to function with destructors for privates.
4152 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4153 const RecordDecl *KmpCmplrdataUD =
4154 (*FI)->getType()->getAsUnionType()->getDecl();
4155 if (NeedsCleanup) {
4156 llvm::Value *DestructorFn = emitDestructorsFunction(
4157 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4158 KmpTaskTWithPrivatesQTy);
4159 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4160 LValue DestructorsLV = CGF.EmitLValueForField(
4161 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4162 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4163 DestructorFn, KmpRoutineEntryPtrTy),
4164 DestructorsLV);
4165 }
4166 // Set priority.
4167 if (Data.Priority.getInt()) {
4168 LValue Data2LV = CGF.EmitLValueForField(
4169 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4170 LValue PriorityLV = CGF.EmitLValueForField(
4171 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4172 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4173 }
4174 Result.NewTask = NewTask;
4175 Result.TaskEntry = TaskEntry;
4176 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4177 Result.TDBase = TDBase;
4178 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4179 return Result;
4180}
4181
4182/// Translates internal dependency kind into the runtime kind.
4183static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4184 RTLDependenceKindTy DepKind;
4185 switch (K) {
4186 case OMPC_DEPEND_in:
4187 DepKind = RTLDependenceKindTy::DepIn;
4188 break;
4189 // Out and InOut dependencies must use the same code.
4190 case OMPC_DEPEND_out:
4191 case OMPC_DEPEND_inout:
4192 DepKind = RTLDependenceKindTy::DepInOut;
4193 break;
4194 case OMPC_DEPEND_mutexinoutset:
4195 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4196 break;
4197 case OMPC_DEPEND_inoutset:
4198 DepKind = RTLDependenceKindTy::DepInOutSet;
4199 break;
4200 case OMPC_DEPEND_outallmemory:
4201 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4202 break;
4203 case OMPC_DEPEND_source:
4204 case OMPC_DEPEND_sink:
4205 case OMPC_DEPEND_depobj:
4206 case OMPC_DEPEND_inoutallmemory:
4207 case OMPC_DEPEND_unknown:
4208 llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4208)
;
4209 }
4210 return DepKind;
4211}
4212
4213/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4214static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4215 QualType &FlagsTy) {
4216 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4217 if (KmpDependInfoTy.isNull()) {
4218 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4219 KmpDependInfoRD->startDefinition();
4220 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4221 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4222 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4223 KmpDependInfoRD->completeDefinition();
4224 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4225 }
4226}
4227
4228std::pair<llvm::Value *, LValue>
4229CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4230 SourceLocation Loc) {
4231 ASTContext &C = CGM.getContext();
4232 QualType FlagsTy;
4233 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4234 RecordDecl *KmpDependInfoRD =
4235 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4236 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4237 LValue Base = CGF.EmitLoadOfPointerLValue(
4238 CGF.Builder.CreateElementBitCast(
4239 DepobjLVal.getAddress(CGF),
4240 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4241 KmpDependInfoPtrTy->castAs<PointerType>());
4242 Address DepObjAddr = CGF.Builder.CreateGEP(
4243 Base.getAddress(CGF),
4244 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4245 LValue NumDepsBase = CGF.MakeAddrLValue(
4246 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4247 // NumDeps = deps[i].base_addr;
4248 LValue BaseAddrLVal = CGF.EmitLValueForField(
4249 NumDepsBase,
4250 *std::next(KmpDependInfoRD->field_begin(),
4251 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4252 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4253 return std::make_pair(NumDeps, Base);
4254}
4255
4256static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4257 llvm::PointerUnion<unsigned *, LValue *> Pos,
4258 const OMPTaskDataTy::DependData &Data,
4259 Address DependenciesArray) {
4260 CodeGenModule &CGM = CGF.CGM;
4261 ASTContext &C = CGM.getContext();
4262 QualType FlagsTy;
4263 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4264 RecordDecl *KmpDependInfoRD =
4265 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4266 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4267
4268 OMPIteratorGeneratorScope IteratorScope(
4269 CGF, cast_or_null<OMPIteratorExpr>(
4270 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4271 : nullptr));
4272 for (const Expr *E : Data.DepExprs) {
4273 llvm::Value *Addr;
4274 llvm::Value *Size;
4275
4276 // The expression will be a nullptr in the 'omp_all_memory' case.
4277 if (E) {
4278 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4279 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4280 } else {
4281 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4282 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4283 }
4284 LValue Base;
4285 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4286 Base = CGF.MakeAddrLValue(
4287 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4288 } else {
4289 assert(E && "Expected a non-null expression")(static_cast <bool> (E && "Expected a non-null expression"
) ? void (0) : __assert_fail ("E && \"Expected a non-null expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4289, __extension__
__PRETTY_FUNCTION__))
;
4290 LValue &PosLVal = *Pos.get<LValue *>();
4291 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4292 Base = CGF.MakeAddrLValue(
4293 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4294 }
4295 // deps[i].base_addr = &<Dependencies[i].second>;
4296 LValue BaseAddrLVal = CGF.EmitLValueForField(
4297 Base,
4298 *std::next(KmpDependInfoRD->field_begin(),
4299 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4300 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4301 // deps[i].len = sizeof(<Dependencies[i].second>);
4302 LValue LenLVal = CGF.EmitLValueForField(
4303 Base, *std::next(KmpDependInfoRD->field_begin(),
4304 static_cast<unsigned int>(RTLDependInfoFields::Len)));
4305 CGF.EmitStoreOfScalar(Size, LenLVal);
4306 // deps[i].flags = <Dependencies[i].first>;
4307 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4308 LValue FlagsLVal = CGF.EmitLValueForField(
4309 Base,
4310 *std::next(KmpDependInfoRD->field_begin(),
4311 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4312 CGF.EmitStoreOfScalar(
4313 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4314 FlagsLVal);
4315 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4316 ++(*P);
4317 } else {
4318 LValue &PosLVal = *Pos.get<LValue *>();
4319 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4320 Idx = CGF.Builder.CreateNUWAdd(Idx,
4321 llvm::ConstantInt::get(Idx->getType(), 1));
4322 CGF.EmitStoreOfScalar(Idx, PosLVal);
4323 }
4324 }
4325}
4326
4327SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4328 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4329 const OMPTaskDataTy::DependData &Data) {
4330 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4331, __extension__
__PRETTY_FUNCTION__))
4331 "Expected depobj dependency kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4331, __extension__
__PRETTY_FUNCTION__))
;
4332 SmallVector<llvm::Value *, 4> Sizes;
4333 SmallVector<LValue, 4> SizeLVals;
4334 ASTContext &C = CGF.getContext();
4335 {
4336 OMPIteratorGeneratorScope IteratorScope(
4337 CGF, cast_or_null<OMPIteratorExpr>(
4338 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4339 : nullptr));
4340 for (const Expr *E : Data.DepExprs) {
4341 llvm::Value *NumDeps;
4342 LValue Base;
4343 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4344 std::tie(NumDeps, Base) =
4345 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4346 LValue NumLVal = CGF.MakeAddrLValue(
4347 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4348 C.getUIntPtrType());
4349 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4350 NumLVal.getAddress(CGF));
4351 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4352 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4353 CGF.EmitStoreOfScalar(Add, NumLVal);
4354 SizeLVals.push_back(NumLVal);
4355 }
4356 }
4357 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4358 llvm::Value *Size =
4359 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4360 Sizes.push_back(Size);
4361 }
4362 return Sizes;
4363}
4364
4365void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4366 QualType &KmpDependInfoTy,
4367 LValue PosLVal,
4368 const OMPTaskDataTy::DependData &Data,
4369 Address DependenciesArray) {
4370 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4371, __extension__
__PRETTY_FUNCTION__))
4371 "Expected depobj dependency kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4371, __extension__
__PRETTY_FUNCTION__))
;
4372 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4373 {
4374 OMPIteratorGeneratorScope IteratorScope(
4375 CGF, cast_or_null<OMPIteratorExpr>(
4376 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4377 : nullptr));
4378 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4379 const Expr *E = Data.DepExprs[I];
4380 llvm::Value *NumDeps;
4381 LValue Base;
4382 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4383 std::tie(NumDeps, Base) =
4384 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4385
4386 // memcopy dependency data.
4387 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4388 ElSize,
4389 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4390 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4391 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4392 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4393
4394 // Increase pos.
4395 // pos += size;
4396 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4397 CGF.EmitStoreOfScalar(Add, PosLVal);
4398 }
4399 }
4400}
4401
4402std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4403 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4404 SourceLocation Loc) {
4405 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4406 return D.DepExprs.empty();
4407 }))
4408 return std::make_pair(nullptr, Address::invalid());
4409 // Process list of dependencies.
4410 ASTContext &C = CGM.getContext();
4411 Address DependenciesArray = Address::invalid();
4412 llvm::Value *NumOfElements = nullptr;
4413 unsigned NumDependencies = std::accumulate(
4414 Dependencies.begin(), Dependencies.end(), 0,
4415 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4416 return D.DepKind == OMPC_DEPEND_depobj
4417 ? V
4418 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4419 });
4420 QualType FlagsTy;
4421 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4422 bool HasDepobjDeps = false;
4423 bool HasRegularWithIterators = false;
4424 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4425 llvm::Value *NumOfRegularWithIterators =
4426 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4427 // Calculate number of depobj dependencies and regular deps with the
4428 // iterators.
4429 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4430 if (D.DepKind == OMPC_DEPEND_depobj) {
4431 SmallVector<llvm::Value *, 4> Sizes =
4432 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4433 for (llvm::Value *Size : Sizes) {
4434 NumOfDepobjElements =
4435 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4436 }
4437 HasDepobjDeps = true;
4438 continue;
4439 }
4440 // Include number of iterations, if any.
4441
4442 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4443 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4444 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4445 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4446 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4447 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4448 NumOfRegularWithIterators =
4449 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4450 }
4451 HasRegularWithIterators = true;
4452 continue;
4453 }
4454 }
4455
4456 QualType KmpDependInfoArrayTy;
4457 if (HasDepobjDeps || HasRegularWithIterators) {
4458 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4459 /*isSigned=*/false);
4460 if (HasDepobjDeps) {
4461 NumOfElements =
4462 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4463 }
4464 if (HasRegularWithIterators) {
4465 NumOfElements =
4466 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4467 }
4468 auto *OVE = new (C) OpaqueValueExpr(
4469 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4470 VK_PRValue);
4471 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4472 RValue::get(NumOfElements));
4473 KmpDependInfoArrayTy =
4474 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4475 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4476 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4477 // Properly emit variable-sized array.
4478 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4479 ImplicitParamDecl::Other);
4480 CGF.EmitVarDecl(*PD);
4481 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4482 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4483 /*isSigned=*/false);
4484 } else {
4485 KmpDependInfoArrayTy = C.getConstantArrayType(
4486 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4487 ArrayType::Normal, /*IndexTypeQuals=*/0);
4488 DependenciesArray =
4489 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4490 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4491 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4492 /*isSigned=*/false);
4493 }
4494 unsigned Pos = 0;
4495 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4496 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4497 Dependencies[I].IteratorExpr)
4498 continue;
4499 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4500 DependenciesArray);
4501 }
4502 // Copy regular dependencies with iterators.
4503 LValue PosLVal = CGF.MakeAddrLValue(
4504 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4505 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4506 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4507 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4508 !Dependencies[I].IteratorExpr)
4509 continue;
4510 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4511 DependenciesArray);
4512 }
4513 // Copy final depobj arrays without iterators.
4514 if (HasDepobjDeps) {
4515 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4516 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4517 continue;
4518 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4519 DependenciesArray);
4520 }
4521 }
4522 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4523 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4524 return std::make_pair(NumOfElements, DependenciesArray);
4525}
4526
4527Address CGOpenMPRuntime::emitDepobjDependClause(
4528 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4529 SourceLocation Loc) {
4530 if (Dependencies.DepExprs.empty())
4531 return Address::invalid();
4532 // Process list of dependencies.
4533 ASTContext &C = CGM.getContext();
4534 Address DependenciesArray = Address::invalid();
4535 unsigned NumDependencies = Dependencies.DepExprs.size();
4536 QualType FlagsTy;
4537 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4538 RecordDecl *KmpDependInfoRD =
4539 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4540
4541 llvm::Value *Size;
4542 // Define type kmp_depend_info[<Dependencies.size()>];
4543 // For depobj reserve one extra element to store the number of elements.
4544 // It is required to handle depobj(x) update(in) construct.
4545 // kmp_depend_info[<Dependencies.size()>] deps;
4546 llvm::Value *NumDepsVal;
4547 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4548 if (const auto *IE =
4549 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4550 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4551 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4552 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4553 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4554 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4555 }
4556 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4557 NumDepsVal);
4558 CharUnits SizeInBytes =
4559 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4560 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4561 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4562 NumDepsVal =
4563 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4564 } else {
4565 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4566 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4567 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4568 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4569 Size = CGM.getSize(Sz.alignTo(Align));
4570 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4571 }
4572 // Need to allocate on the dynamic memory.
4573 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4574 // Use default allocator.
4575 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4576 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4577
4578 llvm::Value *Addr =
4579 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4580 CGM.getModule(), OMPRTL___kmpc_alloc),
4581 Args, ".dep.arr.addr");
4582 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4583 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4584 Addr, KmpDependInfoLlvmTy->getPointerTo());
4585 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4586 // Write number of elements in the first element of array for depobj.
4587 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4588 // deps[i].base_addr = NumDependencies;
4589 LValue BaseAddrLVal = CGF.EmitLValueForField(
4590 Base,
4591 *std::next(KmpDependInfoRD->field_begin(),
4592 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4593 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4594 llvm::PointerUnion<unsigned *, LValue *> Pos;
4595 unsigned Idx = 1;
4596 LValue PosLVal;
4597 if (Dependencies.IteratorExpr) {
4598 PosLVal = CGF.MakeAddrLValue(
4599 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4600 C.getSizeType());
4601 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4602 /*IsInit=*/true);
4603 Pos = &PosLVal;
4604 } else {
4605 Pos = &Idx;
4606 }
4607 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4608 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4609 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4610 CGF.Int8Ty);
4611 return DependenciesArray;
4612}
4613
4614void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4615 SourceLocation Loc) {
4616 ASTContext &C = CGM.getContext();
4617 QualType FlagsTy;
4618 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4619 LValue Base = CGF.EmitLoadOfPointerLValue(
4620 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4621 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4622 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4623 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4624 CGF.ConvertTypeForMem(KmpDependInfoTy));
4625 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4626 Addr.getElementType(), Addr.getPointer(),
4627 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4628 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4629 CGF.VoidPtrTy);
4630 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4631 // Use default allocator.
4632 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4633 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4634
4635 // _kmpc_free(gtid, addr, nullptr);
4636 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4637 CGM.getModule(), OMPRTL___kmpc_free),
4638 Args);
4639}
4640
4641void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4642 OpenMPDependClauseKind NewDepKind,
4643 SourceLocation Loc) {
4644 ASTContext &C = CGM.getContext();
4645 QualType FlagsTy;
4646 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4647 RecordDecl *KmpDependInfoRD =
4648 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4649 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4650 llvm::Value *NumDeps;
4651 LValue Base;
4652 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4653
4654 Address Begin = Base.getAddress(CGF);
4655 // Cast from pointer to array type to pointer to single element.
4656 llvm::Value *End = CGF.Builder.CreateGEP(
4657 Begin.getElementType(), Begin.getPointer(), NumDeps);
4658 // The basic structure here is a while-do loop.
4659 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4660 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4661 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4662 CGF.EmitBlock(BodyBB);
4663 llvm::PHINode *ElementPHI =
4664 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4665 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4666 Begin = Begin.withPointer(ElementPHI, KnownNonNull);
4667 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4668 Base.getTBAAInfo());
4669 // deps[i].flags = NewDepKind;
4670 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4671 LValue FlagsLVal = CGF.EmitLValueForField(
4672 Base, *std::next(KmpDependInfoRD->field_begin(),
4673 static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4674 CGF.EmitStoreOfScalar(
4675 llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)),
4676 FlagsLVal);
4677
4678 // Shift the address forward by one element.
4679 Address ElementNext =
4680 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
4681 ElementPHI->addIncoming(ElementNext.getPointer(),
4682 CGF.Builder.GetInsertBlock());
4683 llvm::Value *IsEmpty =
4684 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
4685 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4686 // Done.
4687 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4688}
4689
4690void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4691 const OMPExecutableDirective &D,
4692 llvm::Function *TaskFunction,
4693 QualType SharedsTy, Address Shareds,
4694 const Expr *IfCond,
4695 const OMPTaskDataTy &Data) {
4696 if (!CGF.HaveInsertPoint())
4697 return;
4698
4699 TaskResultTy Result =
4700 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4701 llvm::Value *NewTask = Result.NewTask;
4702 llvm::Function *TaskEntry = Result.TaskEntry;
4703 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4704 LValue TDBase = Result.TDBase;
4705 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4706 // Process list of dependences.
4707 Address DependenciesArray = Address::invalid();
4708 llvm::Value *NumOfElements;
4709 std::tie(NumOfElements, DependenciesArray) =
4710 emitDependClause(CGF, Data.Dependences, Loc);
4711
4712 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4713 // libcall.
4714 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4715 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4716 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4717 // list is not empty
4718 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4719 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4720 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4721 llvm::Value *DepTaskArgs[7];
4722 if (!Data.Dependences.empty()) {
4723 DepTaskArgs[0] = UpLoc;
4724 DepTaskArgs[1] = ThreadID;
4725 DepTaskArgs[2] = NewTask;
4726 DepTaskArgs[3] = NumOfElements;
4727 DepTaskArgs[4] = DependenciesArray.getPointer();
4728 DepTaskArgs[5] = CGF.Builder.getInt32(0);
4729 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4730 }
4731 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4732 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4733 if (!Data.Tied) {
4734 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4735 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4736 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4737 }
4738 if (!Data.Dependences.empty()) {
4739 CGF.EmitRuntimeCall(
4740 OMPBuilder.getOrCreateRuntimeFunction(
4741 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
4742 DepTaskArgs);
4743 } else {
4744 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4745 CGM.getModule(), OMPRTL___kmpc_omp_task),
4746 TaskArgs);
4747 }
4748 // Check if parent region is untied and build return for untied task;
4749 if (auto *Region =
4750 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4751 Region->emitUntiedSwitch(CGF);
4752 };
4753
4754 llvm::Value *DepWaitTaskArgs[7];
4755 if (!Data.Dependences.empty()) {
4756 DepWaitTaskArgs[0] = UpLoc;
4757 DepWaitTaskArgs[1] = ThreadID;
4758 DepWaitTaskArgs[2] = NumOfElements;
4759 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4760 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4761 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4762 DepWaitTaskArgs[6] =
4763 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
4764 }
4765 auto &M = CGM.getModule();
4766 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4767 TaskEntry, &Data, &DepWaitTaskArgs,
4768 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4769 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4770 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4771 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4772 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4773 // is specified.
4774 if (!Data.Dependences.empty())
4775 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4776 M, OMPRTL___kmpc_omp_taskwait_deps_51),
4777 DepWaitTaskArgs);
4778 // Call proxy_task_entry(gtid, new_task);
4779 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4780 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4781 Action.Enter(CGF);
4782 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4783 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
4784 OutlinedFnArgs);
4785 };
4786
4787 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4788 // kmp_task_t *new_task);
4789 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4790 // kmp_task_t *new_task);
4791 RegionCodeGenTy RCG(CodeGen);
4792 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4793 M, OMPRTL___kmpc_omp_task_begin_if0),
4794 TaskArgs,
4795 OMPBuilder.getOrCreateRuntimeFunction(
4796 M, OMPRTL___kmpc_omp_task_complete_if0),
4797 TaskArgs);
4798 RCG.setAction(Action);
4799 RCG(CGF);
4800 };
4801
4802 if (IfCond) {
4803 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4804 } else {
4805 RegionCodeGenTy ThenRCG(ThenCodeGen);
4806 ThenRCG(CGF);
4807 }
4808}
4809
4810void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4811 const OMPLoopDirective &D,
4812 llvm::Function *TaskFunction,
4813 QualType SharedsTy, Address Shareds,
4814 const Expr *IfCond,
4815 const OMPTaskDataTy &Data) {
4816 if (!CGF.HaveInsertPoint())
4817 return;
4818 TaskResultTy Result =
4819 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4820 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4821 // libcall.
4822 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4823 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4824 // sched, kmp_uint64 grainsize, void *task_dup);
4825 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4826 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4827 llvm::Value *IfVal;
4828 if (IfCond) {
4829 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4830 /*isSigned=*/true);
4831 } else {
4832 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4833 }
4834
4835 LValue LBLVal = CGF.EmitLValueForField(
4836 Result.TDBase,
4837 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4838 const auto *LBVar =
4839 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4840 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
4841 LBLVal.getQuals(),
4842 /*IsInitializer=*/true);
4843 LValue UBLVal = CGF.EmitLValueForField(
4844 Result.TDBase,
4845 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4846 const auto *UBVar =
4847 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4848 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
4849 UBLVal.getQuals(),
4850 /*IsInitializer=*/true);
4851 LValue StLVal = CGF.EmitLValueForField(
4852 Result.TDBase,
4853 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4854 const auto *StVar =
4855 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4856 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
4857 StLVal.getQuals(),
4858 /*IsInitializer=*/true);
4859 // Store reductions address.
4860 LValue RedLVal = CGF.EmitLValueForField(
4861 Result.TDBase,
4862 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4863 if (Data.Reductions) {
4864 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4865 } else {
4866 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
4867 CGF.getContext().VoidPtrTy);
4868 }
4869 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4870 llvm::Value *TaskArgs[] = {
4871 UpLoc,
4872 ThreadID,
4873 Result.NewTask,
4874 IfVal,
4875 LBLVal.getPointer(CGF),
4876 UBLVal.getPointer(CGF),
4877 CGF.EmitLoadOfScalar(StLVal, Loc),
4878 llvm::ConstantInt::getSigned(
4879 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
4880 llvm::ConstantInt::getSigned(
4881 CGF.IntTy, Data.Schedule.getPointer()
4882 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4883 : NoSchedule),
4884 Data.Schedule.getPointer()
4885 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4886 /*isSigned=*/false)
4887 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4888 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4889 Result.TaskDupFn, CGF.VoidPtrTy)
4890 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4891 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4892 CGM.getModule(), OMPRTL___kmpc_taskloop),
4893 TaskArgs);
4894}
4895
4896/// Emit reduction operation for each element of array (required for
4897/// array sections) LHS op = RHS.
4898/// \param Type Type of array.
4899/// \param LHSVar Variable on the left side of the reduction operation
4900/// (references element of array in original variable).
4901/// \param RHSVar Variable on the right side of the reduction operation
4902/// (references element of array in original variable).
4903/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4904/// RHSVar.
4905static void EmitOMPAggregateReduction(
4906 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4907 const VarDecl *RHSVar,
4908 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4909 const Expr *, const Expr *)> &RedOpGen,
4910 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4911 const Expr *UpExpr = nullptr) {
4912 // Perform element-by-element initialization.
4913 QualType ElementTy;
4914 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4915 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4916
4917 // Drill down to the base element type on both arrays.
4918 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4919 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4920
4921 llvm::Value *RHSBegin = RHSAddr.getPointer();
4922 llvm::Value *LHSBegin = LHSAddr.getPointer();
4923 // Cast from pointer to array type to pointer to single element.
4924 llvm::Value *LHSEnd =
4925 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
4926 // The basic structure here is a while-do loop.
4927 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4928 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4929 llvm::Value *IsEmpty =
4930 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4931 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4932
4933 // Enter the loop body, making that address the current address.
4934 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4935 CGF.EmitBlock(BodyBB);
4936
4937 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4938
4939 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4940 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4941 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4942 Address RHSElementCurrent(
4943 RHSElementPHI, RHSAddr.getElementType(),
4944 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4945
4946 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4947 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4948 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4949 Address LHSElementCurrent(
4950 LHSElementPHI, LHSAddr.getElementType(),
4951 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4952
4953 // Emit copy.
4954 CodeGenFunction::OMPPrivateScope Scope(CGF);
4955 Scope.addPrivate(LHSVar, LHSElementCurrent);
4956 Scope.addPrivate(RHSVar, RHSElementCurrent);
4957 Scope.Privatize();
4958 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4959 Scope.ForceCleanup();
4960
4961 // Shift the address forward by one element.
4962 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4963 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
4964 "omp.arraycpy.dest.element");
4965 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4966 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
4967 "omp.arraycpy.src.element");
4968 // Check whether we've reached the end.
4969 llvm::Value *Done =
4970 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4971 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4972 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4973 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4974
4975 // Done.
4976 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4977}
4978
4979/// Emit reduction combiner. If the combiner is a simple expression emit it as
4980/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4981/// UDR combiner function.
4982static void emitReductionCombiner(CodeGenFunction &CGF,
4983 const Expr *ReductionOp) {
4984 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
4985 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4986 if (const auto *DRE =
4987 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4988 if (const auto *DRD =
4989 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4990 std::pair<llvm::Function *, llvm::Function *> Reduction =
4991 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4992 RValue Func = RValue::get(Reduction.first);
4993 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4994 CGF.EmitIgnoredExpr(ReductionOp);
4995 return;
4996 }
4997 CGF.EmitIgnoredExpr(ReductionOp);
4998}
4999
5000llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5001 SourceLocation Loc, llvm::Type *ArgsElemType,
5002 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5003 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5004 ASTContext &C = CGM.getContext();
5005
5006 // void reduction_func(void *LHSArg, void *RHSArg);
5007 FunctionArgList Args;
5008 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5009 ImplicitParamDecl::Other);
5010 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5011 ImplicitParamDecl::Other);
5012 Args.push_back(&LHSArg);
5013 Args.push_back(&RHSArg);
5014 const auto &CGFI =
5015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5016 std::string Name = getName({"omp", "reduction", "reduction_func"});
5017 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5018 llvm::GlobalValue::InternalLinkage, Name,
5019 &CGM.getModule());
5020 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5021 Fn->setDoesNotRecurse();
5022 CodeGenFunction CGF(CGM);
5023 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5024
5025 // Dst = (void*[n])(LHSArg);
5026 // Src = (void*[n])(RHSArg);
5027 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5028 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5029 ArgsElemType->getPointerTo()),
5030 ArgsElemType, CGF.getPointerAlign());
5031 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5032 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5033 ArgsElemType->getPointerTo()),
5034 ArgsElemType, CGF.getPointerAlign());
5035
5036 // ...
5037 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5038 // ...
5039 CodeGenFunction::OMPPrivateScope Scope(CGF);
5040 const auto *IPriv = Privates.begin();
5041 unsigned Idx = 0;
5042 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5043 const auto *RHSVar =
5044 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5045 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5046 const auto *LHSVar =
5047 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5048 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5049 QualType PrivTy = (*IPriv)->getType();
5050 if (PrivTy->isVariablyModifiedType()) {
5051 // Get array size and emit VLA type.
5052 ++Idx;
5053 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5054 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5055 const VariableArrayType *VLA =
5056 CGF.getContext().getAsVariableArrayType(PrivTy);
5057 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5058 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5059 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5060 CGF.EmitVariablyModifiedType(PrivTy);
5061 }
5062 }
5063 Scope.Privatize();
5064 IPriv = Privates.begin();
5065 const auto *ILHS = LHSExprs.begin();
5066 const auto *IRHS = RHSExprs.begin();
5067 for (const Expr *E : ReductionOps) {
5068 if ((*IPriv)->getType()->isArrayType()) {
5069 // Emit reduction for array section.
5070 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5071 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5072 EmitOMPAggregateReduction(
5073 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5074 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5075 emitReductionCombiner(CGF, E);
5076 });
5077 } else {
5078 // Emit reduction for array subscript or single variable.
5079 emitReductionCombiner(CGF, E);
5080 }
5081 ++IPriv;
5082 ++ILHS;
5083 ++IRHS;
5084 }
5085 Scope.ForceCleanup();
5086 CGF.FinishFunction();
5087 return Fn;
5088}
5089
5090void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5091 const Expr *ReductionOp,
5092 const Expr *PrivateRef,
5093 const DeclRefExpr *LHS,
5094 const DeclRefExpr *RHS) {
5095 if (PrivateRef->getType()->isArrayType()) {
5096 // Emit reduction for array section.
5097 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5098 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5099 EmitOMPAggregateReduction(
5100 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5101 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5102 emitReductionCombiner(CGF, ReductionOp);
5103 });
5104 } else {
5105 // Emit reduction for array subscript or single variable.
5106 emitReductionCombiner(CGF, ReductionOp);
5107 }
5108}
5109
5110void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5111 ArrayRef<const Expr *> Privates,
5112 ArrayRef<const Expr *> LHSExprs,
5113 ArrayRef<const Expr *> RHSExprs,
5114 ArrayRef<const Expr *> ReductionOps,
5115 ReductionOptionsTy Options) {
5116 if (!CGF.HaveInsertPoint())
5117 return;
5118
5119 bool WithNowait = Options.WithNowait;
5120 bool SimpleReduction = Options.SimpleReduction;
5121
5122 // Next code should be emitted for reduction:
5123 //
5124 // static kmp_critical_name lock = { 0 };
5125 //
5126 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5127 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5128 // ...
5129 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5130 // *(Type<n>-1*)rhs[<n>-1]);
5131 // }
5132 //
5133 // ...
5134 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5135 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5136 // RedList, reduce_func, &<lock>)) {
5137 // case 1:
5138 // ...
5139 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5140 // ...
5141 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5142 // break;
5143 // case 2:
5144 // ...
5145 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5146 // ...
5147 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5148 // break;
5149 // default:;
5150 // }
5151 //
5152 // if SimpleReduction is true, only the next code is generated:
5153 // ...
5154 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5155 // ...
5156
5157 ASTContext &C = CGM.getContext();
5158
5159 if (SimpleReduction) {
5160 CodeGenFunction::RunCleanupsScope Scope(CGF);
5161 const auto *IPriv = Privates.begin();
5162 const auto *ILHS = LHSExprs.begin();
5163 const auto *IRHS = RHSExprs.begin();
5164 for (const Expr *E : ReductionOps) {
5165 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5166 cast<DeclRefExpr>(*IRHS));
5167 ++IPriv;
5168 ++ILHS;
5169 ++IRHS;
5170 }
5171 return;
5172 }
5173
5174 // 1. Build a list of reduction variables.
5175 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5176 auto Size = RHSExprs.size();
5177 for (const Expr *E : Privates) {
5178 if (E->getType()->isVariablyModifiedType())
5179 // Reserve place for array size.
5180 ++Size;
5181 }
5182 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5183 QualType ReductionArrayTy =
5184 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5185 /*IndexTypeQuals=*/0);
5186 Address ReductionList =
5187 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5188 const auto *IPriv = Privates.begin();
5189 unsigned Idx = 0;
5190 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5191 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5192 CGF.Builder.CreateStore(
5193 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5194 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5195 Elem);
5196 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5197 // Store array size.
5198 ++Idx;
5199 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5200 llvm::Value *Size = CGF.Builder.CreateIntCast(
5201 CGF.getVLASize(
5202 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5203 .NumElts,
5204 CGF.SizeTy, /*isSigned=*/false);
5205 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5206 Elem);
5207 }
5208 }
5209
5210 // 2. Emit reduce_func().
5211 llvm::Function *ReductionFn =
5212 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5213 Privates, LHSExprs, RHSExprs, ReductionOps);
5214
5215 // 3. Create static kmp_critical_name lock = { 0 };
5216 std::string Name = getName({"reduction"});
5217 llvm::Value *Lock = getCriticalRegionLock(Name);
5218
5219 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5220 // RedList, reduce_func, &<lock>);
5221 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5222 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5223 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5224 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5225 ReductionList.getPointer(), CGF.VoidPtrTy);
5226 llvm::Value *Args[] = {
5227 IdentTLoc, // ident_t *<loc>
5228 ThreadId, // i32 <gtid>
5229 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5230 ReductionArrayTySize, // size_type sizeof(RedList)
5231 RL, // void *RedList
5232 ReductionFn, // void (*) (void *, void *) <reduce_func>
5233 Lock // kmp_critical_name *&<lock>
5234 };
5235 llvm::Value *Res = CGF.EmitRuntimeCall(
5236 OMPBuilder.getOrCreateRuntimeFunction(
5237 CGM.getModule(),
5238 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5239 Args);
5240
5241 // 5. Build switch(res)
5242 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5243 llvm::SwitchInst *SwInst =
5244 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5245
5246 // 6. Build case 1:
5247 // ...
5248 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5249 // ...
5250 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5251 // break;
5252 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5253 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5254 CGF.EmitBlock(Case1BB);
5255
5256 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5257 llvm::Value *EndArgs[] = {
5258 IdentTLoc, // ident_t *<loc>
5259 ThreadId, // i32 <gtid>
5260 Lock // kmp_critical_name *&<lock>
5261 };
5262 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5263 CodeGenFunction &CGF, PrePostActionTy &Action) {
5264 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5265 const auto *IPriv = Privates.begin();
5266 const auto *ILHS = LHSExprs.begin();
5267 const auto *IRHS = RHSExprs.begin();
5268 for (const Expr *E : ReductionOps) {
5269 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5270 cast<DeclRefExpr>(*IRHS));
5271 ++IPriv;
5272 ++ILHS;
5273 ++IRHS;
5274 }
5275 };
5276 RegionCodeGenTy RCG(CodeGen);
5277 CommonActionTy Action(
5278 nullptr, std::nullopt,
5279 OMPBuilder.getOrCreateRuntimeFunction(
5280 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5281 : OMPRTL___kmpc_end_reduce),
5282 EndArgs);
5283 RCG.setAction(Action);
5284 RCG(CGF);
5285
5286 CGF.EmitBranch(DefaultBB);
5287
5288 // 7. Build case 2:
5289 // ...
5290 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5291 // ...
5292 // break;
5293 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5294 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5295 CGF.EmitBlock(Case2BB);
5296
5297 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5298 CodeGenFunction &CGF, PrePostActionTy &Action) {
5299 const auto *ILHS = LHSExprs.begin();
5300 const auto *IRHS = RHSExprs.begin();
5301 const auto *IPriv = Privates.begin();
5302 for (const Expr *E : ReductionOps) {
5303 const Expr *XExpr = nullptr;
5304 const Expr *EExpr = nullptr;
5305 const Expr *UpExpr = nullptr;
5306 BinaryOperatorKind BO = BO_Comma;
5307 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5308 if (BO->getOpcode() == BO_Assign) {
5309 XExpr = BO->getLHS();
5310 UpExpr = BO->getRHS();
5311 }
5312 }
5313 // Try to emit update expression as a simple atomic.
5314 const Expr *RHSExpr = UpExpr;
5315 if (RHSExpr) {
5316 // Analyze RHS part of the whole expression.
5317 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5318 RHSExpr->IgnoreParenImpCasts())) {
5319 // If this is a conditional operator, analyze its condition for
5320 // min/max reduction operator.
5321 RHSExpr = ACO->getCond();
5322 }
5323 if (const auto *BORHS =
5324 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5325 EExpr = BORHS->getRHS();
5326 BO = BORHS->getOpcode();
5327 }
5328 }
5329 if (XExpr) {
5330 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5331 auto &&AtomicRedGen = [BO, VD,
5332 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5333 const Expr *EExpr, const Expr *UpExpr) {
5334 LValue X = CGF.EmitLValue(XExpr);
5335 RValue E;
5336 if (EExpr)
5337 E = CGF.EmitAnyExpr(EExpr);
5338 CGF.EmitOMPAtomicSimpleUpdateExpr(
5339 X, E, BO, /*IsXLHSInRHSPart=*/true,
5340 llvm::AtomicOrdering::Monotonic, Loc,
5341 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5342 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5343 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5344 CGF.emitOMPSimpleStore(
5345 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5346 VD->getType().getNonReferenceType(), Loc);
5347 PrivateScope.addPrivate(VD, LHSTemp);
5348 (void)PrivateScope.Privatize();
5349 return CGF.EmitAnyExpr(UpExpr);
5350 });
5351 };
5352 if ((*IPriv)->getType()->isArrayType()) {
5353 // Emit atomic reduction for array section.
5354 const auto *RHSVar =
5355 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5356 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5357 AtomicRedGen, XExpr, EExpr, UpExpr);
5358 } else {
5359 // Emit atomic reduction for array subscript or single variable.
5360 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5361 }
5362 } else {
5363 // Emit as a critical region.
5364 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5365 const Expr *, const Expr *) {
5366 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5367 std::string Name = RT.getName({"atomic_reduction"});
5368 RT.emitCriticalRegion(
5369 CGF, Name,
5370 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5371 Action.Enter(CGF);
5372 emitReductionCombiner(CGF, E);
5373 },
5374 Loc);
5375 };
5376 if ((*IPriv)->getType()->isArrayType()) {
5377 const auto *LHSVar =
5378 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5379 const auto *RHSVar =
5380 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5381 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5382 CritRedGen);
5383 } else {
5384 CritRedGen(CGF, nullptr, nullptr, nullptr);
5385 }
5386 }
5387 ++ILHS;
5388 ++IRHS;
5389 ++IPriv;
5390 }
5391 };
5392 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5393 if (!WithNowait) {
5394 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5395 llvm::Value *EndArgs[] = {
5396 IdentTLoc, // ident_t *<loc>
5397 ThreadId, // i32 <gtid>
5398 Lock // kmp_critical_name *&<lock>
5399 };
5400 CommonActionTy Action(nullptr, std::nullopt,
5401 OMPBuilder.getOrCreateRuntimeFunction(
5402 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5403 EndArgs);
5404 AtomicRCG.setAction(Action);
5405 AtomicRCG(CGF);
5406 } else {
5407 AtomicRCG(CGF);
5408 }
5409
5410 CGF.EmitBranch(DefaultBB);
5411 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5412}
5413
5414/// Generates unique name for artificial threadprivate variables.
5415/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5416static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5417 const Expr *Ref) {
5418 SmallString<256> Buffer;
5419 llvm::raw_svector_ostream Out(Buffer);
5420 const clang::DeclRefExpr *DE;
5421 const VarDecl *D = ::getBaseDecl(Ref, DE);
5422 if (!D)
5423 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5424 D = D->getCanonicalDecl();
5425 std::string Name = CGM.getOpenMPRuntime().getName(
5426 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5427 Out << Prefix << Name << "_"
5428 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5429 return std::string(Out.str());
5430}
5431
5432/// Emits reduction initializer function:
5433/// \code
5434/// void @.red_init(void* %arg, void* %orig) {
5435/// %0 = bitcast void* %arg to <type>*
5436/// store <type> <init>, <type>* %0
5437/// ret void
5438/// }
5439/// \endcode
5440static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5441 SourceLocation Loc,
5442 ReductionCodeGen &RCG, unsigned N) {
5443 ASTContext &C = CGM.getContext();
5444 QualType VoidPtrTy = C.VoidPtrTy;
5445 VoidPtrTy.addRestrict();
5446 FunctionArgList Args;
5447 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5448 ImplicitParamDecl::Other);
5449 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5450 ImplicitParamDecl::Other);
5451 Args.emplace_back(&Param);
5452 Args.emplace_back(&ParamOrig);
5453 const auto &FnInfo =
5454 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5455 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5456 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5457 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5458 Name, &CGM.getModule());
5459 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5460 Fn->setDoesNotRecurse();
5461 CodeGenFunction CGF(CGM);
5462 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5463 QualType PrivateType = RCG.getPrivateType(N);
5464 Address PrivateAddr = CGF.EmitLoadOfPointer(
5465 CGF.Builder.CreateElementBitCast(
5466 CGF.GetAddrOfLocalVar(&Param),
5467 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5468 C.getPointerType(PrivateType)->castAs<PointerType>());
5469 llvm::Value *Size = nullptr;
5470 // If the size of the reduction item is non-constant, load it from global
5471 // threadprivate variable.
5472 if (RCG.getSizes(N).second) {
5473 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5474 CGF, CGM.getContext().getSizeType(),
5475 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5476 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5477 CGM.getContext().getSizeType(), Loc);
5478 }
5479 RCG.emitAggregateType(CGF, N, Size);
5480 Address OrigAddr = Address::invalid();
5481 // If initializer uses initializer from declare reduction construct, emit a
5482 // pointer to the address of the original reduction item (reuired by reduction
5483 // initializer)
5484 if (RCG.usesReductionInitializer(N)) {
5485 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5486 OrigAddr = CGF.EmitLoadOfPointer(
5487 SharedAddr,
5488 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5489 }
5490 // Emit the initializer:
5491 // %0 = bitcast void* %arg to <type>*
5492 // store <type> <init>, <type>* %0
5493 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5494 [](CodeGenFunction &) { return false; });
5495 CGF.FinishFunction();
5496 return Fn;
5497}
5498
5499/// Emits reduction combiner function:
5500/// \code
5501/// void @.red_comb(void* %arg0, void* %arg1) {
5502/// %lhs = bitcast void* %arg0 to <type>*
5503/// %rhs = bitcast void* %arg1 to <type>*
5504/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5505/// store <type> %2, <type>* %lhs
5506/// ret void
5507/// }
5508/// \endcode
5509static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5510 SourceLocation Loc,
5511 ReductionCodeGen &RCG, unsigned N,
5512 const Expr *ReductionOp,
5513 const Expr *LHS, const Expr *RHS,
5514 const Expr *PrivateRef) {
5515 ASTContext &C = CGM.getContext();
5516 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5517 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5518 FunctionArgList Args;
5519 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5520 C.VoidPtrTy, ImplicitParamDecl::Other);
5521 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5522 ImplicitParamDecl::Other);
5523 Args.emplace_back(&ParamInOut);
5524 Args.emplace_back(&ParamIn);
5525 const auto &FnInfo =
5526 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5527 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5528 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5529 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5530 Name, &CGM.getModule());
5531 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5532 Fn->setDoesNotRecurse();
5533 CodeGenFunction CGF(CGM);
5534 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5535 llvm::Value *Size = nullptr;
5536 // If the size of the reduction item is non-constant, load it from global
5537 // threadprivate variable.
5538 if (RCG.getSizes(N).second) {
5539 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5540 CGF, CGM.getContext().getSizeType(),
5541 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5542 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5543 CGM.getContext().getSizeType(), Loc);
5544 }
5545 RCG.emitAggregateType(CGF, N, Size);
5546 // Remap lhs and rhs variables to the addresses of the function arguments.
5547 // %lhs = bitcast void* %arg0 to <type>*
5548 // %rhs = bitcast void* %arg1 to <type>*
5549 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5550 PrivateScope.addPrivate(
5551 LHSVD,
5552 // Pull out the pointer to the variable.
5553 CGF.EmitLoadOfPointer(
5554 CGF.Builder.CreateElementBitCast(
5555 CGF.GetAddrOfLocalVar(&ParamInOut),
5556 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5557 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5558 PrivateScope.addPrivate(
5559 RHSVD,
5560 // Pull out the pointer to the variable.
5561 CGF.EmitLoadOfPointer(
5562 CGF.Builder.CreateElementBitCast(
5563 CGF.GetAddrOfLocalVar(&ParamIn),
5564 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5565 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5566 PrivateScope.Privatize();
5567 // Emit the combiner body:
5568 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5569 // store <type> %2, <type>* %lhs
5570 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5571 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5572 cast<DeclRefExpr>(RHS));
5573 CGF.FinishFunction();
5574 return Fn;
5575}
5576
5577/// Emits reduction finalizer function:
5578/// \code
5579/// void @.red_fini(void* %arg) {
5580/// %0 = bitcast void* %arg to <type>*
5581/// <destroy>(<type>* %0)
5582/// ret void
5583/// }
5584/// \endcode
5585static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5586 SourceLocation Loc,
5587 ReductionCodeGen &RCG, unsigned N) {
5588 if (!RCG.needCleanups(N))
5589 return nullptr;
5590 ASTContext &C = CGM.getContext();
5591 FunctionArgList Args;
5592 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5593 ImplicitParamDecl::Other);
5594 Args.emplace_back(&Param);
5595 const auto &FnInfo =
5596 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5597 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5598 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5599 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5600 Name, &CGM.getModule());
5601 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5602 Fn->setDoesNotRecurse();
5603 CodeGenFunction CGF(CGM);
5604 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5605 Address PrivateAddr = CGF.EmitLoadOfPointer(
5606 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5607 llvm::Value *Size = nullptr;
5608 // If the size of the reduction item is non-constant, load it from global
5609 // threadprivate variable.
5610 if (RCG.getSizes(N).second) {
5611 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5612 CGF, CGM.getContext().getSizeType(),
5613 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5614 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5615 CGM.getContext().getSizeType(), Loc);
5616 }
5617 RCG.emitAggregateType(CGF, N, Size);
5618 // Emit the finalizer body:
5619 // <destroy>(<type>* %0)
5620 RCG.emitCleanups(CGF, N, PrivateAddr);
5621 CGF.FinishFunction(Loc);
5622 return Fn;
5623}
5624
5625llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5626 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5627 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5628 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5629 return nullptr;
5630
5631 // Build typedef struct:
5632 // kmp_taskred_input {
5633 // void *reduce_shar; // shared reduction item
5634 // void *reduce_orig; // original reduction item used for initialization
5635 // size_t reduce_size; // size of data item
5636 // void *reduce_init; // data initialization routine
5637 // void *reduce_fini; // data finalization routine
5638 // void *reduce_comb; // data combiner routine
5639 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5640 // } kmp_taskred_input_t;
5641 ASTContext &C = CGM.getContext();
5642 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5643 RD->startDefinition();
5644 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5645 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5646 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5647 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5648 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5649 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5650 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5651 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5652 RD->completeDefinition();
5653 QualType RDType = C.getRecordType(RD);
5654 unsigned Size = Data.ReductionVars.size();
5655 llvm::APInt ArraySize(/*numBits=*/64, Size);
5656 QualType ArrayRDType = C.getConstantArrayType(
5657 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5658 // kmp_task_red_input_t .rd_input.[Size];
5659 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5660 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5661 Data.ReductionCopies, Data.ReductionOps);
5662 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5663 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5664 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5665 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5666 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5667 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5668 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5669 ".rd_input.gep.");
5670 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5671 // ElemLVal.reduce_shar = &Shareds[Cnt];
5672 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5673 RCG.emitSharedOrigLValue(CGF, Cnt);
5674 llvm::Value *CastedShared =
5675 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5676 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5677 // ElemLVal.reduce_orig = &Origs[Cnt];
5678 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
5679 llvm::Value *CastedOrig =
5680 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
5681 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
5682 RCG.emitAggregateType(CGF, Cnt);
5683 llvm::Value *SizeValInChars;
5684 llvm::Value *SizeVal;
5685 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5686 // We use delayed creation/initialization for VLAs and array sections. It is
5687 // required because runtime does not provide the way to pass the sizes of
5688 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5689 // threadprivate global variables are used to store these values and use
5690 // them in the functions.
5691 bool DelayedCreation = !!SizeVal;
5692 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5693 /*isSigned=*/false);
5694 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5695 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5696 // ElemLVal.reduce_init = init;
5697 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5698 llvm::Value *InitAddr =
5699 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5700 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5701 // ElemLVal.reduce_fini = fini;
5702 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5703 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5704 llvm::Value *FiniAddr = Fini
5705 ? CGF.EmitCastToVoidPtr(Fini)
5706 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5707 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5708 // ElemLVal.reduce_comb = comb;
5709 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5710 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
5711 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5712 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5713 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5714 // ElemLVal.flags = 0;
5715 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5716 if (DelayedCreation) {
5717 CGF.EmitStoreOfScalar(
5718 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
5719 FlagsLVal);
5720 } else
5721 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
5722 FlagsLVal.getType());
5723 }
5724 if (Data.IsReductionWithTaskMod) {
5725 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5726 // is_ws, int num, void *data);
5727 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5728 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5729 CGM.IntTy, /*isSigned=*/true);
5730 llvm::Value *Args[] = {
5731 IdentTLoc, GTid,
5732 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
5733 /*isSigned=*/true),
5734 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5735 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5736 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
5737 return CGF.EmitRuntimeCall(
5738 OMPBuilder.getOrCreateRuntimeFunction(
5739 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
5740 Args);
5741 }
5742 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5743 llvm::Value *Args[] = {
5744 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5745 /*isSigned=*/true),
5746 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5747 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
5748 CGM.VoidPtrTy)};
5749 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5750 CGM.getModule(), OMPRTL___kmpc_taskred_init),
5751 Args);
5752}
5753
5754void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5755 SourceLocation Loc,
5756 bool IsWorksharingReduction) {
5757 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5758 // is_ws, int num, void *data);
5759 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5760 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5761 CGM.IntTy, /*isSigned=*/true);
5762 llvm::Value *Args[] = {IdentTLoc, GTid,
5763 llvm::ConstantInt::get(CGM.IntTy,
5764 IsWorksharingReduction ? 1 : 0,
5765 /*isSigned=*/true)};
5766 (void)CGF.EmitRuntimeCall(
5767 OMPBuilder.getOrCreateRuntimeFunction(
5768 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
5769 Args);
5770}
5771
5772void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5773 SourceLocation Loc,
5774 ReductionCodeGen &RCG,
5775 unsigned N) {
5776 auto Sizes = RCG.getSizes(N);
5777 // Emit threadprivate global variable if the type is non-constant
5778 // (Sizes.second = nullptr).
5779 if (Sizes.second) {
5780 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5781 /*isSigned=*/false);
5782 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5783 CGF, CGM.getContext().getSizeType(),
5784 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5785 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5786 }
5787}
5788
5789Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5790 SourceLocation Loc,
5791 llvm::Value *ReductionsPtr,
5792 LValue SharedLVal) {
5793 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5794 // *d);
5795 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
5796 CGM.IntTy,
5797 /*isSigned=*/true),
5798 ReductionsPtr,
5799 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5800 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
5801 return Address(
5802 CGF.EmitRuntimeCall(
5803 OMPBuilder.getOrCreateRuntimeFunction(
5804 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
5805 Args),
5806 CGF.Int8Ty, SharedLVal.getAlignment());
5807}
5808
5809void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5810 const OMPTaskDataTy &Data) {
5811 if (!CGF.HaveInsertPoint())
5812 return;
5813
5814 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5815 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5816 OMPBuilder.createTaskwait(CGF.Builder);
5817 } else {
5818 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5819 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5820 auto &M = CGM.getModule();
5821 Address DependenciesArray = Address::invalid();
5822 llvm::Value *NumOfElements;
5823 std::tie(NumOfElements, DependenciesArray) =
5824 emitDependClause(CGF, Data.Dependences, Loc);
5825 if (!Data.Dependences.empty()) {
5826 llvm::Value *DepWaitTaskArgs[7];
5827 DepWaitTaskArgs[0] = UpLoc;
5828 DepWaitTaskArgs[1] = ThreadID;
5829 DepWaitTaskArgs[2] = NumOfElements;
5830 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5831 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5832 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5833 DepWaitTaskArgs[6] =
5834 llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause);
5835
5836 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5837
5838 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5839 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5840 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5841 // kmp_int32 has_no_wait); if dependence info is specified.
5842 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5843 M, OMPRTL___kmpc_omp_taskwait_deps_51),
5844 DepWaitTaskArgs);
5845
5846 } else {
5847
5848 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5849 // global_tid);
5850 llvm::Value *Args[] = {UpLoc, ThreadID};
5851 // Ignore return result until untied tasks are supported.
5852 CGF.EmitRuntimeCall(
5853 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
5854 Args);
5855 }
5856 }
5857
5858 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5859 Region->emitUntiedSwitch(CGF);
5860}
5861
5862void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5863 OpenMPDirectiveKind InnerKind,
5864 const RegionCodeGenTy &CodeGen,
5865 bool HasCancel) {
5866 if (!CGF.HaveInsertPoint())
5867 return;
5868 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5869 InnerKind != OMPD_critical &&
5870 InnerKind != OMPD_master &&
5871 InnerKind != OMPD_masked);
5872 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5873}
5874
5875namespace {
5876enum RTCancelKind {
5877 CancelNoreq = 0,
5878 CancelParallel = 1,
5879 CancelLoop = 2,
5880 CancelSections = 3,
5881 CancelTaskgroup = 4
5882};
5883} // anonymous namespace
5884
5885static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5886 RTCancelKind CancelKind = CancelNoreq;
5887 if (CancelRegion == OMPD_parallel)
5888 CancelKind = CancelParallel;
5889 else if (CancelRegion == OMPD_for)
5890 CancelKind = CancelLoop;
5891 else if (CancelRegion == OMPD_sections)
5892 CancelKind = CancelSections;
5893 else {
5894 assert(CancelRegion == OMPD_taskgroup)(static_cast <bool> (CancelRegion == OMPD_taskgroup) ? void
(0) : __assert_fail ("CancelRegion == OMPD_taskgroup", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 5894, __extension__ __PRETTY_FUNCTION__))
;
5895 CancelKind = CancelTaskgroup;
5896 }
5897 return CancelKind;
5898}
5899
5900void CGOpenMPRuntime::emitCancellationPointCall(
5901 CodeGenFunction &CGF, SourceLocation Loc,
5902 OpenMPDirectiveKind CancelRegion) {
5903 if (!CGF.HaveInsertPoint())
5904 return;
5905 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5906 // global_tid, kmp_int32 cncl_kind);
5907 if (auto *OMPRegionInfo =
5908 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5909 // For 'cancellation point taskgroup', the task region info may not have a
5910 // cancel. This may instead happen in another adjacent task.
5911 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5912 llvm::Value *Args[] = {
5913 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5914 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5915 // Ignore return result until untied tasks are supported.
5916 llvm::Value *Result = CGF.EmitRuntimeCall(
5917 OMPBuilder.getOrCreateRuntimeFunction(
5918 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
5919 Args);
5920 // if (__kmpc_cancellationpoint()) {
5921 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5922 // exit from construct;
5923 // }
5924 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5925 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5926 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5927 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5928 CGF.EmitBlock(ExitBB);
5929 if (CancelRegion == OMPD_parallel)
5930 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5931 // exit from construct;
5932 CodeGenFunction::JumpDest CancelDest =
5933 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5934 CGF.EmitBranchThroughCleanup(CancelDest);
5935 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5936 }
5937 }
5938}
5939
5940void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5941 const Expr *IfCond,
5942 OpenMPDirectiveKind CancelRegion) {
5943 if (!CGF.HaveInsertPoint())
5944 return;
5945 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5946 // kmp_int32 cncl_kind);
5947 auto &M = CGM.getModule();
5948 if (auto *OMPRegionInfo =
5949 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5950 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5951 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5952 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5953 llvm::Value *Args[] = {
5954 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5955 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5956 // Ignore return result until untied tasks are supported.
5957 llvm::Value *Result = CGF.EmitRuntimeCall(
5958 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
5959 // if (__kmpc_cancel()) {
5960 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5961 // exit from construct;
5962 // }
5963 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
5964 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
5965 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
5966 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5967 CGF.EmitBlock(ExitBB);
5968 if (CancelRegion == OMPD_parallel)
5969 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5970 // exit from construct;
5971 CodeGenFunction::JumpDest CancelDest =
5972 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5973 CGF.EmitBranchThroughCleanup(CancelDest);
5974 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5975 };
5976 if (IfCond) {
5977 emitIfClause(CGF, IfCond, ThenGen,
5978 [](CodeGenFunction &, PrePostActionTy &) {});
5979 } else {
5980 RegionCodeGenTy ThenRCG(ThenGen);
5981 ThenRCG(CGF);
5982 }
5983 }
5984}
5985
5986namespace {
5987/// Cleanup action for uses_allocators support.
5988class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5989 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5990
5991public:
5992 OMPUsesAllocatorsActionTy(
5993 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5994 : Allocators(Allocators) {}
5995 void Enter(CodeGenFunction &CGF) override {
5996 if (!CGF.HaveInsertPoint())
5997 return;
5998 for (const auto &AllocatorData : Allocators) {
5999 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6000 CGF, AllocatorData.first, AllocatorData.second);
6001 }
6002 }
6003 void Exit(CodeGenFunction &CGF) override {
6004 if (!CGF.HaveInsertPoint())
6005 return;
6006 for (const auto &AllocatorData : Allocators) {
6007 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6008 AllocatorData.first);
6009 }
6010 }
6011};
6012} // namespace
6013
6014void CGOpenMPRuntime::emitTargetOutlinedFunction(
6015 const OMPExecutableDirective &D, StringRef ParentName,
6016 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6017 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6018 assert(!ParentName.empty() && "Invalid target entry parent name!")(static_cast <bool> (!ParentName.empty() && "Invalid target entry parent name!"
) ? void (0) : __assert_fail ("!ParentName.empty() && \"Invalid target entry parent name!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6018, __extension__
__PRETTY_FUNCTION__))
;
6019 HasEmittedTargetRegion = true;
6020 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6021 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6022 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6023 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6024 if (!D.AllocatorTraits)
6025 continue;
6026 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6027 }
6028 }
6029 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6030 CodeGen.setAction(UsesAllocatorAction);
6031 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6032 IsOffloadEntry, CodeGen);
6033}
6034
6035void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6036 const Expr *Allocator,
6037 const Expr *AllocatorTraits) {
6038 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6039 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6040 // Use default memspace handle.
6041 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6042 llvm::Value *NumTraits = llvm::ConstantInt::get(
6043 CGF.IntTy, cast<ConstantArrayType>(
6044 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6045 ->getSize()
6046 .getLimitedValue());
6047 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6048 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6049 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6050 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6051 AllocatorTraitsLVal.getBaseInfo(),
6052 AllocatorTraitsLVal.getTBAAInfo());
6053 llvm::Value *Traits =
6054 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6055
6056 llvm::Value *AllocatorVal =
6057 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6058 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6059 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6060 // Store to allocator.
6061 CGF.EmitVarDecl(*cast<VarDecl>(
6062 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6063 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6064 AllocatorVal =
6065 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6066 Allocator->getType(), Allocator->getExprLoc());
6067 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6068}
6069
6070void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6071 const Expr *Allocator) {
6072 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6073 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6074 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6075 llvm::Value *AllocatorVal =
6076 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6077 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6078 CGF.getContext().VoidPtrTy,
6079 Allocator->getExprLoc());
6080 (void)CGF.EmitRuntimeCall(
6081 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6082 OMPRTL___kmpc_destroy_allocator),
6083 {ThreadId, AllocatorVal});
6084}
6085
6086void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6087 const OMPExecutableDirective &D, StringRef ParentName,
6088 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6089 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6090
6091 auto EntryInfo =
6092 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), ParentName);
6093
6094 CodeGenFunction CGF(CGM, true);
6095 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6096 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6097 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6098
6099 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6100 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6101 return CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6102 };
6103
6104 // Get NumTeams and ThreadLimit attributes
6105 int32_t DefaultValTeams = -1;
6106 int32_t DefaultValThreads = -1;
6107 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6108 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6109
6110 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
6111 DefaultValTeams, DefaultValThreads,
6112 IsOffloadEntry, OutlinedFn, OutlinedFnID);
6113
6114 if (OutlinedFn != nullptr)
6115 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6116}
6117
6118/// Checks if the expression is constant or does not have non-trivial function
6119/// calls.
6120static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6121 // We can skip constant expressions.
6122 // We can skip expressions with trivial calls or simple expressions.
6123 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6124 !E->hasNonTrivialCall(Ctx)) &&
6125 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6126}
6127
6128const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6129 const Stmt *Body) {
6130 const Stmt *Child = Body->IgnoreContainers();
6131 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6132 Child = nullptr;
6133 for (const Stmt *S : C->body()) {
6134 if (const auto *E = dyn_cast<Expr>(S)) {
6135 if (isTrivial(Ctx, E))
6136 continue;
6137 }
6138 // Some of the statements can be ignored.
6139 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6140 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6141 continue;
6142 // Analyze declarations.
6143 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6144 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6145 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6146 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6147 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6148 isa<UsingDirectiveDecl>(D) ||
6149 isa<OMPDeclareReductionDecl>(D) ||
6150 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6151 return true;
6152 const auto *VD = dyn_cast<VarDecl>(D);
6153 if (!VD)
6154 return false;
6155 return VD->hasGlobalStorage() || !VD->isUsed();
6156 }))
6157 continue;
6158 }
6159 // Found multiple children - cannot get the one child only.
6160 if (Child)
6161 return nullptr;
6162 Child = S;
6163 }
6164 if (Child)
6165 Child = Child->IgnoreContainers();
6166 }
6167 return Child;
6168}
6169
6170const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6171 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6172 int32_t &DefaultVal) {
6173
6174 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6175 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6176, __extension__
__PRETTY_FUNCTION__))
6176 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6176, __extension__
__PRETTY_FUNCTION__))
;
6177 switch (DirectiveKind) {
6178 case OMPD_target: {
6179 const auto *CS = D.getInnermostCapturedStmt();
6180 const auto *Body =
6181 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6182 const Stmt *ChildStmt =
6183 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6184 if (const auto *NestedDir =
6185 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6186 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6187 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6188 const Expr *NumTeams =
6189 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6190 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6191 if (auto Constant =
6192 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6193 DefaultVal = Constant->getExtValue();
6194 return NumTeams;
6195 }
6196 DefaultVal = 0;
6197 return nullptr;
6198 }
6199 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6200 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6201 DefaultVal = 1;
6202 return nullptr;
6203 }
6204 DefaultVal = 1;
6205 return nullptr;
6206 }
6207 // A value of -1 is used to check if we need to emit no teams region
6208 DefaultVal = -1;
6209 return nullptr;
6210 }
6211 case OMPD_target_teams:
6212 case OMPD_target_teams_distribute:
6213 case OMPD_target_teams_distribute_simd:
6214 case OMPD_target_teams_distribute_parallel_for:
6215 case OMPD_target_teams_distribute_parallel_for_simd: {
6216 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6217 const Expr *NumTeams =
6218 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6219 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6220 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6221 DefaultVal = Constant->getExtValue();
6222 return NumTeams;
6223 }
6224 DefaultVal = 0;
6225 return nullptr;
6226 }
6227 case OMPD_target_parallel:
6228 case OMPD_target_parallel_for:
6229 case OMPD_target_parallel_for_simd:
6230 case OMPD_target_simd:
6231 DefaultVal = 1;
6232 return nullptr;
6233 case OMPD_parallel:
6234 case OMPD_for:
6235 case OMPD_parallel_for:
6236 case OMPD_parallel_master:
6237 case OMPD_parallel_sections:
6238 case OMPD_for_simd:
6239 case OMPD_parallel_for_simd:
6240 case OMPD_cancel:
6241 case OMPD_cancellation_point:
6242 case OMPD_ordered:
6243 case OMPD_threadprivate:
6244 case OMPD_allocate:
6245 case OMPD_task:
6246 case OMPD_simd:
6247 case OMPD_tile:
6248 case OMPD_unroll:
6249 case OMPD_sections:
6250 case OMPD_section:
6251 case OMPD_single:
6252 case OMPD_master:
6253 case OMPD_critical:
6254 case OMPD_taskyield:
6255 case OMPD_barrier:
6256 case OMPD_taskwait:
6257 case OMPD_taskgroup:
6258 case OMPD_atomic:
6259 case OMPD_flush:
6260 case OMPD_depobj:
6261 case OMPD_scan:
6262 case OMPD_teams:
6263 case OMPD_target_data:
6264 case OMPD_target_exit_data:
6265 case OMPD_target_enter_data:
6266 case OMPD_distribute:
6267 case OMPD_distribute_simd:
6268 case OMPD_distribute_parallel_for:
6269 case OMPD_distribute_parallel_for_simd:
6270 case OMPD_teams_distribute:
6271 case OMPD_teams_distribute_simd:
6272 case OMPD_teams_distribute_parallel_for:
6273 case OMPD_teams_distribute_parallel_for_simd:
6274 case OMPD_target_update:
6275 case OMPD_declare_simd:
6276 case OMPD_declare_variant:
6277 case OMPD_begin_declare_variant:
6278 case OMPD_end_declare_variant:
6279 case OMPD_declare_target:
6280 case OMPD_end_declare_target:
6281 case OMPD_declare_reduction:
6282 case OMPD_declare_mapper:
6283 case OMPD_taskloop:
6284 case OMPD_taskloop_simd:
6285 case OMPD_master_taskloop:
6286 case OMPD_master_taskloop_simd:
6287 case OMPD_parallel_master_taskloop:
6288 case OMPD_parallel_master_taskloop_simd:
6289 case OMPD_requires:
6290 case OMPD_metadirective:
6291 case OMPD_unknown:
6292 break;
6293 default:
6294 break;
6295 }
6296 llvm_unreachable("Unexpected directive kind.")::llvm::llvm_unreachable_internal("Unexpected directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6296)
;
6297}
6298
6299llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6300 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6301 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6303, __extension__
__PRETTY_FUNCTION__))
6302 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6303, __extension__
__PRETTY_FUNCTION__))
6303 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6303, __extension__
__PRETTY_FUNCTION__))
;
6304 CGBuilderTy &Bld = CGF.Builder;
6305 int32_t DefaultNT = -1;
6306 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6307 if (NumTeams != nullptr) {
6308 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6309
6310 switch (DirectiveKind) {
6311 case OMPD_target: {
6312 const auto *CS = D.getInnermostCapturedStmt();
6313 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6314 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6315 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6316 /*IgnoreResultAssign*/ true);
6317 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6318 /*isSigned=*/true);
6319 }
6320 case OMPD_target_teams:
6321 case OMPD_target_teams_distribute:
6322 case OMPD_target_teams_distribute_simd:
6323 case OMPD_target_teams_distribute_parallel_for:
6324 case OMPD_target_teams_distribute_parallel_for_simd: {
6325 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6326 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6327 /*IgnoreResultAssign*/ true);
6328 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6329 /*isSigned=*/true);
6330 }
6331 default:
6332 break;
6333 }
6334 }
6335
6336 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6337}
6338
6339static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6340 llvm::Value *DefaultThreadLimitVal) {
6341 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6342 CGF.getContext(), CS->getCapturedStmt());
6343 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6344 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6345 llvm::Value *NumThreads = nullptr;
6346 llvm::Value *CondVal = nullptr;
6347 // Handle if clause. If if clause present, the number of threads is
6348 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6349 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6350 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6351 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6352 const OMPIfClause *IfClause = nullptr;
6353 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6354 if (C->getNameModifier() == OMPD_unknown ||
6355 C->getNameModifier() == OMPD_parallel) {
6356 IfClause = C;
6357 break;
6358 }
6359 }
6360 if (IfClause) {
6361 const Expr *Cond = IfClause->getCondition();
6362 bool Result;
6363 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6364 if (!Result)
6365 return CGF.Builder.getInt32(1);
6366 } else {
6367 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6368 if (const auto *PreInit =
6369 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6370 for (const auto *I : PreInit->decls()) {
6371 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6372 CGF.EmitVarDecl(cast<VarDecl>(*I));
6373 } else {
6374 CodeGenFunction::AutoVarEmission Emission =
6375 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6376 CGF.EmitAutoVarCleanups(Emission);
6377 }
6378 }
6379 }
6380 CondVal = CGF.EvaluateExprAsBool(Cond);
6381 }
6382 }
6383 }
6384 // Check the value of num_threads clause iff if clause was not specified
6385 // or is not evaluated to false.
6386 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6387 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6388 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6389 const auto *NumThreadsClause =
6390 Dir->getSingleClause<OMPNumThreadsClause>();
6391 CodeGenFunction::LexicalScope Scope(
6392 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6393 if (const auto *PreInit =
6394 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6395 for (const auto *I : PreInit->decls()) {
6396 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6397 CGF.EmitVarDecl(cast<VarDecl>(*I));
6398 } else {
6399 CodeGenFunction::AutoVarEmission Emission =
6400 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6401 CGF.EmitAutoVarCleanups(Emission);
6402 }
6403 }
6404 }
6405 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6406 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6407 /*isSigned=*/false);
6408 if (DefaultThreadLimitVal)
6409 NumThreads = CGF.Builder.CreateSelect(
6410 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6411 DefaultThreadLimitVal, NumThreads);
6412 } else {
6413 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6414 : CGF.Builder.getInt32(0);
6415 }
6416 // Process condition of the if clause.
6417 if (CondVal) {
6418 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6419 CGF.Builder.getInt32(1));
6420 }
6421 return NumThreads;
6422 }
6423 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6424 return CGF.Builder.getInt32(1);
6425 }
6426 return DefaultThreadLimitVal;
6427}
6428
6429const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6430 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6431 int32_t &DefaultVal) {
6432 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6433 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6434, __extension__
__PRETTY_FUNCTION__))
6434 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6434, __extension__
__PRETTY_FUNCTION__))
;
6435
6436 switch (DirectiveKind) {
6437 case OMPD_target:
6438 // Teams have no clause thread_limit
6439 return nullptr;
6440 case OMPD_target_teams:
6441 case OMPD_target_teams_distribute:
6442 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6443 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6444 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6445 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6446 if (auto Constant =
6447 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6448 DefaultVal = Constant->getExtValue();
6449 return ThreadLimit;
6450 }
6451 return nullptr;
6452 case OMPD_target_parallel:
6453 case OMPD_target_parallel_for:
6454 case OMPD_target_parallel_for_simd:
6455 case OMPD_target_teams_distribute_parallel_for:
6456 case OMPD_target_teams_distribute_parallel_for_simd: {
6457 Expr *ThreadLimit = nullptr;
6458 Expr *NumThreads = nullptr;
6459 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6460 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6461 ThreadLimit = ThreadLimitClause->getThreadLimit();
6462 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6463 if (auto Constant =
6464 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6465 DefaultVal = Constant->getExtValue();
6466 }
6467 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6468 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6469 NumThreads = NumThreadsClause->getNumThreads();
6470 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6471 if (auto Constant =
6472 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6473 if (Constant->getExtValue() < DefaultVal) {
6474 DefaultVal = Constant->getExtValue();
6475 ThreadLimit = NumThreads;
6476 }
6477 }
6478 }
6479 }
6480 return ThreadLimit;
6481 }
6482 case OMPD_target_teams_distribute_simd:
6483 case OMPD_target_simd:
6484 DefaultVal = 1;
6485 return nullptr;
6486 case OMPD_parallel:
6487 case OMPD_for:
6488 case OMPD_parallel_for:
6489 case OMPD_parallel_master:
6490 case OMPD_parallel_sections:
6491 case OMPD_for_simd:
6492 case OMPD_parallel_for_simd:
6493 case OMPD_cancel:
6494 case OMPD_cancellation_point:
6495 case OMPD_ordered:
6496 case OMPD_threadprivate:
6497 case OMPD_allocate:
6498 case OMPD_task:
6499 case OMPD_simd:
6500 case OMPD_tile:
6501 case OMPD_unroll:
6502 case OMPD_sections:
6503 case OMPD_section:
6504 case OMPD_single:
6505 case OMPD_master:
6506 case OMPD_critical:
6507 case OMPD_taskyield:
6508 case OMPD_barrier:
6509 case OMPD_taskwait:
6510 case OMPD_taskgroup:
6511 case OMPD_atomic:
6512 case OMPD_flush:
6513 case OMPD_depobj:
6514 case OMPD_scan:
6515 case OMPD_teams:
6516 case OMPD_target_data:
6517 case OMPD_target_exit_data:
6518 case OMPD_target_enter_data:
6519 case OMPD_distribute:
6520 case OMPD_distribute_simd:
6521 case OMPD_distribute_parallel_for:
6522 case OMPD_distribute_parallel_for_simd:
6523 case OMPD_teams_distribute:
6524 case OMPD_teams_distribute_simd:
6525 case OMPD_teams_distribute_parallel_for:
6526 case OMPD_teams_distribute_parallel_for_simd:
6527 case OMPD_target_update:
6528 case OMPD_declare_simd:
6529 case OMPD_declare_variant:
6530 case OMPD_begin_declare_variant:
6531 case OMPD_end_declare_variant:
6532 case OMPD_declare_target:
6533 case OMPD_end_declare_target:
6534 case OMPD_declare_reduction:
6535 case OMPD_declare_mapper:
6536 case OMPD_taskloop:
6537 case OMPD_taskloop_simd:
6538 case OMPD_master_taskloop:
6539 case OMPD_master_taskloop_simd:
6540 case OMPD_parallel_master_taskloop:
6541 case OMPD_parallel_master_taskloop_simd:
6542 case OMPD_requires:
6543 case OMPD_unknown:
6544 break;
6545 default:
6546 break;
6547 }
6548 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6548)
;
6549}
6550
6551llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6552 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6553 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6555, __extension__
__PRETTY_FUNCTION__))
6554 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6555, __extension__
__PRETTY_FUNCTION__))
6555 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6555, __extension__
__PRETTY_FUNCTION__))
;
6556 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6557 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6558, __extension__
__PRETTY_FUNCTION__))
6558 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6558, __extension__
__PRETTY_FUNCTION__))
;
6559 CGBuilderTy &Bld = CGF.Builder;
6560 llvm::Value *ThreadLimitVal = nullptr;
6561 llvm::Value *NumThreadsVal = nullptr;
6562 switch (DirectiveKind) {
6563 case OMPD_target: {
6564 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6565 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6566 return NumThreads;
6567 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6568 CGF.getContext(), CS->getCapturedStmt());
6569 // TODO: The standard is not clear how to resolve two thread limit clauses,
6570 // let's pick the teams one if it's present, otherwise the target one.
6571 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6572 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6573 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6574 ThreadLimitClause = TLC;
6575 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6576 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6577 CodeGenFunction::LexicalScope Scope(
6578 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6579 if (const auto *PreInit =
6580 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6581 for (const auto *I : PreInit->decls()) {
6582 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6583 CGF.EmitVarDecl(cast<VarDecl>(*I));
6584 } else {
6585 CodeGenFunction::AutoVarEmission Emission =
6586 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6587 CGF.EmitAutoVarCleanups(Emission);
6588 }
6589 }
6590 }
6591 }
6592 }
6593 if (ThreadLimitClause) {
6594 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6595 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6596 ThreadLimitVal =
6597 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6598 }
6599 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6600 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6601 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6602 CS = Dir->getInnermostCapturedStmt();
6603 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6604 CGF.getContext(), CS->getCapturedStmt());
6605 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6606 }
6607 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6608 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6609 CS = Dir->getInnermostCapturedStmt();
6610 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6611 return NumThreads;
6612 }
6613 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6614 return Bld.getInt32(1);
6615 }
6616 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6617 }
6618 case OMPD_target_teams: {
6619 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6620 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6621 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6622 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6623 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6624 ThreadLimitVal =
6625 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6626 }
6627 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6628 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6629 return NumThreads;
6630 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631 CGF.getContext(), CS->getCapturedStmt());
6632 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6633 if (Dir->getDirectiveKind() == OMPD_distribute) {
6634 CS = Dir->getInnermostCapturedStmt();
6635 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6636 return NumThreads;
6637 }
6638 }
6639 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6640 }
6641 case OMPD_target_teams_distribute:
6642 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6643 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6644 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6645 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6646 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6647 ThreadLimitVal =
6648 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6649 }
6650 if (llvm::Value *NumThreads =
6651 getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal))
6652 return NumThreads;
6653 return Bld.getInt32(0);
6654 case OMPD_target_parallel:
6655 case OMPD_target_parallel_for:
6656 case OMPD_target_parallel_for_simd:
6657 case OMPD_target_teams_distribute_parallel_for:
6658 case OMPD_target_teams_distribute_parallel_for_simd: {
6659 llvm::Value *CondVal = nullptr;
6660 // Handle if clause. If if clause present, the number of threads is
6661 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6662 if (D.hasClausesOfKind<OMPIfClause>()) {
6663 const OMPIfClause *IfClause = nullptr;
6664 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6665 if (C->getNameModifier() == OMPD_unknown ||
6666 C->getNameModifier() == OMPD_parallel) {
6667 IfClause = C;
6668 break;
6669 }
6670 }
6671 if (IfClause) {
6672 const Expr *Cond = IfClause->getCondition();
6673 bool Result;
6674 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6675 if (!Result)
6676 return Bld.getInt32(1);
6677 } else {
6678 CodeGenFunction::RunCleanupsScope Scope(CGF);
6679 CondVal = CGF.EvaluateExprAsBool(Cond);
6680 }
6681 }
6682 }
6683 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6684 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6685 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6686 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6687 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6688 ThreadLimitVal =
6689 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6690 }
6691 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6692 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6693 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6694 llvm::Value *NumThreads = CGF.EmitScalarExpr(
6695 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6696 NumThreadsVal =
6697 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6698 ThreadLimitVal = ThreadLimitVal
6699 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6700 ThreadLimitVal),
6701 NumThreadsVal, ThreadLimitVal)
6702 : NumThreadsVal;
6703 }
6704 if (!ThreadLimitVal)
6705 ThreadLimitVal = Bld.getInt32(0);
6706 if (CondVal)
6707 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6708 return ThreadLimitVal;
6709 }
6710 case OMPD_target_teams_distribute_simd:
6711 case OMPD_target_simd:
6712 return Bld.getInt32(1);
6713 case OMPD_parallel:
6714 case OMPD_for:
6715 case OMPD_parallel_for:
6716 case OMPD_parallel_master:
6717 case OMPD_parallel_sections:
6718 case OMPD_for_simd:
6719 case OMPD_parallel_for_simd:
6720 case OMPD_cancel:
6721 case OMPD_cancellation_point:
6722 case OMPD_ordered:
6723 case OMPD_threadprivate:
6724 case OMPD_allocate:
6725 case OMPD_task:
6726 case OMPD_simd:
6727 case OMPD_tile:
6728 case OMPD_unroll:
6729 case OMPD_sections:
6730 case OMPD_section:
6731 case OMPD_single:
6732 case OMPD_master:
6733 case OMPD_critical:
6734 case OMPD_taskyield:
6735 case OMPD_barrier:
6736 case OMPD_taskwait:
6737 case OMPD_taskgroup:
6738 case OMPD_atomic:
6739 case OMPD_flush:
6740 case OMPD_depobj:
6741 case OMPD_scan:
6742 case OMPD_teams:
6743 case OMPD_target_data:
6744 case OMPD_target_exit_data:
6745 case OMPD_target_enter_data:
6746 case OMPD_distribute:
6747 case OMPD_distribute_simd:
6748 case OMPD_distribute_parallel_for:
6749 case OMPD_distribute_parallel_for_simd:
6750 case OMPD_teams_distribute:
6751 case OMPD_teams_distribute_simd:
6752 case OMPD_teams_distribute_parallel_for:
6753 case OMPD_teams_distribute_parallel_for_simd:
6754 case OMPD_target_update:
6755 case OMPD_declare_simd:
6756 case OMPD_declare_variant:
6757 case OMPD_begin_declare_variant:
6758 case OMPD_end_declare_variant:
6759 case OMPD_declare_target:
6760 case OMPD_end_declare_target:
6761 case OMPD_declare_reduction:
6762 case OMPD_declare_mapper:
6763 case OMPD_taskloop:
6764 case OMPD_taskloop_simd:
6765 case OMPD_master_taskloop:
6766 case OMPD_master_taskloop_simd:
6767 case OMPD_parallel_master_taskloop:
6768 case OMPD_parallel_master_taskloop_simd:
6769 case OMPD_requires:
6770 case OMPD_metadirective:
6771 case OMPD_unknown:
6772 break;
6773 default:
6774 break;
6775 }
6776 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6776)
;
6777}
6778
6779namespace {
6780LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
6781
6782// Utility to handle information from clauses associated with a given
6783// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6784// It provides a convenient interface to obtain the information and generate
6785// code for that information.
6786class MappableExprsHandler {
6787public:
6788 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6789 static unsigned getFlagMemberOffset() {
6790 unsigned Offset = 0;
6791 for (uint64_t Remain =
6792 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6793 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6794 !(Remain & 1); Remain = Remain >> 1)
6795 Offset++;
6796 return Offset;
6797 }
6798
6799 /// Class that holds debugging information for a data mapping to be passed to
6800 /// the runtime library.
6801 class MappingExprInfo {
6802 /// The variable declaration used for the data mapping.
6803 const ValueDecl *MapDecl = nullptr;
6804 /// The original expression used in the map clause, or null if there is
6805 /// none.
6806 const Expr *MapExpr = nullptr;
6807
6808 public:
6809 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6810 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6811
6812 const ValueDecl *getMapDecl() const { return MapDecl; }
6813 const Expr *getMapExpr() const { return MapExpr; }
6814 };
6815
6816 /// Class that associates information with a base pointer to be passed to the
6817 /// runtime library.
6818 class BasePointerInfo {
6819 /// The base pointer.
6820 llvm::Value *Ptr = nullptr;
6821 /// The base declaration that refers to this device pointer, or null if
6822 /// there is none.
6823 const ValueDecl *DevPtrDecl = nullptr;
6824
6825 public:
6826 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
6827 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
6828 llvm::Value *operator*() const { return Ptr; }
6829 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
6830 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
6831 };
6832
6833 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6834 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
6835 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
6836 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
6837 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
6838 using MapDimArrayTy = SmallVector<uint64_t, 4>;
6839 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
6840
6841 /// This structure contains combined information generated for mappable
6842 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6843 /// mappers, and non-contiguous information.
6844 struct MapCombinedInfoTy {
6845 struct StructNonContiguousInfo {
6846 bool IsNonContiguous = false;
6847 MapDimArrayTy Dims;
6848 MapNonContiguousArrayTy Offsets;
6849 MapNonContiguousArrayTy Counts;
6850 MapNonContiguousArrayTy Strides;
6851 };
6852 MapExprsArrayTy Exprs;
6853 MapBaseValuesArrayTy BasePointers;
6854 MapValuesArrayTy Pointers;
6855 MapValuesArrayTy Sizes;
6856 MapFlagsArrayTy Types;
6857 MapMappersArrayTy Mappers;
6858 StructNonContiguousInfo NonContigInfo;
6859
6860 /// Append arrays in \a CurInfo.
6861 void append(MapCombinedInfoTy &CurInfo) {
6862 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
6863 BasePointers.append(CurInfo.BasePointers.begin(),
6864 CurInfo.BasePointers.end());
6865 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
6866 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
6867 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
6868 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
6869 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
6870 CurInfo.NonContigInfo.Dims.end());
6871 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
6872 CurInfo.NonContigInfo.Offsets.end());
6873 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
6874 CurInfo.NonContigInfo.Counts.end());
6875 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
6876 CurInfo.NonContigInfo.Strides.end());
6877 }
6878 };
6879
6880 /// Map between a struct and the its lowest & highest elements which have been
6881 /// mapped.
6882 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6883 /// HE(FieldIndex, Pointer)}
6884 struct StructRangeInfoTy {
6885 MapCombinedInfoTy PreliminaryMapData;
6886 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6887 0, Address::invalid()};
6888 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6889 0, Address::invalid()};
6890 Address Base = Address::invalid();
6891 Address LB = Address::invalid();
6892 bool IsArraySection = false;
6893 bool HasCompleteRecord = false;
6894 };
6895
6896private:
6897 /// Kind that defines how a device pointer has to be returned.
6898 struct MapInfo {
6899 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6900 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6901 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6902 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6903 bool ReturnDevicePointer = false;
6904 bool IsImplicit = false;
6905 const ValueDecl *Mapper = nullptr;
6906 const Expr *VarRef = nullptr;
6907 bool ForDeviceAddr = false;
6908
6909 MapInfo() = default;
6910 MapInfo(
6911 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6912 OpenMPMapClauseKind MapType,
6913 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6914 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6915 bool ReturnDevicePointer, bool IsImplicit,
6916 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6917 bool ForDeviceAddr = false)
6918 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6919 MotionModifiers(MotionModifiers),
6920 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6921 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6922 };
6923
6924 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6925 /// member and there is no map information about it, then emission of that
6926 /// entry is deferred until the whole struct has been processed.
6927 struct DeferredDevicePtrEntryTy {
6928 const Expr *IE = nullptr;
6929 const ValueDecl *VD = nullptr;
6930 bool ForDeviceAddr = false;
6931
6932 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6933 bool ForDeviceAddr)
6934 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6935 };
6936
6937 /// The target directive from where the mappable clauses were extracted. It
6938 /// is either a executable directive or a user-defined mapper directive.
6939 llvm::PointerUnion<const OMPExecutableDirective *,
6940 const OMPDeclareMapperDecl *>
6941 CurDir;
6942
6943 /// Function the directive is being generated for.
6944 CodeGenFunction &CGF;
6945
6946 /// Set of all first private variables in the current directive.
6947 /// bool data is set to true if the variable is implicitly marked as
6948 /// firstprivate, false otherwise.
6949 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6950
6951 /// Map between device pointer declarations and their expression components.
6952 /// The key value for declarations in 'this' is null.
6953 llvm::DenseMap<
6954 const ValueDecl *,
6955 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6956 DevPointersMap;
6957
6958 /// Map between device addr declarations and their expression components.
6959 /// The key value for declarations in 'this' is null.
6960 llvm::DenseMap<
6961 const ValueDecl *,
6962 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6963 HasDevAddrsMap;
6964
6965 /// Map between lambda declarations and their map type.
6966 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6967
6968 llvm::Value *getExprTypeSize(const Expr *E) const {
6969 QualType ExprTy = E->getType().getCanonicalType();
6970
6971 // Calculate the size for array shaping expression.
6972 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
6973 llvm::Value *Size =
6974 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
6975 for (const Expr *SE : OAE->getDimensions()) {
6976 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
6977 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
6978 CGF.getContext().getSizeType(),
6979 SE->getExprLoc());
6980 Size = CGF.Builder.CreateNUWMul(Size, Sz);
6981 }
6982 return Size;
6983 }
6984
6985 // Reference types are ignored for mapping purposes.
6986 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6987 ExprTy = RefTy->getPointeeType().getCanonicalType();
6988
6989 // Given that an array section is considered a built-in type, we need to
6990 // do the calculation based on the length of the section instead of relying
6991 // on CGF.getTypeSize(E->getType()).
6992 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
6993 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6994 OAE->getBase()->IgnoreParenImpCasts())
6995 .getCanonicalType();
6996
6997 // If there is no length associated with the expression and lower bound is
6998 // not specified too, that means we are using the whole length of the
6999 // base.
7000 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7001 !OAE->getLowerBound())
7002 return CGF.getTypeSize(BaseTy);
7003
7004 llvm::Value *ElemSize;
7005 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7006 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7007 } else {
7008 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7009 assert(ATy && "Expecting array type if not a pointer type.")(static_cast <bool> (ATy && "Expecting array type if not a pointer type."
) ? void (0) : __assert_fail ("ATy && \"Expecting array type if not a pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7009, __extension__
__PRETTY_FUNCTION__))
;
7010 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7011 }
7012
7013 // If we don't have a length at this point, that is because we have an
7014 // array section with a single element.
7015 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7016 return ElemSize;
7017
7018 if (const Expr *LenExpr = OAE->getLength()) {
7019 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7020 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7021 CGF.getContext().getSizeType(),
7022 LenExpr->getExprLoc());
7023 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7024 }
7025 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7026, __extension__
__PRETTY_FUNCTION__))
7026 OAE->getLowerBound() && "expected array_section[lb:].")(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7026, __extension__
__PRETTY_FUNCTION__))
;
7027 // Size = sizetype - lb * elemtype;
7028 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7029 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7030 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7031 CGF.getContext().getSizeType(),
7032 OAE->getLowerBound()->getExprLoc());
7033 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7034 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7035 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7036 LengthVal = CGF.Builder.CreateSelect(
7037 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7038 return LengthVal;
7039 }
7040 return CGF.getTypeSize(ExprTy);
7041 }
7042
7043 /// Return the corresponding bits for a given map clause modifier. Add
7044 /// a flag marking the map as a pointer if requested. Add a flag marking the
7045 /// map as the first one of a series of maps that relate to the same map
7046 /// expression.
7047 OpenMPOffloadMappingFlags getMapTypeBits(
7048 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7049 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7050 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7051 OpenMPOffloadMappingFlags Bits =
7052 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7053 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7054 switch (MapType) {
7055 case OMPC_MAP_alloc:
7056 case OMPC_MAP_release:
7057 // alloc and release is the default behavior in the runtime library, i.e.
7058 // if we don't pass any bits alloc/release that is what the runtime is
7059 // going to do. Therefore, we don't need to signal anything for these two
7060 // type modifiers.
7061 break;
7062 case OMPC_MAP_to:
7063 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7064 break;
7065 case OMPC_MAP_from:
7066 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7067 break;
7068 case OMPC_MAP_tofrom:
7069 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7070 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7071 break;
7072 case OMPC_MAP_delete:
7073 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7074 break;
7075 case OMPC_MAP_unknown:
7076 llvm_unreachable("Unexpected map type!")::llvm::llvm_unreachable_internal("Unexpected map type!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 7076)
;
7077 }
7078 if (AddPtrFlag)
7079 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7080 if (AddIsTargetParamFlag)
7081 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7082 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7083 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7084 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7085 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7086 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7087 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7088 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7089 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7090 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7091 if (IsNonContiguous)
7092 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7093 return Bits;
7094 }
7095
7096 /// Return true if the provided expression is a final array section. A
7097 /// final array section, is one whose length can't be proved to be one.
7098 bool isFinalArraySectionExpression(const Expr *E) const {
7099 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7100
7101 // It is not an array section and therefore not a unity-size one.
7102 if (!OASE)
7103 return false;
7104
7105 // An array section with no colon always refer to a single element.
7106 if (OASE->getColonLocFirst().isInvalid())
7107 return false;
7108
7109 const Expr *Length = OASE->getLength();
7110
7111 // If we don't have a length we have to check if the array has size 1
7112 // for this dimension. Also, we should always expect a length if the
7113 // base type is pointer.
7114 if (!Length) {
7115 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7116 OASE->getBase()->IgnoreParenImpCasts())
7117 .getCanonicalType();
7118 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7119 return ATy->getSize().getSExtValue() != 1;
7120 // If we don't have a constant dimension length, we have to consider
7121 // the current section as having any size, so it is not necessarily
7122 // unitary. If it happen to be unity size, that's user fault.
7123 return true;
7124 }
7125
7126 // Check if the length evaluates to 1.
7127 Expr::EvalResult Result;
7128 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7129 return true; // Can have more that size 1.
7130
7131 llvm::APSInt ConstLength = Result.Val.getInt();
7132 return ConstLength.getSExtValue() != 1;
7133 }
7134
7135 /// Generate the base pointers, section pointers, sizes, map type bits, and
7136 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7137 /// map type, map or motion modifiers, and expression components.
7138 /// \a IsFirstComponent should be set to true if the provided set of
7139 /// components is the first associated with a capture.
7140 void generateInfoForComponentList(
7141 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7142 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7143 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7144 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7145 bool IsFirstComponentList, bool IsImplicit,
7146 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7147 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7148 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7149 OverlappedElements = std::nullopt) const {
7150 // The following summarizes what has to be generated for each map and the
7151 // types below. The generated information is expressed in this order:
7152 // base pointer, section pointer, size, flags
7153 // (to add to the ones that come from the map type and modifier).
7154 //
7155 // double d;
7156 // int i[100];
7157 // float *p;
7158 // int **a = &i;
7159 //
7160 // struct S1 {
7161 // int i;
7162 // float f[50];
7163 // }
7164 // struct S2 {
7165 // int i;
7166 // float f[50];
7167 // S1 s;
7168 // double *p;
7169 // struct S2 *ps;
7170 // int &ref;
7171 // }
7172 // S2 s;
7173 // S2 *ps;
7174 //
7175 // map(d)
7176 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7177 //
7178 // map(i)
7179 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7180 //
7181 // map(i[1:23])
7182 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7183 //
7184 // map(p)
7185 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7186 //
7187 // map(p[1:24])
7188 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7189 // in unified shared memory mode or for local pointers
7190 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7191 //
7192 // map((*a)[0:3])
7193 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7194 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7195 //
7196 // map(**a)
7197 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7198 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7199 //
7200 // map(s)
7201 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7202 //
7203 // map(s.i)
7204 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7205 //
7206 // map(s.s.f)
7207 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7208 //
7209 // map(s.p)
7210 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7211 //
7212 // map(to: s.p[:22])
7213 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7214 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7215 // &(s.p), &(s.p[0]), 22*sizeof(double),
7216 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7217 // (*) alloc space for struct members, only this is a target parameter
7218 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7219 // optimizes this entry out, same in the examples below)
7220 // (***) map the pointee (map: to)
7221 //
7222 // map(to: s.ref)
7223 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7224 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7225 // (*) alloc space for struct members, only this is a target parameter
7226 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7227 // optimizes this entry out, same in the examples below)
7228 // (***) map the pointee (map: to)
7229 //
7230 // map(s.ps)
7231 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7232 //
7233 // map(from: s.ps->s.i)
7234 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7235 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7236 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7237 //
7238 // map(to: s.ps->ps)
7239 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7240 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7241 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7242 //
7243 // map(s.ps->ps->ps)
7244 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7245 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7246 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7247 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7248 //
7249 // map(to: s.ps->ps->s.f[:22])
7250 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7251 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7252 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7253 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7254 //
7255 // map(ps)
7256 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7257 //
7258 // map(ps->i)
7259 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7260 //
7261 // map(ps->s.f)
7262 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7263 //
7264 // map(from: ps->p)
7265 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7266 //
7267 // map(to: ps->p[:22])
7268 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7269 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7270 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7271 //
7272 // map(ps->ps)
7273 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7274 //
7275 // map(from: ps->ps->s.i)
7276 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7277 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7278 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7279 //
7280 // map(from: ps->ps->ps)
7281 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7282 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7283 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7284 //
7285 // map(ps->ps->ps->ps)
7286 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7287 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7288 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7289 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7290 //
7291 // map(to: ps->ps->ps->s.f[:22])
7292 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7293 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7294 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7295 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7296 //
7297 // map(to: s.f[:22]) map(from: s.p[:33])
7298 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7299 // sizeof(double*) (**), TARGET_PARAM
7300 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7301 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7302 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7303 // (*) allocate contiguous space needed to fit all mapped members even if
7304 // we allocate space for members not mapped (in this example,
7305 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7306 // them as well because they fall between &s.f[0] and &s.p)
7307 //
7308 // map(from: s.f[:22]) map(to: ps->p[:33])
7309 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7310 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7311 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7312 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7313 // (*) the struct this entry pertains to is the 2nd element in the list of
7314 // arguments, hence MEMBER_OF(2)
7315 //
7316 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7317 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7318 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7319 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7320 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7321 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7322 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7323 // (*) the struct this entry pertains to is the 4th element in the list
7324 // of arguments, hence MEMBER_OF(4)
7325
7326 // Track if the map information being generated is the first for a capture.
7327 bool IsCaptureFirstInfo = IsFirstComponentList;
7328 // When the variable is on a declare target link or in a to clause with
7329 // unified memory, a reference is needed to hold the host/device address
7330 // of the variable.
7331 bool RequiresReference = false;
7332
7333 // Scan the components from the base to the complete expression.
7334 auto CI = Components.rbegin();
7335 auto CE = Components.rend();
7336 auto I = CI;
7337
7338 // Track if the map information being generated is the first for a list of
7339 // components.
7340 bool IsExpressionFirstInfo = true;
7341 bool FirstPointerInComplexData = false;
7342 Address BP = Address::invalid();
7343 const Expr *AssocExpr = I->getAssociatedExpression();
7344 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
1
Assuming 'AssocExpr' is not a 'CastReturnType'
7345 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
2
Assuming 'AssocExpr' is not a 'CastReturnType'
7346 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
3
Assuming 'AssocExpr' is not a 'CastReturnType'
7347
7348 if (isa<MemberExpr>(AssocExpr)) {
4
Assuming 'AssocExpr' is not a 'MemberExpr'
7349 // The base is the 'this' pointer. The content of the pointer is going
7350 // to be the base of the field being mapped.
7351 BP = CGF.LoadCXXThisAddress();
7352 } else if ((AE
4.1
'AE' is null
&& isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7353 (OASE
4.2
'OASE' is null
&&
7354 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7355 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7356 } else if (OAShE
4.3
'OAShE' is null
&&
5
Taking false branch
7357 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7358 BP = Address(
7359 CGF.EmitScalarExpr(OAShE->getBase()),
7360 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7361 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7362 } else {
7363 // The base is the reference to the variable.
7364 // BP = &Var.
7365 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7366 if (const auto *VD
6.1
'VD' is null
=
7
Taking false branch
7367 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
6
Assuming null pointer is passed into cast
7368 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7369 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7370 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7371 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7372 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7373 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7374 RequiresReference = true;
7375 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7376 }
7377 }
7378 }
7379
7380 // If the variable is a pointer and is being dereferenced (i.e. is not
7381 // the last component), the base has to be the pointer itself, not its
7382 // reference. References are ignored for mapping purposes.
7383 QualType Ty =
7384 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7385 if (Ty->isAnyPointerType() && std::next(I) != CE) {
8
Taking false branch
7386 // No need to generate individual map information for the pointer, it
7387 // can be associated with the combined storage if shared memory mode is
7388 // active or the base declaration is not global variable.
7389 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7390 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7391 !VD || VD->hasLocalStorage())
7392 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7393 else
7394 FirstPointerInComplexData = true;
7395 ++I;
7396 }
7397 }
7398
7399 // Track whether a component of the list should be marked as MEMBER_OF some
7400 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7401 // in a component list should be marked as MEMBER_OF, all subsequent entries
7402 // do not belong to the base struct. E.g.
7403 // struct S2 s;
7404 // s.ps->ps->ps->f[:]
7405 // (1) (2) (3) (4)
7406 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7407 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7408 // is the pointee of ps(2) which is not member of struct s, so it should not
7409 // be marked as such (it is still PTR_AND_OBJ).
7410 // The variable is initialized to false so that PTR_AND_OBJ entries which
7411 // are not struct members are not considered (e.g. array of pointers to
7412 // data).
7413 bool ShouldBeMemberOf = false;
7414
7415 // Variable keeping track of whether or not we have encountered a component
7416 // in the component list which is a member expression. Useful when we have a
7417 // pointer or a final array section, in which case it is the previous
7418 // component in the list which tells us whether we have a member expression.
7419 // E.g. X.f[:]
7420 // While processing the final array section "[:]" it is "f" which tells us
7421 // whether we are dealing with a member of a declared struct.
7422 const MemberExpr *EncounteredME = nullptr;
7423
7424 // Track for the total number of dimension. Start from one for the dummy
7425 // dimension.
7426 uint64_t DimSize = 1;
7427
7428 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7429 bool IsPrevMemberReference = false;
7430
7431 for (; I != CE; ++I) {
9
Loop condition is false. Execution continues on line 7748
7432 // If the current component is member of a struct (parent struct) mark it.
7433 if (!EncounteredME) {
7434 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7435 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7436 // as MEMBER_OF the parent struct.
7437 if (EncounteredME) {
7438 ShouldBeMemberOf = true;
7439 // Do not emit as complex pointer if this is actually not array-like
7440 // expression.
7441 if (FirstPointerInComplexData) {
7442 QualType Ty = std::prev(I)
7443 ->getAssociatedDeclaration()
7444 ->getType()
7445 .getNonReferenceType();
7446 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7447 FirstPointerInComplexData = false;
7448 }
7449 }
7450 }
7451
7452 auto Next = std::next(I);
7453
7454 // We need to generate the addresses and sizes if this is the last
7455 // component, if the component is a pointer or if it is an array section
7456 // whose length can't be proved to be one. If this is a pointer, it
7457 // becomes the base address for the following components.
7458
7459 // A final array section, is one whose length can't be proved to be one.
7460 // If the map item is non-contiguous then we don't treat any array section
7461 // as final array section.
7462 bool IsFinalArraySection =
7463 !IsNonContiguous &&
7464 isFinalArraySectionExpression(I->getAssociatedExpression());
7465
7466 // If we have a declaration for the mapping use that, otherwise use
7467 // the base declaration of the map clause.
7468 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7469 ? I->getAssociatedDeclaration()
7470 : BaseDecl;
7471 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7472 : MapExpr;
7473
7474 // Get information on whether the element is a pointer. Have to do a
7475 // special treatment for array sections given that they are built-in
7476 // types.
7477 const auto *OASE =
7478 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7479 const auto *OAShE =
7480 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7481 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7482 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7483 bool IsPointer =
7484 OAShE ||
7485 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7486 .getCanonicalType()
7487 ->isAnyPointerType()) ||
7488 I->getAssociatedExpression()->getType()->isAnyPointerType();
7489 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7490 MapDecl &&
7491 MapDecl->getType()->isLValueReferenceType();
7492 bool IsNonDerefPointer = IsPointer &&
7493 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7494 !IsNonContiguous;
7495
7496 if (OASE)
7497 ++DimSize;
7498
7499 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7500 IsFinalArraySection) {
7501 // If this is not the last component, we expect the pointer to be
7502 // associated with an array expression or member expression.
7503 assert((Next == CE ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7504 isa<MemberExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7505 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7506 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7507 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7508 isa<UnaryOperator>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7509 isa<BinaryOperator>(Next->getAssociatedExpression())) &&(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
7510 "Unexpected expression")(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7510, __extension__
__PRETTY_FUNCTION__))
;
7511
7512 Address LB = Address::invalid();
7513 Address LowestElem = Address::invalid();
7514 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7515 const MemberExpr *E) {
7516 const Expr *BaseExpr = E->getBase();
7517 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7518 // scalar.
7519 LValue BaseLV;
7520 if (E->isArrow()) {
7521 LValueBaseInfo BaseInfo;
7522 TBAAAccessInfo TBAAInfo;
7523 Address Addr =
7524 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7525 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7526 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7527 } else {
7528 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7529 }
7530 return BaseLV;
7531 };
7532 if (OAShE) {
7533 LowestElem = LB =
7534 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7535 CGF.ConvertTypeForMem(
7536 OAShE->getBase()->getType()->getPointeeType()),
7537 CGF.getContext().getTypeAlignInChars(
7538 OAShE->getBase()->getType()));
7539 } else if (IsMemberReference) {
7540 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7541 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7542 LowestElem = CGF.EmitLValueForFieldInitialization(
7543 BaseLVal, cast<FieldDecl>(MapDecl))
7544 .getAddress(CGF);
7545 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7546 .getAddress(CGF);
7547 } else {
7548 LowestElem = LB =
7549 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7550 .getAddress(CGF);
7551 }
7552
7553 // If this component is a pointer inside the base struct then we don't
7554 // need to create any entry for it - it will be combined with the object
7555 // it is pointing to into a single PTR_AND_OBJ entry.
7556 bool IsMemberPointerOrAddr =
7557 EncounteredME &&
7558 (((IsPointer || ForDeviceAddr) &&
7559 I->getAssociatedExpression() == EncounteredME) ||
7560 (IsPrevMemberReference && !IsPointer) ||
7561 (IsMemberReference && Next != CE &&
7562 !Next->getAssociatedExpression()->getType()->isPointerType()));
7563 if (!OverlappedElements.empty() && Next == CE) {
7564 // Handle base element with the info for overlapped elements.
7565 assert(!PartialStruct.Base.isValid() && "The base element is set.")(static_cast <bool> (!PartialStruct.Base.isValid() &&
"The base element is set.") ? void (0) : __assert_fail ("!PartialStruct.Base.isValid() && \"The base element is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7565, __extension__
__PRETTY_FUNCTION__))
;
7566 assert(!IsPointer &&(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7567, __extension__
__PRETTY_FUNCTION__))
7567 "Unexpected base element with the pointer type.")(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7567, __extension__
__PRETTY_FUNCTION__))
;
7568 // Mark the whole struct as the struct that requires allocation on the
7569 // device.
7570 PartialStruct.LowestElem = {0, LowestElem};
7571 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7572 I->getAssociatedExpression()->getType());
7573 Address HB = CGF.Builder.CreateConstGEP(
7574 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7575 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7576 TypeSize.getQuantity() - 1);
7577 PartialStruct.HighestElem = {
7578 std::numeric_limits<decltype(
7579 PartialStruct.HighestElem.first)>::max(),
7580 HB};
7581 PartialStruct.Base = BP;
7582 PartialStruct.LB = LB;
7583 assert((static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7585, __extension__
__PRETTY_FUNCTION__))
7584 PartialStruct.PreliminaryMapData.BasePointers.empty() &&(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7585, __extension__
__PRETTY_FUNCTION__))
7585 "Overlapped elements must be used only once for the variable.")(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7585, __extension__
__PRETTY_FUNCTION__))
;
7586 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
7587 // Emit data for non-overlapped data.
7588 OpenMPOffloadMappingFlags Flags =
7589 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7590 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7591 /*AddPtrFlag=*/false,
7592 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7593 llvm::Value *Size = nullptr;
7594 // Do bitcopy of all non-overlapped structure elements.
7595 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7596 Component : OverlappedElements) {
7597 Address ComponentLB = Address::invalid();
7598 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7599 Component) {
7600 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7601 const auto *FD = dyn_cast<FieldDecl>(VD);
7602 if (FD && FD->getType()->isLValueReferenceType()) {
7603 const auto *ME =
7604 cast<MemberExpr>(MC.getAssociatedExpression());
7605 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7606 ComponentLB =
7607 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7608 .getAddress(CGF);
7609 } else {
7610 ComponentLB =
7611 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7612 .getAddress(CGF);
7613 }
7614 Size = CGF.Builder.CreatePtrDiff(
7615 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7616 CGF.EmitCastToVoidPtr(LB.getPointer()));
7617 break;
7618 }
7619 }
7620 assert(Size && "Failed to determine structure size")(static_cast <bool> (Size && "Failed to determine structure size"
) ? void (0) : __assert_fail ("Size && \"Failed to determine structure size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7620, __extension__
__PRETTY_FUNCTION__))
;
7621 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7622 CombinedInfo.BasePointers.push_back(BP.getPointer());
7623 CombinedInfo.Pointers.push_back(LB.getPointer());
7624 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7625 Size, CGF.Int64Ty, /*isSigned=*/true));
7626 CombinedInfo.Types.push_back(Flags);
7627 CombinedInfo.Mappers.push_back(nullptr);
7628 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7629 : 1);
7630 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7631 }
7632 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7633 CombinedInfo.BasePointers.push_back(BP.getPointer());
7634 CombinedInfo.Pointers.push_back(LB.getPointer());
7635 Size = CGF.Builder.CreatePtrDiff(
7636 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
7637 CGF.EmitCastToVoidPtr(LB.getPointer()));
7638 CombinedInfo.Sizes.push_back(
7639 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7640 CombinedInfo.Types.push_back(Flags);
7641 CombinedInfo.Mappers.push_back(nullptr);
7642 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7643 : 1);
7644 break;
7645 }
7646 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7647 if (!IsMemberPointerOrAddr ||
7648 (Next == CE && MapType != OMPC_MAP_unknown)) {
7649 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7650 CombinedInfo.BasePointers.push_back(BP.getPointer());
7651 CombinedInfo.Pointers.push_back(LB.getPointer());
7652 CombinedInfo.Sizes.push_back(
7653 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7654 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7655 : 1);
7656
7657 // If Mapper is valid, the last component inherits the mapper.
7658 bool HasMapper = Mapper && Next == CE;
7659 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
7660
7661 // We need to add a pointer flag for each map that comes from the
7662 // same expression except for the first one. We also need to signal
7663 // this map is the first one that relates with the current capture
7664 // (there is a set of entries for each capture).
7665 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7666 MapType, MapModifiers, MotionModifiers, IsImplicit,
7667 !IsExpressionFirstInfo || RequiresReference ||
7668 FirstPointerInComplexData || IsMemberReference,
7669 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7670
7671 if (!IsExpressionFirstInfo || IsMemberReference) {
7672 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7673 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7674 if (IsPointer || (IsMemberReference && Next != CE))
7675 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7676 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7677 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7678 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7679 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7680
7681 if (ShouldBeMemberOf) {
7682 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7683 // should be later updated with the correct value of MEMBER_OF.
7684 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7685 // From now on, all subsequent PTR_AND_OBJ entries should not be
7686 // marked as MEMBER_OF.
7687 ShouldBeMemberOf = false;
7688 }
7689 }
7690
7691 CombinedInfo.Types.push_back(Flags);
7692 }
7693
7694 // If we have encountered a member expression so far, keep track of the
7695 // mapped member. If the parent is "*this", then the value declaration
7696 // is nullptr.
7697 if (EncounteredME) {
7698 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
7699 unsigned FieldIndex = FD->getFieldIndex();
7700
7701 // Update info about the lowest and highest elements for this struct
7702 if (!PartialStruct.Base.isValid()) {
7703 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7704 if (IsFinalArraySection) {
7705 Address HB =
7706 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
7707 .getAddress(CGF);
7708 PartialStruct.HighestElem = {FieldIndex, HB};
7709 } else {
7710 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7711 }
7712 PartialStruct.Base = BP;
7713 PartialStruct.LB = BP;
7714 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7715 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7716 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7717 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7718 }
7719 }
7720
7721 // Need to emit combined struct for array sections.
7722 if (IsFinalArraySection || IsNonContiguous)
7723 PartialStruct.IsArraySection = true;
7724
7725 // If we have a final array section, we are done with this expression.
7726 if (IsFinalArraySection)
7727 break;
7728
7729 // The pointer becomes the base for the next element.
7730 if (Next != CE)
7731 BP = IsMemberReference ? LowestElem : LB;
7732
7733 IsExpressionFirstInfo = false;
7734 IsCaptureFirstInfo = false;
7735 FirstPointerInComplexData = false;
7736 IsPrevMemberReference = IsMemberReference;
7737 } else if (FirstPointerInComplexData) {
7738 QualType Ty = Components.rbegin()
7739 ->getAssociatedDeclaration()
7740 ->getType()
7741 .getNonReferenceType();
7742 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7743 FirstPointerInComplexData = false;
7744 }
7745 }
7746 // If ran into the whole component - allocate the space for the whole
7747 // record.
7748 if (!EncounteredME
9.1
'EncounteredME' is null
)
10
Taking true branch
7749 PartialStruct.HasCompleteRecord = true;
7750
7751 if (!IsNonContiguous)
11
Assuming 'IsNonContiguous' is true
12
Taking false branch
7752 return;
7753
7754 const ASTContext &Context = CGF.getContext();
7755
7756 // For supporting stride in array section, we need to initialize the first
7757 // dimension size as 1, first offset as 0, and first count as 1
7758 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
7759 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7760 MapValuesArrayTy CurStrides;
7761 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
7762 uint64_t ElementTypeSize;
13
'ElementTypeSize' declared without an initial value
7763
7764 // Collect Size information for each dimension and get the element size as
7765 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7766 // should be [10, 10] and the first stride is 4 btyes.
7767 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
14
Assuming '__begin2' is equal to '__end2'
7768 Components) {
7769 const Expr *AssocExpr = Component.getAssociatedExpression();
7770 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7771
7772 if (!OASE)
7773 continue;
7774
7775 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
7776 auto *CAT = Context.getAsConstantArrayType(Ty);
7777 auto *VAT = Context.getAsVariableArrayType(Ty);
7778
7779 // We need all the dimension size except for the last dimension.
7780 assert((VAT || CAT || &Component == &*Components.begin()) &&(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7782, __extension__
__PRETTY_FUNCTION__))
7781 "Should be either ConstantArray or VariableArray if not the "(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7782, __extension__
__PRETTY_FUNCTION__))
7782 "first Component")(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7782, __extension__
__PRETTY_FUNCTION__))
;
7783
7784 // Get element size if CurStrides is empty.
7785 if (CurStrides.empty()) {
7786 const Type *ElementType = nullptr;
7787 if (CAT)
7788 ElementType = CAT->getElementType().getTypePtr();
7789 else if (VAT)
7790 ElementType = VAT->getElementType().getTypePtr();
7791 else
7792 assert(&Component == &*Components.begin() &&(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7794, __extension__
__PRETTY_FUNCTION__))
7793 "Only expect pointer (non CAT or VAT) when this is the "(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7794, __extension__
__PRETTY_FUNCTION__))
7794 "first Component")(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7794, __extension__
__PRETTY_FUNCTION__))
;
7795 // If ElementType is null, then it means the base is a pointer
7796 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7797 // for next iteration.
7798 if (ElementType) {
7799 // For the case that having pointer as base, we need to remove one
7800 // level of indirection.
7801 if (&Component != &*Components.begin())
7802 ElementType = ElementType->getPointeeOrArrayElementType();
7803 ElementTypeSize =
7804 Context.getTypeSizeInChars(ElementType).getQuantity();
7805 CurStrides.push_back(
7806 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
7807 }
7808 }
7809 // Get dimension value except for the last dimension since we don't need
7810 // it.
7811 if (DimSizes.size() < Components.size() - 1) {
7812 if (CAT)
7813 DimSizes.push_back(llvm::ConstantInt::get(
7814 CGF.Int64Ty, CAT->getSize().getZExtValue()));
7815 else if (VAT)
7816 DimSizes.push_back(CGF.Builder.CreateIntCast(
7817 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
7818 /*IsSigned=*/false));
7819 }
7820 }
7821
7822 // Skip the dummy dimension since we have already have its information.
7823 auto *DI = DimSizes.begin() + 1;
7824 // Product of dimension.
7825 llvm::Value *DimProd =
7826 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
15
2nd function call argument is an uninitialized value
7827
7828 // Collect info for non-contiguous. Notice that offset, count, and stride
7829 // are only meaningful for array-section, so we insert a null for anything
7830 // other than array-section.
7831 // Also, the size of offset, count, and stride are not the same as
7832 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7833 // count, and stride are the same as the number of non-contiguous
7834 // declaration in target update to/from clause.
7835 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7836 Components) {
7837 const Expr *AssocExpr = Component.getAssociatedExpression();
7838
7839 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
7840 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7841 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
7842 /*isSigned=*/false);
7843 CurOffsets.push_back(Offset);
7844 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
7845 CurStrides.push_back(CurStrides.back());
7846 continue;
7847 }
7848
7849 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7850
7851 if (!OASE)
7852 continue;
7853
7854 // Offset
7855 const Expr *OffsetExpr = OASE->getLowerBound();
7856 llvm::Value *Offset = nullptr;
7857 if (!OffsetExpr) {
7858 // If offset is absent, then we just set it to zero.
7859 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
7860 } else {
7861 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
7862 CGF.Int64Ty,
7863 /*isSigned=*/false);
7864 }
7865 CurOffsets.push_back(Offset);
7866
7867 // Count
7868 const Expr *CountExpr = OASE->getLength();
7869 llvm::Value *Count = nullptr;
7870 if (!CountExpr) {
7871 // In Clang, once a high dimension is an array section, we construct all
7872 // the lower dimension as array section, however, for case like
7873 // arr[0:2][2], Clang construct the inner dimension as an array section
7874 // but it actually is not in an array section form according to spec.
7875 if (!OASE->getColonLocFirst().isValid() &&
7876 !OASE->getColonLocSecond().isValid()) {
7877 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
7878 } else {
7879 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7880 // When the length is absent it defaults to ⌈(size −
7881 // lower-bound)/stride⌉, where size is the size of the array
7882 // dimension.
7883 const Expr *StrideExpr = OASE->getStride();
7884 llvm::Value *Stride =
7885 StrideExpr
7886 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7887 CGF.Int64Ty, /*isSigned=*/false)
7888 : nullptr;
7889 if (Stride)
7890 Count = CGF.Builder.CreateUDiv(
7891 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
7892 else
7893 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
7894 }
7895 } else {
7896 Count = CGF.EmitScalarExpr(CountExpr);
7897 }
7898 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
7899 CurCounts.push_back(Count);
7900
7901 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7902 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7903 // Offset Count Stride
7904 // D0 0 1 4 (int) <- dummy dimension
7905 // D1 0 2 8 (2 * (1) * 4)
7906 // D2 1 2 20 (1 * (1 * 5) * 4)
7907 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7908 const Expr *StrideExpr = OASE->getStride();
7909 llvm::Value *Stride =
7910 StrideExpr
7911 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
7912 CGF.Int64Ty, /*isSigned=*/false)
7913 : nullptr;
7914 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
7915 if (Stride)
7916 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
7917 else
7918 CurStrides.push_back(DimProd);
7919 if (DI != DimSizes.end())
7920 ++DI;
7921 }
7922
7923 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
7924 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
7925 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
7926 }
7927
7928 /// Return the adjusted map modifiers if the declaration a capture refers to
7929 /// appears in a first-private clause. This is expected to be used only with
7930 /// directives that start with 'target'.
7931 OpenMPOffloadMappingFlags
7932 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7933 assert(Cap.capturesVariable() && "Expected capture by reference only!")(static_cast <bool> (Cap.capturesVariable() && "Expected capture by reference only!"
) ? void (0) : __assert_fail ("Cap.capturesVariable() && \"Expected capture by reference only!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7933, __extension__
__PRETTY_FUNCTION__))
;
7934
7935 // A first private variable captured by reference will use only the
7936 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7937 // declaration is known as first-private in this handler.
7938 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7939 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7940 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7941 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7942 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7943 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7944 }
7945 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7946 if (I != LambdasMap.end())
7947 // for map(to: lambda): using user specified map type.
7948 return getMapTypeBits(
7949 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
7950 /*MotionModifiers=*/std::nullopt, I->getSecond()->isImplicit(),
7951 /*AddPtrFlag=*/false,
7952 /*AddIsTargetParamFlag=*/false,
7953 /*isNonContiguous=*/false);
7954 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7955 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7956 }
7957
7958 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7959 // Rotate by getFlagMemberOffset() bits.
7960 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7961 << getFlagMemberOffset());
7962 }
7963
7964 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7965 OpenMPOffloadMappingFlags MemberOfFlag) {
7966 // If the entry is PTR_AND_OBJ but has not been marked with the special
7967 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7968 // marked as MEMBER_OF.
7969 if (static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7970 Flags & OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ) &&
7971 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
7972 (Flags & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
7973 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF))
7974 return;
7975
7976 // Reset the placeholder value to prepare the flag for the assignment of the
7977 // proper MEMBER_OF value.
7978 Flags &= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7979 Flags |= MemberOfFlag;
7980 }
7981
7982 void getPlainLayout(const CXXRecordDecl *RD,
7983 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7984 bool AsBase) const {
7985 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7986
7987 llvm::StructType *St =
7988 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7989
7990 unsigned NumElements = St->getNumElements();
7991 llvm::SmallVector<
7992 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7993 RecordLayout(NumElements);
7994
7995 // Fill bases.
7996 for (const auto &I : RD->bases()) {
7997 if (I.isVirtual())
7998 continue;
7999 const auto *Base = I.getType()->getAsCXXRecordDecl();
8000 // Ignore empty bases.
8001 if (Base->isEmpty() || CGF.getContext()
8002 .getASTRecordLayout(Base)
8003 .getNonVirtualSize()
8004 .isZero())
8005 continue;
8006
8007 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8008 RecordLayout[FieldIndex] = Base;
8009 }
8010 // Fill in virtual bases.
8011 for (const auto &I : RD->vbases()) {
8012 const auto *Base = I.getType()->getAsCXXRecordDecl();
8013 // Ignore empty bases.
8014 if (Base->isEmpty())
8015 continue;
8016 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8017 if (RecordLayout[FieldIndex])
8018 continue;
8019 RecordLayout[FieldIndex] = Base;
8020 }
8021 // Fill in all the fields.
8022 assert(!RD->isUnion() && "Unexpected union.")(static_cast <bool> (!RD->isUnion() && "Unexpected union."
) ? void (0) : __assert_fail ("!RD->isUnion() && \"Unexpected union.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8022, __extension__
__PRETTY_FUNCTION__))
;
8023 for (const auto *Field : RD->fields()) {
8024 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8025 // will fill in later.)
8026 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8027 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8028 RecordLayout[FieldIndex] = Field;
8029 }
8030 }
8031 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8032 &Data : RecordLayout) {
8033 if (Data.isNull())
8034 continue;
8035 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8036 getPlainLayout(Base, Layout, /*AsBase=*/true);
8037 else
8038 Layout.push_back(Data.get<const FieldDecl *>());
8039 }
8040 }
8041
8042 /// Generate all the base pointers, section pointers, sizes, map types, and
8043 /// mappers for the extracted mappable expressions (all included in \a
8044 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8045 /// pair of the relevant declaration and index where it occurs is appended to
8046 /// the device pointers info array.
8047 void generateAllInfoForClauses(
8048 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8049 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8050 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8051 // We have to process the component lists that relate with the same
8052 // declaration in a single chunk so that we can generate the map flags
8053 // correctly. Therefore, we organize all lists in a map.
8054 enum MapKind { Present, Allocs, Other, Total };
8055 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8056 SmallVector<SmallVector<MapInfo, 8>, 4>>
8057 Info;
8058
8059 // Helper function to fill the information map for the different supported
8060 // clauses.
8061 auto &&InfoGen =
8062 [&Info, &SkipVarSet](
8063 const ValueDecl *D, MapKind Kind,
8064 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8065 OpenMPMapClauseKind MapType,
8066 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8067 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8068 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8069 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8070 if (SkipVarSet.contains(D))
8071 return;
8072 auto It = Info.find(D);
8073 if (It == Info.end())
8074 It = Info
8075 .insert(std::make_pair(
8076 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8077 .first;
8078 It->second[Kind].emplace_back(
8079 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8080 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8081 };
8082
8083 for (const auto *Cl : Clauses) {
8084 const auto *C = dyn_cast<OMPMapClause>(Cl);
8085 if (!C)
8086 continue;
8087 MapKind Kind = Other;
8088 if (llvm::is_contained(C->getMapTypeModifiers(),
8089 OMPC_MAP_MODIFIER_present))
8090 Kind = Present;
8091 else if (C->getMapType() == OMPC_MAP_alloc)
8092 Kind = Allocs;
8093 const auto *EI = C->getVarRefs().begin();
8094 for (const auto L : C->component_lists()) {
8095 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8096 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8097 C->getMapTypeModifiers(), std::nullopt,
8098 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8099 E);
8100 ++EI;
8101 }
8102 }
8103 for (const auto *Cl : Clauses) {
8104 const auto *C = dyn_cast<OMPToClause>(Cl);
8105 if (!C)
8106 continue;
8107 MapKind Kind = Other;
8108 if (llvm::is_contained(C->getMotionModifiers(),
8109 OMPC_MOTION_MODIFIER_present))
8110 Kind = Present;
8111 const auto *EI = C->getVarRefs().begin();
8112 for (const auto L : C->component_lists()) {
8113 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
8114 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8115 C->isImplicit(), std::get<2>(L), *EI);
8116 ++EI;
8117 }
8118 }
8119 for (const auto *Cl : Clauses) {
8120 const auto *C = dyn_cast<OMPFromClause>(Cl);
8121 if (!C)
8122 continue;
8123 MapKind Kind = Other;
8124 if (llvm::is_contained(C->getMotionModifiers(),
8125 OMPC_MOTION_MODIFIER_present))
8126 Kind = Present;
8127 const auto *EI = C->getVarRefs().begin();
8128 for (const auto L : C->component_lists()) {
8129 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
8130 std::nullopt, C->getMotionModifiers(),
8131 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8132 *EI);
8133 ++EI;
8134 }
8135 }
8136
8137 // Look at the use_device_ptr and use_device_addr clauses information and
8138 // mark the existing map entries as such. If there is no map information for
8139 // an entry in the use_device_ptr and use_device_addr list, we create one
8140 // with map type 'alloc' and zero size section. It is the user fault if that
8141 // was not mapped before. If there is no map information and the pointer is
8142 // a struct member, then we defer the emission of that entry until the whole
8143 // struct has been processed.
8144 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8145 SmallVector<DeferredDevicePtrEntryTy, 4>>
8146 DeferredInfo;
8147 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8148
8149 auto &&UseDeviceDataCombinedInfoGen =
8150 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8151 CodeGenFunction &CGF) {
8152 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8153 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8154 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8155 UseDeviceDataCombinedInfo.Sizes.push_back(
8156 llvm::Constant::getNullValue(CGF.Int64Ty));
8157 UseDeviceDataCombinedInfo.Types.push_back(
8158 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8159 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8160 };
8161
8162 auto &&MapInfoGen =
8163 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8164 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8165 OMPClauseMappableExprCommon::MappableExprComponentListRef
8166 Components,
8167 bool IsImplicit, bool IsDevAddr) {
8168 // We didn't find any match in our map information - generate a zero
8169 // size array section - if the pointer is a struct member we defer
8170 // this action until the whole struct has been processed.
8171 if (isa<MemberExpr>(IE)) {
8172 // Insert the pointer into Info to be processed by
8173 // generateInfoForComponentList. Because it is a member pointer
8174 // without a pointee, no entry will be generated for it, therefore
8175 // we need to generate one after the whole struct has been
8176 // processed. Nonetheless, generateInfoForComponentList must be
8177 // called to take the pointer into account for the calculation of
8178 // the range of the partial struct.
8179 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
8180 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
8181 nullptr, nullptr, IsDevAddr);
8182 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8183 } else {
8184 llvm::Value *Ptr;
8185 if (IsDevAddr) {
8186 if (IE->isGLValue())
8187 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8188 else
8189 Ptr = CGF.EmitScalarExpr(IE);
8190 } else {
8191 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8192 }
8193 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
8194 }
8195 };
8196
8197 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8198 const Expr *IE, bool IsDevAddr) -> bool {
8199 // We potentially have map information for this declaration already.
8200 // Look for the first set of components that refer to it. If found,
8201 // return true.
8202 // If the first component is a member expression, we have to look into
8203 // 'this', which maps to null in the map of map information. Otherwise
8204 // look directly for the information.
8205 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8206 if (It != Info.end()) {
8207 bool Found = false;
8208 for (auto &Data : It->second) {
8209 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8210 return MI.Components.back().getAssociatedDeclaration() == VD;
8211 });
8212 // If we found a map entry, signal that the pointer has to be
8213 // returned and move on to the next declaration. Exclude cases where
8214 // the base pointer is mapped as array subscript, array section or
8215 // array shaping. The base address is passed as a pointer to base in
8216 // this case and cannot be used as a base for use_device_ptr list
8217 // item.
8218 if (CI != Data.end()) {
8219 if (IsDevAddr) {
8220 CI->ReturnDevicePointer = true;
8221 Found = true;
8222 break;
8223 } else {
8224 auto PrevCI = std::next(CI->Components.rbegin());
8225 const auto *VarD = dyn_cast<VarDecl>(VD);
8226 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8227 isa<MemberExpr>(IE) ||
8228 !VD->getType().getNonReferenceType()->isPointerType() ||
8229 PrevCI == CI->Components.rend() ||
8230 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8231 VarD->hasLocalStorage()) {
8232 CI->ReturnDevicePointer = true;
8233 Found = true;
8234 break;
8235 }
8236 }
8237 }
8238 }
8239 return Found;
8240 }
8241 return false;
8242 };
8243
8244 // Look at the use_device_ptr clause information and mark the existing map
8245 // entries as such. If there is no map information for an entry in the
8246 // use_device_ptr list, we create one with map type 'alloc' and zero size
8247 // section. It is the user fault if that was not mapped before. If there is
8248 // no map information and the pointer is a struct member, then we defer the
8249 // emission of that entry until the whole struct has been processed.
8250 for (const auto *Cl : Clauses) {
8251 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8252 if (!C)
8253 continue;
8254 for (const auto L : C->component_lists()) {
8255 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8256 std::get<1>(L);
8257 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8258, __extension__
__PRETTY_FUNCTION__))
8258 "Not expecting empty list of components!")(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8258, __extension__
__PRETTY_FUNCTION__))
;
8259 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8260 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8261 const Expr *IE = Components.back().getAssociatedExpression();
8262 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8263 continue;
8264 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8265 /*IsDevAddr=*/false);
8266 }
8267 }
8268
8269 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8270 for (const auto *Cl : Clauses) {
8271 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8272 if (!C)
8273 continue;
8274 for (const auto L : C->component_lists()) {
8275 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8276 std::get<1>(L);
8277 assert(!std::get<1>(L).empty() &&(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8278, __extension__
__PRETTY_FUNCTION__))
8278 "Not expecting empty list of components!")(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8278, __extension__
__PRETTY_FUNCTION__))
;
8279 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8280 if (!Processed.insert(VD).second)
8281 continue;
8282 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8283 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8284 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8285 continue;
8286 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8287 /*IsDevAddr=*/true);
8288 }
8289 }
8290
8291 for (const auto &Data : Info) {
8292 StructRangeInfoTy PartialStruct;
8293 // Temporary generated information.
8294 MapCombinedInfoTy CurInfo;
8295 const Decl *D = Data.first;
8296 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8297 for (const auto &M : Data.second) {
8298 for (const MapInfo &L : M) {
8299 assert(!L.Components.empty() &&(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8300, __extension__
__PRETTY_FUNCTION__))
8300 "Not expecting declaration with no component lists.")(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8300, __extension__
__PRETTY_FUNCTION__))
;
8301
8302 // Remember the current base pointer index.
8303 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8304 CurInfo.NonContigInfo.IsNonContiguous =
8305 L.Components.back().isNonContiguous();
8306 generateInfoForComponentList(
8307 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8308 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8309 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8310
8311 // If this entry relates with a device pointer, set the relevant
8312 // declaration and add the 'return pointer' flag.
8313 if (L.ReturnDevicePointer) {
8314 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8315, __extension__
__PRETTY_FUNCTION__))
8315 "Unexpected number of mapped base pointers.")(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8315, __extension__
__PRETTY_FUNCTION__))
;
8316
8317 const ValueDecl *RelevantVD =
8318 L.Components.back().getAssociatedDeclaration();
8319 assert(RelevantVD &&(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8320, __extension__
__PRETTY_FUNCTION__))
8320 "No relevant declaration related with device pointer??")(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8320, __extension__
__PRETTY_FUNCTION__))
;
8321
8322 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8323 RelevantVD);
8324 CurInfo.Types[CurrentBasePointersIdx] |=
8325 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8326 }
8327 }
8328 }
8329
8330 // Append any pending zero-length pointers which are struct members and
8331 // used with use_device_ptr or use_device_addr.
8332 auto CI = DeferredInfo.find(Data.first);
8333 if (CI != DeferredInfo.end()) {
8334 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8335 llvm::Value *BasePtr;
8336 llvm::Value *Ptr;
8337 if (L.ForDeviceAddr) {
8338 if (L.IE->isGLValue())
8339 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8340 else
8341 Ptr = this->CGF.EmitScalarExpr(L.IE);
8342 BasePtr = Ptr;
8343 // Entry is RETURN_PARAM. Also, set the placeholder value
8344 // MEMBER_OF=FFFF so that the entry is later updated with the
8345 // correct value of MEMBER_OF.
8346 CurInfo.Types.push_back(
8347 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8348 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8349 } else {
8350 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8351 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8352 L.IE->getExprLoc());
8353 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8354 // placeholder value MEMBER_OF=FFFF so that the entry is later
8355 // updated with the correct value of MEMBER_OF.
8356 CurInfo.Types.push_back(
8357 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8358 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8359 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8360 }
8361 CurInfo.Exprs.push_back(L.VD);
8362 CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8363 CurInfo.Pointers.push_back(Ptr);
8364 CurInfo.Sizes.push_back(
8365 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8366 CurInfo.Mappers.push_back(nullptr);
8367 }
8368 }
8369 // If there is an entry in PartialStruct it means we have a struct with
8370 // individual members mapped. Emit an extra combined entry.
8371 if (PartialStruct.Base.isValid()) {
8372 CurInfo.NonContigInfo.Dims.push_back(0);
8373 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct,
8374 /*IsMapThis*/ !VD, VD);
8375 }
8376
8377 // We need to append the results of this capture to what we already
8378 // have.
8379 CombinedInfo.append(CurInfo);
8380 }
8381 // Append data for use_device_ptr clauses.
8382 CombinedInfo.append(UseDeviceDataCombinedInfo);
8383 }
8384
8385public:
8386 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8387 : CurDir(&Dir), CGF(CGF) {
8388 // Extract firstprivate clause information.
8389 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8390 for (const auto *D : C->varlists())
8391 FirstPrivateDecls.try_emplace(
8392 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8393 // Extract implicit firstprivates from uses_allocators clauses.
8394 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8395 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8396 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8397 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8398 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8399 /*Implicit=*/true);
8400 else if (const auto *VD = dyn_cast<VarDecl>(
8401 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8402 ->getDecl()))
8403 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8404 }
8405 }
8406 // Extract device pointer clause information.
8407 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8408 for (auto L : C->component_lists())
8409 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8410 // Extract device addr clause information.
8411 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8412 for (auto L : C->component_lists())
8413 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8414 // Extract map information.
8415 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8416 if (C->getMapType() != OMPC_MAP_to)
8417 continue;
8418 for (auto L : C->component_lists()) {
8419 const ValueDecl *VD = std::get<0>(L);
8420 const auto *RD = VD ? VD->getType()
8421 .getCanonicalType()
8422 .getNonReferenceType()
8423 ->getAsCXXRecordDecl()
8424 : nullptr;
8425 if (RD && RD->isLambda())
8426 LambdasMap.try_emplace(std::get<0>(L), C);
8427 }
8428 }
8429 }
8430
8431 /// Constructor for the declare mapper directive.
8432 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8433 : CurDir(&Dir), CGF(CGF) {}
8434
8435 /// Generate code for the combined entry if we have a partially mapped struct
8436 /// and take care of the mapping flags of the arguments corresponding to
8437 /// individual struct members.
8438 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8439 MapFlagsArrayTy &CurTypes,
8440 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8441 const ValueDecl *VD = nullptr,
8442 bool NotTargetParams = true) const {
8443 if (CurTypes.size() == 1 &&
8444 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8445 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8446 !PartialStruct.IsArraySection)
8447 return;
8448 Address LBAddr = PartialStruct.LowestElem.second;
8449 Address HBAddr = PartialStruct.HighestElem.second;
8450 if (PartialStruct.HasCompleteRecord) {
8451 LBAddr = PartialStruct.LB;
8452 HBAddr = PartialStruct.LB;
8453 }
8454 CombinedInfo.Exprs.push_back(VD);
8455 // Base is the base of the struct
8456 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8457 // Pointer is the address of the lowest element
8458 llvm::Value *LB = LBAddr.getPointer();
8459 const CXXMethodDecl *MD =
8460 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(CGF.CurFuncDecl) : nullptr;
8461 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8462 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8463 // There should not be a mapper for a combined entry.
8464 if (HasBaseClass) {
8465 // OpenMP 5.2 148:21:
8466 // If the target construct is within a class non-static member function,
8467 // and a variable is an accessible data member of the object for which the
8468 // non-static data member function is invoked, the variable is treated as
8469 // if the this[:1] expression had appeared in a map clause with a map-type
8470 // of tofrom.
8471 // Emit this[:1]
8472 CombinedInfo.Pointers.push_back(PartialStruct.Base.getPointer());
8473 QualType Ty = MD->getThisType()->getPointeeType();
8474 llvm::Value *Size =
8475 CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty), CGF.Int64Ty,
8476 /*isSigned=*/true);
8477 CombinedInfo.Sizes.push_back(Size);
8478 } else {
8479 CombinedInfo.Pointers.push_back(LB);
8480 // Size is (addr of {highest+1} element) - (addr of lowest element)
8481 llvm::Value *HB = HBAddr.getPointer();
8482 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8483 HBAddr.getElementType(), HB, /*Idx0=*/1);
8484 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8485 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8486 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8487 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8488 /*isSigned=*/false);
8489 CombinedInfo.Sizes.push_back(Size);
8490 }
8491 CombinedInfo.Mappers.push_back(nullptr);
8492 // Map type is always TARGET_PARAM, if generate info for captures.
8493 CombinedInfo.Types.push_back(
8494 NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8495 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8496 // If any element has the present modifier, then make sure the runtime
8497 // doesn't attempt to allocate the struct.
8498 if (CurTypes.end() !=
8499 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8500 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8501 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8502 }))
8503 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8504 // Remove TARGET_PARAM flag from the first element
8505 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8506 // If any element has the ompx_hold modifier, then make sure the runtime
8507 // uses the hold reference count for the struct as a whole so that it won't
8508 // be unmapped by an extra dynamic reference count decrement. Add it to all
8509 // elements as well so the runtime knows which reference count to check
8510 // when determining whether it's time for device-to-host transfers of
8511 // individual elements.
8512 if (CurTypes.end() !=
8513 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8514 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8515 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8516 })) {
8517 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8518 for (auto &M : CurTypes)
8519 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8520 }
8521
8522 // All other current entries will be MEMBER_OF the combined entry
8523 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8524 // 0xFFFF in the MEMBER_OF field).
8525 OpenMPOffloadMappingFlags MemberOfFlag =
8526 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8527 for (auto &M : CurTypes)
8528 setCorrectMemberOfFlag(M, MemberOfFlag);
8529 }
8530
8531 /// Generate all the base pointers, section pointers, sizes, map types, and
8532 /// mappers for the extracted mappable expressions (all included in \a
8533 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8534 /// pair of the relevant declaration and index where it occurs is appended to
8535 /// the device pointers info array.
8536 void generateAllInfo(
8537 MapCombinedInfoTy &CombinedInfo,
8538 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8539 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8540 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8541, __extension__
__PRETTY_FUNCTION__))
8541 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8541, __extension__
__PRETTY_FUNCTION__))
;
8542 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8543 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8544 }
8545
8546 /// Generate all the base pointers, section pointers, sizes, map types, and
8547 /// mappers for the extracted map clauses of user-defined mapper (all included
8548 /// in \a CombinedInfo).
8549 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8550 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8551, __extension__
__PRETTY_FUNCTION__))
8551 "Expect a declare mapper directive")(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8551, __extension__
__PRETTY_FUNCTION__))
;
8552 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8553 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8554 }
8555
8556 /// Emit capture info for lambdas for variables captured by reference.
8557 void generateInfoForLambdaCaptures(
8558 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8559 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8560 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8561 const auto *RD = VDType->getAsCXXRecordDecl();
8562 if (!RD || !RD->isLambda())
8563 return;
8564 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8565 CGF.getContext().getDeclAlign(VD));
8566 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8567 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8568 FieldDecl *ThisCapture = nullptr;
8569 RD->getCaptureFields(Captures, ThisCapture);
8570 if (ThisCapture) {
8571 LValue ThisLVal =
8572 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8573 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8574 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8575 VDLVal.getPointer(CGF));
8576 CombinedInfo.Exprs.push_back(VD);
8577 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8578 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8579 CombinedInfo.Sizes.push_back(
8580 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8581 CGF.Int64Ty, /*isSigned=*/true));
8582 CombinedInfo.Types.push_back(
8583 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8584 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8585 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8586 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8587 CombinedInfo.Mappers.push_back(nullptr);
8588 }
8589 for (const LambdaCapture &LC : RD->captures()) {
8590 if (!LC.capturesVariable())
8591 continue;
8592 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8593 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8594 continue;
8595 auto It = Captures.find(VD);
8596 assert(It != Captures.end() && "Found lambda capture without field.")(static_cast <bool> (It != Captures.end() && "Found lambda capture without field."
) ? void (0) : __assert_fail ("It != Captures.end() && \"Found lambda capture without field.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8596, __extension__
__PRETTY_FUNCTION__))
;
8597 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8598 if (LC.getCaptureKind() == LCK_ByRef) {
8599 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8600 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8601 VDLVal.getPointer(CGF));
8602 CombinedInfo.Exprs.push_back(VD);
8603 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8604 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8605 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8606 CGF.getTypeSize(
8607 VD->getType().getCanonicalType().getNonReferenceType()),
8608 CGF.Int64Ty, /*isSigned=*/true));
8609 } else {
8610 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8611 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8612 VDLVal.getPointer(CGF));
8613 CombinedInfo.Exprs.push_back(VD);
8614 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8615 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8616 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8617 }
8618 CombinedInfo.Types.push_back(
8619 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8620 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8621 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8622 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8623 CombinedInfo.Mappers.push_back(nullptr);
8624 }
8625 }
8626
8627 /// Set correct indices for lambdas captures.
8628 void adjustMemberOfForLambdaCaptures(
8629 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8630 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8631 MapFlagsArrayTy &Types) const {
8632 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8633 // Set correct member_of idx for all implicit lambda captures.
8634 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8635 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8636 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8637 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8638 continue;
8639 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8640 assert(BasePtr && "Unable to find base lambda address.")(static_cast <bool> (BasePtr && "Unable to find base lambda address."
) ? void (0) : __assert_fail ("BasePtr && \"Unable to find base lambda address.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8640, __extension__
__PRETTY_FUNCTION__))
;
8641 int TgtIdx = -1;
8642 for (unsigned J = I; J > 0; --J) {
8643 unsigned Idx = J - 1;
8644 if (Pointers[Idx] != BasePtr)
8645 continue;
8646 TgtIdx = Idx;
8647 break;
8648 }
8649 assert(TgtIdx != -1 && "Unable to find parent lambda.")(static_cast <bool> (TgtIdx != -1 && "Unable to find parent lambda."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Unable to find parent lambda.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8649, __extension__
__PRETTY_FUNCTION__))
;
8650 // All other current entries will be MEMBER_OF the combined entry
8651 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8652 // 0xFFFF in the MEMBER_OF field).
8653 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8654 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8655 }
8656 }
8657
8658 /// Generate the base pointers, section pointers, sizes, map types, and
8659 /// mappers associated to a given capture (all included in \a CombinedInfo).
8660 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8661 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8662 StructRangeInfoTy &PartialStruct) const {
8663 assert(!Cap->capturesVariableArrayType() &&(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8664, __extension__
__PRETTY_FUNCTION__))
8664 "Not expecting to generate map info for a variable array type!")(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8664, __extension__
__PRETTY_FUNCTION__))
;
8665
8666 // We need to know when we generating information for the first component
8667 const ValueDecl *VD = Cap->capturesThis()
8668 ? nullptr
8669 : Cap->getCapturedVar()->getCanonicalDecl();
8670
8671 // for map(to: lambda): skip here, processing it in
8672 // generateDefaultMapInfo
8673 if (LambdasMap.count(VD))
8674 return;
8675
8676 // If this declaration appears in a is_device_ptr clause we just have to
8677 // pass the pointer by value. If it is a reference to a declaration, we just
8678 // pass its value.
8679 if (VD && (DevPointersMap.count(VD) || HasDevAddrsMap.count(VD))) {
8680 CombinedInfo.Exprs.push_back(VD);
8681 CombinedInfo.BasePointers.emplace_back(Arg, VD);
8682 CombinedInfo.Pointers.push_back(Arg);
8683 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8684 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8685 /*isSigned=*/true));
8686 CombinedInfo.Types.push_back(
8687 (Cap->capturesVariable()
8688 ? OpenMPOffloadMappingFlags::OMP_MAP_TO
8689 : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL) |
8690 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8691 CombinedInfo.Mappers.push_back(nullptr);
8692 return;
8693 }
8694
8695 using MapData =
8696 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8697 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8698 const ValueDecl *, const Expr *>;
8699 SmallVector<MapData, 4> DeclComponentLists;
8700 // For member fields list in is_device_ptr, store it in
8701 // DeclComponentLists for generating components info.
8702 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8703 auto It = DevPointersMap.find(VD);
8704 if (It != DevPointersMap.end())
8705 for (const auto &MCL : It->second)
8706 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8707 /*IsImpicit = */ true, nullptr,
8708 nullptr);
8709 auto I = HasDevAddrsMap.find(VD);
8710 if (I != HasDevAddrsMap.end())
8711 for (const auto &MCL : I->second)
8712 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8713 /*IsImpicit = */ true, nullptr,
8714 nullptr);
8715 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8716, __extension__
__PRETTY_FUNCTION__))
8716 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8716, __extension__
__PRETTY_FUNCTION__))
;
8717 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8718 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8719 const auto *EI = C->getVarRefs().begin();
8720 for (const auto L : C->decl_component_lists(VD)) {
8721 const ValueDecl *VDecl, *Mapper;
8722 // The Expression is not correct if the mapping is implicit
8723 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8724 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8725 std::tie(VDecl, Components, Mapper) = L;
8726 assert(VDecl == VD && "We got information for the wrong declaration??")(static_cast <bool> (VDecl == VD && "We got information for the wrong declaration??"
) ? void (0) : __assert_fail ("VDecl == VD && \"We got information for the wrong declaration??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8726, __extension__
__PRETTY_FUNCTION__))
;
8727 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8728, __extension__
__PRETTY_FUNCTION__))
8728 "Not expecting declaration with no component lists.")(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8728, __extension__
__PRETTY_FUNCTION__))
;
8729 DeclComponentLists.emplace_back(Components, C->getMapType(),
8730 C->getMapTypeModifiers(),
8731 C->isImplicit(), Mapper, E);
8732 ++EI;
8733 }
8734 }
8735 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
8736 const MapData &RHS) {
8737 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
8738 OpenMPMapClauseKind MapType = std::get<1>(RHS);
8739 bool HasPresent =
8740 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8741 bool HasAllocs = MapType == OMPC_MAP_alloc;
8742 MapModifiers = std::get<2>(RHS);
8743 MapType = std::get<1>(LHS);
8744 bool HasPresentR =
8745 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
8746 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8747 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8748 });
8749
8750 // Find overlapping elements (including the offset from the base element).
8751 llvm::SmallDenseMap<
8752 const MapData *,
8753 llvm::SmallVector<
8754 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8755 4>
8756 OverlappedData;
8757 size_t Count = 0;
8758 for (const MapData &L : DeclComponentLists) {
8759 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8760 OpenMPMapClauseKind MapType;
8761 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8762 bool IsImplicit;
8763 const ValueDecl *Mapper;
8764 const Expr *VarRef;
8765 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8766 L;
8767 ++Count;
8768 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(Count)) {
8769 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8770 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
8771 VarRef) = L1;
8772 auto CI = Components.rbegin();
8773 auto CE = Components.rend();
8774 auto SI = Components1.rbegin();
8775 auto SE = Components1.rend();
8776 for (; CI != CE && SI != SE; ++CI, ++SI) {
8777 if (CI->getAssociatedExpression()->getStmtClass() !=
8778 SI->getAssociatedExpression()->getStmtClass())
8779 break;
8780 // Are we dealing with different variables/fields?
8781 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8782 break;
8783 }
8784 // Found overlapping if, at least for one component, reached the head
8785 // of the components list.
8786 if (CI == CE || SI == SE) {
8787 // Ignore it if it is the same component.
8788 if (CI == CE && SI == SE)
8789 continue;
8790 const auto It = (SI == SE) ? CI : SI;
8791 // If one component is a pointer and another one is a kind of
8792 // dereference of this pointer (array subscript, section, dereference,
8793 // etc.), it is not an overlapping.
8794 // Same, if one component is a base and another component is a
8795 // dereferenced pointer memberexpr with the same base.
8796 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
8797 (std::prev(It)->getAssociatedDeclaration() &&
8798 std::prev(It)
8799 ->getAssociatedDeclaration()
8800 ->getType()
8801 ->isPointerType()) ||
8802 (It->getAssociatedDeclaration() &&
8803 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8804 std::next(It) != CE && std::next(It) != SE))
8805 continue;
8806 const MapData &BaseData = CI == CE ? L : L1;
8807 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8808 SI == SE ? Components : Components1;
8809 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8810 OverlappedElements.getSecond().push_back(SubData);
8811 }
8812 }
8813 }
8814 // Sort the overlapped elements for each item.
8815 llvm::SmallVector<const FieldDecl *, 4> Layout;
8816 if (!OverlappedData.empty()) {
8817 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8818 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8819 while (BaseType != OrigType) {
8820 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8821 OrigType = BaseType->getPointeeOrArrayElementType();
8822 }
8823
8824 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8825 getPlainLayout(CRD, Layout, /*AsBase=*/false);
8826 else {
8827 const auto *RD = BaseType->getAsRecordDecl();
8828 Layout.append(RD->field_begin(), RD->field_end());
8829 }
8830 }
8831 for (auto &Pair : OverlappedData) {
8832 llvm::stable_sort(
8833 Pair.getSecond(),
8834 [&Layout](
8835 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8836 OMPClauseMappableExprCommon::MappableExprComponentListRef
8837 Second) {
8838 auto CI = First.rbegin();
8839 auto CE = First.rend();
8840 auto SI = Second.rbegin();
8841 auto SE = Second.rend();
8842 for (; CI != CE && SI != SE; ++CI, ++SI) {
8843 if (CI->getAssociatedExpression()->getStmtClass() !=
8844 SI->getAssociatedExpression()->getStmtClass())
8845 break;
8846 // Are we dealing with different variables/fields?
8847 if (CI->getAssociatedDeclaration() !=
8848 SI->getAssociatedDeclaration())
8849 break;
8850 }
8851
8852 // Lists contain the same elements.
8853 if (CI == CE && SI == SE)
8854 return false;
8855
8856 // List with less elements is less than list with more elements.
8857 if (CI == CE || SI == SE)
8858 return CI == CE;
8859
8860 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8861 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8862 if (FD1->getParent() == FD2->getParent())
8863 return FD1->getFieldIndex() < FD2->getFieldIndex();
8864 const auto *It =
8865 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8866 return FD == FD1 || FD == FD2;
8867 });
8868 return *It == FD1;
8869 });
8870 }
8871
8872 // Associated with a capture, because the mapping flags depend on it.
8873 // Go through all of the elements with the overlapped elements.
8874 bool IsFirstComponentList = true;
8875 for (const auto &Pair : OverlappedData) {
8876 const MapData &L = *Pair.getFirst();
8877 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8878 OpenMPMapClauseKind MapType;
8879 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8880 bool IsImplicit;
8881 const ValueDecl *Mapper;
8882 const Expr *VarRef;
8883 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8884 L;
8885 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8886 OverlappedComponents = Pair.getSecond();
8887 generateInfoForComponentList(
8888 MapType, MapModifiers, std::nullopt, Components, CombinedInfo,
8889 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
8890 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
8891 IsFirstComponentList = false;
8892 }
8893 // Go through other elements without overlapped elements.
8894 for (const MapData &L : DeclComponentLists) {
8895 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8896 OpenMPMapClauseKind MapType;
8897 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8898 bool IsImplicit;
8899 const ValueDecl *Mapper;
8900 const Expr *VarRef;
8901 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
8902 L;
8903 auto It = OverlappedData.find(&L);
8904 if (It == OverlappedData.end())
8905 generateInfoForComponentList(MapType, MapModifiers, std::nullopt,
8906 Components, CombinedInfo, PartialStruct,
8907 IsFirstComponentList, IsImplicit, Mapper,
8908 /*ForDeviceAddr=*/false, VD, VarRef);
8909 IsFirstComponentList = false;
8910 }
8911 }
8912
8913 /// Generate the default map information for a given capture \a CI,
8914 /// record field declaration \a RI and captured value \a CV.
8915 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8916 const FieldDecl &RI, llvm::Value *CV,
8917 MapCombinedInfoTy &CombinedInfo) const {
8918 bool IsImplicit = true;
8919 // Do the default mapping.
8920 if (CI.capturesThis()) {
8921 CombinedInfo.Exprs.push_back(nullptr);
8922 CombinedInfo.BasePointers.push_back(CV);
8923 CombinedInfo.Pointers.push_back(CV);
8924 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8925 CombinedInfo.Sizes.push_back(
8926 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8927 CGF.Int64Ty, /*isSigned=*/true));
8928 // Default map type.
8929 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO |
8930 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8931 } else if (CI.capturesVariableByCopy()) {
8932 const VarDecl *VD = CI.getCapturedVar();
8933 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8934 CombinedInfo.BasePointers.push_back(CV);
8935 CombinedInfo.Pointers.push_back(CV);
8936 if (!RI.getType()->isAnyPointerType()) {
8937 // We have to signal to the runtime captures passed by value that are
8938 // not pointers.
8939 CombinedInfo.Types.push_back(
8940 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8941 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8942 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8943 } else {
8944 // Pointers are implicitly mapped with a zero size and no flags
8945 // (other than first map that is added for all implicit maps).
8946 CombinedInfo.Types.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8947 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8948 }
8949 auto I = FirstPrivateDecls.find(VD);
8950 if (I != FirstPrivateDecls.end())
8951 IsImplicit = I->getSecond();
8952 } else {
8953 assert(CI.capturesVariable() && "Expected captured reference.")(static_cast <bool> (CI.capturesVariable() && "Expected captured reference."
) ? void (0) : __assert_fail ("CI.capturesVariable() && \"Expected captured reference.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8953, __extension__
__PRETTY_FUNCTION__))
;
8954 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8955 QualType ElementType = PtrTy->getPointeeType();
8956 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8957 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8958 // The default map type for a scalar/complex type is 'to' because by
8959 // default the value doesn't have to be retrieved. For an aggregate
8960 // type, the default is 'tofrom'.
8961 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
8962 const VarDecl *VD = CI.getCapturedVar();
8963 auto I = FirstPrivateDecls.find(VD);
8964 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8965 CombinedInfo.BasePointers.push_back(CV);
8966 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8967 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8968 CV, ElementType, CGF.getContext().getDeclAlign(VD),
8969 AlignmentSource::Decl));
8970 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
8971 } else {
8972 CombinedInfo.Pointers.push_back(CV);
8973 }
8974 if (I != FirstPrivateDecls.end())
8975 IsImplicit = I->getSecond();
8976 }
8977 // Every default map produces a single argument which is a target parameter.
8978 CombinedInfo.Types.back() |=
8979 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8980
8981 // Add flag stating this is an implicit map.
8982 if (IsImplicit)
8983 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8984
8985 // No user-defined mapper for default mapping.
8986 CombinedInfo.Mappers.push_back(nullptr);
8987 }
8988};
8989} // anonymous namespace
8990
8991static void emitNonContiguousDescriptor(
8992 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8993 CGOpenMPRuntime::TargetDataInfo &Info) {
8994 CodeGenModule &CGM = CGF.CGM;
8995 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
8996 &NonContigInfo = CombinedInfo.NonContigInfo;
8997
8998 // Build an array of struct descriptor_dim and then assign it to
8999 // offload_args.
9000 //
9001 // struct descriptor_dim {
9002 // uint64_t offset;
9003 // uint64_t count;
9004 // uint64_t stride
9005 // };
9006 ASTContext &C = CGF.getContext();
9007 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9008 RecordDecl *RD;
9009 RD = C.buildImplicitRecord("descriptor_dim");
9010 RD->startDefinition();
9011 addFieldToRecordDecl(C, RD, Int64Ty);
9012 addFieldToRecordDecl(C, RD, Int64Ty);
9013 addFieldToRecordDecl(C, RD, Int64Ty);
9014 RD->completeDefinition();
9015 QualType DimTy = C.getRecordType(RD);
9016
9017 enum { OffsetFD = 0, CountFD, StrideFD };
9018 // We need two index variable here since the size of "Dims" is the same as the
9019 // size of Components, however, the size of offset, count, and stride is equal
9020 // to the size of base declaration that is non-contiguous.
9021 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9022 // Skip emitting ir if dimension size is 1 since it cannot be
9023 // non-contiguous.
9024 if (NonContigInfo.Dims[I] == 1)
9025 continue;
9026 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9027 QualType ArrayTy =
9028 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9029 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9030 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9031 unsigned RevIdx = EE - II - 1;
9032 LValue DimsLVal = CGF.MakeAddrLValue(
9033 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9034 // Offset
9035 LValue OffsetLVal = CGF.EmitLValueForField(
9036 DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9037 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9038 // Count
9039 LValue CountLVal = CGF.EmitLValueForField(
9040 DimsLVal, *std::next(RD->field_begin(), CountFD));
9041 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9042 // Stride
9043 LValue StrideLVal = CGF.EmitLValueForField(
9044 DimsLVal, *std::next(RD->field_begin(), StrideFD));
9045 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9046 }
9047 // args[I] = &dims
9048 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9049 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9050 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9051 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9052 Info.RTArgs.PointersArray, 0, I);
9053 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9054 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9055 ++L;
9056 }
9057}
9058
9059// Try to extract the base declaration from a `this->x` expression if possible.
9060static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9061 if (!E)
9062 return nullptr;
9063
9064 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9065 if (const MemberExpr *ME =
9066 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9067 return ME->getMemberDecl();
9068 return nullptr;
9069}
9070
9071/// Emit a string constant containing the names of the values mapped to the
9072/// offloading runtime library.
9073llvm::Constant *
9074emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9075 MappableExprsHandler::MappingExprInfo &MapExprs) {
9076
9077 uint32_t SrcLocStrSize;
9078 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9079 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9080
9081 SourceLocation Loc;
9082 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9083 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9084 Loc = VD->getLocation();
9085 else
9086 Loc = MapExprs.getMapExpr()->getExprLoc();
9087 } else {
9088 Loc = MapExprs.getMapDecl()->getLocation();
9089 }
9090
9091 std::string ExprName;
9092 if (MapExprs.getMapExpr()) {
9093 PrintingPolicy P(CGF.getContext().getLangOpts());
9094 llvm::raw_string_ostream OS(ExprName);
9095 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9096 OS.flush();
9097 } else {
9098 ExprName = MapExprs.getMapDecl()->getNameAsString();
9099 }
9100
9101 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9102 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9103 PLoc.getLine(), PLoc.getColumn(),
9104 SrcLocStrSize);
9105}
9106
9107/// Emit the arrays used to pass the captures and map information to the
9108/// offloading runtime library. If there is no map or capture information,
9109/// return nullptr by reference.
9110static void emitOffloadingArrays(
9111 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9112 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9113 bool IsNonContiguous = false) {
9114 CodeGenModule &CGM = CGF.CGM;
9115 ASTContext &Ctx = CGF.getContext();
9116
9117 // Reset the array information.
9118 Info.clearArrayInfo();
9119 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9120
9121 if (Info.NumberOfPtrs) {
9122 // Detect if we have any capture size requiring runtime evaluation of the
9123 // size so that a constant array could be eventually used.
9124
9125 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9126 QualType PointerArrayType = Ctx.getConstantArrayType(
9127 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9128 /*IndexTypeQuals=*/0);
9129
9130 Info.RTArgs.BasePointersArray =
9131 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9132 Info.RTArgs.PointersArray =
9133 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9134 Address MappersArray =
9135 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9136 Info.RTArgs.MappersArray = MappersArray.getPointer();
9137
9138 // If we don't have any VLA types or other types that require runtime
9139 // evaluation, we can use a constant array for the map sizes, otherwise we
9140 // need to fill up the arrays as we do for the pointers.
9141 QualType Int64Ty =
9142 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9143 SmallVector<llvm::Constant *> ConstSizes(
9144 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9145 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9146 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9147 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9148 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9149 if (IsNonContiguous &&
9150 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9151 CombinedInfo.Types[I] &
9152 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9153 ConstSizes[I] = llvm::ConstantInt::get(
9154 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9155 else
9156 ConstSizes[I] = CI;
9157 continue;
9158 }
9159 }
9160 RuntimeSizes.set(I);
9161 }
9162
9163 if (RuntimeSizes.all()) {
9164 QualType SizeArrayType = Ctx.getConstantArrayType(
9165 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9166 /*IndexTypeQuals=*/0);
9167 Info.RTArgs.SizesArray =
9168 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9169 } else {
9170 auto *SizesArrayInit = llvm::ConstantArray::get(
9171 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9172 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9173 auto *SizesArrayGbl = new llvm::GlobalVariable(
9174 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9175 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9176 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9177 if (RuntimeSizes.any()) {
9178 QualType SizeArrayType = Ctx.getConstantArrayType(
9179 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9180 /*IndexTypeQuals=*/0);
9181 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9182 llvm::Value *GblConstPtr =
9183 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9184 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9185 CGF.Builder.CreateMemCpy(
9186 Buffer,
9187 Address(GblConstPtr, CGM.Int64Ty,
9188 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9189 /*DestWidth=*/64, /*Signed=*/false))),
9190 CGF.getTypeSize(SizeArrayType));
9191 Info.RTArgs.SizesArray = Buffer.getPointer();
9192 } else {
9193 Info.RTArgs.SizesArray = SizesArrayGbl;
9194 }
9195 }
9196
9197 // The map types are always constant so we don't need to generate code to
9198 // fill arrays. Instead, we create an array constant.
9199 SmallVector<uint64_t, 4> Mapping;
9200 for (auto mapFlag : CombinedInfo.Types)
9201 Mapping.push_back(
9202 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9203 mapFlag));
9204 std::string MaptypesName =
9205 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9206 auto *MapTypesArrayGbl =
9207 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9208 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9209
9210 // The information types are only built if there is debug information
9211 // requested.
9212 if (CGM.getCodeGenOpts().getDebugInfo() ==
9213 llvm::codegenoptions::NoDebugInfo) {
9214 Info.RTArgs.MapNamesArray = llvm::Constant::getNullValue(
9215 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9216 } else {
9217 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9218 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9219 };
9220 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9221 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9222 std::string MapnamesName =
9223 CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9224 auto *MapNamesArrayGbl =
9225 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9226 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9227 }
9228
9229 // If there's a present map type modifier, it must not be applied to the end
9230 // of a region, so generate a separate map type array in that case.
9231 if (Info.separateBeginEndCalls()) {
9232 bool EndMapTypesDiffer = false;
9233 for (uint64_t &Type : Mapping) {
9234 if (Type &
9235 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9236 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9237 Type &=
9238 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9239 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9240 EndMapTypesDiffer = true;
9241 }
9242 }
9243 if (EndMapTypesDiffer) {
9244 MapTypesArrayGbl =
9245 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9246 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9247 }
9248 }
9249
9250 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9251 llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9252 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9253 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9254 Info.RTArgs.BasePointersArray, 0, I);
9255 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9256 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9257 Address BPAddr(BP, BPVal->getType(),
9258 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9259 CGF.Builder.CreateStore(BPVal, BPAddr);
9260
9261 if (Info.requiresDevicePointerInfo())
9262 if (const ValueDecl *DevVD =
9263 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9264 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9265
9266 llvm::Value *PVal = CombinedInfo.Pointers[I];
9267 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9268 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9269 Info.RTArgs.PointersArray, 0, I);
9270 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9271 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9272 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9273 CGF.Builder.CreateStore(PVal, PAddr);
9274
9275 if (RuntimeSizes.test(I)) {
9276 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9277 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9278 Info.RTArgs.SizesArray,
9279 /*Idx0=*/0,
9280 /*Idx1=*/I);
9281 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9282 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9283 CGM.Int64Ty,
9284 /*isSigned=*/true),
9285 SAddr);
9286 }
9287
9288 // Fill up the mapper array.
9289 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9290 if (CombinedInfo.Mappers[I]) {
9291 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9292 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9293 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9294 Info.HasMapper = true;
9295 }
9296 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9297 CGF.Builder.CreateStore(MFunc, MAddr);
9298 }
9299 }
9300
9301 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9302 Info.NumberOfPtrs == 0)
9303 return;
9304
9305 emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9306}
9307
9308/// Check for inner distribute directive.
9309static const OMPExecutableDirective *
9310getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9311 const auto *CS = D.getInnermostCapturedStmt();
9312 const auto *Body =
9313 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9314 const Stmt *ChildStmt =
9315 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9316
9317 if (const auto *NestedDir =
9318 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9319 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9320 switch (D.getDirectiveKind()) {
9321 case OMPD_target:
9322 if (isOpenMPDistributeDirective(DKind))
9323 return NestedDir;
9324 if (DKind == OMPD_teams) {
9325 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9326 /*IgnoreCaptured=*/true);
9327 if (!Body)
9328 return nullptr;
9329 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9330 if (const auto *NND =
9331 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9332 DKind = NND->getDirectiveKind();
9333 if (isOpenMPDistributeDirective(DKind))
9334 return NND;
9335 }
9336 }
9337 return nullptr;
9338 case OMPD_target_teams:
9339 if (isOpenMPDistributeDirective(DKind))
9340 return NestedDir;
9341 return nullptr;
9342 case OMPD_target_parallel:
9343 case OMPD_target_simd:
9344 case OMPD_target_parallel_for:
9345 case OMPD_target_parallel_for_simd:
9346 return nullptr;
9347 case OMPD_target_teams_distribute:
9348 case OMPD_target_teams_distribute_simd:
9349 case OMPD_target_teams_distribute_parallel_for:
9350 case OMPD_target_teams_distribute_parallel_for_simd:
9351 case OMPD_parallel:
9352 case OMPD_for:
9353 case OMPD_parallel_for:
9354 case OMPD_parallel_master:
9355 case OMPD_parallel_sections:
9356 case OMPD_for_simd:
9357 case OMPD_parallel_for_simd:
9358 case OMPD_cancel:
9359 case OMPD_cancellation_point:
9360 case OMPD_ordered:
9361 case OMPD_threadprivate:
9362 case OMPD_allocate:
9363 case OMPD_task:
9364 case OMPD_simd:
9365 case OMPD_tile:
9366 case OMPD_unroll:
9367 case OMPD_sections:
9368 case OMPD_section:
9369 case OMPD_single:
9370 case OMPD_master:
9371 case OMPD_critical:
9372 case OMPD_taskyield:
9373 case OMPD_barrier:
9374 case OMPD_taskwait:
9375 case OMPD_taskgroup:
9376 case OMPD_atomic:
9377 case OMPD_flush:
9378 case OMPD_depobj:
9379 case OMPD_scan:
9380 case OMPD_teams:
9381 case OMPD_target_data:
9382 case OMPD_target_exit_data:
9383 case OMPD_target_enter_data:
9384 case OMPD_distribute:
9385 case OMPD_distribute_simd:
9386 case OMPD_distribute_parallel_for:
9387 case OMPD_distribute_parallel_for_simd:
9388 case OMPD_teams_distribute:
9389 case OMPD_teams_distribute_simd:
9390 case OMPD_teams_distribute_parallel_for:
9391 case OMPD_teams_distribute_parallel_for_simd:
9392 case OMPD_target_update:
9393 case OMPD_declare_simd:
9394 case OMPD_declare_variant:
9395 case OMPD_begin_declare_variant:
9396 case OMPD_end_declare_variant:
9397 case OMPD_declare_target:
9398 case OMPD_end_declare_target:
9399 case OMPD_declare_reduction:
9400 case OMPD_declare_mapper:
9401 case OMPD_taskloop:
9402 case OMPD_taskloop_simd:
9403 case OMPD_master_taskloop:
9404 case OMPD_master_taskloop_simd:
9405 case OMPD_parallel_master_taskloop:
9406 case OMPD_parallel_master_taskloop_simd:
9407 case OMPD_requires:
9408 case OMPD_metadirective:
9409 case OMPD_unknown:
9410 default:
9411 llvm_unreachable("Unexpected directive.")::llvm::llvm_unreachable_internal("Unexpected directive.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 9411)
;
9412 }
9413 }
9414
9415 return nullptr;
9416}
9417
9418/// Emit the user-defined mapper function. The code generation follows the
9419/// pattern in the example below.
9420/// \code
9421/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9422/// void *base, void *begin,
9423/// int64_t size, int64_t type,
9424/// void *name = nullptr) {
9425/// // Allocate space for an array section first or add a base/begin for
9426/// // pointer dereference.
9427/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9428/// !maptype.IsDelete)
9429/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9430/// size*sizeof(Ty), clearToFromMember(type));
9431/// // Map members.
9432/// for (unsigned i = 0; i < size; i++) {
9433/// // For each component specified by this mapper:
9434/// for (auto c : begin[i]->all_components) {
9435/// if (c.hasMapper())
9436/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9437/// c.arg_type, c.arg_name);
9438/// else
9439/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9440/// c.arg_begin, c.arg_size, c.arg_type,
9441/// c.arg_name);
9442/// }
9443/// }
9444/// // Delete the array section.
9445/// if (size > 1 && maptype.IsDelete)
9446/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9447/// size*sizeof(Ty), clearToFromMember(type));
9448/// }
9449/// \endcode
9450void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9451 CodeGenFunction *CGF) {
9452 if (UDMMap.count(D) > 0)
9453 return;
9454 ASTContext &C = CGM.getContext();
9455 QualType Ty = D->getType();
9456 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9457 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9458 auto *MapperVarDecl =
9459 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9460 SourceLocation Loc = D->getLocation();
9461 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9462 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9463
9464 // Prepare mapper function arguments and attributes.
9465 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9466 C.VoidPtrTy, ImplicitParamDecl::Other);
9467 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9468 ImplicitParamDecl::Other);
9469 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9470 C.VoidPtrTy, ImplicitParamDecl::Other);
9471 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9472 ImplicitParamDecl::Other);
9473 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9474 ImplicitParamDecl::Other);
9475 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9476 ImplicitParamDecl::Other);
9477 FunctionArgList Args;
9478 Args.push_back(&HandleArg);
9479 Args.push_back(&BaseArg);
9480 Args.push_back(&BeginArg);
9481 Args.push_back(&SizeArg);
9482 Args.push_back(&TypeArg);
9483 Args.push_back(&NameArg);
9484 const CGFunctionInfo &FnInfo =
9485 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9486 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9487 SmallString<64> TyStr;
9488 llvm::raw_svector_ostream Out(TyStr);
9489 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9490 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9491 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9492 Name, &CGM.getModule());
9493 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9494 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9495 // Start the mapper function code generation.
9496 CodeGenFunction MapperCGF(CGM);
9497 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9498 // Compute the starting and end addresses of array elements.
9499 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9500 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9501 C.getPointerType(Int64Ty), Loc);
9502 // Prepare common arguments for array initiation and deletion.
9503 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9504 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9505 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9506 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9507 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9508 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9509 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9510 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9511 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9512 // Convert the size in bytes into the number of array elements.
9513 Size = MapperCGF.Builder.CreateExactUDiv(
9514 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9515 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9516 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9517 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9518 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9519 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9520 C.getPointerType(Int64Ty), Loc);
9521 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9522 MapperCGF.GetAddrOfLocalVar(&NameArg),
9523 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9524
9525 // Emit array initiation if this is an array section and \p MapType indicates
9526 // that memory allocation is required.
9527 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9528 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9529 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9530
9531 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9532
9533 // Emit the loop header block.
9534 MapperCGF.EmitBlock(HeadBB);
9535 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9536 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9537 // Evaluate whether the initial condition is satisfied.
9538 llvm::Value *IsEmpty =
9539 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9540 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9541 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9542
9543 // Emit the loop body block.
9544 MapperCGF.EmitBlock(BodyBB);
9545 llvm::BasicBlock *LastBB = BodyBB;
9546 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9547 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
9548 PtrPHI->addIncoming(PtrBegin, EntryBB);
9549 Address PtrCurrent(PtrPHI, ElemTy,
9550 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9551 .getAlignment()
9552 .alignmentOfArrayElement(ElementSize));
9553 // Privatize the declared variable of mapper to be the current array element.
9554 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9555 Scope.addPrivate(MapperVarDecl, PtrCurrent);
9556 (void)Scope.Privatize();
9557
9558 // Get map clause information. Fill up the arrays with all mapped variables.
9559 MappableExprsHandler::MapCombinedInfoTy Info;
9560 MappableExprsHandler MEHandler(*D, MapperCGF);
9561 MEHandler.generateAllInfoForMapper(Info);
9562
9563 // Call the runtime API __tgt_mapper_num_components to get the number of
9564 // pre-existing components.
9565 llvm::Value *OffloadingArgs[] = {Handle};
9566 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9567 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9568 OMPRTL___tgt_mapper_num_components),
9569 OffloadingArgs);
9570 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9571 PreviousSize,
9572 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9573
9574 // Fill up the runtime mapper handle for all components.
9575 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9576 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9577 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9578 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9579 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
9580 llvm::Value *CurSizeArg = Info.Sizes[I];
9581 llvm::Value *CurNameArg =
9582 (CGM.getCodeGenOpts().getDebugInfo() ==
9583 llvm::codegenoptions::NoDebugInfo)
9584 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
9585 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
9586
9587 // Extract the MEMBER_OF field from the map type.
9588 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9589 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9590 Info.Types[I]));
9591 llvm::Value *MemberMapType =
9592 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9593
9594 // Combine the map type inherited from user-defined mapper with that
9595 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9596 // bits of the \a MapType, which is the input argument of the mapper
9597 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9598 // bits of MemberMapType.
9599 // [OpenMP 5.0], 1.2.6. map-type decay.
9600 // | alloc | to | from | tofrom | release | delete
9601 // ----------------------------------------------------------
9602 // alloc | alloc | alloc | alloc | alloc | release | delete
9603 // to | alloc | to | alloc | to | release | delete
9604 // from | alloc | alloc | from | from | release | delete
9605 // tofrom | alloc | to | from | tofrom | release | delete
9606 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9607 MapType,
9608 MapperCGF.Builder.getInt64(
9609 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9610 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9611 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9612 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
9613 llvm::BasicBlock *AllocElseBB =
9614 MapperCGF.createBasicBlock("omp.type.alloc.else");
9615 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
9616 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
9617 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
9618 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
9619 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
9620 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9621 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9622 MapperCGF.EmitBlock(AllocBB);
9623 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9624 MemberMapType,
9625 MapperCGF.Builder.getInt64(
9626 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9627 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9628 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9629 MapperCGF.Builder.CreateBr(EndBB);
9630 MapperCGF.EmitBlock(AllocElseBB);
9631 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9632 LeftToFrom,
9633 MapperCGF.Builder.getInt64(
9634 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9635 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9636 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9637 // In case of to, clear OMP_MAP_FROM.
9638 MapperCGF.EmitBlock(ToBB);
9639 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9640 MemberMapType,
9641 MapperCGF.Builder.getInt64(
9642 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9643 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9644 MapperCGF.Builder.CreateBr(EndBB);
9645 MapperCGF.EmitBlock(ToElseBB);
9646 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9647 LeftToFrom,
9648 MapperCGF.Builder.getInt64(
9649 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9650 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9651 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9652 // In case of from, clear OMP_MAP_TO.
9653 MapperCGF.EmitBlock(FromBB);
9654 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9655 MemberMapType,
9656 MapperCGF.Builder.getInt64(
9657 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9658 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9659 // In case of tofrom, do nothing.
9660 MapperCGF.EmitBlock(EndBB);
9661 LastBB = EndBB;
9662 llvm::PHINode *CurMapType =
9663 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
9664 CurMapType->addIncoming(AllocMapType, AllocBB);
9665 CurMapType->addIncoming(ToMapType, ToBB);
9666 CurMapType->addIncoming(FromMapType, FromBB);
9667 CurMapType->addIncoming(MemberMapType, ToElseBB);
9668
9669 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9670 CurSizeArg, CurMapType, CurNameArg};
9671 if (Info.Mappers[I]) {
9672 // Call the corresponding mapper function.
9673 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9674 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
9675 assert(MapperFunc && "Expect a valid mapper function is available.")(static_cast <bool> (MapperFunc && "Expect a valid mapper function is available."
) ? void (0) : __assert_fail ("MapperFunc && \"Expect a valid mapper function is available.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9675, __extension__
__PRETTY_FUNCTION__))
;
9676 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
9677 } else {
9678 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9679 // data structure.
9680 MapperCGF.EmitRuntimeCall(
9681 OMPBuilder.getOrCreateRuntimeFunction(
9682 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
9683 OffloadingArgs);
9684 }
9685 }
9686
9687 // Update the pointer to point to the next element that needs to be mapped,
9688 // and check whether we have mapped all elements.
9689 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9690 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
9691 PtrPHI->addIncoming(PtrNext, LastBB);
9692 llvm::Value *IsDone =
9693 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
9694 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
9695 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9696
9697 MapperCGF.EmitBlock(ExitBB);
9698 // Emit array deletion if this is an array section and \p MapType indicates
9699 // that deletion is required.
9700 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9701 MapName, ElementSize, DoneBB, /*IsInit=*/false);
9702
9703 // Emit the function exit block.
9704 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
9705 MapperCGF.FinishFunction();
9706 UDMMap.try_emplace(D, Fn);
9707 if (CGF) {
9708 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
9709 Decls.second.push_back(D);
9710 }
9711}
9712
9713/// Emit the array initialization or deletion portion for user-defined mapper
9714/// code generation. First, it evaluates whether an array section is mapped and
9715/// whether the \a MapType instructs to delete this section. If \a IsInit is
9716/// true, and \a MapType indicates to not delete this array, array
9717/// initialization code is generated. If \a IsInit is false, and \a MapType
9718/// indicates to not this array, array deletion code is generated.
9719void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9720 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9721 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9722 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9723 bool IsInit) {
9724 StringRef Prefix = IsInit ? ".init" : ".del";
9725
9726 // Evaluate if this is an array section.
9727 llvm::BasicBlock *BodyBB =
9728 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
9729 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9730 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
9731 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9732 MapType,
9733 MapperCGF.Builder.getInt64(
9734 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9735 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9736 llvm::Value *DeleteCond;
9737 llvm::Value *Cond;
9738 if (IsInit) {
9739 // base != begin?
9740 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
9741 // IsPtrAndObj?
9742 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9743 MapType,
9744 MapperCGF.Builder.getInt64(
9745 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9746 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9747 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
9748 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9749 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
9750 DeleteCond = MapperCGF.Builder.CreateIsNull(
9751 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9752 } else {
9753 Cond = IsArray;
9754 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9755 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
9756 }
9757 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
9758 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
9759
9760 MapperCGF.EmitBlock(BodyBB);
9761 // Get the array size by multiplying element size and element number (i.e., \p
9762 // Size).
9763 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9764 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9765 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9766 // memory allocation/deletion purpose only.
9767 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9768 MapType,
9769 MapperCGF.Builder.getInt64(
9770 ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9771 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9772 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9773 MapTypeArg = MapperCGF.Builder.CreateOr(
9774 MapTypeArg,
9775 MapperCGF.Builder.getInt64(
9776 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9777 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9778
9779 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9780 // data structure.
9781 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9782 ArraySize, MapTypeArg, MapName};
9783 MapperCGF.EmitRuntimeCall(
9784 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
9785 OMPRTL___tgt_push_mapper_component),
9786 OffloadingArgs);
9787}
9788
9789llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9790 const OMPDeclareMapperDecl *D) {
9791 auto I = UDMMap.find(D);
9792 if (I != UDMMap.end())
9793 return I->second;
9794 emitUserDefinedMapper(D);
9795 return UDMMap.lookup(D);
9796}
9797
9798llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9799 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9800 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9801 const OMPLoopDirective &D)>
9802 SizeEmitter) {
9803 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9804 const OMPExecutableDirective *TD = &D;
9805 // Get nested teams distribute kind directive, if any.
9806 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
9807 TD = getNestedDistributeDirective(CGM.getContext(), D);
9808 if (!TD)
9809 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9810
9811 const auto *LD = cast<OMPLoopDirective>(TD);
9812 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9813 return NumIterations;
9814 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
9815}
9816
9817void CGOpenMPRuntime::emitTargetCall(
9818 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9819 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9820 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9821 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9822 const OMPLoopDirective &D)>
9823 SizeEmitter) {
9824 if (!CGF.HaveInsertPoint())
9825 return;
9826
9827 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
9828 CGM.getLangOpts().OpenMPOffloadMandatory;
9829
9830 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!")(static_cast <bool> ((OffloadingMandatory || OutlinedFn
) && "Invalid outlined function!") ? void (0) : __assert_fail
("(OffloadingMandatory || OutlinedFn) && \"Invalid outlined function!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9830, __extension__
__PRETTY_FUNCTION__))
;
9831
9832 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
9833 D.hasClausesOfKind<OMPNowaitClause>() ||
9834 D.hasClausesOfKind<OMPInReductionClause>();
9835 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9836 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9837 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9838 PrePostActionTy &) {
9839 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9840 };
9841 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9842
9843 CodeGenFunction::OMPTargetDataInfo InputInfo;
9844 llvm::Value *MapTypesArray = nullptr;
9845 llvm::Value *MapNamesArray = nullptr;
9846 // Generate code for the host fallback function.
9847 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
9848 &CS, OffloadingMandatory](CodeGenFunction &CGF) {
9849 if (OffloadingMandatory) {
9850 CGF.Builder.CreateUnreachable();
9851 } else {
9852 if (RequiresOuterTask) {
9853 CapturedVars.clear();
9854 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
9855 }
9856 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
9857 }
9858 };
9859 // Fill up the pointer arrays and transfer execution to the device.
9860 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
9861 &MapNamesArray, SizeEmitter,
9862 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9863 if (Device.getInt() == OMPC_DEVICE_ancestor) {
9864 // Reverse offloading is not supported, so just execute on the host.
9865 FallbackGen(CGF);
9866 return;
9867 }
9868
9869 // On top of the arrays that were filled up, the target offloading call
9870 // takes as arguments the device id as well as the host pointer. The host
9871 // pointer is used by the runtime library to identify the current target
9872 // region, so it only has to be unique and not necessarily point to
9873 // anything. It could be the pointer to the outlined function that
9874 // implements the target region, but we aren't using that so that the
9875 // compiler doesn't need to keep that, and could therefore inline the host
9876 // function if proven worthwhile during optimization.
9877
9878 // From this point on, we need to have an ID of the target region defined.
9879 assert(OutlinedFnID && "Invalid outlined function ID!")(static_cast <bool> (OutlinedFnID && "Invalid outlined function ID!"
) ? void (0) : __assert_fail ("OutlinedFnID && \"Invalid outlined function ID!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9879, __extension__
__PRETTY_FUNCTION__))
;
9880 (void)OutlinedFnID;
9881
9882 // Emit device ID if any.
9883 llvm::Value *DeviceID;
9884 if (Device.getPointer()) {
9885 assert((Device.getInt() == OMPC_DEVICE_unknown ||(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9887, __extension__
__PRETTY_FUNCTION__))
9886 Device.getInt() == OMPC_DEVICE_device_num) &&(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9887, __extension__
__PRETTY_FUNCTION__))
9887 "Expected device_num modifier.")(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9887, __extension__
__PRETTY_FUNCTION__))
;
9888 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
9889 DeviceID =
9890 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
9891 } else {
9892 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9893 }
9894
9895 // Emit the number of elements in the offloading arrays.
9896 llvm::Value *PointerNum =
9897 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9898
9899 // Return value of the runtime offloading call.
9900 llvm::Value *Return;
9901
9902 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
9903 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
9904
9905 // Source location for the ident struct
9906 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
9907
9908 // Get tripcount for the target loop-based directive.
9909 llvm::Value *NumIterations =
9910 emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9911
9912 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(0);
9913 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9914 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9915 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9916 DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9917 DynCGroupMem = CGF.Builder.CreateIntCast(DynCGroupMemVal, CGF.Int32Ty,
9918 /*isSigned=*/false);
9919 }
9920
9921 llvm::Value *ZeroArray =
9922 llvm::Constant::getNullValue(llvm::ArrayType::get(CGF.CGM.Int32Ty, 3));
9923
9924 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9925 llvm::Value *Flags = CGF.Builder.getInt64(HasNoWait);
9926
9927 llvm::Value *NumTeams3D =
9928 CGF.Builder.CreateInsertValue(ZeroArray, NumTeams, {0});
9929 llvm::Value *NumThreads3D =
9930 CGF.Builder.CreateInsertValue(ZeroArray, NumThreads, {0});
9931
9932 // Arguments for the target kernel.
9933 SmallVector<llvm::Value *> KernelArgs{
9934 CGF.Builder.getInt32(/* Version */ 2),
9935 PointerNum,
9936 InputInfo.BasePointersArray.getPointer(),
9937 InputInfo.PointersArray.getPointer(),
9938 InputInfo.SizesArray.getPointer(),
9939 MapTypesArray,
9940 MapNamesArray,
9941 InputInfo.MappersArray.getPointer(),
9942 NumIterations,
9943 Flags,
9944 NumTeams3D,
9945 NumThreads3D,
9946 DynCGroupMem,
9947 };
9948
9949 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9950 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9951
9952 // The target region is an outlined function launched by the runtime
9953 // via calls to __tgt_target_kernel().
9954 //
9955 // Note that on the host and CPU targets, the runtime implementation of
9956 // these calls simply call the outlined function without forking threads.
9957 // The outlined functions themselves have runtime calls to
9958 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9959 // the compiler in emitTeamsCall() and emitParallelCall().
9960 //
9961 // In contrast, on the NVPTX target, the implementation of
9962 // __tgt_target_teams() launches a GPU kernel with the requested number
9963 // of teams and threads so no additional calls to the runtime are required.
9964 // Check the error code and execute the host version if required.
9965 CGF.Builder.restoreIP(OMPBuilder.emitTargetKernel(
9966 CGF.Builder, AllocaIP, Return, RTLoc, DeviceID, NumTeams, NumThreads,
9967 OutlinedFnID, KernelArgs));
9968
9969 llvm::BasicBlock *OffloadFailedBlock =
9970 CGF.createBasicBlock("omp_offload.failed");
9971 llvm::BasicBlock *OffloadContBlock =
9972 CGF.createBasicBlock("omp_offload.cont");
9973 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
9974 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
9975
9976 CGF.EmitBlock(OffloadFailedBlock);
9977 FallbackGen(CGF);
9978
9979 CGF.EmitBranch(OffloadContBlock);
9980
9981 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
9982 };
9983
9984 // Notify that the host version must be executed.
9985 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
9986 FallbackGen(CGF);
9987 };
9988
9989 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
9990 &MapNamesArray, &CapturedVars, RequiresOuterTask,
9991 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
9992 // Fill up the arrays with all the captured variables.
9993 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9994
9995 // Get mappable expression information.
9996 MappableExprsHandler MEHandler(D, CGF);
9997 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9998 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9999
10000 auto RI = CS.getCapturedRecordDecl()->field_begin();
10001 auto *CV = CapturedVars.begin();
10002 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10003 CE = CS.capture_end();
10004 CI != CE; ++CI, ++RI, ++CV) {
10005 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10006 MappableExprsHandler::StructRangeInfoTy PartialStruct;
10007
10008 // VLA sizes are passed to the outlined region by copy and do not have map
10009 // information associated.
10010 if (CI->capturesVariableArrayType()) {
10011 CurInfo.Exprs.push_back(nullptr);
10012 CurInfo.BasePointers.push_back(*CV);
10013 CurInfo.Pointers.push_back(*CV);
10014 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10015 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10016 // Copy to the device as an argument. No need to retrieve it.
10017 CurInfo.Types.push_back(
10018 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
10019 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
10020 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
10021 CurInfo.Mappers.push_back(nullptr);
10022 } else {
10023 // If we have any information in the map clause, we use it, otherwise we
10024 // just do a default mapping.
10025 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10026 if (!CI->capturesThis())
10027 MappedVarSet.insert(CI->getCapturedVar());
10028 else
10029 MappedVarSet.insert(nullptr);
10030 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10031 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10032 // Generate correct mapping for variables captured by reference in
10033 // lambdas.
10034 if (CI->capturesVariable())
10035 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10036 CurInfo, LambdaPointers);
10037 }
10038 // We expect to have at least an element of information for this capture.
10039 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10040, __extension__
__PRETTY_FUNCTION__))
10040 "Non-existing map pointer for capture!")(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10040, __extension__
__PRETTY_FUNCTION__))
;
10041 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10045, __extension__
__PRETTY_FUNCTION__))
10042 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10045, __extension__
__PRETTY_FUNCTION__))
10043 CurInfo.BasePointers.size() == CurInfo.Types.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10045, __extension__
__PRETTY_FUNCTION__))
10044 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10045, __extension__
__PRETTY_FUNCTION__))
10045 "Inconsistent map information sizes!")(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10045, __extension__
__PRETTY_FUNCTION__))
;
10046
10047 // If there is an entry in PartialStruct it means we have a struct with
10048 // individual members mapped. Emit an extra combined entry.
10049 if (PartialStruct.Base.isValid()) {
10050 CombinedInfo.append(PartialStruct.PreliminaryMapData);
10051 MEHandler.emitCombinedEntry(
10052 CombinedInfo, CurInfo.Types, PartialStruct, CI->capturesThis(),
10053 nullptr, !PartialStruct.PreliminaryMapData.BasePointers.empty());
10054 }
10055
10056 // We need to append the results of this capture to what we already have.
10057 CombinedInfo.append(CurInfo);
10058 }
10059 // Adjust MEMBER_OF flags for the lambdas captures.
10060 MEHandler.adjustMemberOfForLambdaCaptures(
10061 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10062 CombinedInfo.Types);
10063 // Map any list items in a map clause that were not captures because they
10064 // weren't referenced within the construct.
10065 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10066
10067 CGOpenMPRuntime::TargetDataInfo Info;
10068 // Fill up the arrays and create the arguments.
10069 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10070 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10071 llvm::codegenoptions::NoDebugInfo;
10072 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10073 EmitDebug,
10074 /*ForEndCall=*/false);
10075
10076 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10077 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10078 CGF.VoidPtrTy, CGM.getPointerAlign());
10079 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10080 CGM.getPointerAlign());
10081 InputInfo.SizesArray =
10082 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10083 InputInfo.MappersArray =
10084 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10085 MapTypesArray = Info.RTArgs.MapTypesArray;
10086 MapNamesArray = Info.RTArgs.MapNamesArray;
10087 if (RequiresOuterTask)
10088 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10089 else
10090 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10091 };
10092
10093 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10094 CodeGenFunction &CGF, PrePostActionTy &) {
10095 if (RequiresOuterTask) {
10096 CodeGenFunction::OMPTargetDataInfo InputInfo;
10097 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10098 } else {
10099 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10100 }
10101 };
10102
10103 // If we have a target function ID it means that we need to support
10104 // offloading, otherwise, just execute on the host. We need to execute on host
10105 // regardless of the conditional in the if clause if, e.g., the user do not
10106 // specify target triples.
10107 if (OutlinedFnID) {
10108 if (IfCond) {
10109 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10110 } else {
10111 RegionCodeGenTy ThenRCG(TargetThenGen);
10112 ThenRCG(CGF);
10113 }
10114 } else {
10115 RegionCodeGenTy ElseRCG(TargetElseGen);
10116 ElseRCG(CGF);
10117 }
10118}
10119
10120void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10121 StringRef ParentName) {
10122 if (!S)
10123 return;
10124
10125 // Codegen OMP target directives that offload compute to the device.
10126 bool RequiresDeviceCodegen =
10127 isa<OMPExecutableDirective>(S) &&
10128 isOpenMPTargetExecutionDirective(
10129 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10130
10131 if (RequiresDeviceCodegen) {
10132 const auto &E = *cast<OMPExecutableDirective>(S);
10133 auto EntryInfo =
10134 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), ParentName);
10135
10136 // Is this a target region that should not be emitted as an entry point? If
10137 // so just signal we are done with this target region.
10138 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
10139 return;
10140
10141 switch (E.getDirectiveKind()) {
10142 case OMPD_target:
10143 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10144 cast<OMPTargetDirective>(E));
10145 break;
10146 case OMPD_target_parallel:
10147 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10148 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10149 break;
10150 case OMPD_target_teams:
10151 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10152 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10153 break;
10154 case OMPD_target_teams_distribute:
10155 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10156 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10157 break;
10158 case OMPD_target_teams_distribute_simd:
10159 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10160 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10161 break;
10162 case OMPD_target_parallel_for:
10163 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10164 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10165 break;
10166 case OMPD_target_parallel_for_simd:
10167 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10168 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10169 break;
10170 case OMPD_target_simd:
10171 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10172 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10173 break;
10174 case OMPD_target_teams_distribute_parallel_for:
10175 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10176 CGM, ParentName,
10177 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10178 break;
10179 case OMPD_target_teams_distribute_parallel_for_simd:
10180 CodeGenFunction::
10181 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10182 CGM, ParentName,
10183 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10184 break;
10185 case OMPD_parallel:
10186 case OMPD_for:
10187 case OMPD_parallel_for:
10188 case OMPD_parallel_master:
10189 case OMPD_parallel_sections:
10190 case OMPD_for_simd:
10191 case OMPD_parallel_for_simd:
10192 case OMPD_cancel:
10193 case OMPD_cancellation_point:
10194 case OMPD_ordered:
10195 case OMPD_threadprivate:
10196 case OMPD_allocate:
10197 case OMPD_task:
10198 case OMPD_simd:
10199 case OMPD_tile:
10200 case OMPD_unroll:
10201 case OMPD_sections:
10202 case OMPD_section:
10203 case OMPD_single:
10204 case OMPD_master:
10205 case OMPD_critical:
10206 case OMPD_taskyield:
10207 case OMPD_barrier:
10208 case OMPD_taskwait:
10209 case OMPD_taskgroup:
10210 case OMPD_atomic:
10211 case OMPD_flush:
10212 case OMPD_depobj:
10213 case OMPD_scan:
10214 case OMPD_teams:
10215 case OMPD_target_data:
10216 case OMPD_target_exit_data:
10217 case OMPD_target_enter_data:
10218 case OMPD_distribute:
10219 case OMPD_distribute_simd:
10220 case OMPD_distribute_parallel_for:
10221 case OMPD_distribute_parallel_for_simd:
10222 case OMPD_teams_distribute:
10223 case OMPD_teams_distribute_simd:
10224 case OMPD_teams_distribute_parallel_for:
10225 case OMPD_teams_distribute_parallel_for_simd:
10226 case OMPD_target_update:
10227 case OMPD_declare_simd:
10228 case OMPD_declare_variant:
10229 case OMPD_begin_declare_variant:
10230 case OMPD_end_declare_variant:
10231 case OMPD_declare_target:
10232 case OMPD_end_declare_target:
10233 case OMPD_declare_reduction:
10234 case OMPD_declare_mapper:
10235 case OMPD_taskloop:
10236 case OMPD_taskloop_simd:
10237 case OMPD_master_taskloop:
10238 case OMPD_master_taskloop_simd:
10239 case OMPD_parallel_master_taskloop:
10240 case OMPD_parallel_master_taskloop_simd:
10241 case OMPD_requires:
10242 case OMPD_metadirective:
10243 case OMPD_unknown:
10244 default:
10245 llvm_unreachable("Unknown target directive for OpenMP device codegen.")::llvm::llvm_unreachable_internal("Unknown target directive for OpenMP device codegen."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10245)
;
10246 }
10247 return;
10248 }
10249
10250 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10251 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10252 return;
10253
10254 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10255 return;
10256 }
10257
10258 // If this is a lambda function, look into its body.
10259 if (const auto *L = dyn_cast<LambdaExpr>(S))
10260 S = L->getBody();
10261
10262 // Keep looking for target regions recursively.
10263 for (const Stmt *II : S->children())
10264 scanForTargetRegionsFunctions(II, ParentName);
10265}
10266
10267static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10268 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10269 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10270 if (!DevTy)
10271 return false;
10272 // Do not emit device_type(nohost) functions for the host.
10273 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10274 return true;
10275 // Do not emit device_type(host) functions for the device.
10276 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10277 return true;
10278 return false;
10279}
10280
10281bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10282 // If emitting code for the host, we do not process FD here. Instead we do
10283 // the normal code generation.
10284 if (!CGM.getLangOpts().OpenMPIsDevice) {
10285 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10286 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10287 CGM.getLangOpts().OpenMPIsDevice))
10288 return true;
10289 return false;
10290 }
10291
10292 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10293 // Try to detect target regions in the function.
10294 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10295 StringRef Name = CGM.getMangledName(GD);
10296 scanForTargetRegionsFunctions(FD->getBody(), Name);
10297 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10298 CGM.getLangOpts().OpenMPIsDevice))
10299 return true;
10300 }
10301
10302 // Do not to emit function if it is not marked as declare target.
10303 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10304 AlreadyEmittedTargetDecls.count(VD) == 0;
10305}
10306
10307bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10308 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10309 CGM.getLangOpts().OpenMPIsDevice))
10310 return true;
10311
10312 if (!CGM.getLangOpts().OpenMPIsDevice)
10313 return false;
10314
10315 // Check if there are Ctors/Dtors in this declaration and look for target
10316 // regions in it. We use the complete variant to produce the kernel name
10317 // mangling.
10318 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10319 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10320 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10321 StringRef ParentName =
10322 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10323 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10324 }
10325 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10326 StringRef ParentName =
10327 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10328 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10329 }
10330 }
10331
10332 // Do not to emit variable if it is not marked as declare target.
10333 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10334 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10335 cast<VarDecl>(GD.getDecl()));
10336 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10337 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10338 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10339 HasRequiresUnifiedSharedMemory)) {
10340 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10341 return true;
10342 }
10343 return false;
10344}
10345
10346void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10347 llvm::Constant *Addr) {
10348 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10349 !CGM.getLangOpts().OpenMPIsDevice)
10350 return;
10351
10352 // If we have host/nohost variables, they do not need to be registered.
10353 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10354 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10355 if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
10356 return;
10357
10358 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10359 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10360 if (!Res) {
10361 if (CGM.getLangOpts().OpenMPIsDevice) {
10362 // Register non-target variables being emitted in device code (debug info
10363 // may cause this).
10364 StringRef VarName = CGM.getMangledName(VD);
10365 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10366 }
10367 return;
10368 }
10369 // Register declare target variables.
10370 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10371 StringRef VarName;
10372 int64_t VarSize;
10373 llvm::GlobalValue::LinkageTypes Linkage;
10374
10375 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10376 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10377 !HasRequiresUnifiedSharedMemory) {
10378 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10379 VarName = CGM.getMangledName(VD);
10380 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10381 VarSize =
10382 CGM.getContext().getTypeSizeInChars(VD->getType()).getQuantity();
10383 assert(VarSize != 0 && "Expected non-zero size of the variable")(static_cast <bool> (VarSize != 0 && "Expected non-zero size of the variable"
) ? void (0) : __assert_fail ("VarSize != 0 && \"Expected non-zero size of the variable\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10383, __extension__
__PRETTY_FUNCTION__))
;
10384 } else {
10385 VarSize = 0;
10386 }
10387 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10388 // Temp solution to prevent optimizations of the internal variables.
10389 if (CGM.getLangOpts().OpenMPIsDevice &&
10390 (!VD->isExternallyVisible() ||
10391 Linkage == llvm::GlobalValue::LinkOnceODRLinkage)) {
10392 // Do not create a "ref-variable" if the original is not also available
10393 // on the host.
10394 if (!OMPBuilder.OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10395 return;
10396 std::string RefName = getName({VarName, "ref"});
10397 if (!CGM.GetGlobalValue(RefName)) {
10398 llvm::Constant *AddrRef =
10399 OMPBuilder.getOrCreateInternalVariable(Addr->getType(), RefName);
10400 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10401 GVAddrRef->setConstant(/*Val=*/true);
10402 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10403 GVAddrRef->setInitializer(Addr);
10404 CGM.addCompilerUsedGlobal(GVAddrRef);
10405 }
10406 }
10407 } else {
10408 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res
== OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10412, __extension__
__PRETTY_FUNCTION__))
10409 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res
== OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10412, __extension__
__PRETTY_FUNCTION__))
10410 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res
== OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10412, __extension__
__PRETTY_FUNCTION__))
10411 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res
== OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10412, __extension__
__PRETTY_FUNCTION__))
10412 "Declare target attribute must link or to with unified memory.")(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res
== OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10412, __extension__
__PRETTY_FUNCTION__))
;
10413 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10414 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10415 else
10416 Flags = llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10417
10418 if (CGM.getLangOpts().OpenMPIsDevice) {
10419 VarName = Addr->getName();
10420 Addr = nullptr;
10421 } else {
10422 VarName = getAddrOfDeclareTargetVar(VD).getName();
10423 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10424 }
10425 VarSize = CGM.getPointerSize().getQuantity();
10426 Linkage = llvm::GlobalValue::WeakAnyLinkage;
10427 }
10428
10429 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
10430 VarName, Addr, VarSize, Flags, Linkage);
10431}
10432
10433bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10434 if (isa<FunctionDecl>(GD.getDecl()) ||
10435 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10436 return emitTargetFunctions(GD);
10437
10438 return emitTargetGlobalVariable(GD);
10439}
10440
10441void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10442 for (const VarDecl *VD : DeferredGlobalVariables) {
10443 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10444 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10445 if (!Res)
10446 continue;
10447 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10448 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10449 !HasRequiresUnifiedSharedMemory) {
10450 CGM.EmitGlobal(VD);
10451 } else {
10452 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res ==
OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10456, __extension__
__PRETTY_FUNCTION__))
10453 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res ==
OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10456, __extension__
__PRETTY_FUNCTION__))
10454 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res ==
OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10456, __extension__
__PRETTY_FUNCTION__))
10455 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res ==
OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10456, __extension__
__PRETTY_FUNCTION__))
10456 "Expected link clause or to clause with unified memory.")(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res ==
OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory
)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || ((*Res == OMPDeclareTargetDeclAttr::MT_To || *Res == OMPDeclareTargetDeclAttr::MT_Enter) && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10456, __extension__
__PRETTY_FUNCTION__))
;
10457 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10458 }
10459 }
10460}
10461
10462void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10463 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10464 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10465, __extension__
__PRETTY_FUNCTION__))
10465 " Expected target-based directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10465, __extension__
__PRETTY_FUNCTION__))
;
10466}
10467
10468void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10469 for (const OMPClause *Clause : D->clauselists()) {
10470 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10471 HasRequiresUnifiedSharedMemory = true;
10472 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10473 } else if (const auto *AC =
10474 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10475 switch (AC->getAtomicDefaultMemOrderKind()) {
10476 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10477 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10478 break;
10479 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10480 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10481 break;
10482 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10483 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10484 break;
10485 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10486 break;
10487 }
10488 }
10489 }
10490}
10491
10492llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10493 return RequiresAtomicOrdering;
10494}
10495
10496bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10497 LangAS &AS) {
10498 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10499 return false;
10500 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10501 switch(A->getAllocatorType()) {
10502 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10503 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10504 // Not supported, fallback to the default mem space.
10505 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10506 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10507 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10508 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10509 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10510 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10511 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10512 AS = LangAS::Default;
10513 return true;
10514 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10515 llvm_unreachable("Expected predefined allocator for the variables with the "::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10516
)
10516 "static storage.")::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10516
)
;
10517 }
10518 return false;
10519}
10520
10521bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10522 return HasRequiresUnifiedSharedMemory;
10523}
10524
10525CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10526 CodeGenModule &CGM)
10527 : CGM(CGM) {
10528 if (CGM.getLangOpts().OpenMPIsDevice) {
10529 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10530 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10531 }
10532}
10533
10534CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10535 if (CGM.getLangOpts().OpenMPIsDevice)
10536 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10537}
10538
10539bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10540 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10541 return true;
10542
10543 const auto *D = cast<FunctionDecl>(GD.getDecl());
10544 // Do not to emit function if it is marked as declare target as it was already
10545 // emitted.
10546 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10547 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10548 if (auto *F = dyn_cast_or_null<llvm::Function>(
10549 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10550 return !F->isDeclaration();
10551 return false;
10552 }
10553 return true;
10554 }
10555
10556 return !AlreadyEmittedTargetDecls.insert(D).second;
10557}
10558
10559llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10560 // If we don't have entries or if we are emitting code for the device, we
10561 // don't need to do anything.
10562 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10563 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10564 (OMPBuilder.OffloadInfoManager.empty() &&
10565 !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion))
10566 return nullptr;
10567
10568 // Create and register the function that handles the requires directives.
10569 ASTContext &C = CGM.getContext();
10570
10571 llvm::Function *RequiresRegFn;
10572 {
10573 CodeGenFunction CGF(CGM);
10574 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10575 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10576 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10577 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10578 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10579 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10580 // TODO: check for other requires clauses.
10581 // The requires directive takes effect only when a target region is
10582 // present in the compilation unit. Otherwise it is ignored and not
10583 // passed to the runtime. This avoids the runtime from throwing an error
10584 // for mismatching requires clauses across compilation units that don't
10585 // contain at least 1 target region.
10586 assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OMPBuilder.OffloadInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10588, __extension__
__PRETTY_FUNCTION__))
10587 !OMPBuilder.OffloadInfoManager.empty()) &&(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OMPBuilder.OffloadInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10588, __extension__
__PRETTY_FUNCTION__))
10588 "Target or declare target region expected.")(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OMPBuilder.OffloadInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10588, __extension__
__PRETTY_FUNCTION__))
;
10589 if (HasRequiresUnifiedSharedMemory)
10590 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
10591 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10592 CGM.getModule(), OMPRTL___tgt_register_requires),
10593 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
10594 CGF.FinishFunction();
10595 }
10596 return RequiresRegFn;
10597}
10598
10599void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10600 const OMPExecutableDirective &D,
10601 SourceLocation Loc,
10602 llvm::Function *OutlinedFn,
10603 ArrayRef<llvm::Value *> CapturedVars) {
10604 if (!CGF.HaveInsertPoint())
10605 return;
10606
10607 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10608 CodeGenFunction::RunCleanupsScope Scope(CGF);
10609
10610 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10611 llvm::Value *Args[] = {
10612 RTLoc,
10613 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
10614 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
10615 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10616 RealArgs.append(std::begin(Args), std::end(Args));
10617 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
10618
10619 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10620 CGM.getModule(), OMPRTL___kmpc_fork_teams);
10621 CGF.EmitRuntimeCall(RTLFn, RealArgs);
10622}
10623
10624void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10625 const Expr *NumTeams,
10626 const Expr *ThreadLimit,
10627 SourceLocation Loc) {
10628 if (!CGF.HaveInsertPoint())
10629 return;
10630
10631 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10632
10633 llvm::Value *NumTeamsVal =
10634 NumTeams
10635 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
10636 CGF.CGM.Int32Ty, /* isSigned = */ true)
10637 : CGF.Builder.getInt32(0);
10638
10639 llvm::Value *ThreadLimitVal =
10640 ThreadLimit
10641 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
10642 CGF.CGM.Int32Ty, /* isSigned = */ true)
10643 : CGF.Builder.getInt32(0);
10644
10645 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10646 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10647 ThreadLimitVal};
10648 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
10649 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
10650 PushNumTeamsArgs);
10651}
10652
10653void CGOpenMPRuntime::emitTargetDataCalls(
10654 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10655 const Expr *Device, const RegionCodeGenTy &CodeGen,
10656 CGOpenMPRuntime::TargetDataInfo &Info) {
10657 if (!CGF.HaveInsertPoint())
10658 return;
10659
10660 // Action used to replace the default codegen action and turn privatization
10661 // off.
10662 PrePostActionTy NoPrivAction;
10663
10664 // Generate the code for the opening of the data environment. Capture all the
10665 // arguments of the runtime call by reference because they are used in the
10666 // closing of the region.
10667 auto &&BeginThenGen = [this, &D, Device, &Info,
10668 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
10669 // Fill up the arrays with all the mapped variables.
10670 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10671
10672 // Get map clause information.
10673 MappableExprsHandler MEHandler(D, CGF);
10674 MEHandler.generateAllInfo(CombinedInfo);
10675
10676 // Fill up the arrays and create the arguments.
10677 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10678 /*IsNonContiguous=*/true);
10679
10680 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10681 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10682 llvm::codegenoptions::NoDebugInfo;
10683 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10684 EmitDebug);
10685
10686 // Emit device ID if any.
10687 llvm::Value *DeviceID = nullptr;
10688 if (Device) {
10689 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10690 CGF.Int64Ty, /*isSigned=*/true);
10691 } else {
10692 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10693 }
10694
10695 // Emit the number of elements in the offloading arrays.
10696 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10697 //
10698 // Source location for the ident struct
10699 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10700
10701 llvm::Value *OffloadingArgs[] = {RTLoc,
10702 DeviceID,
10703 PointerNum,
10704 RTArgs.BasePointersArray,
10705 RTArgs.PointersArray,
10706 RTArgs.SizesArray,
10707 RTArgs.MapTypesArray,
10708 RTArgs.MapNamesArray,
10709 RTArgs.MappersArray};
10710 CGF.EmitRuntimeCall(
10711 OMPBuilder.getOrCreateRuntimeFunction(
10712 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
10713 OffloadingArgs);
10714
10715 // If device pointer privatization is required, emit the body of the region
10716 // here. It will have to be duplicated: with and without privatization.
10717 if (!Info.CaptureDeviceAddrMap.empty())
10718 CodeGen(CGF);
10719 };
10720
10721 // Generate code for the closing of the data region.
10722 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
10723 PrePostActionTy &) {
10724 assert(Info.isValid() && "Invalid data environment closing arguments.")(static_cast <bool> (Info.isValid() && "Invalid data environment closing arguments."
) ? void (0) : __assert_fail ("Info.isValid() && \"Invalid data environment closing arguments.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10724, __extension__
__PRETTY_FUNCTION__))
;
10725
10726 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs;
10727 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10728 llvm::codegenoptions::NoDebugInfo;
10729 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, RTArgs, Info,
10730 EmitDebug,
10731 /*ForEndCall=*/true);
10732
10733 // Emit device ID if any.
10734 llvm::Value *DeviceID = nullptr;
10735 if (Device) {
10736 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10737 CGF.Int64Ty, /*isSigned=*/true);
10738 } else {
10739 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10740 }
10741
10742 // Emit the number of elements in the offloading arrays.
10743 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
10744
10745 // Source location for the ident struct
10746 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10747
10748 llvm::Value *OffloadingArgs[] = {RTLoc,
10749 DeviceID,
10750 PointerNum,
10751 RTArgs.BasePointersArray,
10752 RTArgs.PointersArray,
10753 RTArgs.SizesArray,
10754 RTArgs.MapTypesArray,
10755 RTArgs.MapNamesArray,
10756 RTArgs.MappersArray};
10757 CGF.EmitRuntimeCall(
10758 OMPBuilder.getOrCreateRuntimeFunction(
10759 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
10760 OffloadingArgs);
10761 };
10762
10763 // If we need device pointer privatization, we need to emit the body of the
10764 // region with no privatization in the 'else' branch of the conditional.
10765 // Otherwise, we don't have to do anything.
10766 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
10767 PrePostActionTy &) {
10768 if (!Info.CaptureDeviceAddrMap.empty()) {
10769 CodeGen.setAction(NoPrivAction);
10770 CodeGen(CGF);
10771 }
10772 };
10773
10774 // We don't have to do anything to close the region if the if clause evaluates
10775 // to false.
10776 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
10777
10778 if (IfCond) {
10779 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
10780 } else {
10781 RegionCodeGenTy RCG(BeginThenGen);
10782 RCG(CGF);
10783 }
10784
10785 // If we don't require privatization of device pointers, we emit the body in
10786 // between the runtime calls. This avoids duplicating the body code.
10787 if (Info.CaptureDeviceAddrMap.empty()) {
10788 CodeGen.setAction(NoPrivAction);
10789 CodeGen(CGF);
10790 }
10791
10792 if (IfCond) {
10793 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
10794 } else {
10795 RegionCodeGenTy RCG(EndThenGen);
10796 RCG(CGF);
10797 }
10798}
10799
10800void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10801 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10802 const Expr *Device) {
10803 if (!CGF.HaveInsertPoint())
10804 return;
10805
10806 assert((isa<OMPTargetEnterDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10809, __extension__
__PRETTY_FUNCTION__))
10807 isa<OMPTargetExitDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10809, __extension__
__PRETTY_FUNCTION__))
10808 isa<OMPTargetUpdateDirective>(D)) &&(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10809, __extension__
__PRETTY_FUNCTION__))
10809 "Expecting either target enter, exit data, or update directives.")(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10809, __extension__
__PRETTY_FUNCTION__))
;
10810
10811 CodeGenFunction::OMPTargetDataInfo InputInfo;
10812 llvm::Value *MapTypesArray = nullptr;
10813 llvm::Value *MapNamesArray = nullptr;
10814 // Generate the code for the opening of the data environment.
10815 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10816 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10817 // Emit device ID if any.
10818 llvm::Value *DeviceID = nullptr;
10819 if (Device) {
10820 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
10821 CGF.Int64Ty, /*isSigned=*/true);
10822 } else {
10823 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10824 }
10825
10826 // Emit the number of elements in the offloading arrays.
10827 llvm::Constant *PointerNum =
10828 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10829
10830 // Source location for the ident struct
10831 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10832
10833 llvm::Value *OffloadingArgs[] = {RTLoc,
10834 DeviceID,
10835 PointerNum,
10836 InputInfo.BasePointersArray.getPointer(),
10837 InputInfo.PointersArray.getPointer(),
10838 InputInfo.SizesArray.getPointer(),
10839 MapTypesArray,
10840 MapNamesArray,
10841 InputInfo.MappersArray.getPointer()};
10842
10843 // Select the right runtime function call for each standalone
10844 // directive.
10845 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10846 RuntimeFunction RTLFn;
10847 switch (D.getDirectiveKind()) {
10848 case OMPD_target_enter_data:
10849 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10850 : OMPRTL___tgt_target_data_begin_mapper;
10851 break;
10852 case OMPD_target_exit_data:
10853 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10854 : OMPRTL___tgt_target_data_end_mapper;
10855 break;
10856 case OMPD_target_update:
10857 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10858 : OMPRTL___tgt_target_data_update_mapper;
10859 break;
10860 case OMPD_parallel:
10861 case OMPD_for:
10862 case OMPD_parallel_for:
10863 case OMPD_parallel_master:
10864 case OMPD_parallel_sections:
10865 case OMPD_for_simd:
10866 case OMPD_parallel_for_simd:
10867 case OMPD_cancel:
10868 case OMPD_cancellation_point:
10869 case OMPD_ordered:
10870 case OMPD_threadprivate:
10871 case OMPD_allocate:
10872 case OMPD_task:
10873 case OMPD_simd:
10874 case OMPD_tile:
10875 case OMPD_unroll:
10876 case OMPD_sections:
10877 case OMPD_section:
10878 case OMPD_single:
10879 case OMPD_master:
10880 case OMPD_critical:
10881 case OMPD_taskyield:
10882 case OMPD_barrier:
10883 case OMPD_taskwait:
10884 case OMPD_taskgroup:
10885 case OMPD_atomic:
10886 case OMPD_flush:
10887 case OMPD_depobj:
10888 case OMPD_scan:
10889 case OMPD_teams:
10890 case OMPD_target_data:
10891 case OMPD_distribute:
10892 case OMPD_distribute_simd:
10893 case OMPD_distribute_parallel_for:
10894 case OMPD_distribute_parallel_for_simd:
10895 case OMPD_teams_distribute:
10896 case OMPD_teams_distribute_simd:
10897 case OMPD_teams_distribute_parallel_for:
10898 case OMPD_teams_distribute_parallel_for_simd:
10899 case OMPD_declare_simd:
10900 case OMPD_declare_variant:
10901 case OMPD_begin_declare_variant:
10902 case OMPD_end_declare_variant:
10903 case OMPD_declare_target:
10904 case OMPD_end_declare_target:
10905 case OMPD_declare_reduction:
10906 case OMPD_declare_mapper:
10907 case OMPD_taskloop:
10908 case OMPD_taskloop_simd:
10909 case OMPD_master_taskloop:
10910 case OMPD_master_taskloop_simd:
10911 case OMPD_parallel_master_taskloop:
10912 case OMPD_parallel_master_taskloop_simd:
10913 case OMPD_target:
10914 case OMPD_target_simd:
10915 case OMPD_target_teams_distribute:
10916 case OMPD_target_teams_distribute_simd:
10917 case OMPD_target_teams_distribute_parallel_for:
10918 case OMPD_target_teams_distribute_parallel_for_simd:
10919 case OMPD_target_teams:
10920 case OMPD_target_parallel:
10921 case OMPD_target_parallel_for:
10922 case OMPD_target_parallel_for_simd:
10923 case OMPD_requires:
10924 case OMPD_metadirective:
10925 case OMPD_unknown:
10926 default:
10927 llvm_unreachable("Unexpected standalone target data directive.")::llvm::llvm_unreachable_internal("Unexpected standalone target data directive."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10927)
;
10928 break;
10929 }
10930 CGF.EmitRuntimeCall(
10931 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
10932 OffloadingArgs);
10933 };
10934
10935 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10936 &MapNamesArray](CodeGenFunction &CGF,
10937 PrePostActionTy &) {
10938 // Fill up the arrays with all the mapped variables.
10939 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10940
10941 // Get map clause information.
10942 MappableExprsHandler MEHandler(D, CGF);
10943 MEHandler.generateAllInfo(CombinedInfo);
10944
10945 CGOpenMPRuntime::TargetDataInfo Info;
10946 // Fill up the arrays and create the arguments.
10947 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10948 /*IsNonContiguous=*/true);
10949 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10950 D.hasClausesOfKind<OMPNowaitClause>();
10951 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10952 llvm::codegenoptions::NoDebugInfo;
10953 OMPBuilder.emitOffloadingArraysArgument(CGF.Builder, Info.RTArgs, Info,
10954 EmitDebug,
10955 /*ForEndCall=*/false);
10956 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10957 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10958 CGF.VoidPtrTy, CGM.getPointerAlign());
10959 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10960 CGM.getPointerAlign());
10961 InputInfo.SizesArray =
10962 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10963 InputInfo.MappersArray =
10964 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10965 MapTypesArray = Info.RTArgs.MapTypesArray;
10966 MapNamesArray = Info.RTArgs.MapNamesArray;
10967 if (RequiresOuterTask)
10968 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10969 else
10970 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10971 };
10972
10973 if (IfCond) {
10974 emitIfClause(CGF, IfCond, TargetThenGen,
10975 [](CodeGenFunction &CGF, PrePostActionTy &) {});
10976 } else {
10977 RegionCodeGenTy ThenRCG(TargetThenGen);
10978 ThenRCG(CGF);
10979 }
10980}
10981
10982namespace {
10983 /// Kind of parameter in a function with 'declare simd' directive.
10984enum ParamKindTy {
10985 Linear,
10986 LinearRef,
10987 LinearUVal,
10988 LinearVal,
10989 Uniform,
10990 Vector,
10991};
10992/// Attribute set of the parameter.
10993struct ParamAttrTy {
10994 ParamKindTy Kind = Vector;
10995 llvm::APSInt StrideOrArg;
10996 llvm::APSInt Alignment;
10997 bool HasVarStride = false;
10998};
10999} // namespace
11000
11001static unsigned evaluateCDTSize(const FunctionDecl *FD,
11002 ArrayRef<ParamAttrTy> ParamAttrs) {
11003 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11004 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11005 // of that clause. The VLEN value must be power of 2.
11006 // In other case the notion of the function`s "characteristic data type" (CDT)
11007 // is used to compute the vector length.
11008 // CDT is defined in the following order:
11009 // a) For non-void function, the CDT is the return type.
11010 // b) If the function has any non-uniform, non-linear parameters, then the
11011 // CDT is the type of the first such parameter.
11012 // c) If the CDT determined by a) or b) above is struct, union, or class
11013 // type which is pass-by-value (except for the type that maps to the
11014 // built-in complex data type), the characteristic data type is int.
11015 // d) If none of the above three cases is applicable, the CDT is int.
11016 // The VLEN is then determined based on the CDT and the size of vector
11017 // register of that ISA for which current vector version is generated. The
11018 // VLEN is computed using the formula below:
11019 // VLEN = sizeof(vector_register) / sizeof(CDT),
11020 // where vector register size specified in section 3.2.1 Registers and the
11021 // Stack Frame of original AMD64 ABI document.
11022 QualType RetType = FD->getReturnType();
11023 if (RetType.isNull())
11024 return 0;
11025 ASTContext &C = FD->getASTContext();
11026 QualType CDT;
11027 if (!RetType.isNull() && !RetType->isVoidType()) {
11028 CDT = RetType;
11029 } else {
11030 unsigned Offset = 0;
11031 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11032 if (ParamAttrs[Offset].Kind == Vector)
11033 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11034 ++Offset;
11035 }
11036 if (CDT.isNull()) {
11037 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11038 if (ParamAttrs[I + Offset].Kind == Vector) {
11039 CDT = FD->getParamDecl(I)->getType();
11040 break;
11041 }
11042 }
11043 }
11044 }
11045 if (CDT.isNull())
11046 CDT = C.IntTy;
11047 CDT = CDT->getCanonicalTypeUnqualified();
11048 if (CDT->isRecordType() || CDT->isUnionType())
11049 CDT = C.IntTy;
11050 return C.getTypeSize(CDT);
11051}
11052
11053/// Mangle the parameter part of the vector function name according to
11054/// their OpenMP classification. The mangling function is defined in
11055/// section 4.5 of the AAVFABI(2021Q1).
11056static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11057 SmallString<256> Buffer;
11058 llvm::raw_svector_ostream Out(Buffer);
11059 for (const auto &ParamAttr : ParamAttrs) {
11060 switch (ParamAttr.Kind) {
11061 case Linear:
11062 Out << 'l';
11063 break;
11064 case LinearRef:
11065 Out << 'R';
11066 break;
11067 case LinearUVal:
11068 Out << 'U';
11069 break;
11070 case LinearVal:
11071 Out << 'L';
11072 break;
11073 case Uniform:
11074 Out << 'u';
11075 break;
11076 case Vector:
11077 Out << 'v';
11078 break;
11079 }
11080 if (ParamAttr.HasVarStride)
11081 Out << "s" << ParamAttr.StrideOrArg;
11082 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11083 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11084 // Don't print the step value if it is not present or if it is
11085 // equal to 1.
11086 if (ParamAttr.StrideOrArg < 0)
11087 Out << 'n' << -ParamAttr.StrideOrArg;
11088 else if (ParamAttr.StrideOrArg != 1)
11089 Out << ParamAttr.StrideOrArg;
11090 }
11091
11092 if (!!ParamAttr.Alignment)
11093 Out << 'a' << ParamAttr.Alignment;
11094 }
11095
11096 return std::string(Out.str());
11097}
11098
11099static void
11100emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11101 const llvm::APSInt &VLENVal,
11102 ArrayRef<ParamAttrTy> ParamAttrs,
11103 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11104 struct ISADataTy {
11105 char ISA;
11106 unsigned VecRegSize;
11107 };
11108 ISADataTy ISAData[] = {
11109 {
11110 'b', 128
11111 }, // SSE
11112 {
11113 'c', 256
11114 }, // AVX
11115 {
11116 'd', 256
11117 }, // AVX2
11118 {
11119 'e', 512
11120 }, // AVX512
11121 };
11122 llvm::SmallVector<char, 2> Masked;
11123 switch (State) {
11124 case OMPDeclareSimdDeclAttr::BS_Undefined:
11125 Masked.push_back('N');
11126 Masked.push_back('M');
11127 break;
11128 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11129 Masked.push_back('N');
11130 break;
11131 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11132 Masked.push_back('M');
11133 break;
11134 }
11135 for (char Mask : Masked) {
11136 for (const ISADataTy &Data : ISAData) {
11137 SmallString<256> Buffer;
11138 llvm::raw_svector_ostream Out(Buffer);
11139 Out << "_ZGV" << Data.ISA << Mask;
11140 if (!VLENVal) {
11141 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11142 assert(NumElts && "Non-zero simdlen/cdtsize expected")(static_cast <bool> (NumElts && "Non-zero simdlen/cdtsize expected"
) ? void (0) : __assert_fail ("NumElts && \"Non-zero simdlen/cdtsize expected\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11142, __extension__
__PRETTY_FUNCTION__))
;
11143 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11144 } else {
11145 Out << VLENVal;
11146 }
11147 Out << mangleVectorParameters(ParamAttrs);
11148 Out << '_' << Fn->getName();
11149 Fn->addFnAttr(Out.str());
11150 }
11151 }
11152}
11153
11154// This are the Functions that are needed to mangle the name of the
11155// vector functions generated by the compiler, according to the rules
11156// defined in the "Vector Function ABI specifications for AArch64",
11157// available at
11158// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11159
11160/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11161static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11162 QT = QT.getCanonicalType();
11163
11164 if (QT->isVoidType())
11165 return false;
11166
11167 if (Kind == ParamKindTy::Uniform)
11168 return false;
11169
11170 if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11171 return false;
11172
11173 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11174 !QT->isReferenceType())
11175 return false;
11176
11177 return true;
11178}
11179
11180/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11181static bool getAArch64PBV(QualType QT, ASTContext &C) {
11182 QT = QT.getCanonicalType();
11183 unsigned Size = C.getTypeSize(QT);
11184
11185 // Only scalars and complex within 16 bytes wide set PVB to true.
11186 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11187 return false;
11188
11189 if (QT->isFloatingType())
11190 return true;
11191
11192 if (QT->isIntegerType())
11193 return true;
11194
11195 if (QT->isPointerType())
11196 return true;
11197
11198 // TODO: Add support for complex types (section 3.1.2, item 2).
11199
11200 return false;
11201}
11202
11203/// Computes the lane size (LS) of a return type or of an input parameter,
11204/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11205/// TODO: Add support for references, section 3.2.1, item 1.
11206static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11207 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11208 QualType PTy = QT.getCanonicalType()->getPointeeType();
11209 if (getAArch64PBV(PTy, C))
11210 return C.getTypeSize(PTy);
11211 }
11212 if (getAArch64PBV(QT, C))
11213 return C.getTypeSize(QT);
11214
11215 return C.getTypeSize(C.getUIntPtrType());
11216}
11217
11218// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11219// signature of the scalar function, as defined in 3.2.2 of the
11220// AAVFABI.
11221static std::tuple<unsigned, unsigned, bool>
11222getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11223 QualType RetType = FD->getReturnType().getCanonicalType();
11224
11225 ASTContext &C = FD->getASTContext();
11226
11227 bool OutputBecomesInput = false;
11228
11229 llvm::SmallVector<unsigned, 8> Sizes;
11230 if (!RetType->isVoidType()) {
11231 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11232 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11233 OutputBecomesInput = true;
11234 }
11235 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11236 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11237 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11238 }
11239
11240 assert(!Sizes.empty() && "Unable to determine NDS and WDS.")(static_cast <bool> (!Sizes.empty() && "Unable to determine NDS and WDS."
) ? void (0) : __assert_fail ("!Sizes.empty() && \"Unable to determine NDS and WDS.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11240, __extension__
__PRETTY_FUNCTION__))
;
11241 // The LS of a function parameter / return value can only be a power
11242 // of 2, starting from 8 bits, up to 128.
11243 assert(llvm::all_of(Sizes,(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11248, __extension__
__PRETTY_FUNCTION__))
11244 [](unsigned Size) {(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11248, __extension__
__PRETTY_FUNCTION__))
11245 return Size == 8 || Size == 16 || Size == 32 ||(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11248, __extension__
__PRETTY_FUNCTION__))
11246 Size == 64 || Size == 128;(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11248, __extension__
__PRETTY_FUNCTION__))
11247 }) &&(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11248, __extension__
__PRETTY_FUNCTION__))
11248 "Invalid size")(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11248, __extension__
__PRETTY_FUNCTION__))
;
11249
11250 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11251 *std::max_element(std::begin(Sizes), std::end(Sizes)),
11252 OutputBecomesInput);
11253}
11254
11255// Function used to add the attribute. The parameter `VLEN` is
11256// templated to allow the use of "x" when targeting scalable functions
11257// for SVE.
11258template <typename T>
11259static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11260 char ISA, StringRef ParSeq,
11261 StringRef MangledName, bool OutputBecomesInput,
11262 llvm::Function *Fn) {
11263 SmallString<256> Buffer;
11264 llvm::raw_svector_ostream Out(Buffer);
11265 Out << Prefix << ISA << LMask << VLEN;
11266 if (OutputBecomesInput)
11267 Out << "v";
11268 Out << ParSeq << "_" << MangledName;
11269 Fn->addFnAttr(Out.str());
11270}
11271
11272// Helper function to generate the Advanced SIMD names depending on
11273// the value of the NDS when simdlen is not present.
11274static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11275 StringRef Prefix, char ISA,
11276 StringRef ParSeq, StringRef MangledName,
11277 bool OutputBecomesInput,
11278 llvm::Function *Fn) {
11279 switch (NDS) {
11280 case 8:
11281 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11282 OutputBecomesInput, Fn);
11283 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11284 OutputBecomesInput, Fn);
11285 break;
11286 case 16:
11287 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11288 OutputBecomesInput, Fn);
11289 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11290 OutputBecomesInput, Fn);
11291 break;
11292 case 32:
11293 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11294 OutputBecomesInput, Fn);
11295 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11296 OutputBecomesInput, Fn);
11297 break;
11298 case 64:
11299 case 128:
11300 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11301 OutputBecomesInput, Fn);
11302 break;
11303 default:
11304 llvm_unreachable("Scalar type is too wide.")::llvm::llvm_unreachable_internal("Scalar type is too wide.",
"clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11304)
;
11305 }
11306}
11307
11308/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11309static void emitAArch64DeclareSimdFunction(
11310 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11311 ArrayRef<ParamAttrTy> ParamAttrs,
11312 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11313 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11314
11315 // Get basic data for building the vector signature.
11316 const auto Data = getNDSWDS(FD, ParamAttrs);
11317 const unsigned NDS = std::get<0>(Data);
11318 const unsigned WDS = std::get<1>(Data);
11319 const bool OutputBecomesInput = std::get<2>(Data);
11320
11321 // Check the values provided via `simdlen` by the user.
11322 // 1. A `simdlen(1)` doesn't produce vector signatures,
11323 if (UserVLEN == 1) {
11324 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11325 DiagnosticsEngine::Warning,
11326 "The clause simdlen(1) has no effect when targeting aarch64.");
11327 CGM.getDiags().Report(SLoc, DiagID);
11328 return;
11329 }
11330
11331 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11332 // Advanced SIMD output.
11333 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11334 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11335 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11336 "power of 2 when targeting Advanced SIMD.");
11337 CGM.getDiags().Report(SLoc, DiagID);
11338 return;
11339 }
11340
11341 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11342 // limits.
11343 if (ISA == 's' && UserVLEN != 0) {
11344 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11345 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11346 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11347 "lanes in the architectural constraints "
11348 "for SVE (min is 128-bit, max is "
11349 "2048-bit, by steps of 128-bit)");
11350 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11351 return;
11352 }
11353 }
11354
11355 // Sort out parameter sequence.
11356 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11357 StringRef Prefix = "_ZGV";
11358 // Generate simdlen from user input (if any).
11359 if (UserVLEN) {
11360 if (ISA == 's') {
11361 // SVE generates only a masked function.
11362 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11363 OutputBecomesInput, Fn);
11364 } else {
11365 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11365, __extension__
__PRETTY_FUNCTION__))
;
11366 // Advanced SIMD generates one or two functions, depending on
11367 // the `[not]inbranch` clause.
11368 switch (State) {
11369 case OMPDeclareSimdDeclAttr::BS_Undefined:
11370 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11371 OutputBecomesInput, Fn);
11372 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11373 OutputBecomesInput, Fn);
11374 break;
11375 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11376 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11377 OutputBecomesInput, Fn);
11378 break;
11379 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11380 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11381 OutputBecomesInput, Fn);
11382 break;
11383 }
11384 }
11385 } else {
11386 // If no user simdlen is provided, follow the AAVFABI rules for
11387 // generating the vector length.
11388 if (ISA == 's') {
11389 // SVE, section 3.4.1, item 1.
11390 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11391 OutputBecomesInput, Fn);
11392 } else {
11393 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11393, __extension__
__PRETTY_FUNCTION__))
;
11394 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11395 // two vector names depending on the use of the clause
11396 // `[not]inbranch`.
11397 switch (State) {
11398 case OMPDeclareSimdDeclAttr::BS_Undefined:
11399 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11400 OutputBecomesInput, Fn);
11401 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11402 OutputBecomesInput, Fn);
11403 break;
11404 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11405 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11406 OutputBecomesInput, Fn);
11407 break;
11408 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11409 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11410 OutputBecomesInput, Fn);
11411 break;
11412 }
11413 }
11414 }
11415}
11416
11417void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11418 llvm::Function *Fn) {
11419 ASTContext &C = CGM.getContext();
11420 FD = FD->getMostRecentDecl();
11421 while (FD) {
11422 // Map params to their positions in function decl.
11423 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11424 if (isa<CXXMethodDecl>(FD))
11425 ParamPositions.try_emplace(FD, 0);
11426 unsigned ParamPos = ParamPositions.size();
11427 for (const ParmVarDecl *P : FD->parameters()) {
11428 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11429 ++ParamPos;
11430 }
11431 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11432 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11433 // Mark uniform parameters.
11434 for (const Expr *E : Attr->uniforms()) {
11435 E = E->IgnoreParenImpCasts();
11436 unsigned Pos;
11437 if (isa<CXXThisExpr>(E)) {
11438 Pos = ParamPositions[FD];
11439 } else {
11440 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11441 ->getCanonicalDecl();
11442 auto It = ParamPositions.find(PVD);
11443 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11443, __extension__
__PRETTY_FUNCTION__))
;
11444 Pos = It->second;
11445 }
11446 ParamAttrs[Pos].Kind = Uniform;
11447 }
11448 // Get alignment info.
11449 auto *NI = Attr->alignments_begin();
11450 for (const Expr *E : Attr->aligneds()) {
11451 E = E->IgnoreParenImpCasts();
11452 unsigned Pos;
11453 QualType ParmTy;
11454 if (isa<CXXThisExpr>(E)) {
11455 Pos = ParamPositions[FD];
11456 ParmTy = E->getType();
11457 } else {
11458 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11459 ->getCanonicalDecl();
11460 auto It = ParamPositions.find(PVD);
11461 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11461, __extension__
__PRETTY_FUNCTION__))
;
11462 Pos = It->second;
11463 ParmTy = PVD->getType();
11464 }
11465 ParamAttrs[Pos].Alignment =
11466 (*NI)
11467 ? (*NI)->EvaluateKnownConstInt(C)
11468 : llvm::APSInt::getUnsigned(
11469 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11470 .getQuantity());
11471 ++NI;
11472 }
11473 // Mark linear parameters.
11474 auto *SI = Attr->steps_begin();
11475 auto *MI = Attr->modifiers_begin();
11476 for (const Expr *E : Attr->linears()) {
11477 E = E->IgnoreParenImpCasts();
11478 unsigned Pos;
11479 bool IsReferenceType = false;
11480 // Rescaling factor needed to compute the linear parameter
11481 // value in the mangled name.
11482 unsigned PtrRescalingFactor = 1;
11483 if (isa<CXXThisExpr>(E)) {
11484 Pos = ParamPositions[FD];
11485 auto *P = cast<PointerType>(E->getType());
11486 PtrRescalingFactor = CGM.getContext()
11487 .getTypeSizeInChars(P->getPointeeType())
11488 .getQuantity();
11489 } else {
11490 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11491 ->getCanonicalDecl();
11492 auto It = ParamPositions.find(PVD);
11493 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11493, __extension__
__PRETTY_FUNCTION__))
;
11494 Pos = It->second;
11495 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11496 PtrRescalingFactor = CGM.getContext()
11497 .getTypeSizeInChars(P->getPointeeType())
11498 .getQuantity();
11499 else if (PVD->getType()->isReferenceType()) {
11500 IsReferenceType = true;
11501 PtrRescalingFactor =
11502 CGM.getContext()
11503 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11504 .getQuantity();
11505 }
11506 }
11507 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11508 if (*MI == OMPC_LINEAR_ref)
11509 ParamAttr.Kind = LinearRef;
11510 else if (*MI == OMPC_LINEAR_uval)
11511 ParamAttr.Kind = LinearUVal;
11512 else if (IsReferenceType)
11513 ParamAttr.Kind = LinearVal;
11514 else
11515 ParamAttr.Kind = Linear;
11516 // Assuming a stride of 1, for `linear` without modifiers.
11517 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11518 if (*SI) {
11519 Expr::EvalResult Result;
11520 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11521 if (const auto *DRE =
11522 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11523 if (const auto *StridePVD =
11524 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11525 ParamAttr.HasVarStride = true;
11526 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11527 assert(It != ParamPositions.end() &&(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11528, __extension__
__PRETTY_FUNCTION__))
11528 "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11528, __extension__
__PRETTY_FUNCTION__))
;
11529 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11530 }
11531 }
11532 } else {
11533 ParamAttr.StrideOrArg = Result.Val.getInt();
11534 }
11535 }
11536 // If we are using a linear clause on a pointer, we need to
11537 // rescale the value of linear_step with the byte size of the
11538 // pointee type.
11539 if (!ParamAttr.HasVarStride &&
11540 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11541 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11542 ++SI;
11543 ++MI;
11544 }
11545 llvm::APSInt VLENVal;
11546 SourceLocation ExprLoc;
11547 const Expr *VLENExpr = Attr->getSimdlen();
11548 if (VLENExpr) {
11549 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11550 ExprLoc = VLENExpr->getExprLoc();
11551 }
11552 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11553 if (CGM.getTriple().isX86()) {
11554 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11555 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11556 unsigned VLEN = VLENVal.getExtValue();
11557 StringRef MangledName = Fn->getName();
11558 if (CGM.getTarget().hasFeature("sve"))
11559 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11560 MangledName, 's', 128, Fn, ExprLoc);
11561 else if (CGM.getTarget().hasFeature("neon"))
11562 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11563 MangledName, 'n', 128, Fn, ExprLoc);
11564 }
11565 }
11566 FD = FD->getPreviousDecl();
11567 }
11568}
11569
11570namespace {
11571/// Cleanup action for doacross support.
11572class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11573public:
11574 static const int DoacrossFinArgs = 2;
11575
11576private:
11577 llvm::FunctionCallee RTLFn;
11578 llvm::Value *Args[DoacrossFinArgs];
11579
11580public:
11581 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11582 ArrayRef<llvm::Value *> CallArgs)
11583 : RTLFn(RTLFn) {
11584 assert(CallArgs.size() == DoacrossFinArgs)(static_cast <bool> (CallArgs.size() == DoacrossFinArgs
) ? void (0) : __assert_fail ("CallArgs.size() == DoacrossFinArgs"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11584, __extension__
__PRETTY_FUNCTION__))
;
11585 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
11586 }
11587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11588 if (!CGF.HaveInsertPoint())
11589 return;
11590 CGF.EmitRuntimeCall(RTLFn, Args);
11591 }
11592};
11593} // namespace
11594
11595void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11596 const OMPLoopDirective &D,
11597 ArrayRef<Expr *> NumIterations) {
11598 if (!CGF.HaveInsertPoint())
11599 return;
11600
11601 ASTContext &C = CGM.getContext();
11602 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11603 RecordDecl *RD;
11604 if (KmpDimTy.isNull()) {
11605 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11606 // kmp_int64 lo; // lower
11607 // kmp_int64 up; // upper
11608 // kmp_int64 st; // stride
11609 // };
11610 RD = C.buildImplicitRecord("kmp_dim");
11611 RD->startDefinition();
11612 addFieldToRecordDecl(C, RD, Int64Ty);
11613 addFieldToRecordDecl(C, RD, Int64Ty);
11614 addFieldToRecordDecl(C, RD, Int64Ty);
11615 RD->completeDefinition();
11616 KmpDimTy = C.getRecordType(RD);
11617 } else {
11618 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11619 }
11620 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11621 QualType ArrayTy =
11622 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
11623
11624 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
11625 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
11626 enum { LowerFD = 0, UpperFD, StrideFD };
11627 // Fill dims with data.
11628 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11629 LValue DimsLVal = CGF.MakeAddrLValue(
11630 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11631 // dims.upper = num_iterations;
11632 LValue UpperLVal = CGF.EmitLValueForField(
11633 DimsLVal, *std::next(RD->field_begin(), UpperFD));
11634 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11635 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
11636 Int64Ty, NumIterations[I]->getExprLoc());
11637 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
11638 // dims.stride = 1;
11639 LValue StrideLVal = CGF.EmitLValueForField(
11640 DimsLVal, *std::next(RD->field_begin(), StrideFD));
11641 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
11642 StrideLVal);
11643 }
11644
11645 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11646 // kmp_int32 num_dims, struct kmp_dim * dims);
11647 llvm::Value *Args[] = {
11648 emitUpdateLocation(CGF, D.getBeginLoc()),
11649 getThreadID(CGF, D.getBeginLoc()),
11650 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
11651 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11652 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
11653 CGM.VoidPtrTy)};
11654
11655 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11656 CGM.getModule(), OMPRTL___kmpc_doacross_init);
11657 CGF.EmitRuntimeCall(RTLFn, Args);
11658 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11659 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
11660 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11661 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
11662 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
11663 llvm::ArrayRef(FiniArgs));
11664}
11665
11666void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11667 const OMPDependClause *C) {
11668 QualType Int64Ty =
11669 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11670 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11671 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11672 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
11673 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
11674 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11675 const Expr *CounterVal = C->getLoopData(I);
11676 assert(CounterVal)(static_cast <bool> (CounterVal) ? void (0) : __assert_fail
("CounterVal", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11676
, __extension__ __PRETTY_FUNCTION__))
;
11677 llvm::Value *CntVal = CGF.EmitScalarConversion(
11678 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
11679 CounterVal->getExprLoc());
11680 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
11681 /*Volatile=*/false, Int64Ty);
11682 }
11683 llvm::Value *Args[] = {
11684 emitUpdateLocation(CGF, C->getBeginLoc()),
11685 getThreadID(CGF, C->getBeginLoc()),
11686 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
11687 llvm::FunctionCallee RTLFn;
11688 if (C->getDependencyKind() == OMPC_DEPEND_source) {
11689 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11690 OMPRTL___kmpc_doacross_post);
11691 } else {
11692 assert(C->getDependencyKind() == OMPC_DEPEND_sink)(static_cast <bool> (C->getDependencyKind() == OMPC_DEPEND_sink
) ? void (0) : __assert_fail ("C->getDependencyKind() == OMPC_DEPEND_sink"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11692, __extension__
__PRETTY_FUNCTION__))
;
11693 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
11694 OMPRTL___kmpc_doacross_wait);
11695 }
11696 CGF.EmitRuntimeCall(RTLFn, Args);
11697}
11698
11699void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11700 llvm::FunctionCallee Callee,
11701 ArrayRef<llvm::Value *> Args) const {
11702 assert(Loc.isValid() && "Outlined function call location must be valid.")(static_cast <bool> (Loc.isValid() && "Outlined function call location must be valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Outlined function call location must be valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11702, __extension__
__PRETTY_FUNCTION__))
;
11703 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
11704
11705 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
11706 if (Fn->doesNotThrow()) {
11707 CGF.EmitNounwindRuntimeCall(Fn, Args);
11708 return;
11709 }
11710 }
11711 CGF.EmitRuntimeCall(Callee, Args);
11712}
11713
11714void CGOpenMPRuntime::emitOutlinedFunctionCall(
11715 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11716 ArrayRef<llvm::Value *> Args) const {
11717 emitCall(CGF, Loc, OutlinedFn, Args);
11718}
11719
11720void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11721 if (const auto *FD = dyn_cast<FunctionDecl>(D))
11722 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11723 HasEmittedDeclareTargetRegion = true;
11724}
11725
11726Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11727 const VarDecl *NativeParam,
11728 const VarDecl *TargetParam) const {
11729 return CGF.GetAddrOfLocalVar(NativeParam);
11730}
11731
11732/// Return allocator value from expression, or return a null allocator (default
11733/// when no allocator specified).
11734static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11735 const Expr *Allocator) {
11736 llvm::Value *AllocVal;
11737 if (Allocator) {
11738 AllocVal = CGF.EmitScalarExpr(Allocator);
11739 // According to the standard, the original allocator type is a enum
11740 // (integer). Convert to pointer type, if required.
11741 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
11742 CGF.getContext().VoidPtrTy,
11743 Allocator->getExprLoc());
11744 } else {
11745 // If no allocator specified, it defaults to the null allocator.
11746 AllocVal = llvm::Constant::getNullValue(
11747 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
11748 }
11749 return AllocVal;
11750}
11751
11752/// Return the alignment from an allocate directive if present.
11753static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11754 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11755
11756 if (!AllocateAlignment)
11757 return nullptr;
11758
11759 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
11760}
11761
11762Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11763 const VarDecl *VD) {
11764 if (!VD)
11765 return Address::invalid();
11766 Address UntiedAddr = Address::invalid();
11767 Address UntiedRealAddr = Address::invalid();
11768 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11769 if (It != FunctionToUntiedTaskStackMap.end()) {
11770 const UntiedLocalVarsAddressesMap &UntiedData =
11771 UntiedLocalVarsStack[It->second];
11772 auto I = UntiedData.find(VD);
11773 if (I != UntiedData.end()) {
11774 UntiedAddr = I->second.first;
11775 UntiedRealAddr = I->second.second;
11776 }
11777 }
11778 const VarDecl *CVD = VD->getCanonicalDecl();
11779 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11780 // Use the default allocation.
11781 if (!isAllocatableDecl(VD))
11782 return UntiedAddr;
11783 llvm::Value *Size;
11784 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11785 if (CVD->getType()->isVariablyModifiedType()) {
11786 Size = CGF.getTypeSize(CVD->getType());
11787 // Align the size: ((size + align - 1) / align) * align
11788 Size = CGF.Builder.CreateNUWAdd(
11789 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
11790 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
11791 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
11792 } else {
11793 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11794 Size = CGM.getSize(Sz.alignTo(Align));
11795 }
11796 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
11797 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11798 const Expr *Allocator = AA->getAllocator();
11799 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11800 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
11801 SmallVector<llvm::Value *, 4> Args;
11802 Args.push_back(ThreadID);
11803 if (Alignment)
11804 Args.push_back(Alignment);
11805 Args.push_back(Size);
11806 Args.push_back(AllocVal);
11807 llvm::omp::RuntimeFunction FnID =
11808 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11809 llvm::Value *Addr = CGF.EmitRuntimeCall(
11810 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
11811 getName({CVD->getName(), ".void.addr"}));
11812 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11813 CGM.getModule(), OMPRTL___kmpc_free);
11814 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11815 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11816 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
11817 if (UntiedAddr.isValid())
11818 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
11819
11820 // Cleanup action for allocate support.
11821 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11822 llvm::FunctionCallee RTLFn;
11823 SourceLocation::UIntTy LocEncoding;
11824 Address Addr;
11825 const Expr *AllocExpr;
11826
11827 public:
11828 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11829 SourceLocation::UIntTy LocEncoding, Address Addr,
11830 const Expr *AllocExpr)
11831 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11832 AllocExpr(AllocExpr) {}
11833 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11834 if (!CGF.HaveInsertPoint())
11835 return;
11836 llvm::Value *Args[3];
11837 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11838 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
11839 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11840 Addr.getPointer(), CGF.VoidPtrTy);
11841 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
11842 Args[2] = AllocVal;
11843 CGF.EmitRuntimeCall(RTLFn, Args);
11844 }
11845 };
11846 Address VDAddr =
11847 UntiedRealAddr.isValid()
11848 ? UntiedRealAddr
11849 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
11850 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11851 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11852 VDAddr, Allocator);
11853 if (UntiedRealAddr.isValid())
11854 if (auto *Region =
11855 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
11856 Region->emitUntiedSwitch(CGF);
11857 return VDAddr;
11858 }
11859 return UntiedAddr;
11860}
11861
11862bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11863 const VarDecl *VD) const {
11864 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
11865 if (It == FunctionToUntiedTaskStackMap.end())
11866 return false;
11867 return UntiedLocalVarsStack[It->second].count(VD) > 0;
11868}
11869
11870CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11871 CodeGenModule &CGM, const OMPLoopDirective &S)
11872 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11873 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11873, __extension__
__PRETTY_FUNCTION__))
;
11874 if (!NeedToPush)
11875 return;
11876 NontemporalDeclsSet &DS =
11877 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11878 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11879 for (const Stmt *Ref : C->private_refs()) {
11880 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11881 const ValueDecl *VD;
11882 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11883 VD = DRE->getDecl();
11884 } else {
11885 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11886 assert((ME->isImplicitCXXThis() ||(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11888, __extension__
__PRETTY_FUNCTION__))
11887 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11888, __extension__
__PRETTY_FUNCTION__))
11888 "Expected member of current class.")(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11888, __extension__
__PRETTY_FUNCTION__))
;
11889 VD = ME->getMemberDecl();
11890 }
11891 DS.insert(VD);
11892 }
11893 }
11894}
11895
11896CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11897 if (!NeedToPush)
11898 return;
11899 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11900}
11901
11902CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11903 CodeGenFunction &CGF,
11904 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11905 std::pair<Address, Address>> &LocalVars)
11906 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11907 if (!NeedToPush)
11908 return;
11909 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11910 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11911 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
11912}
11913
11914CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11915 if (!NeedToPush)
11916 return;
11917 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11918}
11919
11920bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11921 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11921, __extension__
__PRETTY_FUNCTION__))
;
11922
11923 return llvm::any_of(
11924 CGM.getOpenMPRuntime().NontemporalDeclsStack,
11925 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11926}
11927
11928void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11929 const OMPExecutableDirective &S,
11930 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11931 const {
11932 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11933 // Vars in target/task regions must be excluded completely.
11934 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11935 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11936 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11937 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11938 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11939 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11940 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11941 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11942 }
11943 }
11944 // Exclude vars in private clauses.
11945 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11946 for (const Expr *Ref : C->varlists()) {
11947 if (!Ref->getType()->isScalarType())
11948 continue;
11949 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11950 if (!DRE)
11951 continue;
11952 NeedToCheckForLPCs.insert(DRE->getDecl());
11953 }
11954 }
11955 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11956 for (const Expr *Ref : C->varlists()) {
11957 if (!Ref->getType()->isScalarType())
11958 continue;
11959 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11960 if (!DRE)
11961 continue;
11962 NeedToCheckForLPCs.insert(DRE->getDecl());
11963 }
11964 }
11965 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11966 for (const Expr *Ref : C->varlists()) {
11967 if (!Ref->getType()->isScalarType())
11968 continue;
11969 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11970 if (!DRE)
11971 continue;
11972 NeedToCheckForLPCs.insert(DRE->getDecl());
11973 }
11974 }
11975 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11976 for (const Expr *Ref : C->varlists()) {
11977 if (!Ref->getType()->isScalarType())
11978 continue;
11979 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11980 if (!DRE)
11981 continue;
11982 NeedToCheckForLPCs.insert(DRE->getDecl());
11983 }
11984 }
11985 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11986 for (const Expr *Ref : C->varlists()) {
11987 if (!Ref->getType()->isScalarType())
11988 continue;
11989 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11990 if (!DRE)
11991 continue;
11992 NeedToCheckForLPCs.insert(DRE->getDecl());
11993 }
11994 }
11995 for (const Decl *VD : NeedToCheckForLPCs) {
11996 for (const LastprivateConditionalData &Data :
11997 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11998 if (Data.DeclToUniqueName.count(VD) > 0) {
11999 if (!Data.Disabled)
12000 NeedToAddForLPCsAsDisabled.insert(VD);
12001 break;
12002 }
12003 }
12004 }
12005}
12006
12007CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12008 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12009 : CGM(CGF.CGM),
12010 Action((CGM.getLangOpts().OpenMP >= 50 &&
12011 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12012 [](const OMPLastprivateClause *C) {
12013 return C->getKind() ==
12014 OMPC_LASTPRIVATE_conditional;
12015 }))
12016 ? ActionToDo::PushAsLastprivateConditional
12017 : ActionToDo::DoNotPush) {
12018 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12018, __extension__
__PRETTY_FUNCTION__))
;
12019 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12020 return;
12021 assert(Action == ActionToDo::PushAsLastprivateConditional &&(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12022, __extension__
__PRETTY_FUNCTION__))
12022 "Expected a push action.")(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12022, __extension__
__PRETTY_FUNCTION__))
;
12023 LastprivateConditionalData &Data =
12024 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12025 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12026 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12027 continue;
12028
12029 for (const Expr *Ref : C->varlists()) {
12030 Data.DeclToUniqueName.insert(std::make_pair(
12031 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12032 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12033 }
12034 }
12035 Data.IVLVal = IVLVal;
12036 Data.Fn = CGF.CurFn;
12037}
12038
12039CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12040 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12041 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12042 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12042, __extension__
__PRETTY_FUNCTION__))
;
12043 if (CGM.getLangOpts().OpenMP < 50)
12044 return;
12045 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12046 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12047 if (!NeedToAddForLPCsAsDisabled.empty()) {
12048 Action = ActionToDo::DisableLastprivateConditional;
12049 LastprivateConditionalData &Data =
12050 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12051 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12052 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12053 Data.Fn = CGF.CurFn;
12054 Data.Disabled = true;
12055 }
12056}
12057
12058CGOpenMPRuntime::LastprivateConditionalRAII
12059CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12060 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12061 return LastprivateConditionalRAII(CGF, S);
12062}
12063
12064CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12065 if (CGM.getLangOpts().OpenMP < 50)
12066 return;
12067 if (Action == ActionToDo::DisableLastprivateConditional) {
12068 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12069, __extension__
__PRETTY_FUNCTION__))
12069 "Expected list of disabled private vars.")(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12069, __extension__
__PRETTY_FUNCTION__))
;
12070 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12071 }
12072 if (Action == ActionToDo::PushAsLastprivateConditional) {
12073 assert((static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12075, __extension__
__PRETTY_FUNCTION__))
12074 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12075, __extension__
__PRETTY_FUNCTION__))
12075 "Expected list of lastprivate conditional vars.")(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12075, __extension__
__PRETTY_FUNCTION__))
;
12076 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12077 }
12078}
12079
12080Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12081 const VarDecl *VD) {
12082 ASTContext &C = CGM.getContext();
12083 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12084 if (I == LastprivateConditionalToTypes.end())
12085 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12086 QualType NewType;
12087 const FieldDecl *VDField;
12088 const FieldDecl *FiredField;
12089 LValue BaseLVal;
12090 auto VI = I->getSecond().find(VD);
12091 if (VI == I->getSecond().end()) {
12092 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12093 RD->startDefinition();
12094 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12095 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12096 RD->completeDefinition();
12097 NewType = C.getRecordType(RD);
12098 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12099 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12100 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12101 } else {
12102 NewType = std::get<0>(VI->getSecond());
12103 VDField = std::get<1>(VI->getSecond());
12104 FiredField = std::get<2>(VI->getSecond());
12105 BaseLVal = std::get<3>(VI->getSecond());
12106 }
12107 LValue FiredLVal =
12108 CGF.EmitLValueForField(BaseLVal, FiredField);
12109 CGF.EmitStoreOfScalar(
12110 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12111 FiredLVal);
12112 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12113}
12114
12115namespace {
12116/// Checks if the lastprivate conditional variable is referenced in LHS.
12117class LastprivateConditionalRefChecker final
12118 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12119 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12120 const Expr *FoundE = nullptr;
12121 const Decl *FoundD = nullptr;
12122 StringRef UniqueDeclName;
12123 LValue IVLVal;
12124 llvm::Function *FoundFn = nullptr;
12125 SourceLocation Loc;
12126
12127public:
12128 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12129 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12130 llvm::reverse(LPM)) {
12131 auto It = D.DeclToUniqueName.find(E->getDecl());
12132 if (It == D.DeclToUniqueName.end())
12133 continue;
12134 if (D.Disabled)
12135 return false;
12136 FoundE = E;
12137 FoundD = E->getDecl()->getCanonicalDecl();
12138 UniqueDeclName = It->second;
12139 IVLVal = D.IVLVal;
12140 FoundFn = D.Fn;
12141 break;
12142 }
12143 return FoundE == E;
12144 }
12145 bool VisitMemberExpr(const MemberExpr *E) {
12146 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12147 return false;
12148 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12149 llvm::reverse(LPM)) {
12150 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12151 if (It == D.DeclToUniqueName.end())
12152 continue;
12153 if (D.Disabled)
12154 return false;
12155 FoundE = E;
12156 FoundD = E->getMemberDecl()->getCanonicalDecl();
12157 UniqueDeclName = It->second;
12158 IVLVal = D.IVLVal;
12159 FoundFn = D.Fn;
12160 break;
12161 }
12162 return FoundE == E;
12163 }
12164 bool VisitStmt(const Stmt *S) {
12165 for (const Stmt *Child : S->children()) {
12166 if (!Child)
12167 continue;
12168 if (const auto *E = dyn_cast<Expr>(Child))
12169 if (!E->isGLValue())
12170 continue;
12171 if (Visit(Child))
12172 return true;
12173 }
12174 return false;
12175 }
12176 explicit LastprivateConditionalRefChecker(
12177 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12178 : LPM(LPM) {}
12179 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12180 getFoundData() const {
12181 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12182 }
12183};
12184} // namespace
12185
12186void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12187 LValue IVLVal,
12188 StringRef UniqueDeclName,
12189 LValue LVal,
12190 SourceLocation Loc) {
12191 // Last updated loop counter for the lastprivate conditional var.
12192 // int<xx> last_iv = 0;
12193 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12194 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
12195 LLIVTy, getName({UniqueDeclName, "iv"}));
12196 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12197 IVLVal.getAlignment().getAsAlign());
12198 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12199
12200 // Last value of the lastprivate conditional.
12201 // decltype(priv_a) last_a;
12202 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
12203 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12204 Last->setAlignment(LVal.getAlignment().getAsAlign());
12205 LValue LastLVal = CGF.MakeAddrLValue(
12206 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12207
12208 // Global loop counter. Required to handle inner parallel-for regions.
12209 // iv
12210 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12211
12212 // #pragma omp critical(a)
12213 // if (last_iv <= iv) {
12214 // last_iv = iv;
12215 // last_a = priv_a;
12216 // }
12217 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12218 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12219 Action.Enter(CGF);
12220 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12221 // (last_iv <= iv) ? Check if the variable is updated and store new
12222 // value in global var.
12223 llvm::Value *CmpRes;
12224 if (IVLVal.getType()->isSignedIntegerType()) {
12225 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12226 } else {
12227 assert(IVLVal.getType()->isUnsignedIntegerType() &&(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12228, __extension__
__PRETTY_FUNCTION__))
12228 "Loop iteration variable must be integer.")(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12228, __extension__
__PRETTY_FUNCTION__))
;
12229 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12230 }
12231 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12232 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12233 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12234 // {
12235 CGF.EmitBlock(ThenBB);
12236
12237 // last_iv = iv;
12238 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12239
12240 // last_a = priv_a;
12241 switch (CGF.getEvaluationKind(LVal.getType())) {
12242 case TEK_Scalar: {
12243 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12244 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12245 break;
12246 }
12247 case TEK_Complex: {
12248 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12249 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12250 break;
12251 }
12252 case TEK_Aggregate:
12253 llvm_unreachable(::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12254)
12254 "Aggregates are not supported in lastprivate conditional.")::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12254)
;
12255 }
12256 // }
12257 CGF.EmitBranch(ExitBB);
12258 // There is no need to emit line number for unconditional branch.
12259 (void)ApplyDebugLocation::CreateEmpty(CGF);
12260 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12261 };
12262
12263 if (CGM.getLangOpts().OpenMPSimd) {
12264 // Do not emit as a critical region as no parallel region could be emitted.
12265 RegionCodeGenTy ThenRCG(CodeGen);
12266 ThenRCG(CGF);
12267 } else {
12268 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12269 }
12270}
12271
12272void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12273 const Expr *LHS) {
12274 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12275 return;
12276 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12277 if (!Checker.Visit(LHS))
12278 return;
12279 const Expr *FoundE;
12280 const Decl *FoundD;
12281 StringRef UniqueDeclName;
12282 LValue IVLVal;
12283 llvm::Function *FoundFn;
12284 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12285 Checker.getFoundData();
12286 if (FoundFn != CGF.CurFn) {
12287 // Special codegen for inner parallel regions.
12288 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12289 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12290 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12291, __extension__
__PRETTY_FUNCTION__))
12291 "Lastprivate conditional is not found in outer region.")(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12291, __extension__
__PRETTY_FUNCTION__))
;
12292 QualType StructTy = std::get<0>(It->getSecond());
12293 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12294 LValue PrivLVal = CGF.EmitLValue(FoundE);
12295 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12296 PrivLVal.getAddress(CGF),
12297 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12298 CGF.ConvertTypeForMem(StructTy));
12299 LValue BaseLVal =
12300 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12301 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12302 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12303 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12304 FiredLVal, llvm::AtomicOrdering::Unordered,
12305 /*IsVolatile=*/true, /*isInit=*/false);
12306 return;
12307 }
12308
12309 // Private address of the lastprivate conditional in the current context.
12310 // priv_a
12311 LValue LVal = CGF.EmitLValue(FoundE);
12312 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12313 FoundE->getExprLoc());
12314}
12315
12316void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12317 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12318 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12319 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12320 return;
12321 auto Range = llvm::reverse(LastprivateConditionalStack);
12322 auto It = llvm::find_if(
12323 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12324 if (It == Range.end() || It->Fn != CGF.CurFn)
12325 return;
12326 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12327 assert(LPCI != LastprivateConditionalToTypes.end() &&(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12328, __extension__
__PRETTY_FUNCTION__))
12328 "Lastprivates must be registered already.")(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12328, __extension__
__PRETTY_FUNCTION__))
;
12329 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12330 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12331 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12332 for (const auto &Pair : It->DeclToUniqueName) {
12333 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12334 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12335 continue;
12336 auto I = LPCI->getSecond().find(Pair.first);
12337 assert(I != LPCI->getSecond().end() &&(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12338, __extension__
__PRETTY_FUNCTION__))
12338 "Lastprivate must be rehistered already.")(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12338, __extension__
__PRETTY_FUNCTION__))
;
12339 // bool Cmp = priv_a.Fired != 0;
12340 LValue BaseLVal = std::get<3>(I->getSecond());
12341 LValue FiredLVal =
12342 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12343 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12344 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12345 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12346 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12347 // if (Cmp) {
12348 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12349 CGF.EmitBlock(ThenBB);
12350 Address Addr = CGF.GetAddrOfLocalVar(VD);
12351 LValue LVal;
12352 if (VD->getType()->isReferenceType())
12353 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12354 AlignmentSource::Decl);
12355 else
12356 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12357 AlignmentSource::Decl);
12358 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12359 D.getBeginLoc());
12360 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12361 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12362 // }
12363 }
12364}
12365
12366void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12367 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12368 SourceLocation Loc) {
12369 if (CGF.getLangOpts().OpenMP < 50)
12370 return;
12371 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12372 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12373, __extension__
__PRETTY_FUNCTION__))
12373 "Unknown lastprivate conditional variable.")(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12373, __extension__
__PRETTY_FUNCTION__))
;
12374 StringRef UniqueName = It->second;
12375 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12376 // The variable was not updated in the region - exit.
12377 if (!GV)
12378 return;
12379 LValue LPLVal = CGF.MakeAddrLValue(
12380 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12381 PrivLVal.getType().getNonReferenceType());
12382 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12383 CGF.EmitStoreOfScalar(Res, PrivLVal);
12384}
12385
12386llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12387 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12388 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12389 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12389)
;
12390}
12391
12392llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12393 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12394 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12395 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12395)
;
12396}
12397
12398llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12399 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12400 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12401 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12402 bool Tied, unsigned &NumberOfParts) {
12403 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12403)
;
12404}
12405
12406void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12407 SourceLocation Loc,
12408 llvm::Function *OutlinedFn,
12409 ArrayRef<llvm::Value *> CapturedVars,
12410 const Expr *IfCond,
12411 llvm::Value *NumThreads) {
12412 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12412)
;
12413}
12414
12415void CGOpenMPSIMDRuntime::emitCriticalRegion(
12416 CodeGenFunction &CGF, StringRef CriticalName,
12417 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12418 const Expr *Hint) {
12419 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12419)
;
12420}
12421
12422void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12423 const RegionCodeGenTy &MasterOpGen,
12424 SourceLocation Loc) {
12425 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12425)
;
12426}
12427
12428void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12429 const RegionCodeGenTy &MasterOpGen,
12430 SourceLocation Loc,
12431 const Expr *Filter) {
12432 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12432)
;
12433}
12434
12435void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12436 SourceLocation Loc) {
12437 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12437)
;
12438}
12439
12440void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12441 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12442 SourceLocation Loc) {
12443 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12443)
;
12444}
12445
12446void CGOpenMPSIMDRuntime::emitSingleRegion(
12447 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12448 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12449 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12450 ArrayRef<const Expr *> AssignmentOps) {
12451 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12451)
;
12452}
12453
12454void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12455 const RegionCodeGenTy &OrderedOpGen,
12456 SourceLocation Loc,
12457 bool IsThreads) {
12458 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12458)
;
12459}
12460
12461void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12462 SourceLocation Loc,
12463 OpenMPDirectiveKind Kind,
12464 bool EmitChecks,
12465 bool ForceSimpleCall) {
12466 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12466)
;
12467}
12468
12469void CGOpenMPSIMDRuntime::emitForDispatchInit(
12470 CodeGenFunction &CGF, SourceLocation Loc,
12471 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12472 bool Ordered, const DispatchRTInput &DispatchValues) {
12473 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12473)
;
12474}
12475
12476void CGOpenMPSIMDRuntime::emitForStaticInit(
12477 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12478 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12479 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12479)
;
12480}
12481
12482void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12483 CodeGenFunction &CGF, SourceLocation Loc,
12484 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12485 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12485)
;
12486}
12487
12488void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12489 SourceLocation Loc,
12490 unsigned IVSize,
12491 bool IVSigned) {
12492 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12492)
;
12493}
12494
12495void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12496 SourceLocation Loc,
12497 OpenMPDirectiveKind DKind) {
12498 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12498)
;
12499}
12500
12501llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12502 SourceLocation Loc,
12503 unsigned IVSize, bool IVSigned,
12504 Address IL, Address LB,
12505 Address UB, Address ST) {
12506 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12506)
;
12507}
12508
12509void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12510 llvm::Value *NumThreads,
12511 SourceLocation Loc) {
12512 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12512)
;
12513}
12514
12515void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12516 ProcBindKind ProcBind,
12517 SourceLocation Loc) {
12518 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12518)
;
12519}
12520
12521Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12522 const VarDecl *VD,
12523 Address VDAddr,
12524 SourceLocation Loc) {
12525 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12525)
;
12526}
12527
12528llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12529 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12530 CodeGenFunction *CGF) {
12531 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12531)
;
12532}
12533
12534Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12535 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12536 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12536)
;
12537}
12538
12539void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12540 ArrayRef<const Expr *> Vars,
12541 SourceLocation Loc,
12542 llvm::AtomicOrdering AO) {
12543 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12543)
;
12544}
12545
12546void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12547 const OMPExecutableDirective &D,
12548 llvm::Function *TaskFunction,
12549 QualType SharedsTy, Address Shareds,
12550 const Expr *IfCond,
12551 const OMPTaskDataTy &Data) {
12552 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12552)
;
12553}
12554
12555void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12556 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12557 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12558 const Expr *IfCond, const OMPTaskDataTy &Data) {
12559 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12559)
;
12560}
12561
12562void CGOpenMPSIMDRuntime::emitReduction(
12563 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12564 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12565 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12566 assert(Options.SimpleReduction && "Only simple reduction is expected.")(static_cast <bool> (Options.SimpleReduction &&
"Only simple reduction is expected.") ? void (0) : __assert_fail
("Options.SimpleReduction && \"Only simple reduction is expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12566, __extension__
__PRETTY_FUNCTION__))
;
12567 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12568 ReductionOps, Options);
12569}
12570
12571llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12572 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12573 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12574 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12574)
;
12575}
12576
12577void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12578 SourceLocation Loc,
12579 bool IsWorksharingReduction) {
12580 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12580)
;
12581}
12582
12583void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12584 SourceLocation Loc,
12585 ReductionCodeGen &RCG,
12586 unsigned N) {
12587 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12587)
;
12588}
12589
12590Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12591 SourceLocation Loc,
12592 llvm::Value *ReductionsPtr,
12593 LValue SharedLVal) {
12594 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12594)
;
12595}
12596
12597void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12598 SourceLocation Loc,
12599 const OMPTaskDataTy &Data) {
12600 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12600)
;
12601}
12602
12603void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12604 CodeGenFunction &CGF, SourceLocation Loc,
12605 OpenMPDirectiveKind CancelRegion) {
12606 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12606)
;
12607}
12608
12609void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12610 SourceLocation Loc, const Expr *IfCond,
12611 OpenMPDirectiveKind CancelRegion) {
12612 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12612)
;
12613}
12614
12615void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12616 const OMPExecutableDirective &D, StringRef ParentName,
12617 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12618 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12619 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12619)
;
12620}
12621
12622void CGOpenMPSIMDRuntime::emitTargetCall(
12623 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12624 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12625 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12626 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12627 const OMPLoopDirective &D)>
12628 SizeEmitter) {
12629 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12629)
;
12630}
12631
12632bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12633 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12633)
;
12634}
12635
12636bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12637 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12637)
;
12638}
12639
12640bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12641 return false;
12642}
12643
12644void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12645 const OMPExecutableDirective &D,
12646 SourceLocation Loc,
12647 llvm::Function *OutlinedFn,
12648 ArrayRef<llvm::Value *> CapturedVars) {
12649 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12649)
;
12650}
12651
12652void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12653 const Expr *NumTeams,
12654 const Expr *ThreadLimit,
12655 SourceLocation Loc) {
12656 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12656)
;
12657}
12658
12659void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12660 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12661 const Expr *Device, const RegionCodeGenTy &CodeGen,
12662 CGOpenMPRuntime::TargetDataInfo &Info) {
12663 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12663)
;
12664}
12665
12666void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12667 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12668 const Expr *Device) {
12669 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12669)
;
12670}
12671
12672void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12673 const OMPLoopDirective &D,
12674 ArrayRef<Expr *> NumIterations) {
12675 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12675)
;
12676}
12677
12678void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12679 const OMPDependClause *C) {
12680 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12680)
;
12681}
12682
12683const VarDecl *
12684CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12685 const VarDecl *NativeParam) const {
12686 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12686)
;
12687}
12688
12689Address
12690CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12691 const VarDecl *NativeParam,
12692 const VarDecl *TargetParam) const {
12693 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12693)
;
12694}