Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Warning:line 2271, column 3
Address of stack memory associated with local variable 'Action' is still referred to by a temporary object on the stack upon returning to the caller. This will be a dangling reference

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CGOpenMPRuntime.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/clang/include -I tools/clang/include -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/clang/lib/CodeGen/CGOpenMPRuntime.cpp
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <numeric>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 123, __extension__
__PRETTY_FUNCTION__))
;
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 204, __extension__
__PRETTY_FUNCTION__))
;
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 250)
;
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 258)
;
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 288)
;
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 295)
;
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 348)
;
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 388)
;
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 394)
;
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 399)
;
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE
479};
480
481namespace {
482LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
483/// Values for bit flags for marking which requires clauses have been used.
484enum OpenMPOffloadingRequiresDirFlags : int64_t {
485 /// flag undefined.
486 OMP_REQ_UNDEFINED = 0x000,
487 /// no requires clause present.
488 OMP_REQ_NONE = 0x001,
489 /// reverse_offload clause.
490 OMP_REQ_REVERSE_OFFLOAD = 0x002,
491 /// unified_address clause.
492 OMP_REQ_UNIFIED_ADDRESS = 0x004,
493 /// unified_shared_memory clause.
494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
495 /// dynamic_allocators clause.
496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS
498};
499
500enum OpenMPOffloadingReservedDeviceIDs {
501 /// Device ID if the device was not defined, runtime should get it
502 /// from environment variables in the spec.
503 OMP_DEVICEID_UNDEF = -1,
504};
505} // anonymous namespace
506
507/// Describes ident structure that describes a source location.
508/// All descriptions are taken from
509/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510/// Original structure:
511/// typedef struct ident {
512/// kmp_int32 reserved_1; /**< might be used in Fortran;
513/// see above */
514/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
515/// KMP_IDENT_KMPC identifies this union
516/// member */
517/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
518/// see above */
519///#if USE_ITT_BUILD
520/// /* but currently used for storing
521/// region-specific ITT */
522/// /* contextual information. */
523///#endif /* USE_ITT_BUILD */
524/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
525/// C++ */
526/// char const *psource; /**< String describing the source location.
527/// The string is composed of semi-colon separated
528// fields which describe the source file,
529/// the function and a pair of line numbers that
530/// delimit the construct.
531/// */
532/// } ident_t;
533enum IdentFieldIndex {
534 /// might be used in Fortran
535 IdentField_Reserved_1,
536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537 IdentField_Flags,
538 /// Not really used in Fortran any more
539 IdentField_Reserved_2,
540 /// Source[4] in Fortran, do not use for C++
541 IdentField_Reserved_3,
542 /// String describing the source location. The string is composed of
543 /// semi-colon separated fields which describe the source file, the function
544 /// and a pair of line numbers that delimit the construct.
545 IdentField_PSource
546};
547
548/// Schedule types for 'omp for' loops (these enumerators are taken from
549/// the enum sched_type in kmp.h).
550enum OpenMPSchedType {
551 /// Lower bound for default (unordered) versions.
552 OMP_sch_lower = 32,
553 OMP_sch_static_chunked = 33,
554 OMP_sch_static = 34,
555 OMP_sch_dynamic_chunked = 35,
556 OMP_sch_guided_chunked = 36,
557 OMP_sch_runtime = 37,
558 OMP_sch_auto = 38,
559 /// static with chunk adjustment (e.g., simd)
560 OMP_sch_static_balanced_chunked = 45,
561 /// Lower bound for 'ordered' versions.
562 OMP_ord_lower = 64,
563 OMP_ord_static_chunked = 65,
564 OMP_ord_static = 66,
565 OMP_ord_dynamic_chunked = 67,
566 OMP_ord_guided_chunked = 68,
567 OMP_ord_runtime = 69,
568 OMP_ord_auto = 70,
569 OMP_sch_default = OMP_sch_static,
570 /// dist_schedule types
571 OMP_dist_sch_static_chunked = 91,
572 OMP_dist_sch_static = 92,
573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574 /// Set if the monotonic schedule modifier was present.
575 OMP_sch_modifier_monotonic = (1 << 29),
576 /// Set if the nonmonotonic schedule modifier was present.
577 OMP_sch_modifier_nonmonotonic = (1 << 30),
578};
579
580/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581/// region.
582class CleanupTy final : public EHScopeStack::Cleanup {
583 PrePostActionTy *Action;
584
585public:
586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588 if (!CGF.HaveInsertPoint())
589 return;
590 Action->Exit(CGF);
591 }
592};
593
594} // anonymous namespace
595
596void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597 CodeGenFunction::RunCleanupsScope Scope(CGF);
598 if (PrePostAction) {
599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600 Callback(CodeGen, CGF, *PrePostAction);
601 } else {
602 PrePostActionTy Action;
603 Callback(CodeGen, CGF, Action);
604 }
605}
606
607/// Check if the combiner is a call to UDR combiner and if it is so return the
608/// UDR decl used for reduction.
609static const OMPDeclareReductionDecl *
610getReductionInit(const Expr *ReductionOp) {
611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613 if (const auto *DRE =
614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616 return DRD;
617 return nullptr;
618}
619
620static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621 const OMPDeclareReductionDecl *DRD,
622 const Expr *InitOp,
623 Address Private, Address Original,
624 QualType Ty) {
625 if (DRD->getInitializer()) {
626 std::pair<llvm::Function *, llvm::Function *> Reduction =
627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628 const auto *CE = cast<CallExpr>(InitOp);
629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632 const auto *LHSDRE =
633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634 const auto *RHSDRE =
635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639 (void)PrivateScope.Privatize();
640 RValue Func = RValue::get(Reduction.second);
641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642 CGF.EmitIgnoredExpr(InitOp);
643 } else {
644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646 auto *GV = new llvm::GlobalVariable(
647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648 llvm::GlobalValue::PrivateLinkage, Init, Name);
649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650 RValue InitRVal;
651 switch (CGF.getEvaluationKind(Ty)) {
652 case TEK_Scalar:
653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654 break;
655 case TEK_Complex:
656 InitRVal =
657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658 break;
659 case TEK_Aggregate: {
660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663 /*IsInitializer=*/false);
664 return;
665 }
666 }
667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670 /*IsInitializer=*/false);
671 }
672}
673
674/// Emit initialization of arrays of complex types.
675/// \param DestAddr Address of the array.
676/// \param Type Type of array.
677/// \param Init Initial expression of array.
678/// \param SrcAddr Address of the original array.
679static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680 QualType Type, bool EmitDeclareReductionInit,
681 const Expr *Init,
682 const OMPDeclareReductionDecl *DRD,
683 Address SrcAddr = Address::invalid()) {
684 // Perform element-by-element initialization.
685 QualType ElementTy;
686
687 // Drill down to the base element type on both arrays.
688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690 if (DRD)
691 SrcAddr =
692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693
694 llvm::Value *SrcBegin = nullptr;
695 if (DRD)
696 SrcBegin = SrcAddr.getPointer();
697 llvm::Value *DestBegin = DestAddr.getPointer();
698 // Cast from pointer to array type to pointer to single element.
699 llvm::Value *DestEnd =
700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701 // The basic structure here is a while-do loop.
702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704 llvm::Value *IsEmpty =
705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707
708 // Enter the loop body, making that address the current address.
709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710 CGF.EmitBlock(BodyBB);
711
712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713
714 llvm::PHINode *SrcElementPHI = nullptr;
715 Address SrcElementCurrent = Address::invalid();
716 if (DRD) {
717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718 "omp.arraycpy.srcElementPast");
719 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720 SrcElementCurrent =
721 Address(SrcElementPHI, SrcAddr.getElementType(),
722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723 }
724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726 DestElementPHI->addIncoming(DestBegin, EntryBB);
727 Address DestElementCurrent =
728 Address(DestElementPHI, DestAddr.getElementType(),
729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730
731 // Emit copy.
732 {
733 CodeGenFunction::RunCleanupsScope InitScope(CGF);
734 if (EmitDeclareReductionInit) {
735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736 SrcElementCurrent, ElementTy);
737 } else
738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739 /*IsInitializer=*/false);
740 }
741
742 if (DRD) {
743 // Shift the address forward by one element.
744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746 "omp.arraycpy.dest.element");
747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748 }
749
750 // Shift the address forward by one element.
751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753 "omp.arraycpy.dest.element");
754 // Check whether we've reached the end.
755 llvm::Value *Done =
756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759
760 // Done.
761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762}
763
764LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765 return CGF.EmitOMPSharedLValue(E);
766}
767
768LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769 const Expr *E) {
770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772 return LValue();
773}
774
775void ReductionCodeGen::emitAggregateInitialization(
776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777 const OMPDeclareReductionDecl *DRD) {
778 // Emit VarDecl with copy init for arrays.
779 // Get the address of the original variable captured in current
780 // captured region.
781 const auto *PrivateVD =
782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783 bool EmitDeclareReductionInit =
784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786 EmitDeclareReductionInit,
787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788 : PrivateVD->getInit(),
789 DRD, SharedAddr);
790}
791
792ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793 ArrayRef<const Expr *> Origs,
794 ArrayRef<const Expr *> Privates,
795 ArrayRef<const Expr *> ReductionOps) {
796 ClausesData.reserve(Shareds.size());
797 SharedAddresses.reserve(Shareds.size());
798 Sizes.reserve(Shareds.size());
799 BaseDecls.reserve(Shareds.size());
800 const auto *IOrig = Origs.begin();
801 const auto *IPriv = Privates.begin();
802 const auto *IRed = ReductionOps.begin();
803 for (const Expr *Ref : Shareds) {
804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805 std::advance(IOrig, 1);
806 std::advance(IPriv, 1);
807 std::advance(IRed, 1);
808 }
809}
810
811void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 813, __extension__
__PRETTY_FUNCTION__))
813 "Number of generated lvalues must be exactly N.")(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 813, __extension__
__PRETTY_FUNCTION__))
;
814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816 SharedAddresses.emplace_back(First, Second);
817 if (ClausesData[N].Shared == ClausesData[N].Ref) {
818 OrigAddresses.emplace_back(First, Second);
819 } else {
820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822 OrigAddresses.emplace_back(First, Second);
823 }
824}
825
826void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827 QualType PrivateType = getPrivateType(N);
828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829 if (!PrivateType->isVariablyModifiedType()) {
830 Sizes.emplace_back(
831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832 nullptr);
833 return;
834 }
835 llvm::Value *Size;
836 llvm::Value *SizeInChars;
837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839 if (AsArraySection) {
840 Size = CGF.Builder.CreatePtrDiff(ElemType,
841 OrigAddresses[N].second.getPointer(CGF),
842 OrigAddresses[N].first.getPointer(CGF));
843 Size = CGF.Builder.CreateNUWAdd(
844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846 } else {
847 SizeInChars =
848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850 }
851 Sizes.emplace_back(SizeInChars, Size);
852 CodeGenFunction::OpaqueValueMapping OpaqueMap(
853 CGF,
854 cast<OpaqueValueExpr>(
855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856 RValue::get(Size));
857 CGF.EmitVariablyModifiedType(PrivateType);
858}
859
860void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861 llvm::Value *Size) {
862 QualType PrivateType = getPrivateType(N);
863 if (!PrivateType->isVariablyModifiedType()) {
864 assert(!Size && !Sizes[N].second &&(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 866, __extension__
__PRETTY_FUNCTION__))
865 "Size should be nullptr for non-variably modified reduction "(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 866, __extension__
__PRETTY_FUNCTION__))
866 "items.")(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 866, __extension__
__PRETTY_FUNCTION__))
;
867 return;
868 }
869 CodeGenFunction::OpaqueValueMapping OpaqueMap(
870 CGF,
871 cast<OpaqueValueExpr>(
872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873 RValue::get(Size));
874 CGF.EmitVariablyModifiedType(PrivateType);
875}
876
877void ReductionCodeGen::emitInitialization(
878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880 assert(SharedAddresses.size() > N && "No variable was generated")(static_cast <bool> (SharedAddresses.size() > N &&
"No variable was generated") ? void (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 880, __extension__
__PRETTY_FUNCTION__))
;
881 const auto *PrivateVD =
882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883 const OMPDeclareReductionDecl *DRD =
884 getReductionInit(ClausesData[N].ReductionOp);
885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886 if (DRD && DRD->getInitializer())
887 (void)DefaultInit(CGF);
888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890 (void)DefaultInit(CGF);
891 QualType SharedType = SharedAddresses[N].first.getType();
892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893 PrivateAddr, SharedAddr, SharedType);
894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897 PrivateVD->getType().getQualifiers(),
898 /*IsInitializer=*/false);
899 }
900}
901
902bool ReductionCodeGen::needCleanups(unsigned N) {
903 QualType PrivateType = getPrivateType(N);
904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905 return DTorKind != QualType::DK_none;
906}
907
908void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909 Address PrivateAddr) {
910 QualType PrivateType = getPrivateType(N);
911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912 if (needCleanups(N)) {
913 PrivateAddr = CGF.Builder.CreateElementBitCast(
914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916 }
917}
918
919static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920 LValue BaseLV) {
921 BaseTy = BaseTy.getNonReferenceType();
922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926 } else {
927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929 }
930 BaseTy = BaseTy->getPointeeType();
931 }
932 return CGF.MakeAddrLValue(
933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934 CGF.ConvertTypeForMem(ElTy)),
935 BaseLV.getType(), BaseLV.getBaseInfo(),
936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937}
938
939static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940 Address OriginalBaseAddress, llvm::Value *Addr) {
941 Address Tmp = Address::invalid();
942 Address TopTmp = Address::invalid();
943 Address MostTopTmp = Address::invalid();
944 BaseTy = BaseTy.getNonReferenceType();
945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947 Tmp = CGF.CreateMemTemp(BaseTy);
948 if (TopTmp.isValid())
949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950 else
951 MostTopTmp = Tmp;
952 TopTmp = Tmp;
953 BaseTy = BaseTy->getPointeeType();
954 }
955
956 if (Tmp.isValid()) {
957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958 Addr, Tmp.getElementType());
959 CGF.Builder.CreateStore(Addr, Tmp);
960 return MostTopTmp;
961 }
962
963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964 Addr, OriginalBaseAddress.getType());
965 return OriginalBaseAddress.withPointer(Addr);
966}
967
968static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969 const VarDecl *OrigVD = nullptr;
970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973 Base = TempOASE->getBase()->IgnoreParenImpCasts();
974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975 Base = TempASE->getBase()->IgnoreParenImpCasts();
976 DE = cast<DeclRefExpr>(Base);
977 OrigVD = cast<VarDecl>(DE->getDecl());
978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981 Base = TempASE->getBase()->IgnoreParenImpCasts();
982 DE = cast<DeclRefExpr>(Base);
983 OrigVD = cast<VarDecl>(DE->getDecl());
984 }
985 return OrigVD;
986}
987
988Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989 Address PrivateAddr) {
990 const DeclRefExpr *DE;
991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992 BaseDecls.emplace_back(OrigVD);
993 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994 LValue BaseLValue =
995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996 OriginalBaseLValue);
997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000 SharedAddr.getPointer());
1001 llvm::Value *PrivatePointer =
1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003 PrivateAddr.getPointer(), SharedAddr.getType());
1004 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006 return castToBase(CGF, OrigVD->getType(),
1007 SharedAddresses[N].first.getType(),
1008 OriginalBaseLValue.getAddress(CGF), Ptr);
1009 }
1010 BaseDecls.emplace_back(
1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012 return PrivateAddr;
1013}
1014
1015bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016 const OMPDeclareReductionDecl *DRD =
1017 getReductionInit(ClausesData[N].ReductionOp);
1018 return DRD && DRD->getInitializer();
1019}
1020
1021LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022 return CGF.EmitLoadOfPointerLValue(
1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024 getThreadIDVariable()->getType()->castAs<PointerType>());
1025}
1026
1027void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028 if (!CGF.HaveInsertPoint())
1029 return;
1030 // 1.2.2 OpenMP Language Terminology
1031 // Structured block - An executable statement with a single entry at the
1032 // top and a single exit at the bottom.
1033 // The point of exit cannot be a branch out of the structured block.
1034 // longjmp() and throw() must not violate the entry/exit criteria.
1035 CGF.EHStack.pushTerminate();
1036 if (S)
1037 CGF.incrementProfileCounter(S);
1038 CodeGen(CGF);
1039 CGF.EHStack.popTerminate();
1040}
1041
1042LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043 CodeGenFunction &CGF) {
1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045 getThreadIDVariable()->getType(),
1046 AlignmentSource::Decl);
1047}
1048
1049static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050 QualType FieldTy) {
1051 auto *Field = FieldDecl::Create(
1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055 Field->setAccess(AS_public);
1056 DC->addDecl(Field);
1057 return Field;
1058}
1059
1060CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061 StringRef Separator)
1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065
1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067 OMPBuilder.initialize();
1068 loadOffloadInfoMetadata();
1069}
1070
1071void CGOpenMPRuntime::clear() {
1072 InternalVars.clear();
1073 // Clean non-target variable declarations possibly used only in debug info.
1074 for (const auto &Data : EmittedNonTargetVariables) {
1075 if (!Data.getValue().pointsToAliveValue())
1076 continue;
1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078 if (!GV)
1079 continue;
1080 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081 continue;
1082 GV->eraseFromParent();
1083 }
1084}
1085
1086std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087 SmallString<128> Buffer;
1088 llvm::raw_svector_ostream OS(Buffer);
1089 StringRef Sep = FirstSeparator;
1090 for (StringRef Part : Parts) {
1091 OS << Sep << Part;
1092 Sep = Separator;
1093 }
1094 return std::string(OS.str());
1095}
1096
1097static llvm::Function *
1098emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099 const Expr *CombinerInitializer, const VarDecl *In,
1100 const VarDecl *Out, bool IsCombiner) {
1101 // void .omp_combiner.(Ty *in, Ty *out);
1102 ASTContext &C = CGM.getContext();
1103 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104 FunctionArgList Args;
1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109 Args.push_back(&OmpOutParm);
1110 Args.push_back(&OmpInParm);
1111 const CGFunctionInfo &FnInfo =
1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114 std::string Name = CGM.getOpenMPRuntime().getName(
1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117 Name, &CGM.getModule());
1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119 if (CGM.getLangOpts().Optimize) {
1120 Fn->removeFnAttr(llvm::Attribute::NoInline);
1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123 }
1124 CodeGenFunction CGF(CGM);
1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128 Out->getLocation());
1129 CodeGenFunction::OMPPrivateScope Scope(CGF);
1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131 Scope.addPrivate(
1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133 .getAddress(CGF));
1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135 Scope.addPrivate(
1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137 .getAddress(CGF));
1138 (void)Scope.Privatize();
1139 if (!IsCombiner && Out->hasInit() &&
1140 !CGF.isTrivialInitializer(Out->getInit())) {
1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142 Out->getType().getQualifiers(),
1143 /*IsInitializer=*/true);
1144 }
1145 if (CombinerInitializer)
1146 CGF.EmitIgnoredExpr(CombinerInitializer);
1147 Scope.ForceCleanup();
1148 CGF.FinishFunction();
1149 return Fn;
1150}
1151
1152void CGOpenMPRuntime::emitUserDefinedReduction(
1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154 if (UDRMap.count(D) > 0)
1155 return;
1156 llvm::Function *Combiner = emitCombinerOrInitializer(
1157 CGM, D->getType(), D->getCombiner(),
1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160 /*IsCombiner=*/true);
1161 llvm::Function *Initializer = nullptr;
1162 if (const Expr *Init = D->getInitializer()) {
1163 Initializer = emitCombinerOrInitializer(
1164 CGM, D->getType(),
1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166 : nullptr,
1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169 /*IsCombiner=*/false);
1170 }
1171 UDRMap.try_emplace(D, Combiner, Initializer);
1172 if (CGF) {
1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174 Decls.second.push_back(D);
1175 }
1176}
1177
1178std::pair<llvm::Function *, llvm::Function *>
1179CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180 auto I = UDRMap.find(D);
1181 if (I != UDRMap.end())
1182 return I->second;
1183 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184 return UDRMap.lookup(D);
1185}
1186
1187namespace {
1188// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189// Builder if one is present.
1190struct PushAndPopStackRAII {
1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192 bool HasCancel, llvm::omp::Directive Kind)
1193 : OMPBuilder(OMPBuilder) {
1194 if (!OMPBuilder)
1195 return;
1196
1197 // The following callback is the crucial part of clangs cleanup process.
1198 //
1199 // NOTE:
1200 // Once the OpenMPIRBuilder is used to create parallel regions (and
1201 // similar), the cancellation destination (Dest below) is determined via
1202 // IP. That means if we have variables to finalize we split the block at IP,
1203 // use the new block (=BB) as destination to build a JumpDest (via
1204 // getJumpDestInCurrentScope(BB)) which then is fed to
1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206 // to push & pop an FinalizationInfo object.
1207 // The FiniCB will still be needed but at the point where the
1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210 assert(IP.getBlock()->end() == IP.getPoint() &&(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1211, __extension__
__PRETTY_FUNCTION__))
1211 "Clang CG should cause non-terminated block!")(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1211, __extension__
__PRETTY_FUNCTION__))
;
1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213 CGF.Builder.restoreIP(IP);
1214 CodeGenFunction::JumpDest Dest =
1215 CGF.getOMPCancelDestination(OMPD_parallel);
1216 CGF.EmitBranchThroughCleanup(Dest);
1217 };
1218
1219 // TODO: Remove this once we emit parallel regions through the
1220 // OpenMPIRBuilder as it can do this setup internally.
1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222 OMPBuilder->pushFinalizationCB(std::move(FI));
1223 }
1224 ~PushAndPopStackRAII() {
1225 if (OMPBuilder)
1226 OMPBuilder->popFinalizationCB();
1227 }
1228 llvm::OpenMPIRBuilder *OMPBuilder;
1229};
1230} // namespace
1231
1232static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236 assert(ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1237, __extension__
__PRETTY_FUNCTION__))
1237 "thread id variable must be of type kmp_int32 *")(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1237, __extension__
__PRETTY_FUNCTION__))
;
1238 CodeGenFunction CGF(CGM, true);
1239 bool HasCancel = false;
1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241 HasCancel = OPD->hasCancel();
1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243 HasCancel = OPD->hasCancel();
1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245 HasCancel = OPSD->hasCancel();
1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247 HasCancel = OPFD->hasCancel();
1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249 HasCancel = OPFD->hasCancel();
1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251 HasCancel = OPFD->hasCancel();
1252 else if (const auto *OPFD =
1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254 HasCancel = OPFD->hasCancel();
1255 else if (const auto *OPFD =
1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257 HasCancel = OPFD->hasCancel();
1258
1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260 // parallel region to make cancellation barriers work properly.
1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264 HasCancel, OutlinedHelperName);
1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267}
1268
1269llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275}
1276
1277llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281 return emitParallelOrTeamsOutlinedFunction(
1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283}
1284
1285llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289 bool Tied, unsigned &NumberOfParts) {
1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291 PrePostActionTy &) {
1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294 llvm::Value *TaskArgs[] = {
1295 UpLoc, ThreadID,
1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297 TaskTVar->getType()->castAs<PointerType>())
1298 .getPointer(CGF)};
1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300 CGM.getModule(), OMPRTL___kmpc_omp_task),
1301 TaskArgs);
1302 };
1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304 UntiedCodeGen);
1305 CodeGen.setAction(Action);
1306 assert(!ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1307, __extension__
__PRETTY_FUNCTION__))
1307 "thread id variable must be of type kmp_int32 for tasks")(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1307, __extension__
__PRETTY_FUNCTION__))
;
1308 const OpenMPDirectiveKind Region =
1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310 : OMPD_task;
1311 const CapturedStmt *CS = D.getCapturedStmt(Region);
1312 bool HasCancel = false;
1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320 HasCancel = TD->hasCancel();
1321
1322 CodeGenFunction CGF(CGM, true);
1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324 InnermostKind, HasCancel, Action);
1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327 if (!Tied)
1328 NumberOfParts = Action.getNumberOfParts();
1329 return Res;
1330}
1331
1332static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1333 const RecordDecl *RD, const CGRecordLayout &RL,
1334 ArrayRef<llvm::Constant *> Data) {
1335 llvm::StructType *StructTy = RL.getLLVMType();
1336 unsigned PrevIdx = 0;
1337 ConstantInitBuilder CIBuilder(CGM);
1338 const auto *DI = Data.begin();
1339 for (const FieldDecl *FD : RD->fields()) {
1340 unsigned Idx = RL.getLLVMFieldNo(FD);
1341 // Fill the alignment.
1342 for (unsigned I = PrevIdx; I < Idx; ++I)
1343 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1344 PrevIdx = Idx + 1;
1345 Fields.add(*DI);
1346 ++DI;
1347 }
1348}
1349
1350template <class... As>
1351static llvm::GlobalVariable *
1352createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1353 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1354 As &&... Args) {
1355 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1356 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1357 ConstantInitBuilder CIBuilder(CGM);
1358 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1359 buildStructValue(Fields, CGM, RD, RL, Data);
1360 return Fields.finishAndCreateGlobal(
1361 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1362 std::forward<As>(Args)...);
1363}
1364
1365template <typename T>
1366static void
1367createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1368 ArrayRef<llvm::Constant *> Data,
1369 T &Parent) {
1370 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1371 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1372 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1373 buildStructValue(Fields, CGM, RD, RL, Data);
1374 Fields.finishAndAddTo(Parent);
1375}
1376
1377void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1378 bool AtCurrentPoint) {
1379 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1380 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")(static_cast <bool> (!Elem.second.ServiceInsertPt &&
"Insert point is set already.") ? void (0) : __assert_fail (
"!Elem.second.ServiceInsertPt && \"Insert point is set already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1380, __extension__
__PRETTY_FUNCTION__))
;
1381
1382 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1383 if (AtCurrentPoint) {
1384 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1385 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1386 } else {
1387 Elem.second.ServiceInsertPt =
1388 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1389 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1390 }
1391}
1392
1393void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1394 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1395 if (Elem.second.ServiceInsertPt) {
1396 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1397 Elem.second.ServiceInsertPt = nullptr;
1398 Ptr->eraseFromParent();
1399 }
1400}
1401
1402static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1403 SourceLocation Loc,
1404 SmallString<128> &Buffer) {
1405 llvm::raw_svector_ostream OS(Buffer);
1406 // Build debug location
1407 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1408 OS << ";" << PLoc.getFilename() << ";";
1409 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1410 OS << FD->getQualifiedNameAsString();
1411 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1412 return OS.str();
1413}
1414
1415llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1416 SourceLocation Loc,
1417 unsigned Flags) {
1418 uint32_t SrcLocStrSize;
1419 llvm::Constant *SrcLocStr;
1420 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1421 Loc.isInvalid()) {
1422 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1423 } else {
1424 std::string FunctionName;
1425 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1426 FunctionName = FD->getQualifiedNameAsString();
1427 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1428 const char *FileName = PLoc.getFilename();
1429 unsigned Line = PLoc.getLine();
1430 unsigned Column = PLoc.getColumn();
1431 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1432 Column, SrcLocStrSize);
1433 }
1434 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1435 return OMPBuilder.getOrCreateIdent(
1436 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1437}
1438
1439llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1440 SourceLocation Loc) {
1441 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1441, __extension__
__PRETTY_FUNCTION__))
;
1442 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1443 // the clang invariants used below might be broken.
1444 if (CGM.getLangOpts().OpenMPIRBuilder) {
1445 SmallString<128> Buffer;
1446 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1447 uint32_t SrcLocStrSize;
1448 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1449 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1450 return OMPBuilder.getOrCreateThreadID(
1451 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1452 }
1453
1454 llvm::Value *ThreadID = nullptr;
1455 // Check whether we've already cached a load of the thread id in this
1456 // function.
1457 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1458 if (I != OpenMPLocThreadIDMap.end()) {
1459 ThreadID = I->second.ThreadID;
1460 if (ThreadID != nullptr)
1461 return ThreadID;
1462 }
1463 // If exceptions are enabled, do not use parameter to avoid possible crash.
1464 if (auto *OMPRegionInfo =
1465 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1466 if (OMPRegionInfo->getThreadIDVariable()) {
1467 // Check if this an outlined function with thread id passed as argument.
1468 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1469 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1470 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1471 !CGF.getLangOpts().CXXExceptions ||
1472 CGF.Builder.GetInsertBlock() == TopBlock ||
1473 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1474 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1475 TopBlock ||
1476 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1477 CGF.Builder.GetInsertBlock()) {
1478 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1479 // If value loaded in entry block, cache it and use it everywhere in
1480 // function.
1481 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1482 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1483 Elem.second.ThreadID = ThreadID;
1484 }
1485 return ThreadID;
1486 }
1487 }
1488 }
1489
1490 // This is not an outlined function region - need to call __kmpc_int32
1491 // kmpc_global_thread_num(ident_t *loc).
1492 // Generate thread id value and cache this value for use across the
1493 // function.
1494 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1495 if (!Elem.second.ServiceInsertPt)
1496 setLocThreadIdInsertPt(CGF);
1497 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1498 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1499 llvm::CallInst *Call = CGF.Builder.CreateCall(
1500 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1501 OMPRTL___kmpc_global_thread_num),
1502 emitUpdateLocation(CGF, Loc));
1503 Call->setCallingConv(CGF.getRuntimeCC());
1504 Elem.second.ThreadID = Call;
1505 return Call;
1506}
1507
1508void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1509 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1509, __extension__
__PRETTY_FUNCTION__))
;
1510 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1511 clearLocThreadIdInsertPt(CGF);
1512 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1513 }
1514 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1515 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1516 UDRMap.erase(D);
1517 FunctionUDRMap.erase(CGF.CurFn);
1518 }
1519 auto I = FunctionUDMMap.find(CGF.CurFn);
1520 if (I != FunctionUDMMap.end()) {
1521 for(const auto *D : I->second)
1522 UDMMap.erase(D);
1523 FunctionUDMMap.erase(I);
1524 }
1525 LastprivateConditionalToTypes.erase(CGF.CurFn);
1526 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1527}
1528
1529llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1530 return OMPBuilder.IdentPtr;
1531}
1532
1533llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1534 if (!Kmpc_MicroTy) {
1535 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1536 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1537 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1538 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1539 }
1540 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1541}
1542
1543llvm::FunctionCallee
1544CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1545 bool IsGPUDistribute) {
1546 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1547, __extension__
__PRETTY_FUNCTION__))
1547 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1547, __extension__
__PRETTY_FUNCTION__))
;
1548 StringRef Name;
1549 if (IsGPUDistribute)
1550 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1551 : "__kmpc_distribute_static_init_4u")
1552 : (IVSigned ? "__kmpc_distribute_static_init_8"
1553 : "__kmpc_distribute_static_init_8u");
1554 else
1555 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1556 : "__kmpc_for_static_init_4u")
1557 : (IVSigned ? "__kmpc_for_static_init_8"
1558 : "__kmpc_for_static_init_8u");
1559
1560 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1561 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1562 llvm::Type *TypeParams[] = {
1563 getIdentTyPointerTy(), // loc
1564 CGM.Int32Ty, // tid
1565 CGM.Int32Ty, // schedtype
1566 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1567 PtrTy, // p_lower
1568 PtrTy, // p_upper
1569 PtrTy, // p_stride
1570 ITy, // incr
1571 ITy // chunk
1572 };
1573 auto *FnTy =
1574 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1575 return CGM.CreateRuntimeFunction(FnTy, Name);
1576}
1577
1578llvm::FunctionCallee
1579CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1580 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1581, __extension__
__PRETTY_FUNCTION__))
1581 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1581, __extension__
__PRETTY_FUNCTION__))
;
1582 StringRef Name =
1583 IVSize == 32
1584 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1585 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1586 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1587 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1588 CGM.Int32Ty, // tid
1589 CGM.Int32Ty, // schedtype
1590 ITy, // lower
1591 ITy, // upper
1592 ITy, // stride
1593 ITy // chunk
1594 };
1595 auto *FnTy =
1596 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1597 return CGM.CreateRuntimeFunction(FnTy, Name);
1598}
1599
1600llvm::FunctionCallee
1601CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1602 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1603, __extension__
__PRETTY_FUNCTION__))
1603 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1603, __extension__
__PRETTY_FUNCTION__))
;
1604 StringRef Name =
1605 IVSize == 32
1606 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1607 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1608 llvm::Type *TypeParams[] = {
1609 getIdentTyPointerTy(), // loc
1610 CGM.Int32Ty, // tid
1611 };
1612 auto *FnTy =
1613 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1614 return CGM.CreateRuntimeFunction(FnTy, Name);
1615}
1616
1617llvm::FunctionCallee
1618CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1619 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1620, __extension__
__PRETTY_FUNCTION__))
1620 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1620, __extension__
__PRETTY_FUNCTION__))
;
1621 StringRef Name =
1622 IVSize == 32
1623 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1624 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1625 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1626 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1627 llvm::Type *TypeParams[] = {
1628 getIdentTyPointerTy(), // loc
1629 CGM.Int32Ty, // tid
1630 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1631 PtrTy, // p_lower
1632 PtrTy, // p_upper
1633 PtrTy // p_stride
1634 };
1635 auto *FnTy =
1636 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1637 return CGM.CreateRuntimeFunction(FnTy, Name);
1638}
1639
1640/// Obtain information that uniquely identifies a target entry. This
1641/// consists of the file and device IDs as well as line number associated with
1642/// the relevant entry source location.
1643static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1644 unsigned &DeviceID, unsigned &FileID,
1645 unsigned &LineNum) {
1646 SourceManager &SM = C.getSourceManager();
1647
1648 // The loc should be always valid and have a file ID (the user cannot use
1649 // #pragma directives in macros)
1650
1651 assert(Loc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (Loc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1651, __extension__
__PRETTY_FUNCTION__))
;
1652
1653 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1654 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1654, __extension__
__PRETTY_FUNCTION__))
;
1655
1656 llvm::sys::fs::UniqueID ID;
1657 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1658 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1659 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1659, __extension__
__PRETTY_FUNCTION__))
;
1660 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1661 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1662 << PLoc.getFilename() << EC.message();
1663 }
1664
1665 DeviceID = ID.getDevice();
1666 FileID = ID.getFile();
1667 LineNum = PLoc.getLine();
1668}
1669
1670Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1671 if (CGM.getLangOpts().OpenMPSimd)
1672 return Address::invalid();
1673 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1674 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1675 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1676 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1677 HasRequiresUnifiedSharedMemory))) {
1678 SmallString<64> PtrName;
1679 {
1680 llvm::raw_svector_ostream OS(PtrName);
1681 OS << CGM.getMangledName(GlobalDecl(VD));
1682 if (!VD->isExternallyVisible()) {
1683 unsigned DeviceID, FileID, Line;
1684 getTargetEntryUniqueInfo(CGM.getContext(),
1685 VD->getCanonicalDecl()->getBeginLoc(),
1686 DeviceID, FileID, Line);
1687 OS << llvm::format("_%x", FileID);
1688 }
1689 OS << "_decl_tgt_ref_ptr";
1690 }
1691 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1692 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1693 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1694 if (!Ptr) {
1695 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1696
1697 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1698 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1699
1700 if (!CGM.getLangOpts().OpenMPIsDevice)
1701 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1702 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1703 }
1704 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1705 }
1706 return Address::invalid();
1707}
1708
1709llvm::Constant *
1710CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1711 assert(!CGM.getLangOpts().OpenMPUseTLS ||(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1712, __extension__
__PRETTY_FUNCTION__))
1712 !CGM.getContext().getTargetInfo().isTLSSupported())(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1712, __extension__
__PRETTY_FUNCTION__))
;
1713 // Lookup the entry, lazily creating it if necessary.
1714 std::string Suffix = getName({"cache", ""});
1715 return getOrCreateInternalVariable(
1716 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1717}
1718
1719Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1720 const VarDecl *VD,
1721 Address VDAddr,
1722 SourceLocation Loc) {
1723 if (CGM.getLangOpts().OpenMPUseTLS &&
1724 CGM.getContext().getTargetInfo().isTLSSupported())
1725 return VDAddr;
1726
1727 llvm::Type *VarTy = VDAddr.getElementType();
1728 llvm::Value *Args[] = {
1729 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1730 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1731 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1732 getOrCreateThreadPrivateCache(VD)};
1733 return Address(
1734 CGF.EmitRuntimeCall(
1735 OMPBuilder.getOrCreateRuntimeFunction(
1736 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1737 Args),
1738 CGF.Int8Ty, VDAddr.getAlignment());
1739}
1740
1741void CGOpenMPRuntime::emitThreadPrivateVarInit(
1742 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1743 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1744 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1745 // library.
1746 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1747 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1748 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1749 OMPLoc);
1750 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1751 // to register constructor/destructor for variable.
1752 llvm::Value *Args[] = {
1753 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1754 Ctor, CopyCtor, Dtor};
1755 CGF.EmitRuntimeCall(
1756 OMPBuilder.getOrCreateRuntimeFunction(
1757 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1758 Args);
1759}
1760
1761llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1762 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1763 bool PerformInit, CodeGenFunction *CGF) {
1764 if (CGM.getLangOpts().OpenMPUseTLS &&
1765 CGM.getContext().getTargetInfo().isTLSSupported())
1766 return nullptr;
1767
1768 VD = VD->getDefinition(CGM.getContext());
1769 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1770 QualType ASTTy = VD->getType();
1771
1772 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1773 const Expr *Init = VD->getAnyInitializer();
1774 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1775 // Generate function that re-emits the declaration's initializer into the
1776 // threadprivate copy of the variable VD
1777 CodeGenFunction CtorCGF(CGM);
1778 FunctionArgList Args;
1779 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1780 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1781 ImplicitParamDecl::Other);
1782 Args.push_back(&Dst);
1783
1784 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1785 CGM.getContext().VoidPtrTy, Args);
1786 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1787 std::string Name = getName({"__kmpc_global_ctor_", ""});
1788 llvm::Function *Fn =
1789 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1790 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1791 Args, Loc, Loc);
1792 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1793 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1794 CGM.getContext().VoidPtrTy, Dst.getLocation());
1795 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1796 Arg = CtorCGF.Builder.CreateElementBitCast(
1797 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1798 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1799 /*IsInitializer=*/true);
1800 ArgVal = CtorCGF.EmitLoadOfScalar(
1801 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1802 CGM.getContext().VoidPtrTy, Dst.getLocation());
1803 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1804 CtorCGF.FinishFunction();
1805 Ctor = Fn;
1806 }
1807 if (VD->getType().isDestructedType() != QualType::DK_none) {
1808 // Generate function that emits destructor call for the threadprivate copy
1809 // of the variable VD
1810 CodeGenFunction DtorCGF(CGM);
1811 FunctionArgList Args;
1812 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1813 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1814 ImplicitParamDecl::Other);
1815 Args.push_back(&Dst);
1816
1817 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1818 CGM.getContext().VoidTy, Args);
1819 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1820 std::string Name = getName({"__kmpc_global_dtor_", ""});
1821 llvm::Function *Fn =
1822 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1823 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1824 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1825 Loc, Loc);
1826 // Create a scope with an artificial location for the body of this function.
1827 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1828 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1829 DtorCGF.GetAddrOfLocalVar(&Dst),
1830 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1831 DtorCGF.emitDestroy(
1832 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1833 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1834 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1835 DtorCGF.FinishFunction();
1836 Dtor = Fn;
1837 }
1838 // Do not emit init function if it is not required.
1839 if (!Ctor && !Dtor)
1840 return nullptr;
1841
1842 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1843 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1844 /*isVarArg=*/false)
1845 ->getPointerTo();
1846 // Copying constructor for the threadprivate variable.
1847 // Must be NULL - reserved by runtime, but currently it requires that this
1848 // parameter is always NULL. Otherwise it fires assertion.
1849 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1850 if (Ctor == nullptr) {
1851 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1852 /*isVarArg=*/false)
1853 ->getPointerTo();
1854 Ctor = llvm::Constant::getNullValue(CtorTy);
1855 }
1856 if (Dtor == nullptr) {
1857 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1858 /*isVarArg=*/false)
1859 ->getPointerTo();
1860 Dtor = llvm::Constant::getNullValue(DtorTy);
1861 }
1862 if (!CGF) {
1863 auto *InitFunctionTy =
1864 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1865 std::string Name = getName({"__omp_threadprivate_init_", ""});
1866 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1867 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1868 CodeGenFunction InitCGF(CGM);
1869 FunctionArgList ArgList;
1870 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1871 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1872 Loc, Loc);
1873 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1874 InitCGF.FinishFunction();
1875 return InitFunction;
1876 }
1877 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1878 }
1879 return nullptr;
1880}
1881
1882bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1883 llvm::GlobalVariable *Addr,
1884 bool PerformInit) {
1885 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1886 !CGM.getLangOpts().OpenMPIsDevice)
1887 return false;
1888 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1889 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1890 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1891 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1892 HasRequiresUnifiedSharedMemory))
1893 return CGM.getLangOpts().OpenMPIsDevice;
1894 VD = VD->getDefinition(CGM.getContext());
1895 assert(VD && "Unknown VarDecl")(static_cast <bool> (VD && "Unknown VarDecl") ?
void (0) : __assert_fail ("VD && \"Unknown VarDecl\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1895, __extension__
__PRETTY_FUNCTION__))
;
1896
1897 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1898 return CGM.getLangOpts().OpenMPIsDevice;
1899
1900 QualType ASTTy = VD->getType();
1901 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1902
1903 // Produce the unique prefix to identify the new target regions. We use
1904 // the source location of the variable declaration which we know to not
1905 // conflict with any target region.
1906 unsigned DeviceID;
1907 unsigned FileID;
1908 unsigned Line;
1909 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1910 SmallString<128> Buffer, Out;
1911 {
1912 llvm::raw_svector_ostream OS(Buffer);
1913 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1914 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1915 }
1916
1917 const Expr *Init = VD->getAnyInitializer();
1918 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1919 llvm::Constant *Ctor;
1920 llvm::Constant *ID;
1921 if (CGM.getLangOpts().OpenMPIsDevice) {
1922 // Generate function that re-emits the declaration's initializer into
1923 // the threadprivate copy of the variable VD
1924 CodeGenFunction CtorCGF(CGM);
1925
1926 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1927 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1928 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1929 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1930 llvm::GlobalValue::WeakODRLinkage);
1931 if (CGM.getTriple().isAMDGCN())
1932 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1933 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1934 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1935 FunctionArgList(), Loc, Loc);
1936 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1937 llvm::Constant *AddrInAS0 = Addr;
1938 if (Addr->getAddressSpace() != 0)
1939 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1940 Addr, llvm::PointerType::getWithSamePointeeType(
1941 cast<llvm::PointerType>(Addr->getType()), 0));
1942 CtorCGF.EmitAnyExprToMem(Init,
1943 Address(AddrInAS0, Addr->getValueType(),
1944 CGM.getContext().getDeclAlign(VD)),
1945 Init->getType().getQualifiers(),
1946 /*IsInitializer=*/true);
1947 CtorCGF.FinishFunction();
1948 Ctor = Fn;
1949 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1950 } else {
1951 Ctor = new llvm::GlobalVariable(
1952 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1953 llvm::GlobalValue::PrivateLinkage,
1954 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1955 ID = Ctor;
1956 }
1957
1958 // Register the information for the entry associated with the constructor.
1959 Out.clear();
1960 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1961 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1962 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1963 }
1964 if (VD->getType().isDestructedType() != QualType::DK_none) {
1965 llvm::Constant *Dtor;
1966 llvm::Constant *ID;
1967 if (CGM.getLangOpts().OpenMPIsDevice) {
1968 // Generate function that emits destructor call for the threadprivate
1969 // copy of the variable VD
1970 CodeGenFunction DtorCGF(CGM);
1971
1972 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1973 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1974 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1975 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1976 llvm::GlobalValue::WeakODRLinkage);
1977 if (CGM.getTriple().isAMDGCN())
1978 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1979 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1980 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1981 FunctionArgList(), Loc, Loc);
1982 // Create a scope with an artificial location for the body of this
1983 // function.
1984 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1985 llvm::Constant *AddrInAS0 = Addr;
1986 if (Addr->getAddressSpace() != 0)
1987 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1988 Addr, llvm::PointerType::getWithSamePointeeType(
1989 cast<llvm::PointerType>(Addr->getType()), 0));
1990 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1991 CGM.getContext().getDeclAlign(VD)),
1992 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1993 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1994 DtorCGF.FinishFunction();
1995 Dtor = Fn;
1996 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1997 } else {
1998 Dtor = new llvm::GlobalVariable(
1999 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2000 llvm::GlobalValue::PrivateLinkage,
2001 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2002 ID = Dtor;
2003 }
2004 // Register the information for the entry associated with the destructor.
2005 Out.clear();
2006 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2007 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2008 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2009 }
2010 return CGM.getLangOpts().OpenMPIsDevice;
2011}
2012
2013Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2014 QualType VarType,
2015 StringRef Name) {
2016 std::string Suffix = getName({"artificial", ""});
2017 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2018 llvm::GlobalVariable *GAddr =
2019 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2020 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2021 CGM.getTarget().isTLSSupported()) {
2022 GAddr->setThreadLocal(/*Val=*/true);
2023 return Address(GAddr, GAddr->getValueType(),
2024 CGM.getContext().getTypeAlignInChars(VarType));
2025 }
2026 std::string CacheSuffix = getName({"cache", ""});
2027 llvm::Value *Args[] = {
2028 emitUpdateLocation(CGF, SourceLocation()),
2029 getThreadID(CGF, SourceLocation()),
2030 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2031 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2032 /*isSigned=*/false),
2033 getOrCreateInternalVariable(
2034 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2035 return Address(
2036 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2037 CGF.EmitRuntimeCall(
2038 OMPBuilder.getOrCreateRuntimeFunction(
2039 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2040 Args),
2041 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2042 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
2043}
2044
2045void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2046 const RegionCodeGenTy &ThenGen,
2047 const RegionCodeGenTy &ElseGen) {
2048 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2049
2050 // If the condition constant folds and can be elided, try to avoid emitting
2051 // the condition and the dead arm of the if/else.
2052 bool CondConstant;
2053 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2054 if (CondConstant)
2055 ThenGen(CGF);
2056 else
2057 ElseGen(CGF);
2058 return;
2059 }
2060
2061 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2062 // emit the conditional branch.
2063 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2064 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2065 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2066 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2067
2068 // Emit the 'then' code.
2069 CGF.EmitBlock(ThenBlock);
2070 ThenGen(CGF);
2071 CGF.EmitBranch(ContBlock);
2072 // Emit the 'else' code if present.
2073 // There is no need to emit line number for unconditional branch.
2074 (void)ApplyDebugLocation::CreateEmpty(CGF);
2075 CGF.EmitBlock(ElseBlock);
2076 ElseGen(CGF);
2077 // There is no need to emit line number for unconditional branch.
2078 (void)ApplyDebugLocation::CreateEmpty(CGF);
2079 CGF.EmitBranch(ContBlock);
2080 // Emit the continuation block for code after the if.
2081 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2082}
2083
2084void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2085 llvm::Function *OutlinedFn,
2086 ArrayRef<llvm::Value *> CapturedVars,
2087 const Expr *IfCond,
2088 llvm::Value *NumThreads) {
2089 if (!CGF.HaveInsertPoint())
2090 return;
2091 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2092 auto &M = CGM.getModule();
2093 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2094 this](CodeGenFunction &CGF, PrePostActionTy &) {
2095 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2096 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2097 llvm::Value *Args[] = {
2098 RTLoc,
2099 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2100 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2101 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2102 RealArgs.append(std::begin(Args), std::end(Args));
2103 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2104
2105 llvm::FunctionCallee RTLFn =
2106 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2107 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2108 };
2109 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2110 this](CodeGenFunction &CGF, PrePostActionTy &) {
2111 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2112 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2113 // Build calls:
2114 // __kmpc_serialized_parallel(&Loc, GTid);
2115 llvm::Value *Args[] = {RTLoc, ThreadID};
2116 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2117 M, OMPRTL___kmpc_serialized_parallel),
2118 Args);
2119
2120 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2121 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2122 Address ZeroAddrBound =
2123 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2124 /*Name=*/".bound.zero.addr");
2125 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2126 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2127 // ThreadId for serialized parallels is 0.
2128 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2129 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2130 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2131
2132 // Ensure we do not inline the function. This is trivially true for the ones
2133 // passed to __kmpc_fork_call but the ones called in serialized regions
2134 // could be inlined. This is not a perfect but it is closer to the invariant
2135 // we want, namely, every data environment starts with a new function.
2136 // TODO: We should pass the if condition to the runtime function and do the
2137 // handling there. Much cleaner code.
2138 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2139 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2140 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2141
2142 // __kmpc_end_serialized_parallel(&Loc, GTid);
2143 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2144 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2145 M, OMPRTL___kmpc_end_serialized_parallel),
2146 EndArgs);
2147 };
2148 if (IfCond) {
2149 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2150 } else {
2151 RegionCodeGenTy ThenRCG(ThenGen);
2152 ThenRCG(CGF);
2153 }
2154}
2155
2156// If we're inside an (outlined) parallel region, use the region info's
2157// thread-ID variable (it is passed in a first argument of the outlined function
2158// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2159// regular serial code region, get thread ID by calling kmp_int32
2160// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2161// return the address of that temp.
2162Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2163 SourceLocation Loc) {
2164 if (auto *OMPRegionInfo =
2165 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2166 if (OMPRegionInfo->getThreadIDVariable())
2167 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2168
2169 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2170 QualType Int32Ty =
2171 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2172 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2173 CGF.EmitStoreOfScalar(ThreadID,
2174 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2175
2176 return ThreadIDTemp;
2177}
2178
2179llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2180 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2181 SmallString<256> Buffer;
2182 llvm::raw_svector_ostream Out(Buffer);
2183 Out << Name;
2184 StringRef RuntimeName = Out.str();
2185 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2186 if (Elem.second) {
2187 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2188, __extension__
__PRETTY_FUNCTION__))
2188 "OMP internal variable has different type than requested")(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2188, __extension__
__PRETTY_FUNCTION__))
;
2189 return &*Elem.second;
2190 }
2191
2192 return Elem.second = new llvm::GlobalVariable(
2193 CGM.getModule(), Ty, /*IsConstant*/ false,
2194 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2195 Elem.first(), /*InsertBefore=*/nullptr,
2196 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2197}
2198
2199llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2200 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2201 std::string Name = getName({Prefix, "var"});
2202 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2203}
2204
2205namespace {
2206/// Common pre(post)-action for different OpenMP constructs.
2207class CommonActionTy final : public PrePostActionTy {
2208 llvm::FunctionCallee EnterCallee;
2209 ArrayRef<llvm::Value *> EnterArgs;
2210 llvm::FunctionCallee ExitCallee;
2211 ArrayRef<llvm::Value *> ExitArgs;
2212 bool Conditional;
2213 llvm::BasicBlock *ContBlock = nullptr;
2214
2215public:
2216 CommonActionTy(llvm::FunctionCallee EnterCallee,
2217 ArrayRef<llvm::Value *> EnterArgs,
2218 llvm::FunctionCallee ExitCallee,
2219 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2220 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2221 ExitArgs(ExitArgs), Conditional(Conditional) {}
2222 void Enter(CodeGenFunction &CGF) override {
2223 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2224 if (Conditional) {
2225 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2226 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2227 ContBlock = CGF.createBasicBlock("omp_if.end");
2228 // Generate the branch (If-stmt)
2229 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2230 CGF.EmitBlock(ThenBlock);
2231 }
2232 }
2233 void Done(CodeGenFunction &CGF) {
2234 // Emit the rest of blocks/branches
2235 CGF.EmitBranch(ContBlock);
2236 CGF.EmitBlock(ContBlock, true);
2237 }
2238 void Exit(CodeGenFunction &CGF) override {
2239 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2240 }
2241};
2242} // anonymous namespace
2243
2244void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2245 StringRef CriticalName,
2246 const RegionCodeGenTy &CriticalOpGen,
2247 SourceLocation Loc, const Expr *Hint) {
2248 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2249 // CriticalOpGen();
2250 // __kmpc_end_critical(ident_t *, gtid, Lock);
2251 // Prepare arguments and build a call to __kmpc_critical
2252 if (!CGF.HaveInsertPoint())
12
Taking false branch
2253 return;
2254 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2255 getCriticalRegionLock(CriticalName)};
2256 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2257 std::end(Args));
2258 if (Hint
12.1
'Hint' is null
) {
13
Taking false branch
2259 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2260 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2261 }
2262 CommonActionTy Action(
2263 OMPBuilder.getOrCreateRuntimeFunction(
2264 CGM.getModule(),
2265 Hint
13.1
'Hint' is null
? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
14
'?' condition is false
2266 EnterArgs,
2267 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2268 OMPRTL___kmpc_end_critical),
2269 Args);
2270 CriticalOpGen.setAction(Action);
2271 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
15
Address of stack memory associated with local variable 'Action' is still referred to by a temporary object on the stack upon returning to the caller. This will be a dangling reference
2272}
2273
2274void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2275 const RegionCodeGenTy &MasterOpGen,
2276 SourceLocation Loc) {
2277 if (!CGF.HaveInsertPoint())
2278 return;
2279 // if(__kmpc_master(ident_t *, gtid)) {
2280 // MasterOpGen();
2281 // __kmpc_end_master(ident_t *, gtid);
2282 // }
2283 // Prepare arguments and build a call to __kmpc_master
2284 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2285 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2286 CGM.getModule(), OMPRTL___kmpc_master),
2287 Args,
2288 OMPBuilder.getOrCreateRuntimeFunction(
2289 CGM.getModule(), OMPRTL___kmpc_end_master),
2290 Args,
2291 /*Conditional=*/true);
2292 MasterOpGen.setAction(Action);
2293 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2294 Action.Done(CGF);
2295}
2296
2297void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2298 const RegionCodeGenTy &MaskedOpGen,
2299 SourceLocation Loc, const Expr *Filter) {
2300 if (!CGF.HaveInsertPoint())
2301 return;
2302 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2303 // MaskedOpGen();
2304 // __kmpc_end_masked(iden_t *, gtid);
2305 // }
2306 // Prepare arguments and build a call to __kmpc_masked
2307 llvm::Value *FilterVal = Filter
2308 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2309 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2311 FilterVal};
2312 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2313 getThreadID(CGF, Loc)};
2314 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2315 CGM.getModule(), OMPRTL___kmpc_masked),
2316 Args,
2317 OMPBuilder.getOrCreateRuntimeFunction(
2318 CGM.getModule(), OMPRTL___kmpc_end_masked),
2319 ArgsEnd,
2320 /*Conditional=*/true);
2321 MaskedOpGen.setAction(Action);
2322 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2323 Action.Done(CGF);
2324}
2325
2326void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2327 SourceLocation Loc) {
2328 if (!CGF.HaveInsertPoint())
2329 return;
2330 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2331 OMPBuilder.createTaskyield(CGF.Builder);
2332 } else {
2333 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2334 llvm::Value *Args[] = {
2335 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2336 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2337 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2338 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2339 Args);
2340 }
2341
2342 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2343 Region->emitUntiedSwitch(CGF);
2344}
2345
2346void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2347 const RegionCodeGenTy &TaskgroupOpGen,
2348 SourceLocation Loc) {
2349 if (!CGF.HaveInsertPoint())
2350 return;
2351 // __kmpc_taskgroup(ident_t *, gtid);
2352 // TaskgroupOpGen();
2353 // __kmpc_end_taskgroup(ident_t *, gtid);
2354 // Prepare arguments and build a call to __kmpc_taskgroup
2355 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2356 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2357 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2358 Args,
2359 OMPBuilder.getOrCreateRuntimeFunction(
2360 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2361 Args);
2362 TaskgroupOpGen.setAction(Action);
2363 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2364}
2365
2366/// Given an array of pointers to variables, project the address of a
2367/// given variable.
2368static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2369 unsigned Index, const VarDecl *Var) {
2370 // Pull out the pointer to the variable.
2371 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2372 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2373
2374 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2375 return Address(
2376 CGF.Builder.CreateBitCast(
2377 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2378 ElemTy, CGF.getContext().getDeclAlign(Var));
2379}
2380
2381static llvm::Value *emitCopyprivateCopyFunction(
2382 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2383 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2384 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2385 SourceLocation Loc) {
2386 ASTContext &C = CGM.getContext();
2387 // void copy_func(void *LHSArg, void *RHSArg);
2388 FunctionArgList Args;
2389 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2390 ImplicitParamDecl::Other);
2391 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2392 ImplicitParamDecl::Other);
2393 Args.push_back(&LHSArg);
2394 Args.push_back(&RHSArg);
2395 const auto &CGFI =
2396 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2397 std::string Name =
2398 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2399 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2400 llvm::GlobalValue::InternalLinkage, Name,
2401 &CGM.getModule());
2402 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2403 Fn->setDoesNotRecurse();
2404 CodeGenFunction CGF(CGM);
2405 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2406 // Dest = (void*[n])(LHSArg);
2407 // Src = (void*[n])(RHSArg);
2408 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2409 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2410 ArgsElemType->getPointerTo()),
2411 ArgsElemType, CGF.getPointerAlign());
2412 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2413 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2414 ArgsElemType->getPointerTo()),
2415 ArgsElemType, CGF.getPointerAlign());
2416 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2417 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2418 // ...
2419 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2420 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2421 const auto *DestVar =
2422 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2423 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2424
2425 const auto *SrcVar =
2426 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2427 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2428
2429 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2430 QualType Type = VD->getType();
2431 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2432 }
2433 CGF.FinishFunction();
2434 return Fn;
2435}
2436
2437void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2438 const RegionCodeGenTy &SingleOpGen,
2439 SourceLocation Loc,
2440 ArrayRef<const Expr *> CopyprivateVars,
2441 ArrayRef<const Expr *> SrcExprs,
2442 ArrayRef<const Expr *> DstExprs,
2443 ArrayRef<const Expr *> AssignmentOps) {
2444 if (!CGF.HaveInsertPoint())
2445 return;
2446 assert(CopyprivateVars.size() == SrcExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2448, __extension__
__PRETTY_FUNCTION__))
2447 CopyprivateVars.size() == DstExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2448, __extension__
__PRETTY_FUNCTION__))
2448 CopyprivateVars.size() == AssignmentOps.size())(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2448, __extension__
__PRETTY_FUNCTION__))
;
2449 ASTContext &C = CGM.getContext();
2450 // int32 did_it = 0;
2451 // if(__kmpc_single(ident_t *, gtid)) {
2452 // SingleOpGen();
2453 // __kmpc_end_single(ident_t *, gtid);
2454 // did_it = 1;
2455 // }
2456 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2457 // <copy_func>, did_it);
2458
2459 Address DidIt = Address::invalid();
2460 if (!CopyprivateVars.empty()) {
2461 // int32 did_it = 0;
2462 QualType KmpInt32Ty =
2463 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2464 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2465 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2466 }
2467 // Prepare arguments and build a call to __kmpc_single
2468 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2469 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2470 CGM.getModule(), OMPRTL___kmpc_single),
2471 Args,
2472 OMPBuilder.getOrCreateRuntimeFunction(
2473 CGM.getModule(), OMPRTL___kmpc_end_single),
2474 Args,
2475 /*Conditional=*/true);
2476 SingleOpGen.setAction(Action);
2477 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2478 if (DidIt.isValid()) {
2479 // did_it = 1;
2480 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2481 }
2482 Action.Done(CGF);
2483 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2484 // <copy_func>, did_it);
2485 if (DidIt.isValid()) {
2486 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2487 QualType CopyprivateArrayTy = C.getConstantArrayType(
2488 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2489 /*IndexTypeQuals=*/0);
2490 // Create a list of all private variables for copyprivate.
2491 Address CopyprivateList =
2492 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2493 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2494 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2495 CGF.Builder.CreateStore(
2496 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2497 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2498 CGF.VoidPtrTy),
2499 Elem);
2500 }
2501 // Build function that copies private values from single region to all other
2502 // threads in the corresponding parallel region.
2503 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2504 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2505 SrcExprs, DstExprs, AssignmentOps, Loc);
2506 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2507 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2508 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2509 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2510 llvm::Value *Args[] = {
2511 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2512 getThreadID(CGF, Loc), // i32 <gtid>
2513 BufSize, // size_t <buf_size>
2514 CL.getPointer(), // void *<copyprivate list>
2515 CpyFn, // void (*) (void *, void *) <copy_func>
2516 DidItVal // i32 did_it
2517 };
2518 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2519 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2520 Args);
2521 }
2522}
2523
2524void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2525 const RegionCodeGenTy &OrderedOpGen,
2526 SourceLocation Loc, bool IsThreads) {
2527 if (!CGF.HaveInsertPoint())
2528 return;
2529 // __kmpc_ordered(ident_t *, gtid);
2530 // OrderedOpGen();
2531 // __kmpc_end_ordered(ident_t *, gtid);
2532 // Prepare arguments and build a call to __kmpc_ordered
2533 if (IsThreads) {
2534 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2535 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2536 CGM.getModule(), OMPRTL___kmpc_ordered),
2537 Args,
2538 OMPBuilder.getOrCreateRuntimeFunction(
2539 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2540 Args);
2541 OrderedOpGen.setAction(Action);
2542 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2543 return;
2544 }
2545 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2546}
2547
2548unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2549 unsigned Flags;
2550 if (Kind == OMPD_for)
2551 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2552 else if (Kind == OMPD_sections)
2553 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2554 else if (Kind == OMPD_single)
2555 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2556 else if (Kind == OMPD_barrier)
2557 Flags = OMP_IDENT_BARRIER_EXPL;
2558 else
2559 Flags = OMP_IDENT_BARRIER_IMPL;
2560 return Flags;
2561}
2562
2563void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2564 CodeGenFunction &CGF, const OMPLoopDirective &S,
2565 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2566 // Check if the loop directive is actually a doacross loop directive. In this
2567 // case choose static, 1 schedule.
2568 if (llvm::any_of(
2569 S.getClausesOfKind<OMPOrderedClause>(),
2570 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2571 ScheduleKind = OMPC_SCHEDULE_static;
2572 // Chunk size is 1 in this case.
2573 llvm::APInt ChunkSize(32, 1);
2574 ChunkExpr = IntegerLiteral::Create(
2575 CGF.getContext(), ChunkSize,
2576 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2577 SourceLocation());
2578 }
2579}
2580
2581void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2582 OpenMPDirectiveKind Kind, bool EmitChecks,
2583 bool ForceSimpleCall) {
2584 // Check if we should use the OMPBuilder
2585 auto *OMPRegionInfo =
2586 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2587 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2588 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2589 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2590 return;
2591 }
2592
2593 if (!CGF.HaveInsertPoint())
2594 return;
2595 // Build call __kmpc_cancel_barrier(loc, thread_id);
2596 // Build call __kmpc_barrier(loc, thread_id);
2597 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2598 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2599 // thread_id);
2600 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2601 getThreadID(CGF, Loc)};
2602 if (OMPRegionInfo) {
2603 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2604 llvm::Value *Result = CGF.EmitRuntimeCall(
2605 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2606 OMPRTL___kmpc_cancel_barrier),
2607 Args);
2608 if (EmitChecks) {
2609 // if (__kmpc_cancel_barrier()) {
2610 // exit from construct;
2611 // }
2612 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2613 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2614 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2615 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2616 CGF.EmitBlock(ExitBB);
2617 // exit from construct;
2618 CodeGenFunction::JumpDest CancelDestination =
2619 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2620 CGF.EmitBranchThroughCleanup(CancelDestination);
2621 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2622 }
2623 return;
2624 }
2625 }
2626 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2627 CGM.getModule(), OMPRTL___kmpc_barrier),
2628 Args);
2629}
2630
2631/// Map the OpenMP loop schedule to the runtime enumeration.
2632static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2633 bool Chunked, bool Ordered) {
2634 switch (ScheduleKind) {
2635 case OMPC_SCHEDULE_static:
2636 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2637 : (Ordered ? OMP_ord_static : OMP_sch_static);
2638 case OMPC_SCHEDULE_dynamic:
2639 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2640 case OMPC_SCHEDULE_guided:
2641 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2642 case OMPC_SCHEDULE_runtime:
2643 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2644 case OMPC_SCHEDULE_auto:
2645 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2646 case OMPC_SCHEDULE_unknown:
2647 assert(!Chunked && "chunk was specified but schedule kind not known")(static_cast <bool> (!Chunked && "chunk was specified but schedule kind not known"
) ? void (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2647, __extension__
__PRETTY_FUNCTION__))
;
2648 return Ordered ? OMP_ord_static : OMP_sch_static;
2649 }
2650 llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2650)
;
2651}
2652
2653/// Map the OpenMP distribute schedule to the runtime enumeration.
2654static OpenMPSchedType
2655getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2656 // only static is allowed for dist_schedule
2657 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2658}
2659
2660bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2661 bool Chunked) const {
2662 OpenMPSchedType Schedule =
2663 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2664 return Schedule == OMP_sch_static;
2665}
2666
2667bool CGOpenMPRuntime::isStaticNonchunked(
2668 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2669 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2670 return Schedule == OMP_dist_sch_static;
2671}
2672
2673bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2674 bool Chunked) const {
2675 OpenMPSchedType Schedule =
2676 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2677 return Schedule == OMP_sch_static_chunked;
2678}
2679
2680bool CGOpenMPRuntime::isStaticChunked(
2681 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2682 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2683 return Schedule == OMP_dist_sch_static_chunked;
2684}
2685
2686bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2687 OpenMPSchedType Schedule =
2688 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2689 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")(static_cast <bool> (Schedule != OMP_sch_static_chunked
&& "cannot be chunked here") ? void (0) : __assert_fail
("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2689, __extension__
__PRETTY_FUNCTION__))
;
2690 return Schedule != OMP_sch_static;
2691}
2692
2693static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2694 OpenMPScheduleClauseModifier M1,
2695 OpenMPScheduleClauseModifier M2) {
2696 int Modifier = 0;
2697 switch (M1) {
2698 case OMPC_SCHEDULE_MODIFIER_monotonic:
2699 Modifier = OMP_sch_modifier_monotonic;
2700 break;
2701 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2702 Modifier = OMP_sch_modifier_nonmonotonic;
2703 break;
2704 case OMPC_SCHEDULE_MODIFIER_simd:
2705 if (Schedule == OMP_sch_static_chunked)
2706 Schedule = OMP_sch_static_balanced_chunked;
2707 break;
2708 case OMPC_SCHEDULE_MODIFIER_last:
2709 case OMPC_SCHEDULE_MODIFIER_unknown:
2710 break;
2711 }
2712 switch (M2) {
2713 case OMPC_SCHEDULE_MODIFIER_monotonic:
2714 Modifier = OMP_sch_modifier_monotonic;
2715 break;
2716 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2717 Modifier = OMP_sch_modifier_nonmonotonic;
2718 break;
2719 case OMPC_SCHEDULE_MODIFIER_simd:
2720 if (Schedule == OMP_sch_static_chunked)
2721 Schedule = OMP_sch_static_balanced_chunked;
2722 break;
2723 case OMPC_SCHEDULE_MODIFIER_last:
2724 case OMPC_SCHEDULE_MODIFIER_unknown:
2725 break;
2726 }
2727 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2728 // If the static schedule kind is specified or if the ordered clause is
2729 // specified, and if the nonmonotonic modifier is not specified, the effect is
2730 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2731 // modifier is specified, the effect is as if the nonmonotonic modifier is
2732 // specified.
2733 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2734 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2735 Schedule == OMP_sch_static_balanced_chunked ||
2736 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2737 Schedule == OMP_dist_sch_static_chunked ||
2738 Schedule == OMP_dist_sch_static))
2739 Modifier = OMP_sch_modifier_nonmonotonic;
2740 }
2741 return Schedule | Modifier;
2742}
2743
2744void CGOpenMPRuntime::emitForDispatchInit(
2745 CodeGenFunction &CGF, SourceLocation Loc,
2746 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2747 bool Ordered, const DispatchRTInput &DispatchValues) {
2748 if (!CGF.HaveInsertPoint())
2749 return;
2750 OpenMPSchedType Schedule = getRuntimeSchedule(
2751 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2752 assert(Ordered ||(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2755, __extension__
__PRETTY_FUNCTION__))
2753 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2755, __extension__
__PRETTY_FUNCTION__))
2754 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2755, __extension__
__PRETTY_FUNCTION__))
2755 Schedule != OMP_sch_static_balanced_chunked))(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2755, __extension__
__PRETTY_FUNCTION__))
;
2756 // Call __kmpc_dispatch_init(
2757 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2758 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2759 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2760
2761 // If the Chunk was not specified in the clause - use default value 1.
2762 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2763 : CGF.Builder.getIntN(IVSize, 1);
2764 llvm::Value *Args[] = {
2765 emitUpdateLocation(CGF, Loc),
2766 getThreadID(CGF, Loc),
2767 CGF.Builder.getInt32(addMonoNonMonoModifier(
2768 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2769 DispatchValues.LB, // Lower
2770 DispatchValues.UB, // Upper
2771 CGF.Builder.getIntN(IVSize, 1), // Stride
2772 Chunk // Chunk
2773 };
2774 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2775}
2776
2777static void emitForStaticInitCall(
2778 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2779 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2780 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2781 const CGOpenMPRuntime::StaticRTInput &Values) {
2782 if (!CGF.HaveInsertPoint())
2783 return;
2784
2785 assert(!Values.Ordered)(static_cast <bool> (!Values.Ordered) ? void (0) : __assert_fail
("!Values.Ordered", "clang/lib/CodeGen/CGOpenMPRuntime.cpp",
2785, __extension__ __PRETTY_FUNCTION__))
;
2786 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2790, __extension__
__PRETTY_FUNCTION__))
2787 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2790, __extension__
__PRETTY_FUNCTION__))
2788 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2790, __extension__
__PRETTY_FUNCTION__))
2789 Schedule == OMP_dist_sch_static ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2790, __extension__
__PRETTY_FUNCTION__))
2790 Schedule == OMP_dist_sch_static_chunked)(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2790, __extension__
__PRETTY_FUNCTION__))
;
2791
2792 // Call __kmpc_for_static_init(
2793 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2794 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2795 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2796 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2797 llvm::Value *Chunk = Values.Chunk;
2798 if (Chunk == nullptr) {
2799 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2801, __extension__
__PRETTY_FUNCTION__))
2800 Schedule == OMP_dist_sch_static) &&(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2801, __extension__
__PRETTY_FUNCTION__))
2801 "expected static non-chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2801, __extension__
__PRETTY_FUNCTION__))
;
2802 // If the Chunk was not specified in the clause - use default value 1.
2803 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2804 } else {
2805 assert((Schedule == OMP_sch_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2809, __extension__
__PRETTY_FUNCTION__))
2806 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2809, __extension__
__PRETTY_FUNCTION__))
2807 Schedule == OMP_ord_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2809, __extension__
__PRETTY_FUNCTION__))
2808 Schedule == OMP_dist_sch_static_chunked) &&(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2809, __extension__
__PRETTY_FUNCTION__))
2809 "expected static chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2809, __extension__
__PRETTY_FUNCTION__))
;
2810 }
2811 llvm::Value *Args[] = {
2812 UpdateLocation,
2813 ThreadId,
2814 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2815 M2)), // Schedule type
2816 Values.IL.getPointer(), // &isLastIter
2817 Values.LB.getPointer(), // &LB
2818 Values.UB.getPointer(), // &UB
2819 Values.ST.getPointer(), // &Stride
2820 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2821 Chunk // Chunk
2822 };
2823 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2824}
2825
2826void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2827 SourceLocation Loc,
2828 OpenMPDirectiveKind DKind,
2829 const OpenMPScheduleTy &ScheduleKind,
2830 const StaticRTInput &Values) {
2831 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2832 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2833 assert(isOpenMPWorksharingDirective(DKind) &&(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2834, __extension__
__PRETTY_FUNCTION__))
2834 "Expected loop-based or sections-based directive.")(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2834, __extension__
__PRETTY_FUNCTION__))
;
2835 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2836 isOpenMPLoopDirective(DKind)
2837 ? OMP_IDENT_WORK_LOOP
2838 : OMP_IDENT_WORK_SECTIONS);
2839 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2840 llvm::FunctionCallee StaticInitFunction =
2841 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2842 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2843 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2844 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2845}
2846
2847void CGOpenMPRuntime::emitDistributeStaticInit(
2848 CodeGenFunction &CGF, SourceLocation Loc,
2849 OpenMPDistScheduleClauseKind SchedKind,
2850 const CGOpenMPRuntime::StaticRTInput &Values) {
2851 OpenMPSchedType ScheduleNum =
2852 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2853 llvm::Value *UpdatedLocation =
2854 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2855 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2856 llvm::FunctionCallee StaticInitFunction;
2857 bool isGPUDistribute =
2858 CGM.getLangOpts().OpenMPIsDevice &&
2859 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2860 StaticInitFunction = createForStaticInitFunction(
2861 Values.IVSize, Values.IVSigned, isGPUDistribute);
2862
2863 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2864 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2865 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2866}
2867
2868void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2869 SourceLocation Loc,
2870 OpenMPDirectiveKind DKind) {
2871 if (!CGF.HaveInsertPoint())
2872 return;
2873 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2874 llvm::Value *Args[] = {
2875 emitUpdateLocation(CGF, Loc,
2876 isOpenMPDistributeDirective(DKind)
2877 ? OMP_IDENT_WORK_DISTRIBUTE
2878 : isOpenMPLoopDirective(DKind)
2879 ? OMP_IDENT_WORK_LOOP
2880 : OMP_IDENT_WORK_SECTIONS),
2881 getThreadID(CGF, Loc)};
2882 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2883 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2884 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2885 CGF.EmitRuntimeCall(
2886 OMPBuilder.getOrCreateRuntimeFunction(
2887 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2888 Args);
2889 else
2890 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2891 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2892 Args);
2893}
2894
2895void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2896 SourceLocation Loc,
2897 unsigned IVSize,
2898 bool IVSigned) {
2899 if (!CGF.HaveInsertPoint())
2900 return;
2901 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2902 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2903 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2904}
2905
2906llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2907 SourceLocation Loc, unsigned IVSize,
2908 bool IVSigned, Address IL,
2909 Address LB, Address UB,
2910 Address ST) {
2911 // Call __kmpc_dispatch_next(
2912 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2913 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2914 // kmp_int[32|64] *p_stride);
2915 llvm::Value *Args[] = {
2916 emitUpdateLocation(CGF, Loc),
2917 getThreadID(CGF, Loc),
2918 IL.getPointer(), // &isLastIter
2919 LB.getPointer(), // &Lower
2920 UB.getPointer(), // &Upper
2921 ST.getPointer() // &Stride
2922 };
2923 llvm::Value *Call =
2924 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2925 return CGF.EmitScalarConversion(
2926 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2927 CGF.getContext().BoolTy, Loc);
2928}
2929
2930void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2931 llvm::Value *NumThreads,
2932 SourceLocation Loc) {
2933 if (!CGF.HaveInsertPoint())
2934 return;
2935 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2936 llvm::Value *Args[] = {
2937 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2938 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2939 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2940 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2941 Args);
2942}
2943
2944void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2945 ProcBindKind ProcBind,
2946 SourceLocation Loc) {
2947 if (!CGF.HaveInsertPoint())
2948 return;
2949 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")(static_cast <bool> (ProcBind != OMP_PROC_BIND_unknown &&
"Unsupported proc_bind value.") ? void (0) : __assert_fail (
"ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2949, __extension__
__PRETTY_FUNCTION__))
;
2950 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2951 llvm::Value *Args[] = {
2952 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2953 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2954 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2955 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2956 Args);
2957}
2958
2959void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2960 SourceLocation Loc, llvm::AtomicOrdering AO) {
2961 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2962 OMPBuilder.createFlush(CGF.Builder);
2963 } else {
2964 if (!CGF.HaveInsertPoint())
2965 return;
2966 // Build call void __kmpc_flush(ident_t *loc)
2967 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2968 CGM.getModule(), OMPRTL___kmpc_flush),
2969 emitUpdateLocation(CGF, Loc));
2970 }
2971}
2972
2973namespace {
2974/// Indexes of fields for type kmp_task_t.
2975enum KmpTaskTFields {
2976 /// List of shared variables.
2977 KmpTaskTShareds,
2978 /// Task routine.
2979 KmpTaskTRoutine,
2980 /// Partition id for the untied tasks.
2981 KmpTaskTPartId,
2982 /// Function with call of destructors for private variables.
2983 Data1,
2984 /// Task priority.
2985 Data2,
2986 /// (Taskloops only) Lower bound.
2987 KmpTaskTLowerBound,
2988 /// (Taskloops only) Upper bound.
2989 KmpTaskTUpperBound,
2990 /// (Taskloops only) Stride.
2991 KmpTaskTStride,
2992 /// (Taskloops only) Is last iteration flag.
2993 KmpTaskTLastIter,
2994 /// (Taskloops only) Reduction data.
2995 KmpTaskTReductions,
2996};
2997} // anonymous namespace
2998
2999bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3000 return OffloadEntriesTargetRegion.empty() &&
3001 OffloadEntriesDeviceGlobalVar.empty();
3002}
3003
3004/// Initialize target region entry.
3005void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3006 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3007 StringRef ParentName, unsigned LineNum,
3008 unsigned Order) {
3009 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3011, __extension__
__PRETTY_FUNCTION__))
3010 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3011, __extension__
__PRETTY_FUNCTION__))
3011 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3011, __extension__
__PRETTY_FUNCTION__))
;
3012 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3013 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3014 OMPTargetRegionEntryTargetRegion);
3015 ++OffloadingEntriesNum;
3016}
3017
3018void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3019 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3020 StringRef ParentName, unsigned LineNum,
3021 llvm::Constant *Addr, llvm::Constant *ID,
3022 OMPTargetRegionEntryKind Flags) {
3023 // If we are emitting code for a target, the entry is already initialized,
3024 // only has to be registered.
3025 if (CGM.getLangOpts().OpenMPIsDevice) {
3026 // This could happen if the device compilation is invoked standalone.
3027 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3028 return;
3029 auto &Entry =
3030 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3031 Entry.setAddress(Addr);
3032 Entry.setID(ID);
3033 Entry.setFlags(Flags);
3034 } else {
3035 if (Flags ==
3036 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3037 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3038 /*IgnoreAddressId*/ true))
3039 return;
3040 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3041, __extension__
__PRETTY_FUNCTION__))
3041 "Target region entry already registered!")(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3041, __extension__
__PRETTY_FUNCTION__))
;
3042 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3043 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3044 ++OffloadingEntriesNum;
3045 }
3046}
3047
3048bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3049 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3050 bool IgnoreAddressId) const {
3051 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3052 if (PerDevice == OffloadEntriesTargetRegion.end())
3053 return false;
3054 auto PerFile = PerDevice->second.find(FileID);
3055 if (PerFile == PerDevice->second.end())
3056 return false;
3057 auto PerParentName = PerFile->second.find(ParentName);
3058 if (PerParentName == PerFile->second.end())
3059 return false;
3060 auto PerLine = PerParentName->second.find(LineNum);
3061 if (PerLine == PerParentName->second.end())
3062 return false;
3063 // Fail if this entry is already registered.
3064 if (!IgnoreAddressId &&
3065 (PerLine->second.getAddress() || PerLine->second.getID()))
3066 return false;
3067 return true;
3068}
3069
3070void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3071 const OffloadTargetRegionEntryInfoActTy &Action) {
3072 // Scan all target region entries and perform the provided action.
3073 for (const auto &D : OffloadEntriesTargetRegion)
3074 for (const auto &F : D.second)
3075 for (const auto &P : F.second)
3076 for (const auto &L : P.second)
3077 Action(D.first, F.first, P.first(), L.first, L.second);
3078}
3079
3080void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3081 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3082 OMPTargetGlobalVarEntryKind Flags,
3083 unsigned Order) {
3084 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3086, __extension__
__PRETTY_FUNCTION__))
3085 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3086, __extension__
__PRETTY_FUNCTION__))
3086 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3086, __extension__
__PRETTY_FUNCTION__))
;
3087 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3088 ++OffloadingEntriesNum;
3089}
3090
3091void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3092 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3093 CharUnits VarSize,
3094 OMPTargetGlobalVarEntryKind Flags,
3095 llvm::GlobalValue::LinkageTypes Linkage) {
3096 if (CGM.getLangOpts().OpenMPIsDevice) {
3097 // This could happen if the device compilation is invoked standalone.
3098 if (!hasDeviceGlobalVarEntryInfo(VarName))
3099 return;
3100 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3101 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3102 if (Entry.getVarSize().isZero()) {
3103 Entry.setVarSize(VarSize);
3104 Entry.setLinkage(Linkage);
3105 }
3106 return;
3107 }
3108 Entry.setVarSize(VarSize);
3109 Entry.setLinkage(Linkage);
3110 Entry.setAddress(Addr);
3111 } else {
3112 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3113 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3114 assert(Entry.isValid() && Entry.getFlags() == Flags &&(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3115, __extension__
__PRETTY_FUNCTION__))
3115 "Entry not initialized!")(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3115, __extension__
__PRETTY_FUNCTION__))
;
3116 if (Entry.getVarSize().isZero()) {
3117 Entry.setVarSize(VarSize);
3118 Entry.setLinkage(Linkage);
3119 }
3120 return;
3121 }
3122 OffloadEntriesDeviceGlobalVar.try_emplace(
3123 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3124 ++OffloadingEntriesNum;
3125 }
3126}
3127
3128void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3129 actOnDeviceGlobalVarEntriesInfo(
3130 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3131 // Scan all target region entries and perform the provided action.
3132 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3133 Action(E.getKey(), E.getValue());
3134}
3135
3136void CGOpenMPRuntime::createOffloadEntry(
3137 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3138 llvm::GlobalValue::LinkageTypes Linkage) {
3139 StringRef Name = Addr->getName();
3140 llvm::Module &M = CGM.getModule();
3141 llvm::LLVMContext &C = M.getContext();
3142
3143 // Create constant string with the name.
3144 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3145
3146 std::string StringName = getName({"omp_offloading", "entry_name"});
3147 auto *Str = new llvm::GlobalVariable(
3148 M, StrPtrInit->getType(), /*isConstant=*/true,
3149 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3150 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3151
3152 llvm::Constant *Data[] = {
3153 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3154 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3155 llvm::ConstantInt::get(CGM.SizeTy, Size),
3156 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3157 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3158 std::string EntryName = getName({"omp_offloading", "entry", ""});
3159 llvm::GlobalVariable *Entry = createGlobalStruct(
3160 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3161 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3162
3163 // The entry has to be created in the section the linker expects it to be.
3164 Entry->setSection("omp_offloading_entries");
3165}
3166
3167void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3168 // Emit the offloading entries and metadata so that the device codegen side
3169 // can easily figure out what to emit. The produced metadata looks like
3170 // this:
3171 //
3172 // !omp_offload.info = !{!1, ...}
3173 //
3174 // Right now we only generate metadata for function that contain target
3175 // regions.
3176
3177 // If we are in simd mode or there are no entries, we don't need to do
3178 // anything.
3179 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3180 return;
3181
3182 llvm::Module &M = CGM.getModule();
3183 llvm::LLVMContext &C = M.getContext();
3184 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3185 SourceLocation, StringRef>,
3186 16>
3187 OrderedEntries(OffloadEntriesInfoManager.size());
3188 llvm::SmallVector<StringRef, 16> ParentFunctions(
3189 OffloadEntriesInfoManager.size());
3190
3191 // Auxiliary methods to create metadata values and strings.
3192 auto &&GetMDInt = [this](unsigned V) {
3193 return llvm::ConstantAsMetadata::get(
3194 llvm::ConstantInt::get(CGM.Int32Ty, V));
3195 };
3196
3197 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3198
3199 // Create the offloading info metadata node.
3200 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3201
3202 // Create function that emits metadata for each target region entry;
3203 auto &&TargetRegionMetadataEmitter =
3204 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3205 &GetMDString](
3206 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3207 unsigned Line,
3208 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3209 // Generate metadata for target regions. Each entry of this metadata
3210 // contains:
3211 // - Entry 0 -> Kind of this type of metadata (0).
3212 // - Entry 1 -> Device ID of the file where the entry was identified.
3213 // - Entry 2 -> File ID of the file where the entry was identified.
3214 // - Entry 3 -> Mangled name of the function where the entry was
3215 // identified.
3216 // - Entry 4 -> Line in the file where the entry was identified.
3217 // - Entry 5 -> Order the entry was created.
3218 // The first element of the metadata node is the kind.
3219 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3220 GetMDInt(FileID), GetMDString(ParentName),
3221 GetMDInt(Line), GetMDInt(E.getOrder())};
3222
3223 SourceLocation Loc;
3224 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3225 E = CGM.getContext().getSourceManager().fileinfo_end();
3226 I != E; ++I) {
3227 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3228 I->getFirst()->getUniqueID().getFile() == FileID) {
3229 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3230 I->getFirst(), Line, 1);
3231 break;
3232 }
3233 }
3234 // Save this entry in the right position of the ordered entries array.
3235 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3236 ParentFunctions[E.getOrder()] = ParentName;
3237
3238 // Add metadata to the named metadata node.
3239 MD->addOperand(llvm::MDNode::get(C, Ops));
3240 };
3241
3242 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3243 TargetRegionMetadataEmitter);
3244
3245 // Create function that emits metadata for each device global variable entry;
3246 auto &&DeviceGlobalVarMetadataEmitter =
3247 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3248 MD](StringRef MangledName,
3249 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3250 &E) {
3251 // Generate metadata for global variables. Each entry of this metadata
3252 // contains:
3253 // - Entry 0 -> Kind of this type of metadata (1).
3254 // - Entry 1 -> Mangled name of the variable.
3255 // - Entry 2 -> Declare target kind.
3256 // - Entry 3 -> Order the entry was created.
3257 // The first element of the metadata node is the kind.
3258 llvm::Metadata *Ops[] = {
3259 GetMDInt(E.getKind()), GetMDString(MangledName),
3260 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3261
3262 // Save this entry in the right position of the ordered entries array.
3263 OrderedEntries[E.getOrder()] =
3264 std::make_tuple(&E, SourceLocation(), MangledName);
3265
3266 // Add metadata to the named metadata node.
3267 MD->addOperand(llvm::MDNode::get(C, Ops));
3268 };
3269
3270 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3271 DeviceGlobalVarMetadataEmitter);
3272
3273 for (const auto &E : OrderedEntries) {
3274 assert(std::get<0>(E) && "All ordered entries must exist!")(static_cast <bool> (std::get<0>(E) && "All ordered entries must exist!"
) ? void (0) : __assert_fail ("std::get<0>(E) && \"All ordered entries must exist!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3274, __extension__
__PRETTY_FUNCTION__))
;
3275 if (const auto *CE =
3276 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3277 std::get<0>(E))) {
3278 if (!CE->getID() || !CE->getAddress()) {
3279 // Do not blame the entry if the parent funtion is not emitted.
3280 StringRef FnName = ParentFunctions[CE->getOrder()];
3281 if (!CGM.GetGlobalValue(FnName))
3282 continue;
3283 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3284 DiagnosticsEngine::Error,
3285 "Offloading entry for target region in %0 is incorrect: either the "
3286 "address or the ID is invalid.");
3287 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3288 continue;
3289 }
3290 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3291 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3292 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3293 OffloadEntryInfoDeviceGlobalVar>(
3294 std::get<0>(E))) {
3295 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3296 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3297 CE->getFlags());
3298 switch (Flags) {
3299 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3300 if (CGM.getLangOpts().OpenMPIsDevice &&
3301 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3302 continue;
3303 if (!CE->getAddress()) {
3304 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3305 DiagnosticsEngine::Error, "Offloading entry for declare target "
3306 "variable %0 is incorrect: the "
3307 "address is invalid.");
3308 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3309 continue;
3310 }
3311 // The vaiable has no definition - no need to add the entry.
3312 if (CE->getVarSize().isZero())
3313 continue;
3314 break;
3315 }
3316 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3317 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3319, __extension__
__PRETTY_FUNCTION__))
3318 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3319, __extension__
__PRETTY_FUNCTION__))
3319 "Declaret target link address is set.")(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3319, __extension__
__PRETTY_FUNCTION__))
;
3320 if (CGM.getLangOpts().OpenMPIsDevice)
3321 continue;
3322 if (!CE->getAddress()) {
3323 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3324 DiagnosticsEngine::Error,
3325 "Offloading entry for declare target variable is incorrect: the "
3326 "address is invalid.");
3327 CGM.getDiags().Report(DiagID);
3328 continue;
3329 }
3330 break;
3331 }
3332
3333 // Hidden or internal symbols on the device are not externally visible. We
3334 // should not attempt to register them by creating an offloading entry.
3335 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3336 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3337 continue;
3338
3339 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3340 CE->getVarSize().getQuantity(), Flags,
3341 CE->getLinkage());
3342 } else {
3343 llvm_unreachable("Unsupported entry kind.")::llvm::llvm_unreachable_internal("Unsupported entry kind.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3343)
;
3344 }
3345 }
3346}
3347
3348/// Loads all the offload entries information from the host IR
3349/// metadata.
3350void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3351 // If we are in target mode, load the metadata from the host IR. This code has
3352 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3353
3354 if (!CGM.getLangOpts().OpenMPIsDevice)
3355 return;
3356
3357 if (CGM.getLangOpts().OMPHostIRFile.empty())
3358 return;
3359
3360 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3361 if (auto EC = Buf.getError()) {
3362 CGM.getDiags().Report(diag::err_cannot_open_file)
3363 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3364 return;
3365 }
3366
3367 llvm::LLVMContext C;
3368 auto ME = expectedToErrorOrAndEmitErrors(
3369 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3370
3371 if (auto EC = ME.getError()) {
3372 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3373 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3374 CGM.getDiags().Report(DiagID)
3375 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3376 return;
3377 }
3378
3379 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3380 if (!MD)
3381 return;
3382
3383 for (llvm::MDNode *MN : MD->operands()) {
3384 auto &&GetMDInt = [MN](unsigned Idx) {
3385 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3386 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3387 };
3388
3389 auto &&GetMDString = [MN](unsigned Idx) {
3390 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3391 return V->getString();
3392 };
3393
3394 switch (GetMDInt(0)) {
3395 default:
3396 llvm_unreachable("Unexpected metadata!")::llvm::llvm_unreachable_internal("Unexpected metadata!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3396)
;
3397 break;
3398 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3399 OffloadingEntryInfoTargetRegion:
3400 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3401 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3402 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3403 /*Order=*/GetMDInt(5));
3404 break;
3405 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3406 OffloadingEntryInfoDeviceGlobalVar:
3407 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3408 /*MangledName=*/GetMDString(1),
3409 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3410 /*Flags=*/GetMDInt(2)),
3411 /*Order=*/GetMDInt(3));
3412 break;
3413 }
3414 }
3415}
3416
3417void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3418 if (!KmpRoutineEntryPtrTy) {
3419 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3420 ASTContext &C = CGM.getContext();
3421 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3422 FunctionProtoType::ExtProtoInfo EPI;
3423 KmpRoutineEntryPtrQTy = C.getPointerType(
3424 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3425 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3426 }
3427}
3428
3429QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3430 // Make sure the type of the entry is already created. This is the type we
3431 // have to create:
3432 // struct __tgt_offload_entry{
3433 // void *addr; // Pointer to the offload entry info.
3434 // // (function or global)
3435 // char *name; // Name of the function or global.
3436 // size_t size; // Size of the entry info (0 if it a function).
3437 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3438 // int32_t reserved; // Reserved, to use by the runtime library.
3439 // };
3440 if (TgtOffloadEntryQTy.isNull()) {
3441 ASTContext &C = CGM.getContext();
3442 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3443 RD->startDefinition();
3444 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3445 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3446 addFieldToRecordDecl(C, RD, C.getSizeType());
3447 addFieldToRecordDecl(
3448 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3449 addFieldToRecordDecl(
3450 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3451 RD->completeDefinition();
3452 RD->addAttr(PackedAttr::CreateImplicit(C));
3453 TgtOffloadEntryQTy = C.getRecordType(RD);
3454 }
3455 return TgtOffloadEntryQTy;
3456}
3457
3458namespace {
3459struct PrivateHelpersTy {
3460 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3461 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3462 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3463 PrivateElemInit(PrivateElemInit) {}
3464 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3465 const Expr *OriginalRef = nullptr;
3466 const VarDecl *Original = nullptr;
3467 const VarDecl *PrivateCopy = nullptr;
3468 const VarDecl *PrivateElemInit = nullptr;
3469 bool isLocalPrivate() const {
3470 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3471 }
3472};
3473typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3474} // anonymous namespace
3475
3476static bool isAllocatableDecl(const VarDecl *VD) {
3477 const VarDecl *CVD = VD->getCanonicalDecl();
3478 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3479 return false;
3480 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3481 // Use the default allocation.
3482 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3483 !AA->getAllocator());
3484}
3485
3486static RecordDecl *
3487createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3488 if (!Privates.empty()) {
3489 ASTContext &C = CGM.getContext();
3490 // Build struct .kmp_privates_t. {
3491 // /* private vars */
3492 // };
3493 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3494 RD->startDefinition();
3495 for (const auto &Pair : Privates) {
3496 const VarDecl *VD = Pair.second.Original;
3497 QualType Type = VD->getType().getNonReferenceType();
3498 // If the private variable is a local variable with lvalue ref type,
3499 // allocate the pointer instead of the pointee type.
3500 if (Pair.second.isLocalPrivate()) {
3501 if (VD->getType()->isLValueReferenceType())
3502 Type = C.getPointerType(Type);
3503 if (isAllocatableDecl(VD))
3504 Type = C.getPointerType(Type);
3505 }
3506 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3507 if (VD->hasAttrs()) {
3508 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3509 E(VD->getAttrs().end());
3510 I != E; ++I)
3511 FD->addAttr(*I);
3512 }
3513 }
3514 RD->completeDefinition();
3515 return RD;
3516 }
3517 return nullptr;
3518}
3519
3520static RecordDecl *
3521createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3522 QualType KmpInt32Ty,
3523 QualType KmpRoutineEntryPointerQTy) {
3524 ASTContext &C = CGM.getContext();
3525 // Build struct kmp_task_t {
3526 // void * shareds;
3527 // kmp_routine_entry_t routine;
3528 // kmp_int32 part_id;
3529 // kmp_cmplrdata_t data1;
3530 // kmp_cmplrdata_t data2;
3531 // For taskloops additional fields:
3532 // kmp_uint64 lb;
3533 // kmp_uint64 ub;
3534 // kmp_int64 st;
3535 // kmp_int32 liter;
3536 // void * reductions;
3537 // };
3538 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3539 UD->startDefinition();
3540 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3541 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3542 UD->completeDefinition();
3543 QualType KmpCmplrdataTy = C.getRecordType(UD);
3544 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3545 RD->startDefinition();
3546 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3547 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3548 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3549 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3550 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3551 if (isOpenMPTaskLoopDirective(Kind)) {
3552 QualType KmpUInt64Ty =
3553 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3554 QualType KmpInt64Ty =
3555 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3556 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3557 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3558 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3559 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3560 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3561 }
3562 RD->completeDefinition();
3563 return RD;
3564}
3565
3566static RecordDecl *
3567createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3568 ArrayRef<PrivateDataTy> Privates) {
3569 ASTContext &C = CGM.getContext();
3570 // Build struct kmp_task_t_with_privates {
3571 // kmp_task_t task_data;
3572 // .kmp_privates_t. privates;
3573 // };
3574 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3575 RD->startDefinition();
3576 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3577 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3578 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3579 RD->completeDefinition();
3580 return RD;
3581}
3582
3583/// Emit a proxy function which accepts kmp_task_t as the second
3584/// argument.
3585/// \code
3586/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3587/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3588/// For taskloops:
3589/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3590/// tt->reductions, tt->shareds);
3591/// return 0;
3592/// }
3593/// \endcode
3594static llvm::Function *
3595emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3596 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3597 QualType KmpTaskTWithPrivatesPtrQTy,
3598 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3599 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3600 llvm::Value *TaskPrivatesMap) {
3601 ASTContext &C = CGM.getContext();
3602 FunctionArgList Args;
3603 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3604 ImplicitParamDecl::Other);
3605 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3606 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3607 ImplicitParamDecl::Other);
3608 Args.push_back(&GtidArg);
3609 Args.push_back(&TaskTypeArg);
3610 const auto &TaskEntryFnInfo =
3611 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3612 llvm::FunctionType *TaskEntryTy =
3613 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3614 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3615 auto *TaskEntry = llvm::Function::Create(
3616 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3617 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3618 TaskEntry->setDoesNotRecurse();
3619 CodeGenFunction CGF(CGM);
3620 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3621 Loc, Loc);
3622
3623 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3624 // tt,
3625 // For taskloops:
3626 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3627 // tt->task_data.shareds);
3628 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3629 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3630 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3631 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3632 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3633 const auto *KmpTaskTWithPrivatesQTyRD =
3634 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3635 LValue Base =
3636 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3637 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3638 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3639 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3640 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3641
3642 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3643 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3644 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3645 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3646 CGF.ConvertTypeForMem(SharedsPtrTy));
3647
3648 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3649 llvm::Value *PrivatesParam;
3650 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3651 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3652 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3653 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3654 } else {
3655 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3656 }
3657
3658 llvm::Value *CommonArgs[] = {
3659 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3660 CGF.Builder
3661 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3662 CGF.VoidPtrTy, CGF.Int8Ty)
3663 .getPointer()};
3664 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3665 std::end(CommonArgs));
3666 if (isOpenMPTaskLoopDirective(Kind)) {
3667 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3668 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3669 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3670 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3671 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3672 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3673 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3674 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3675 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3676 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3677 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3678 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3679 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3680 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3681 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3682 CallArgs.push_back(LBParam);
3683 CallArgs.push_back(UBParam);
3684 CallArgs.push_back(StParam);
3685 CallArgs.push_back(LIParam);
3686 CallArgs.push_back(RParam);
3687 }
3688 CallArgs.push_back(SharedsParam);
3689
3690 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3691 CallArgs);
3692 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3693 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3694 CGF.FinishFunction();
3695 return TaskEntry;
3696}
3697
3698static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3699 SourceLocation Loc,
3700 QualType KmpInt32Ty,
3701 QualType KmpTaskTWithPrivatesPtrQTy,
3702 QualType KmpTaskTWithPrivatesQTy) {
3703 ASTContext &C = CGM.getContext();
3704 FunctionArgList Args;
3705 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3706 ImplicitParamDecl::Other);
3707 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3708 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3709 ImplicitParamDecl::Other);
3710 Args.push_back(&GtidArg);
3711 Args.push_back(&TaskTypeArg);
3712 const auto &DestructorFnInfo =
3713 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3714 llvm::FunctionType *DestructorFnTy =
3715 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3716 std::string Name =
3717 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3718 auto *DestructorFn =
3719 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3720 Name, &CGM.getModule());
3721 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3722 DestructorFnInfo);
3723 DestructorFn->setDoesNotRecurse();
3724 CodeGenFunction CGF(CGM);
3725 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3726 Args, Loc, Loc);
3727
3728 LValue Base = CGF.EmitLoadOfPointerLValue(
3729 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3730 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3731 const auto *KmpTaskTWithPrivatesQTyRD =
3732 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3733 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3734 Base = CGF.EmitLValueForField(Base, *FI);
3735 for (const auto *Field :
3736 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3737 if (QualType::DestructionKind DtorKind =
3738 Field->getType().isDestructedType()) {
3739 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3740 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3741 }
3742 }
3743 CGF.FinishFunction();
3744 return DestructorFn;
3745}
3746
3747/// Emit a privates mapping function for correct handling of private and
3748/// firstprivate variables.
3749/// \code
3750/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3751/// **noalias priv1,..., <tyn> **noalias privn) {
3752/// *priv1 = &.privates.priv1;
3753/// ...;
3754/// *privn = &.privates.privn;
3755/// }
3756/// \endcode
3757static llvm::Value *
3758emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3759 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3760 ArrayRef<PrivateDataTy> Privates) {
3761 ASTContext &C = CGM.getContext();
3762 FunctionArgList Args;
3763 ImplicitParamDecl TaskPrivatesArg(
3764 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3765 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3766 ImplicitParamDecl::Other);
3767 Args.push_back(&TaskPrivatesArg);
3768 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3769 unsigned Counter = 1;
3770 for (const Expr *E : Data.PrivateVars) {
3771 Args.push_back(ImplicitParamDecl::Create(
3772 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3773 C.getPointerType(C.getPointerType(E->getType()))
3774 .withConst()
3775 .withRestrict(),
3776 ImplicitParamDecl::Other));
3777 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3778 PrivateVarsPos[VD] = Counter;
3779 ++Counter;
3780 }
3781 for (const Expr *E : Data.FirstprivateVars) {
3782 Args.push_back(ImplicitParamDecl::Create(
3783 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3784 C.getPointerType(C.getPointerType(E->getType()))
3785 .withConst()
3786 .withRestrict(),
3787 ImplicitParamDecl::Other));
3788 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3789 PrivateVarsPos[VD] = Counter;
3790 ++Counter;
3791 }
3792 for (const Expr *E : Data.LastprivateVars) {
3793 Args.push_back(ImplicitParamDecl::Create(
3794 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3795 C.getPointerType(C.getPointerType(E->getType()))
3796 .withConst()
3797 .withRestrict(),
3798 ImplicitParamDecl::Other));
3799 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3800 PrivateVarsPos[VD] = Counter;
3801 ++Counter;
3802 }
3803 for (const VarDecl *VD : Data.PrivateLocals) {
3804 QualType Ty = VD->getType().getNonReferenceType();
3805 if (VD->getType()->isLValueReferenceType())
3806 Ty = C.getPointerType(Ty);
3807 if (isAllocatableDecl(VD))
3808 Ty = C.getPointerType(Ty);
3809 Args.push_back(ImplicitParamDecl::Create(
3810 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3811 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3812 ImplicitParamDecl::Other));
3813 PrivateVarsPos[VD] = Counter;
3814 ++Counter;
3815 }
3816 const auto &TaskPrivatesMapFnInfo =
3817 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3818 llvm::FunctionType *TaskPrivatesMapTy =
3819 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3820 std::string Name =
3821 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3822 auto *TaskPrivatesMap = llvm::Function::Create(
3823 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3824 &CGM.getModule());
3825 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3826 TaskPrivatesMapFnInfo);
3827 if (CGM.getLangOpts().Optimize) {
3828 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3829 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3830 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3831 }
3832 CodeGenFunction CGF(CGM);
3833 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3834 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3835
3836 // *privi = &.privates.privi;
3837 LValue Base = CGF.EmitLoadOfPointerLValue(
3838 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3839 TaskPrivatesArg.getType()->castAs<PointerType>());
3840 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3841 Counter = 0;
3842 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3843 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3844 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3845 LValue RefLVal =
3846 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3847 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3848 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3849 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3850 ++Counter;
3851 }
3852 CGF.FinishFunction();
3853 return TaskPrivatesMap;
3854}
3855
3856/// Emit initialization for private variables in task-based directives.
3857static void emitPrivatesInit(CodeGenFunction &CGF,
3858 const OMPExecutableDirective &D,
3859 Address KmpTaskSharedsPtr, LValue TDBase,
3860 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3861 QualType SharedsTy, QualType SharedsPtrTy,
3862 const OMPTaskDataTy &Data,
3863 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3864 ASTContext &C = CGF.getContext();
3865 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3866 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3867 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3868 ? OMPD_taskloop
3869 : OMPD_task;
3870 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3871 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3872 LValue SrcBase;
3873 bool IsTargetTask =
3874 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3875 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3876 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3877 // PointersArray, SizesArray, and MappersArray. The original variables for
3878 // these arrays are not captured and we get their addresses explicitly.
3879 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3880 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3881 SrcBase = CGF.MakeAddrLValue(
3882 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3883 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3884 CGF.ConvertTypeForMem(SharedsTy)),
3885 SharedsTy);
3886 }
3887 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3888 for (const PrivateDataTy &Pair : Privates) {
3889 // Do not initialize private locals.
3890 if (Pair.second.isLocalPrivate()) {
3891 ++FI;
3892 continue;
3893 }
3894 const VarDecl *VD = Pair.second.PrivateCopy;
3895 const Expr *Init = VD->getAnyInitializer();
3896 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3897 !CGF.isTrivialInitializer(Init)))) {
3898 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3899 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3900 const VarDecl *OriginalVD = Pair.second.Original;
3901 // Check if the variable is the target-based BasePointersArray,
3902 // PointersArray, SizesArray, or MappersArray.
3903 LValue SharedRefLValue;
3904 QualType Type = PrivateLValue.getType();
3905 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3906 if (IsTargetTask && !SharedField) {
3907 assert(isa<ImplicitParamDecl>(OriginalVD) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3908 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3909 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3910 ->getNumParams() == 0 &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3911 isa<TranslationUnitDecl>((static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3912 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3913 ->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
3914 "Expected artificial target data variable.")(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3914, __extension__
__PRETTY_FUNCTION__))
;
3915 SharedRefLValue =
3916 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3917 } else if (ForDup) {
3918 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3919 SharedRefLValue = CGF.MakeAddrLValue(
3920 SharedRefLValue.getAddress(CGF).withAlignment(
3921 C.getDeclAlign(OriginalVD)),
3922 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3923 SharedRefLValue.getTBAAInfo());
3924 } else if (CGF.LambdaCaptureFields.count(
3925 Pair.second.Original->getCanonicalDecl()) > 0 ||
3926 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3927 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3928 } else {
3929 // Processing for implicitly captured variables.
3930 InlinedOpenMPRegionRAII Region(
3931 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3932 /*HasCancel=*/false, /*NoInheritance=*/true);
3933 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3934 }
3935 if (Type->isArrayType()) {
3936 // Initialize firstprivate array.
3937 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3938 // Perform simple memcpy.
3939 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3940 } else {
3941 // Initialize firstprivate array using element-by-element
3942 // initialization.
3943 CGF.EmitOMPAggregateAssign(
3944 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3945 Type,
3946 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3947 Address SrcElement) {
3948 // Clean up any temporaries needed by the initialization.
3949 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3950 InitScope.addPrivate(Elem, SrcElement);
3951 (void)InitScope.Privatize();
3952 // Emit initialization for single element.
3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3954 CGF, &CapturesInfo);
3955 CGF.EmitAnyExprToMem(Init, DestElement,
3956 Init->getType().getQualifiers(),
3957 /*IsInitializer=*/false);
3958 });
3959 }
3960 } else {
3961 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3962 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3963 (void)InitScope.Privatize();
3964 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3965 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3966 /*capturedByInit=*/false);
3967 }
3968 } else {
3969 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3970 }
3971 }
3972 ++FI;
3973 }
3974}
3975
3976/// Check if duplication function is required for taskloops.
3977static bool checkInitIsRequired(CodeGenFunction &CGF,
3978 ArrayRef<PrivateDataTy> Privates) {
3979 bool InitRequired = false;
3980 for (const PrivateDataTy &Pair : Privates) {
3981 if (Pair.second.isLocalPrivate())
3982 continue;
3983 const VarDecl *VD = Pair.second.PrivateCopy;
3984 const Expr *Init = VD->getAnyInitializer();
3985 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3986 !CGF.isTrivialInitializer(Init));
3987 if (InitRequired)
3988 break;
3989 }
3990 return InitRequired;
3991}
3992
3993
3994/// Emit task_dup function (for initialization of
3995/// private/firstprivate/lastprivate vars and last_iter flag)
3996/// \code
3997/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3998/// lastpriv) {
3999/// // setup lastprivate flag
4000/// task_dst->last = lastpriv;
4001/// // could be constructor calls here...
4002/// }
4003/// \endcode
4004static llvm::Value *
4005emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
4006 const OMPExecutableDirective &D,
4007 QualType KmpTaskTWithPrivatesPtrQTy,
4008 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4009 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4010 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4011 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4012 ASTContext &C = CGM.getContext();
4013 FunctionArgList Args;
4014 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4015 KmpTaskTWithPrivatesPtrQTy,
4016 ImplicitParamDecl::Other);
4017 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4018 KmpTaskTWithPrivatesPtrQTy,
4019 ImplicitParamDecl::Other);
4020 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4021 ImplicitParamDecl::Other);
4022 Args.push_back(&DstArg);
4023 Args.push_back(&SrcArg);
4024 Args.push_back(&LastprivArg);
4025 const auto &TaskDupFnInfo =
4026 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4027 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4028 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4029 auto *TaskDup = llvm::Function::Create(
4030 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4031 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4032 TaskDup->setDoesNotRecurse();
4033 CodeGenFunction CGF(CGM);
4034 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4035 Loc);
4036
4037 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4038 CGF.GetAddrOfLocalVar(&DstArg),
4039 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4040 // task_dst->liter = lastpriv;
4041 if (WithLastIter) {
4042 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4043 LValue Base = CGF.EmitLValueForField(
4044 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4045 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4046 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4047 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4048 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4049 }
4050
4051 // Emit initial values for private copies (if any).
4052 assert(!Privates.empty())(static_cast <bool> (!Privates.empty()) ? void (0) : __assert_fail
("!Privates.empty()", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 4052, __extension__ __PRETTY_FUNCTION__))
;
4053 Address KmpTaskSharedsPtr = Address::invalid();
4054 if (!Data.FirstprivateVars.empty()) {
4055 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4056 CGF.GetAddrOfLocalVar(&SrcArg),
4057 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4058 LValue Base = CGF.EmitLValueForField(
4059 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4060 KmpTaskSharedsPtr = Address(
4061 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4062 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4063 KmpTaskTShareds)),
4064 Loc),
4065 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4066 }
4067 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4068 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4069 CGF.FinishFunction();
4070 return TaskDup;
4071}
4072
4073/// Checks if destructor function is required to be generated.
4074/// \return true if cleanups are required, false otherwise.
4075static bool
4076checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4077 ArrayRef<PrivateDataTy> Privates) {
4078 for (const PrivateDataTy &P : Privates) {
4079 if (P.second.isLocalPrivate())
4080 continue;
4081 QualType Ty = P.second.Original->getType().getNonReferenceType();
4082 if (Ty.isDestructedType())
4083 return true;
4084 }
4085 return false;
4086}
4087
4088namespace {
4089/// Loop generator for OpenMP iterator expression.
4090class OMPIteratorGeneratorScope final
4091 : public CodeGenFunction::OMPPrivateScope {
4092 CodeGenFunction &CGF;
4093 const OMPIteratorExpr *E = nullptr;
4094 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4095 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4096 OMPIteratorGeneratorScope() = delete;
4097 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4098
4099public:
4100 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4101 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4102 if (!E)
4103 return;
4104 SmallVector<llvm::Value *, 4> Uppers;
4105 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4106 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4107 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4108 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4109 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4110 addPrivate(
4111 HelperData.CounterVD,
4112 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4113 }
4114 Privatize();
4115
4116 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4117 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4118 LValue CLVal =
4119 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4120 HelperData.CounterVD->getType());
4121 // Counter = 0;
4122 CGF.EmitStoreOfScalar(
4123 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4124 CLVal);
4125 CodeGenFunction::JumpDest &ContDest =
4126 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4127 CodeGenFunction::JumpDest &ExitDest =
4128 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4129 // N = <number-of_iterations>;
4130 llvm::Value *N = Uppers[I];
4131 // cont:
4132 // if (Counter < N) goto body; else goto exit;
4133 CGF.EmitBlock(ContDest.getBlock());
4134 auto *CVal =
4135 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4136 llvm::Value *Cmp =
4137 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4138 ? CGF.Builder.CreateICmpSLT(CVal, N)
4139 : CGF.Builder.CreateICmpULT(CVal, N);
4140 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4141 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4142 // body:
4143 CGF.EmitBlock(BodyBB);
4144 // Iteri = Begini + Counter * Stepi;
4145 CGF.EmitIgnoredExpr(HelperData.Update);
4146 }
4147 }
4148 ~OMPIteratorGeneratorScope() {
4149 if (!E)
4150 return;
4151 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4152 // Counter = Counter + 1;
4153 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4154 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4155 // goto cont;
4156 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4157 // exit:
4158 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4159 }
4160 }
4161};
4162} // namespace
4163
4164static std::pair<llvm::Value *, llvm::Value *>
4165getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4166 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4167 llvm::Value *Addr;
4168 if (OASE) {
4169 const Expr *Base = OASE->getBase();
4170 Addr = CGF.EmitScalarExpr(Base);
4171 } else {
4172 Addr = CGF.EmitLValue(E).getPointer(CGF);
4173 }
4174 llvm::Value *SizeVal;
4175 QualType Ty = E->getType();
4176 if (OASE) {
4177 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4178 for (const Expr *SE : OASE->getDimensions()) {
4179 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4180 Sz = CGF.EmitScalarConversion(
4181 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4182 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4183 }
4184 } else if (const auto *ASE =
4185 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4186 LValue UpAddrLVal =
4187 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4188 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4189 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4190 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4191 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4192 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4193 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4194 } else {
4195 SizeVal = CGF.getTypeSize(Ty);
4196 }
4197 return std::make_pair(Addr, SizeVal);
4198}
4199
4200/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4201static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4202 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4203 if (KmpTaskAffinityInfoTy.isNull()) {
4204 RecordDecl *KmpAffinityInfoRD =
4205 C.buildImplicitRecord("kmp_task_affinity_info_t");
4206 KmpAffinityInfoRD->startDefinition();
4207 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4208 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4209 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4210 KmpAffinityInfoRD->completeDefinition();
4211 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4212 }
4213}
4214
4215CGOpenMPRuntime::TaskResultTy
4216CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4217 const OMPExecutableDirective &D,
4218 llvm::Function *TaskFunction, QualType SharedsTy,
4219 Address Shareds, const OMPTaskDataTy &Data) {
4220 ASTContext &C = CGM.getContext();
4221 llvm::SmallVector<PrivateDataTy, 4> Privates;
4222 // Aggregate privates and sort them by the alignment.
4223 const auto *I = Data.PrivateCopies.begin();
4224 for (const Expr *E : Data.PrivateVars) {
4225 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4226 Privates.emplace_back(
4227 C.getDeclAlign(VD),
4228 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4229 /*PrivateElemInit=*/nullptr));
4230 ++I;
4231 }
4232 I = Data.FirstprivateCopies.begin();
4233 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4234 for (const Expr *E : Data.FirstprivateVars) {
4235 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4236 Privates.emplace_back(
4237 C.getDeclAlign(VD),
4238 PrivateHelpersTy(
4239 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4240 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4241 ++I;
4242 ++IElemInitRef;
4243 }
4244 I = Data.LastprivateCopies.begin();
4245 for (const Expr *E : Data.LastprivateVars) {
4246 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4247 Privates.emplace_back(
4248 C.getDeclAlign(VD),
4249 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4250 /*PrivateElemInit=*/nullptr));
4251 ++I;
4252 }
4253 for (const VarDecl *VD : Data.PrivateLocals) {
4254 if (isAllocatableDecl(VD))
4255 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4256 else
4257 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4258 }
4259 llvm::stable_sort(Privates,
4260 [](const PrivateDataTy &L, const PrivateDataTy &R) {
4261 return L.first > R.first;
4262 });
4263 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4264 // Build type kmp_routine_entry_t (if not built yet).
4265 emitKmpRoutineEntryT(KmpInt32Ty);
4266 // Build type kmp_task_t (if not built yet).
4267 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4268 if (SavedKmpTaskloopTQTy.isNull()) {
4269 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4270 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4271 }
4272 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4273 } else {
4274 assert((D.getDirectiveKind() == OMPD_task ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4277, __extension__
__PRETTY_FUNCTION__))
4275 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4277, __extension__
__PRETTY_FUNCTION__))
4276 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4277, __extension__
__PRETTY_FUNCTION__))
4277 "Expected taskloop, task or target directive")(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4277, __extension__
__PRETTY_FUNCTION__))
;
4278 if (SavedKmpTaskTQTy.isNull()) {
4279 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4280 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4281 }
4282 KmpTaskTQTy = SavedKmpTaskTQTy;
4283 }
4284 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4285 // Build particular struct kmp_task_t for the given task.
4286 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4287 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4288 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4289 QualType KmpTaskTWithPrivatesPtrQTy =
4290 C.getPointerType(KmpTaskTWithPrivatesQTy);
4291 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4292 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4293 KmpTaskTWithPrivatesTy->getPointerTo();
4294 llvm::Value *KmpTaskTWithPrivatesTySize =
4295 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4296 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4297
4298 // Emit initial values for private copies (if any).
4299 llvm::Value *TaskPrivatesMap = nullptr;
4300 llvm::Type *TaskPrivatesMapTy =
4301 std::next(TaskFunction->arg_begin(), 3)->getType();
4302 if (!Privates.empty()) {
4303 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4304 TaskPrivatesMap =
4305 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4306 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4307 TaskPrivatesMap, TaskPrivatesMapTy);
4308 } else {
4309 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4310 cast<llvm::PointerType>(TaskPrivatesMapTy));
4311 }
4312 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4313 // kmp_task_t *tt);
4314 llvm::Function *TaskEntry = emitProxyTaskFunction(
4315 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4316 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4317 TaskPrivatesMap);
4318
4319 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4320 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4321 // kmp_routine_entry_t *task_entry);
4322 // Task flags. Format is taken from
4323 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4324 // description of kmp_tasking_flags struct.
4325 enum {
4326 TiedFlag = 0x1,
4327 FinalFlag = 0x2,
4328 DestructorsFlag = 0x8,
4329 PriorityFlag = 0x20,
4330 DetachableFlag = 0x40,
4331 };
4332 unsigned Flags = Data.Tied ? TiedFlag : 0;
4333 bool NeedsCleanup = false;
4334 if (!Privates.empty()) {
4335 NeedsCleanup =
4336 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4337 if (NeedsCleanup)
4338 Flags = Flags | DestructorsFlag;
4339 }
4340 if (Data.Priority.getInt())
4341 Flags = Flags | PriorityFlag;
4342 if (D.hasClausesOfKind<OMPDetachClause>())
4343 Flags = Flags | DetachableFlag;
4344 llvm::Value *TaskFlags =
4345 Data.Final.getPointer()
4346 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4347 CGF.Builder.getInt32(FinalFlag),
4348 CGF.Builder.getInt32(/*C=*/0))
4349 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4350 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4351 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4352 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4353 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4354 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4355 TaskEntry, KmpRoutineEntryPtrTy)};
4356 llvm::Value *NewTask;
4357 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4358 // Check if we have any device clause associated with the directive.
4359 const Expr *Device = nullptr;
4360 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4361 Device = C->getDevice();
4362 // Emit device ID if any otherwise use default value.
4363 llvm::Value *DeviceID;
4364 if (Device)
4365 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4366 CGF.Int64Ty, /*isSigned=*/true);
4367 else
4368 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4369 AllocArgs.push_back(DeviceID);
4370 NewTask = CGF.EmitRuntimeCall(
4371 OMPBuilder.getOrCreateRuntimeFunction(
4372 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4373 AllocArgs);
4374 } else {
4375 NewTask =
4376 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4377 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4378 AllocArgs);
4379 }
4380 // Emit detach clause initialization.
4381 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4382 // task_descriptor);
4383 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4384 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4385 LValue EvtLVal = CGF.EmitLValue(Evt);
4386
4387 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4388 // int gtid, kmp_task_t *task);
4389 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4390 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4391 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4392 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4393 OMPBuilder.getOrCreateRuntimeFunction(
4394 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4395 {Loc, Tid, NewTask});
4396 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4397 Evt->getExprLoc());
4398 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4399 }
4400 // Process affinity clauses.
4401 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4402 // Process list of affinity data.
4403 ASTContext &C = CGM.getContext();
4404 Address AffinitiesArray = Address::invalid();
4405 // Calculate number of elements to form the array of affinity data.
4406 llvm::Value *NumOfElements = nullptr;
4407 unsigned NumAffinities = 0;
4408 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4409 if (const Expr *Modifier = C->getModifier()) {
4410 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4411 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4412 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4413 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4414 NumOfElements =
4415 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4416 }
4417 } else {
4418 NumAffinities += C->varlist_size();
4419 }
4420 }
4421 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4422 // Fields ids in kmp_task_affinity_info record.
4423 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4424
4425 QualType KmpTaskAffinityInfoArrayTy;
4426 if (NumOfElements) {
4427 NumOfElements = CGF.Builder.CreateNUWAdd(
4428 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4429 auto *OVE = new (C) OpaqueValueExpr(
4430 Loc,
4431 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4432 VK_PRValue);
4433 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4434 RValue::get(NumOfElements));
4435 KmpTaskAffinityInfoArrayTy =
4436 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4437 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4438 // Properly emit variable-sized array.
4439 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4440 ImplicitParamDecl::Other);
4441 CGF.EmitVarDecl(*PD);
4442 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4443 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4444 /*isSigned=*/false);
4445 } else {
4446 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4447 KmpTaskAffinityInfoTy,
4448 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4449 ArrayType::Normal, /*IndexTypeQuals=*/0);
4450 AffinitiesArray =
4451 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4452 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4453 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4454 /*isSigned=*/false);
4455 }
4456
4457 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4458 // Fill array by elements without iterators.
4459 unsigned Pos = 0;
4460 bool HasIterator = false;
4461 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4462 if (C->getModifier()) {
4463 HasIterator = true;
4464 continue;
4465 }
4466 for (const Expr *E : C->varlists()) {
4467 llvm::Value *Addr;
4468 llvm::Value *Size;
4469 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4470 LValue Base =
4471 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4472 KmpTaskAffinityInfoTy);
4473 // affs[i].base_addr = &<Affinities[i].second>;
4474 LValue BaseAddrLVal = CGF.EmitLValueForField(
4475 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4476 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4477 BaseAddrLVal);
4478 // affs[i].len = sizeof(<Affinities[i].second>);
4479 LValue LenLVal = CGF.EmitLValueForField(
4480 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4481 CGF.EmitStoreOfScalar(Size, LenLVal);
4482 ++Pos;
4483 }
4484 }
4485 LValue PosLVal;
4486 if (HasIterator) {
4487 PosLVal = CGF.MakeAddrLValue(
4488 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4489 C.getSizeType());
4490 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4491 }
4492 // Process elements with iterators.
4493 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4494 const Expr *Modifier = C->getModifier();
4495 if (!Modifier)
4496 continue;
4497 OMPIteratorGeneratorScope IteratorScope(
4498 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4499 for (const Expr *E : C->varlists()) {
4500 llvm::Value *Addr;
4501 llvm::Value *Size;
4502 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4503 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4504 LValue Base = CGF.MakeAddrLValue(
4505 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4506 // affs[i].base_addr = &<Affinities[i].second>;
4507 LValue BaseAddrLVal = CGF.EmitLValueForField(
4508 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4509 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4510 BaseAddrLVal);
4511 // affs[i].len = sizeof(<Affinities[i].second>);
4512 LValue LenLVal = CGF.EmitLValueForField(
4513 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4514 CGF.EmitStoreOfScalar(Size, LenLVal);
4515 Idx = CGF.Builder.CreateNUWAdd(
4516 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4517 CGF.EmitStoreOfScalar(Idx, PosLVal);
4518 }
4519 }
4520 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4521 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4522 // naffins, kmp_task_affinity_info_t *affin_list);
4523 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4524 llvm::Value *GTid = getThreadID(CGF, Loc);
4525 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4526 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4527 // FIXME: Emit the function and ignore its result for now unless the
4528 // runtime function is properly implemented.
4529 (void)CGF.EmitRuntimeCall(
4530 OMPBuilder.getOrCreateRuntimeFunction(
4531 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4532 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4533 }
4534 llvm::Value *NewTaskNewTaskTTy =
4535 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4536 NewTask, KmpTaskTWithPrivatesPtrTy);
4537 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4538 KmpTaskTWithPrivatesQTy);
4539 LValue TDBase =
4540 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4541 // Fill the data in the resulting kmp_task_t record.
4542 // Copy shareds if there are any.
4543 Address KmpTaskSharedsPtr = Address::invalid();
4544 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4545 KmpTaskSharedsPtr = Address(
4546 CGF.EmitLoadOfScalar(
4547 CGF.EmitLValueForField(
4548 TDBase,
4549 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4550 Loc),
4551 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4552 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4553 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4554 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4555 }
4556 // Emit initial values for private copies (if any).
4557 TaskResultTy Result;
4558 if (!Privates.empty()) {
4559 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4560 SharedsTy, SharedsPtrTy, Data, Privates,
4561 /*ForDup=*/false);
4562 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4563 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4564 Result.TaskDupFn = emitTaskDupFunction(
4565 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4566 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4567 /*WithLastIter=*/!Data.LastprivateVars.empty());
4568 }
4569 }
4570 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4571 enum { Priority = 0, Destructors = 1 };
4572 // Provide pointer to function with destructors for privates.
4573 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4574 const RecordDecl *KmpCmplrdataUD =
4575 (*FI)->getType()->getAsUnionType()->getDecl();
4576 if (NeedsCleanup) {
4577 llvm::Value *DestructorFn = emitDestructorsFunction(
4578 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4579 KmpTaskTWithPrivatesQTy);
4580 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4581 LValue DestructorsLV = CGF.EmitLValueForField(
4582 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4583 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4584 DestructorFn, KmpRoutineEntryPtrTy),
4585 DestructorsLV);
4586 }
4587 // Set priority.
4588 if (Data.Priority.getInt()) {
4589 LValue Data2LV = CGF.EmitLValueForField(
4590 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4591 LValue PriorityLV = CGF.EmitLValueForField(
4592 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4593 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4594 }
4595 Result.NewTask = NewTask;
4596 Result.TaskEntry = TaskEntry;
4597 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4598 Result.TDBase = TDBase;
4599 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4600 return Result;
4601}
4602
4603namespace {
4604/// Dependence kind for RTL.
4605enum RTLDependenceKindTy {
4606 DepIn = 0x01,
4607 DepInOut = 0x3,
4608 DepMutexInOutSet = 0x4,
4609 DepInOutSet = 0x8
4610};
4611/// Fields ids in kmp_depend_info record.
4612enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4613} // namespace
4614
4615/// Translates internal dependency kind into the runtime kind.
4616static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4617 RTLDependenceKindTy DepKind;
4618 switch (K) {
4619 case OMPC_DEPEND_in:
4620 DepKind = DepIn;
4621 break;
4622 // Out and InOut dependencies must use the same code.
4623 case OMPC_DEPEND_out:
4624 case OMPC_DEPEND_inout:
4625 DepKind = DepInOut;
4626 break;
4627 case OMPC_DEPEND_mutexinoutset:
4628 DepKind = DepMutexInOutSet;
4629 break;
4630 case OMPC_DEPEND_inoutset:
4631 DepKind = DepInOutSet;
4632 break;
4633 case OMPC_DEPEND_source:
4634 case OMPC_DEPEND_sink:
4635 case OMPC_DEPEND_depobj:
4636 case OMPC_DEPEND_unknown:
4637 llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4637)
;
4638 }
4639 return DepKind;
4640}
4641
4642/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4643static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4644 QualType &FlagsTy) {
4645 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4646 if (KmpDependInfoTy.isNull()) {
4647 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4648 KmpDependInfoRD->startDefinition();
4649 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4650 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4651 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4652 KmpDependInfoRD->completeDefinition();
4653 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4654 }
4655}
4656
4657std::pair<llvm::Value *, LValue>
4658CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4659 SourceLocation Loc) {
4660 ASTContext &C = CGM.getContext();
4661 QualType FlagsTy;
4662 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4663 RecordDecl *KmpDependInfoRD =
4664 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4665 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4666 LValue Base = CGF.EmitLoadOfPointerLValue(
4667 CGF.Builder.CreateElementBitCast(
4668 DepobjLVal.getAddress(CGF),
4669 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4670 KmpDependInfoPtrTy->castAs<PointerType>());
4671 Address DepObjAddr = CGF.Builder.CreateGEP(
4672 Base.getAddress(CGF),
4673 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4674 LValue NumDepsBase = CGF.MakeAddrLValue(
4675 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4676 // NumDeps = deps[i].base_addr;
4677 LValue BaseAddrLVal = CGF.EmitLValueForField(
4678 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4679 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4680 return std::make_pair(NumDeps, Base);
4681}
4682
4683static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4684 llvm::PointerUnion<unsigned *, LValue *> Pos,
4685 const OMPTaskDataTy::DependData &Data,
4686 Address DependenciesArray) {
4687 CodeGenModule &CGM = CGF.CGM;
4688 ASTContext &C = CGM.getContext();
4689 QualType FlagsTy;
4690 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4691 RecordDecl *KmpDependInfoRD =
4692 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4693 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4694
4695 OMPIteratorGeneratorScope IteratorScope(
4696 CGF, cast_or_null<OMPIteratorExpr>(
4697 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4698 : nullptr));
4699 for (const Expr *E : Data.DepExprs) {
4700 llvm::Value *Addr;
4701 llvm::Value *Size;
4702 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4703 LValue Base;
4704 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4705 Base = CGF.MakeAddrLValue(
4706 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4707 } else {
4708 LValue &PosLVal = *Pos.get<LValue *>();
4709 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4710 Base = CGF.MakeAddrLValue(
4711 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4712 }
4713 // deps[i].base_addr = &<Dependencies[i].second>;
4714 LValue BaseAddrLVal = CGF.EmitLValueForField(
4715 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4716 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4717 BaseAddrLVal);
4718 // deps[i].len = sizeof(<Dependencies[i].second>);
4719 LValue LenLVal = CGF.EmitLValueForField(
4720 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4721 CGF.EmitStoreOfScalar(Size, LenLVal);
4722 // deps[i].flags = <Dependencies[i].first>;
4723 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4724 LValue FlagsLVal = CGF.EmitLValueForField(
4725 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4726 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4727 FlagsLVal);
4728 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4729 ++(*P);
4730 } else {
4731 LValue &PosLVal = *Pos.get<LValue *>();
4732 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4733 Idx = CGF.Builder.CreateNUWAdd(Idx,
4734 llvm::ConstantInt::get(Idx->getType(), 1));
4735 CGF.EmitStoreOfScalar(Idx, PosLVal);
4736 }
4737 }
4738}
4739
4740SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4741 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4742 const OMPTaskDataTy::DependData &Data) {
4743 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4744, __extension__
__PRETTY_FUNCTION__))
4744 "Expected depobj dependecy kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4744, __extension__
__PRETTY_FUNCTION__))
;
4745 SmallVector<llvm::Value *, 4> Sizes;
4746 SmallVector<LValue, 4> SizeLVals;
4747 ASTContext &C = CGF.getContext();
4748 {
4749 OMPIteratorGeneratorScope IteratorScope(
4750 CGF, cast_or_null<OMPIteratorExpr>(
4751 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4752 : nullptr));
4753 for (const Expr *E : Data.DepExprs) {
4754 llvm::Value *NumDeps;
4755 LValue Base;
4756 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4757 std::tie(NumDeps, Base) =
4758 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4759 LValue NumLVal = CGF.MakeAddrLValue(
4760 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4761 C.getUIntPtrType());
4762 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4763 NumLVal.getAddress(CGF));
4764 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4765 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4766 CGF.EmitStoreOfScalar(Add, NumLVal);
4767 SizeLVals.push_back(NumLVal);
4768 }
4769 }
4770 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4771 llvm::Value *Size =
4772 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4773 Sizes.push_back(Size);
4774 }
4775 return Sizes;
4776}
4777
4778void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4779 QualType &KmpDependInfoTy,
4780 LValue PosLVal,
4781 const OMPTaskDataTy::DependData &Data,
4782 Address DependenciesArray) {
4783 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4784, __extension__
__PRETTY_FUNCTION__))
4784 "Expected depobj dependecy kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4784, __extension__
__PRETTY_FUNCTION__))
;
4785 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4786 {
4787 OMPIteratorGeneratorScope IteratorScope(
4788 CGF, cast_or_null<OMPIteratorExpr>(
4789 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4790 : nullptr));
4791 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4792 const Expr *E = Data.DepExprs[I];
4793 llvm::Value *NumDeps;
4794 LValue Base;
4795 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4796 std::tie(NumDeps, Base) =
4797 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4798
4799 // memcopy dependency data.
4800 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4801 ElSize,
4802 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4803 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4804 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4805 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4806
4807 // Increase pos.
4808 // pos += size;
4809 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4810 CGF.EmitStoreOfScalar(Add, PosLVal);
4811 }
4812 }
4813}
4814
4815std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4816 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4817 SourceLocation Loc) {
4818 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4819 return D.DepExprs.empty();
4820 }))
4821 return std::make_pair(nullptr, Address::invalid());
4822 // Process list of dependencies.
4823 ASTContext &C = CGM.getContext();
4824 Address DependenciesArray = Address::invalid();
4825 llvm::Value *NumOfElements = nullptr;
4826 unsigned NumDependencies = std::accumulate(
4827 Dependencies.begin(), Dependencies.end(), 0,
4828 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4829 return D.DepKind == OMPC_DEPEND_depobj
4830 ? V
4831 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4832 });
4833 QualType FlagsTy;
4834 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4835 bool HasDepobjDeps = false;
4836 bool HasRegularWithIterators = false;
4837 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4838 llvm::Value *NumOfRegularWithIterators =
4839 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4840 // Calculate number of depobj dependecies and regular deps with the iterators.
4841 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4842 if (D.DepKind == OMPC_DEPEND_depobj) {
4843 SmallVector<llvm::Value *, 4> Sizes =
4844 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4845 for (llvm::Value *Size : Sizes) {
4846 NumOfDepobjElements =
4847 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4848 }
4849 HasDepobjDeps = true;
4850 continue;
4851 }
4852 // Include number of iterations, if any.
4853
4854 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4855 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4856 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4857 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4858 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4859 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4860 NumOfRegularWithIterators =
4861 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4862 }
4863 HasRegularWithIterators = true;
4864 continue;
4865 }
4866 }
4867
4868 QualType KmpDependInfoArrayTy;
4869 if (HasDepobjDeps || HasRegularWithIterators) {
4870 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4871 /*isSigned=*/false);
4872 if (HasDepobjDeps) {
4873 NumOfElements =
4874 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4875 }
4876 if (HasRegularWithIterators) {
4877 NumOfElements =
4878 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4879 }
4880 auto *OVE = new (C) OpaqueValueExpr(
4881 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4882 VK_PRValue);
4883 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4884 RValue::get(NumOfElements));
4885 KmpDependInfoArrayTy =
4886 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4887 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4888 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4889 // Properly emit variable-sized array.
4890 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4891 ImplicitParamDecl::Other);
4892 CGF.EmitVarDecl(*PD);
4893 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4894 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4895 /*isSigned=*/false);
4896 } else {
4897 KmpDependInfoArrayTy = C.getConstantArrayType(
4898 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4899 ArrayType::Normal, /*IndexTypeQuals=*/0);
4900 DependenciesArray =
4901 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4902 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4903 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4904 /*isSigned=*/false);
4905 }
4906 unsigned Pos = 0;
4907 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4908 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4909 Dependencies[I].IteratorExpr)
4910 continue;
4911 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4912 DependenciesArray);
4913 }
4914 // Copy regular dependecies with iterators.
4915 LValue PosLVal = CGF.MakeAddrLValue(
4916 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4917 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4918 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4919 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4920 !Dependencies[I].IteratorExpr)
4921 continue;
4922 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4923 DependenciesArray);
4924 }
4925 // Copy final depobj arrays without iterators.
4926 if (HasDepobjDeps) {
4927 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4928 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4929 continue;
4930 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4931 DependenciesArray);
4932 }
4933 }
4934 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4935 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4936 return std::make_pair(NumOfElements, DependenciesArray);
4937}
4938
4939Address CGOpenMPRuntime::emitDepobjDependClause(
4940 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4941 SourceLocation Loc) {
4942 if (Dependencies.DepExprs.empty())
4943 return Address::invalid();
4944 // Process list of dependencies.
4945 ASTContext &C = CGM.getContext();
4946 Address DependenciesArray = Address::invalid();
4947 unsigned NumDependencies = Dependencies.DepExprs.size();
4948 QualType FlagsTy;
4949 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4950 RecordDecl *KmpDependInfoRD =
4951 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4952
4953 llvm::Value *Size;
4954 // Define type kmp_depend_info[<Dependencies.size()>];
4955 // For depobj reserve one extra element to store the number of elements.
4956 // It is required to handle depobj(x) update(in) construct.
4957 // kmp_depend_info[<Dependencies.size()>] deps;
4958 llvm::Value *NumDepsVal;
4959 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4960 if (const auto *IE =
4961 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4962 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4963 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4964 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4965 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4966 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4967 }
4968 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4969 NumDepsVal);
4970 CharUnits SizeInBytes =
4971 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4972 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4973 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4974 NumDepsVal =
4975 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4976 } else {
4977 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4978 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4979 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4980 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4981 Size = CGM.getSize(Sz.alignTo(Align));
4982 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4983 }
4984 // Need to allocate on the dynamic memory.
4985 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4986 // Use default allocator.
4987 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4988 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4989
4990 llvm::Value *Addr =
4991 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4992 CGM.getModule(), OMPRTL___kmpc_alloc),
4993 Args, ".dep.arr.addr");
4994 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4995 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4996 Addr, KmpDependInfoLlvmTy->getPointerTo());
4997 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4998 // Write number of elements in the first element of array for depobj.
4999 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5000 // deps[i].base_addr = NumDependencies;
5001 LValue BaseAddrLVal = CGF.EmitLValueForField(
5002 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5003 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5004 llvm::PointerUnion<unsigned *, LValue *> Pos;
5005 unsigned Idx = 1;
5006 LValue PosLVal;
5007 if (Dependencies.IteratorExpr) {
5008 PosLVal = CGF.MakeAddrLValue(
5009 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5010 C.getSizeType());
5011 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5012 /*IsInit=*/true);
5013 Pos = &PosLVal;
5014 } else {
5015 Pos = &Idx;
5016 }
5017 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5018 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5019 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
5020 CGF.Int8Ty);
5021 return DependenciesArray;
5022}
5023
5024void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5025 SourceLocation Loc) {
5026 ASTContext &C = CGM.getContext();
5027 QualType FlagsTy;
5028 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5029 LValue Base = CGF.EmitLoadOfPointerLValue(
5030 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
5031 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5032 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5033 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
5034 CGF.ConvertTypeForMem(KmpDependInfoTy));
5035 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5036 Addr.getElementType(), Addr.getPointer(),
5037 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5038 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5039 CGF.VoidPtrTy);
5040 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5041 // Use default allocator.
5042 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5043 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5044
5045 // _kmpc_free(gtid, addr, nullptr);
5046 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5047 CGM.getModule(), OMPRTL___kmpc_free),
5048 Args);
5049}
5050
5051void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5052 OpenMPDependClauseKind NewDepKind,
5053 SourceLocation Loc) {
5054 ASTContext &C = CGM.getContext();
5055 QualType FlagsTy;
5056 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5057 RecordDecl *KmpDependInfoRD =
5058 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5059 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5060 llvm::Value *NumDeps;
5061 LValue Base;
5062 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5063
5064 Address Begin = Base.getAddress(CGF);
5065 // Cast from pointer to array type to pointer to single element.
5066 llvm::Value *End = CGF.Builder.CreateGEP(
5067 Begin.getElementType(), Begin.getPointer(), NumDeps);
5068 // The basic structure here is a while-do loop.
5069 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5070 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5071 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5072 CGF.EmitBlock(BodyBB);
5073 llvm::PHINode *ElementPHI =
5074 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5075 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5076 Begin = Begin.withPointer(ElementPHI);
5077 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5078 Base.getTBAAInfo());
5079 // deps[i].flags = NewDepKind;
5080 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5081 LValue FlagsLVal = CGF.EmitLValueForField(
5082 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5083 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5084 FlagsLVal);
5085
5086 // Shift the address forward by one element.
5087 Address ElementNext =
5088 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5089 ElementPHI->addIncoming(ElementNext.getPointer(),
5090 CGF.Builder.GetInsertBlock());
5091 llvm::Value *IsEmpty =
5092 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5093 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5094 // Done.
5095 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5096}
5097
5098void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5099 const OMPExecutableDirective &D,
5100 llvm::Function *TaskFunction,
5101 QualType SharedsTy, Address Shareds,
5102 const Expr *IfCond,
5103 const OMPTaskDataTy &Data) {
5104 if (!CGF.HaveInsertPoint())
5105 return;
5106
5107 TaskResultTy Result =
5108 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5109 llvm::Value *NewTask = Result.NewTask;
5110 llvm::Function *TaskEntry = Result.TaskEntry;
5111 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5112 LValue TDBase = Result.TDBase;
5113 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5114 // Process list of dependences.
5115 Address DependenciesArray = Address::invalid();
5116 llvm::Value *NumOfElements;
5117 std::tie(NumOfElements, DependenciesArray) =
5118 emitDependClause(CGF, Data.Dependences, Loc);
5119
5120 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5121 // libcall.
5122 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5123 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5124 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5125 // list is not empty
5126 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5127 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5128 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5129 llvm::Value *DepTaskArgs[7];
5130 if (!Data.Dependences.empty()) {
5131 DepTaskArgs[0] = UpLoc;
5132 DepTaskArgs[1] = ThreadID;
5133 DepTaskArgs[2] = NewTask;
5134 DepTaskArgs[3] = NumOfElements;
5135 DepTaskArgs[4] = DependenciesArray.getPointer();
5136 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5137 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5138 }
5139 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5140 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5141 if (!Data.Tied) {
5142 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5143 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5144 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5145 }
5146 if (!Data.Dependences.empty()) {
5147 CGF.EmitRuntimeCall(
5148 OMPBuilder.getOrCreateRuntimeFunction(
5149 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5150 DepTaskArgs);
5151 } else {
5152 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5153 CGM.getModule(), OMPRTL___kmpc_omp_task),
5154 TaskArgs);
5155 }
5156 // Check if parent region is untied and build return for untied task;
5157 if (auto *Region =
5158 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5159 Region->emitUntiedSwitch(CGF);
5160 };
5161
5162 llvm::Value *DepWaitTaskArgs[6];
5163 if (!Data.Dependences.empty()) {
5164 DepWaitTaskArgs[0] = UpLoc;
5165 DepWaitTaskArgs[1] = ThreadID;
5166 DepWaitTaskArgs[2] = NumOfElements;
5167 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5168 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5169 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5170 }
5171 auto &M = CGM.getModule();
5172 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5173 TaskEntry, &Data, &DepWaitTaskArgs,
5174 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5175 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5176 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5177 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5178 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5179 // is specified.
5180 if (!Data.Dependences.empty())
5181 CGF.EmitRuntimeCall(
5182 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5183 DepWaitTaskArgs);
5184 // Call proxy_task_entry(gtid, new_task);
5185 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5186 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5187 Action.Enter(CGF);
5188 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5189 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5190 OutlinedFnArgs);
5191 };
5192
5193 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5194 // kmp_task_t *new_task);
5195 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5196 // kmp_task_t *new_task);
5197 RegionCodeGenTy RCG(CodeGen);
5198 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5199 M, OMPRTL___kmpc_omp_task_begin_if0),
5200 TaskArgs,
5201 OMPBuilder.getOrCreateRuntimeFunction(
5202 M, OMPRTL___kmpc_omp_task_complete_if0),
5203 TaskArgs);
5204 RCG.setAction(Action);
5205 RCG(CGF);
5206 };
5207
5208 if (IfCond) {
5209 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5210 } else {
5211 RegionCodeGenTy ThenRCG(ThenCodeGen);
5212 ThenRCG(CGF);
5213 }
5214}
5215
5216void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5217 const OMPLoopDirective &D,
5218 llvm::Function *TaskFunction,
5219 QualType SharedsTy, Address Shareds,
5220 const Expr *IfCond,
5221 const OMPTaskDataTy &Data) {
5222 if (!CGF.HaveInsertPoint())
5223 return;
5224 TaskResultTy Result =
5225 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5226 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5227 // libcall.
5228 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5229 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5230 // sched, kmp_uint64 grainsize, void *task_dup);
5231 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5232 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5233 llvm::Value *IfVal;
5234 if (IfCond) {
5235 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5236 /*isSigned=*/true);
5237 } else {
5238 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5239 }
5240
5241 LValue LBLVal = CGF.EmitLValueForField(
5242 Result.TDBase,
5243 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5244 const auto *LBVar =
5245 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5246 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5247 LBLVal.getQuals(),
5248 /*IsInitializer=*/true);
5249 LValue UBLVal = CGF.EmitLValueForField(
5250 Result.TDBase,
5251 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5252 const auto *UBVar =
5253 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5254 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5255 UBLVal.getQuals(),
5256 /*IsInitializer=*/true);
5257 LValue StLVal = CGF.EmitLValueForField(
5258 Result.TDBase,
5259 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5260 const auto *StVar =
5261 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5262 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5263 StLVal.getQuals(),
5264 /*IsInitializer=*/true);
5265 // Store reductions address.
5266 LValue RedLVal = CGF.EmitLValueForField(
5267 Result.TDBase,
5268 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5269 if (Data.Reductions) {
5270 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5271 } else {
5272 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5273 CGF.getContext().VoidPtrTy);
5274 }
5275 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5276 llvm::Value *TaskArgs[] = {
5277 UpLoc,
5278 ThreadID,
5279 Result.NewTask,
5280 IfVal,
5281 LBLVal.getPointer(CGF),
5282 UBLVal.getPointer(CGF),
5283 CGF.EmitLoadOfScalar(StLVal, Loc),
5284 llvm::ConstantInt::getSigned(
5285 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5286 llvm::ConstantInt::getSigned(
5287 CGF.IntTy, Data.Schedule.getPointer()
5288 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5289 : NoSchedule),
5290 Data.Schedule.getPointer()
5291 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5292 /*isSigned=*/false)
5293 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5294 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5295 Result.TaskDupFn, CGF.VoidPtrTy)
5296 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5297 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5298 CGM.getModule(), OMPRTL___kmpc_taskloop),
5299 TaskArgs);
5300}
5301
5302/// Emit reduction operation for each element of array (required for
5303/// array sections) LHS op = RHS.
5304/// \param Type Type of array.
5305/// \param LHSVar Variable on the left side of the reduction operation
5306/// (references element of array in original variable).
5307/// \param RHSVar Variable on the right side of the reduction operation
5308/// (references element of array in original variable).
5309/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5310/// RHSVar.
5311static void EmitOMPAggregateReduction(
5312 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5313 const VarDecl *RHSVar,
5314 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5315 const Expr *, const Expr *)> &RedOpGen,
5316 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5317 const Expr *UpExpr = nullptr) {
5318 // Perform element-by-element initialization.
5319 QualType ElementTy;
5320 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5321 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5322
5323 // Drill down to the base element type on both arrays.
5324 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5325 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5326
5327 llvm::Value *RHSBegin = RHSAddr.getPointer();
5328 llvm::Value *LHSBegin = LHSAddr.getPointer();
5329 // Cast from pointer to array type to pointer to single element.
5330 llvm::Value *LHSEnd =
5331 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5332 // The basic structure here is a while-do loop.
5333 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5334 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5335 llvm::Value *IsEmpty =
5336 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5337 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5338
5339 // Enter the loop body, making that address the current address.
5340 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5341 CGF.EmitBlock(BodyBB);
5342
5343 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5344
5345 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5346 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5347 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5348 Address RHSElementCurrent(
5349 RHSElementPHI, RHSAddr.getElementType(),
5350 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5351
5352 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5353 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5354 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5355 Address LHSElementCurrent(
5356 LHSElementPHI, LHSAddr.getElementType(),
5357 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5358
5359 // Emit copy.
5360 CodeGenFunction::OMPPrivateScope Scope(CGF);
5361 Scope.addPrivate(LHSVar, LHSElementCurrent);
5362 Scope.addPrivate(RHSVar, RHSElementCurrent);
5363 Scope.Privatize();
5364 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5365 Scope.ForceCleanup();
5366
5367 // Shift the address forward by one element.
5368 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5369 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5370 "omp.arraycpy.dest.element");
5371 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5372 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5373 "omp.arraycpy.src.element");
5374 // Check whether we've reached the end.
5375 llvm::Value *Done =
5376 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5377 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5378 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5379 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5380
5381 // Done.
5382 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5383}
5384
5385/// Emit reduction combiner. If the combiner is a simple expression emit it as
5386/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5387/// UDR combiner function.
5388static void emitReductionCombiner(CodeGenFunction &CGF,
5389 const Expr *ReductionOp) {
5390 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5391 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5392 if (const auto *DRE =
5393 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5394 if (const auto *DRD =
5395 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5396 std::pair<llvm::Function *, llvm::Function *> Reduction =
5397 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5398 RValue Func = RValue::get(Reduction.first);
5399 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5400 CGF.EmitIgnoredExpr(ReductionOp);
5401 return;
5402 }
5403 CGF.EmitIgnoredExpr(ReductionOp);
5404}
5405
5406llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5407 SourceLocation Loc, llvm::Type *ArgsElemType,
5408 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5409 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5410 ASTContext &C = CGM.getContext();
5411
5412 // void reduction_func(void *LHSArg, void *RHSArg);
5413 FunctionArgList Args;
5414 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5415 ImplicitParamDecl::Other);
5416 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5417 ImplicitParamDecl::Other);
5418 Args.push_back(&LHSArg);
5419 Args.push_back(&RHSArg);
5420 const auto &CGFI =
5421 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5422 std::string Name = getName({"omp", "reduction", "reduction_func"});
5423 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5424 llvm::GlobalValue::InternalLinkage, Name,
5425 &CGM.getModule());
5426 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5427 Fn->setDoesNotRecurse();
5428 CodeGenFunction CGF(CGM);
5429 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5430
5431 // Dst = (void*[n])(LHSArg);
5432 // Src = (void*[n])(RHSArg);
5433 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5434 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5435 ArgsElemType->getPointerTo()),
5436 ArgsElemType, CGF.getPointerAlign());
5437 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5438 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5439 ArgsElemType->getPointerTo()),
5440 ArgsElemType, CGF.getPointerAlign());
5441
5442 // ...
5443 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5444 // ...
5445 CodeGenFunction::OMPPrivateScope Scope(CGF);
5446 const auto *IPriv = Privates.begin();
5447 unsigned Idx = 0;
5448 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5449 const auto *RHSVar =
5450 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5451 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5452 const auto *LHSVar =
5453 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5454 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5455 QualType PrivTy = (*IPriv)->getType();
5456 if (PrivTy->isVariablyModifiedType()) {
5457 // Get array size and emit VLA type.
5458 ++Idx;
5459 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5460 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5461 const VariableArrayType *VLA =
5462 CGF.getContext().getAsVariableArrayType(PrivTy);
5463 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5464 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5465 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5466 CGF.EmitVariablyModifiedType(PrivTy);
5467 }
5468 }
5469 Scope.Privatize();
5470 IPriv = Privates.begin();
5471 const auto *ILHS = LHSExprs.begin();
5472 const auto *IRHS = RHSExprs.begin();
5473 for (const Expr *E : ReductionOps) {
5474 if ((*IPriv)->getType()->isArrayType()) {
5475 // Emit reduction for array section.
5476 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5477 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5478 EmitOMPAggregateReduction(
5479 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5480 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5481 emitReductionCombiner(CGF, E);
5482 });
5483 } else {
5484 // Emit reduction for array subscript or single variable.
5485 emitReductionCombiner(CGF, E);
5486 }
5487 ++IPriv;
5488 ++ILHS;
5489 ++IRHS;
5490 }
5491 Scope.ForceCleanup();
5492 CGF.FinishFunction();
5493 return Fn;
5494}
5495
5496void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5497 const Expr *ReductionOp,
5498 const Expr *PrivateRef,
5499 const DeclRefExpr *LHS,
5500 const DeclRefExpr *RHS) {
5501 if (PrivateRef->getType()->isArrayType()) {
5502 // Emit reduction for array section.
5503 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5504 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5505 EmitOMPAggregateReduction(
5506 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5507 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5508 emitReductionCombiner(CGF, ReductionOp);
5509 });
5510 } else {
5511 // Emit reduction for array subscript or single variable.
5512 emitReductionCombiner(CGF, ReductionOp);
5513 }
5514}
5515
5516void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5517 ArrayRef<const Expr *> Privates,
5518 ArrayRef<const Expr *> LHSExprs,
5519 ArrayRef<const Expr *> RHSExprs,
5520 ArrayRef<const Expr *> ReductionOps,
5521 ReductionOptionsTy Options) {
5522 if (!CGF.HaveInsertPoint())
5523 return;
5524
5525 bool WithNowait = Options.WithNowait;
5526 bool SimpleReduction = Options.SimpleReduction;
5527
5528 // Next code should be emitted for reduction:
5529 //
5530 // static kmp_critical_name lock = { 0 };
5531 //
5532 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5533 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5534 // ...
5535 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5536 // *(Type<n>-1*)rhs[<n>-1]);
5537 // }
5538 //
5539 // ...
5540 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5541 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5542 // RedList, reduce_func, &<lock>)) {
5543 // case 1:
5544 // ...
5545 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5546 // ...
5547 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5548 // break;
5549 // case 2:
5550 // ...
5551 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5552 // ...
5553 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5554 // break;
5555 // default:;
5556 // }
5557 //
5558 // if SimpleReduction is true, only the next code is generated:
5559 // ...
5560 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5561 // ...
5562
5563 ASTContext &C = CGM.getContext();
5564
5565 if (SimpleReduction) {
5566 CodeGenFunction::RunCleanupsScope Scope(CGF);
5567 const auto *IPriv = Privates.begin();
5568 const auto *ILHS = LHSExprs.begin();
5569 const auto *IRHS = RHSExprs.begin();
5570 for (const Expr *E : ReductionOps) {
5571 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5572 cast<DeclRefExpr>(*IRHS));
5573 ++IPriv;
5574 ++ILHS;
5575 ++IRHS;
5576 }
5577 return;
5578 }
5579
5580 // 1. Build a list of reduction variables.
5581 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5582 auto Size = RHSExprs.size();
5583 for (const Expr *E : Privates) {
5584 if (E->getType()->isVariablyModifiedType())
5585 // Reserve place for array size.
5586 ++Size;
5587 }
5588 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5589 QualType ReductionArrayTy =
5590 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5591 /*IndexTypeQuals=*/0);
5592 Address ReductionList =
5593 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5594 const auto *IPriv = Privates.begin();
5595 unsigned Idx = 0;
5596 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5597 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5598 CGF.Builder.CreateStore(
5599 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5600 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5601 Elem);
5602 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5603 // Store array size.
5604 ++Idx;
5605 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5606 llvm::Value *Size = CGF.Builder.CreateIntCast(
5607 CGF.getVLASize(
5608 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5609 .NumElts,
5610 CGF.SizeTy, /*isSigned=*/false);
5611 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5612 Elem);
5613 }
5614 }
5615
5616 // 2. Emit reduce_func().
5617 llvm::Function *ReductionFn =
5618 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5619 Privates, LHSExprs, RHSExprs, ReductionOps);
5620
5621 // 3. Create static kmp_critical_name lock = { 0 };
5622 std::string Name = getName({"reduction"});
5623 llvm::Value *Lock = getCriticalRegionLock(Name);
5624
5625 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5626 // RedList, reduce_func, &<lock>);
5627 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5628 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5629 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5630 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5631 ReductionList.getPointer(), CGF.VoidPtrTy);
5632 llvm::Value *Args[] = {
5633 IdentTLoc, // ident_t *<loc>
5634 ThreadId, // i32 <gtid>
5635 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5636 ReductionArrayTySize, // size_type sizeof(RedList)
5637 RL, // void *RedList
5638 ReductionFn, // void (*) (void *, void *) <reduce_func>
5639 Lock // kmp_critical_name *&<lock>
5640 };
5641 llvm::Value *Res = CGF.EmitRuntimeCall(
5642 OMPBuilder.getOrCreateRuntimeFunction(
5643 CGM.getModule(),
5644 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5645 Args);
5646
5647 // 5. Build switch(res)
5648 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5649 llvm::SwitchInst *SwInst =
5650 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5651
5652 // 6. Build case 1:
5653 // ...
5654 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5655 // ...
5656 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5657 // break;
5658 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5659 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5660 CGF.EmitBlock(Case1BB);
5661
5662 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5663 llvm::Value *EndArgs[] = {
5664 IdentTLoc, // ident_t *<loc>
5665 ThreadId, // i32 <gtid>
5666 Lock // kmp_critical_name *&<lock>
5667 };
5668 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5669 CodeGenFunction &CGF, PrePostActionTy &Action) {
5670 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5671 const auto *IPriv = Privates.begin();
5672 const auto *ILHS = LHSExprs.begin();
5673 const auto *IRHS = RHSExprs.begin();
5674 for (const Expr *E : ReductionOps) {
5675 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5676 cast<DeclRefExpr>(*IRHS));
5677 ++IPriv;
5678 ++ILHS;
5679 ++IRHS;
5680 }
5681 };
5682 RegionCodeGenTy RCG(CodeGen);
5683 CommonActionTy Action(
5684 nullptr, llvm::None,
5685 OMPBuilder.getOrCreateRuntimeFunction(
5686 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5687 : OMPRTL___kmpc_end_reduce),
5688 EndArgs);
5689 RCG.setAction(Action);
5690 RCG(CGF);
5691
5692 CGF.EmitBranch(DefaultBB);
5693
5694 // 7. Build case 2:
5695 // ...
5696 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5697 // ...
5698 // break;
5699 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5700 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5701 CGF.EmitBlock(Case2BB);
5702
5703 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5704 CodeGenFunction &CGF, PrePostActionTy &Action) {
5705 const auto *ILHS = LHSExprs.begin();
5706 const auto *IRHS = RHSExprs.begin();
5707 const auto *IPriv = Privates.begin();
5708 for (const Expr *E : ReductionOps) {
5709 const Expr *XExpr = nullptr;
5710 const Expr *EExpr = nullptr;
5711 const Expr *UpExpr = nullptr;
5712 BinaryOperatorKind BO = BO_Comma;
5713 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5714 if (BO->getOpcode() == BO_Assign) {
5715 XExpr = BO->getLHS();
5716 UpExpr = BO->getRHS();
5717 }
5718 }
5719 // Try to emit update expression as a simple atomic.
5720 const Expr *RHSExpr = UpExpr;
5721 if (RHSExpr) {
5722 // Analyze RHS part of the whole expression.
5723 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5724 RHSExpr->IgnoreParenImpCasts())) {
5725 // If this is a conditional operator, analyze its condition for
5726 // min/max reduction operator.
5727 RHSExpr = ACO->getCond();
5728 }
5729 if (const auto *BORHS =
5730 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5731 EExpr = BORHS->getRHS();
5732 BO = BORHS->getOpcode();
5733 }
5734 }
5735 if (XExpr) {
5736 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5737 auto &&AtomicRedGen = [BO, VD,
5738 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5739 const Expr *EExpr, const Expr *UpExpr) {
5740 LValue X = CGF.EmitLValue(XExpr);
5741 RValue E;
5742 if (EExpr)
5743 E = CGF.EmitAnyExpr(EExpr);
5744 CGF.EmitOMPAtomicSimpleUpdateExpr(
5745 X, E, BO, /*IsXLHSInRHSPart=*/true,
5746 llvm::AtomicOrdering::Monotonic, Loc,
5747 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5748 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5749 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5750 CGF.emitOMPSimpleStore(
5751 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5752 VD->getType().getNonReferenceType(), Loc);
5753 PrivateScope.addPrivate(VD, LHSTemp);
5754 (void)PrivateScope.Privatize();
5755 return CGF.EmitAnyExpr(UpExpr);
5756 });
5757 };
5758 if ((*IPriv)->getType()->isArrayType()) {
5759 // Emit atomic reduction for array section.
5760 const auto *RHSVar =
5761 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5762 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5763 AtomicRedGen, XExpr, EExpr, UpExpr);
5764 } else {
5765 // Emit atomic reduction for array subscript or single variable.
5766 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5767 }
5768 } else {
5769 // Emit as a critical region.
5770 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5771 const Expr *, const Expr *) {
5772 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5773 std::string Name = RT.getName({"atomic_reduction"});
5774 RT.emitCriticalRegion(
5775 CGF, Name,
5776 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5777 Action.Enter(CGF);
5778 emitReductionCombiner(CGF, E);
5779 },
5780 Loc);
5781 };
5782 if ((*IPriv)->getType()->isArrayType()) {
5783 const auto *LHSVar =
5784 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5785 const auto *RHSVar =
5786 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5787 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5788 CritRedGen);
5789 } else {
5790 CritRedGen(CGF, nullptr, nullptr, nullptr);
5791 }
5792 }
5793 ++ILHS;
5794 ++IRHS;
5795 ++IPriv;
5796 }
5797 };
5798 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5799 if (!WithNowait) {
5800 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5801 llvm::Value *EndArgs[] = {
5802 IdentTLoc, // ident_t *<loc>
5803 ThreadId, // i32 <gtid>
5804 Lock // kmp_critical_name *&<lock>
5805 };
5806 CommonActionTy Action(nullptr, llvm::None,
5807 OMPBuilder.getOrCreateRuntimeFunction(
5808 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5809 EndArgs);
5810 AtomicRCG.setAction(Action);
5811 AtomicRCG(CGF);
5812 } else {
5813 AtomicRCG(CGF);
5814 }
5815
5816 CGF.EmitBranch(DefaultBB);
5817 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5818}
5819
5820/// Generates unique name for artificial threadprivate variables.
5821/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5822static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5823 const Expr *Ref) {
5824 SmallString<256> Buffer;
5825 llvm::raw_svector_ostream Out(Buffer);
5826 const clang::DeclRefExpr *DE;
5827 const VarDecl *D = ::getBaseDecl(Ref, DE);
5828 if (!D)
5829 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5830 D = D->getCanonicalDecl();
5831 std::string Name = CGM.getOpenMPRuntime().getName(
5832 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5833 Out << Prefix << Name << "_"
5834 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5835 return std::string(Out.str());
5836}
5837
5838/// Emits reduction initializer function:
5839/// \code
5840/// void @.red_init(void* %arg, void* %orig) {
5841/// %0 = bitcast void* %arg to <type>*
5842/// store <type> <init>, <type>* %0
5843/// ret void
5844/// }
5845/// \endcode
5846static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5847 SourceLocation Loc,
5848 ReductionCodeGen &RCG, unsigned N) {
5849 ASTContext &C = CGM.getContext();
5850 QualType VoidPtrTy = C.VoidPtrTy;
5851 VoidPtrTy.addRestrict();
5852 FunctionArgList Args;
5853 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5854 ImplicitParamDecl::Other);
5855 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5856 ImplicitParamDecl::Other);
5857 Args.emplace_back(&Param);
5858 Args.emplace_back(&ParamOrig);
5859 const auto &FnInfo =
5860 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5861 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5862 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5863 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5864 Name, &CGM.getModule());
5865 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5866 Fn->setDoesNotRecurse();
5867 CodeGenFunction CGF(CGM);
5868 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5869 QualType PrivateType = RCG.getPrivateType(N);
5870 Address PrivateAddr = CGF.EmitLoadOfPointer(
5871 CGF.Builder.CreateElementBitCast(
5872 CGF.GetAddrOfLocalVar(&Param),
5873 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5874 C.getPointerType(PrivateType)->castAs<PointerType>());
5875 llvm::Value *Size = nullptr;
5876 // If the size of the reduction item is non-constant, load it from global
5877 // threadprivate variable.
5878 if (RCG.getSizes(N).second) {
5879 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5880 CGF, CGM.getContext().getSizeType(),
5881 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5882 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5883 CGM.getContext().getSizeType(), Loc);
5884 }
5885 RCG.emitAggregateType(CGF, N, Size);
5886 Address OrigAddr = Address::invalid();
5887 // If initializer uses initializer from declare reduction construct, emit a
5888 // pointer to the address of the original reduction item (reuired by reduction
5889 // initializer)
5890 if (RCG.usesReductionInitializer(N)) {
5891 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5892 OrigAddr = CGF.EmitLoadOfPointer(
5893 SharedAddr,
5894 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5895 }
5896 // Emit the initializer:
5897 // %0 = bitcast void* %arg to <type>*
5898 // store <type> <init>, <type>* %0
5899 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5900 [](CodeGenFunction &) { return false; });
5901 CGF.FinishFunction();
5902 return Fn;
5903}
5904
5905/// Emits reduction combiner function:
5906/// \code
5907/// void @.red_comb(void* %arg0, void* %arg1) {
5908/// %lhs = bitcast void* %arg0 to <type>*
5909/// %rhs = bitcast void* %arg1 to <type>*
5910/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5911/// store <type> %2, <type>* %lhs
5912/// ret void
5913/// }
5914/// \endcode
5915static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5916 SourceLocation Loc,
5917 ReductionCodeGen &RCG, unsigned N,
5918 const Expr *ReductionOp,
5919 const Expr *LHS, const Expr *RHS,
5920 const Expr *PrivateRef) {
5921 ASTContext &C = CGM.getContext();
5922 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5923 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5924 FunctionArgList Args;
5925 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5926 C.VoidPtrTy, ImplicitParamDecl::Other);
5927 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5928 ImplicitParamDecl::Other);
5929 Args.emplace_back(&ParamInOut);
5930 Args.emplace_back(&ParamIn);
5931 const auto &FnInfo =
5932 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5933 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5934 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5935 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5936 Name, &CGM.getModule());
5937 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5938 Fn->setDoesNotRecurse();
5939 CodeGenFunction CGF(CGM);
5940 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5941 llvm::Value *Size = nullptr;
5942 // If the size of the reduction item is non-constant, load it from global
5943 // threadprivate variable.
5944 if (RCG.getSizes(N).second) {
5945 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5946 CGF, CGM.getContext().getSizeType(),
5947 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5948 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5949 CGM.getContext().getSizeType(), Loc);
5950 }
5951 RCG.emitAggregateType(CGF, N, Size);
5952 // Remap lhs and rhs variables to the addresses of the function arguments.
5953 // %lhs = bitcast void* %arg0 to <type>*
5954 // %rhs = bitcast void* %arg1 to <type>*
5955 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5956 PrivateScope.addPrivate(
5957 LHSVD,
5958 // Pull out the pointer to the variable.
5959 CGF.EmitLoadOfPointer(
5960 CGF.Builder.CreateElementBitCast(
5961 CGF.GetAddrOfLocalVar(&ParamInOut),
5962 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5963 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5964 PrivateScope.addPrivate(
5965 RHSVD,
5966 // Pull out the pointer to the variable.
5967 CGF.EmitLoadOfPointer(
5968 CGF.Builder.CreateElementBitCast(
5969 CGF.GetAddrOfLocalVar(&ParamIn),
5970 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5971 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5972 PrivateScope.Privatize();
5973 // Emit the combiner body:
5974 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5975 // store <type> %2, <type>* %lhs
5976 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5977 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5978 cast<DeclRefExpr>(RHS));
5979 CGF.FinishFunction();
5980 return Fn;
5981}
5982
5983/// Emits reduction finalizer function:
5984/// \code
5985/// void @.red_fini(void* %arg) {
5986/// %0 = bitcast void* %arg to <type>*
5987/// <destroy>(<type>* %0)
5988/// ret void
5989/// }
5990/// \endcode
5991static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5992 SourceLocation Loc,
5993 ReductionCodeGen &RCG, unsigned N) {
5994 if (!RCG.needCleanups(N))
5995 return nullptr;
5996 ASTContext &C = CGM.getContext();
5997 FunctionArgList Args;
5998 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5999 ImplicitParamDecl::Other);
6000 Args.emplace_back(&Param);
6001 const auto &FnInfo =
6002 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6003 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6004 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6005 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6006 Name, &CGM.getModule());
6007 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6008 Fn->setDoesNotRecurse();
6009 CodeGenFunction CGF(CGM);
6010 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6011 Address PrivateAddr = CGF.EmitLoadOfPointer(
6012 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
6013 llvm::Value *Size = nullptr;
6014 // If the size of the reduction item is non-constant, load it from global
6015 // threadprivate variable.
6016 if (RCG.getSizes(N).second) {
6017 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6018 CGF, CGM.getContext().getSizeType(),
6019 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6020 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6021 CGM.getContext().getSizeType(), Loc);
6022 }
6023 RCG.emitAggregateType(CGF, N, Size);
6024 // Emit the finalizer body:
6025 // <destroy>(<type>* %0)
6026 RCG.emitCleanups(CGF, N, PrivateAddr);
6027 CGF.FinishFunction(Loc);
6028 return Fn;
6029}
6030
6031llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6032 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6033 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6034 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6035 return nullptr;
6036
6037 // Build typedef struct:
6038 // kmp_taskred_input {
6039 // void *reduce_shar; // shared reduction item
6040 // void *reduce_orig; // original reduction item used for initialization
6041 // size_t reduce_size; // size of data item
6042 // void *reduce_init; // data initialization routine
6043 // void *reduce_fini; // data finalization routine
6044 // void *reduce_comb; // data combiner routine
6045 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6046 // } kmp_taskred_input_t;
6047 ASTContext &C = CGM.getContext();
6048 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6049 RD->startDefinition();
6050 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6051 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6052 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6053 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6054 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6055 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6056 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6057 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6058 RD->completeDefinition();
6059 QualType RDType = C.getRecordType(RD);
6060 unsigned Size = Data.ReductionVars.size();
6061 llvm::APInt ArraySize(/*numBits=*/64, Size);
6062 QualType ArrayRDType = C.getConstantArrayType(
6063 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6064 // kmp_task_red_input_t .rd_input.[Size];
6065 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6066 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6067 Data.ReductionCopies, Data.ReductionOps);
6068 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6069 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6070 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6071 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6072 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6073 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6074 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6075 ".rd_input.gep.");
6076 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6077 // ElemLVal.reduce_shar = &Shareds[Cnt];
6078 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6079 RCG.emitSharedOrigLValue(CGF, Cnt);
6080 llvm::Value *CastedShared =
6081 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6082 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6083 // ElemLVal.reduce_orig = &Origs[Cnt];
6084 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6085 llvm::Value *CastedOrig =
6086 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6087 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6088 RCG.emitAggregateType(CGF, Cnt);
6089 llvm::Value *SizeValInChars;
6090 llvm::Value *SizeVal;
6091 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6092 // We use delayed creation/initialization for VLAs and array sections. It is
6093 // required because runtime does not provide the way to pass the sizes of
6094 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6095 // threadprivate global variables are used to store these values and use
6096 // them in the functions.
6097 bool DelayedCreation = !!SizeVal;
6098 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6099 /*isSigned=*/false);
6100 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6101 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6102 // ElemLVal.reduce_init = init;
6103 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6104 llvm::Value *InitAddr =
6105 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6106 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6107 // ElemLVal.reduce_fini = fini;
6108 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6109 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6110 llvm::Value *FiniAddr = Fini
6111 ? CGF.EmitCastToVoidPtr(Fini)
6112 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6113 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6114 // ElemLVal.reduce_comb = comb;
6115 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6116 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6117 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6118 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6119 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6120 // ElemLVal.flags = 0;
6121 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6122 if (DelayedCreation) {
6123 CGF.EmitStoreOfScalar(
6124 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6125 FlagsLVal);
6126 } else
6127 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6128 FlagsLVal.getType());
6129 }
6130 if (Data.IsReductionWithTaskMod) {
6131 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6132 // is_ws, int num, void *data);
6133 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6134 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6135 CGM.IntTy, /*isSigned=*/true);
6136 llvm::Value *Args[] = {
6137 IdentTLoc, GTid,
6138 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6139 /*isSigned=*/true),
6140 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6142 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6143 return CGF.EmitRuntimeCall(
6144 OMPBuilder.getOrCreateRuntimeFunction(
6145 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6146 Args);
6147 }
6148 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6149 llvm::Value *Args[] = {
6150 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6151 /*isSigned=*/true),
6152 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6153 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6154 CGM.VoidPtrTy)};
6155 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6156 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6157 Args);
6158}
6159
6160void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6161 SourceLocation Loc,
6162 bool IsWorksharingReduction) {
6163 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6164 // is_ws, int num, void *data);
6165 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6166 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6167 CGM.IntTy, /*isSigned=*/true);
6168 llvm::Value *Args[] = {IdentTLoc, GTid,
6169 llvm::ConstantInt::get(CGM.IntTy,
6170 IsWorksharingReduction ? 1 : 0,
6171 /*isSigned=*/true)};
6172 (void)CGF.EmitRuntimeCall(
6173 OMPBuilder.getOrCreateRuntimeFunction(
6174 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6175 Args);
6176}
6177
6178void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6179 SourceLocation Loc,
6180 ReductionCodeGen &RCG,
6181 unsigned N) {
6182 auto Sizes = RCG.getSizes(N);
6183 // Emit threadprivate global variable if the type is non-constant
6184 // (Sizes.second = nullptr).
6185 if (Sizes.second) {
6186 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6187 /*isSigned=*/false);
6188 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6189 CGF, CGM.getContext().getSizeType(),
6190 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6191 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6192 }
6193}
6194
6195Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6196 SourceLocation Loc,
6197 llvm::Value *ReductionsPtr,
6198 LValue SharedLVal) {
6199 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6200 // *d);
6201 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6202 CGM.IntTy,
6203 /*isSigned=*/true),
6204 ReductionsPtr,
6205 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6206 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6207 return Address(
6208 CGF.EmitRuntimeCall(
6209 OMPBuilder.getOrCreateRuntimeFunction(
6210 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6211 Args),
6212 CGF.Int8Ty, SharedLVal.getAlignment());
6213}
6214
6215void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6216 const OMPTaskDataTy &Data) {
6217 if (!CGF.HaveInsertPoint())
6218 return;
6219
6220 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6221 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6222 OMPBuilder.createTaskwait(CGF.Builder);
6223 } else {
6224 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6225 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6226 auto &M = CGM.getModule();
6227 Address DependenciesArray = Address::invalid();
6228 llvm::Value *NumOfElements;
6229 std::tie(NumOfElements, DependenciesArray) =
6230 emitDependClause(CGF, Data.Dependences, Loc);
6231 llvm::Value *DepWaitTaskArgs[6];
6232 if (!Data.Dependences.empty()) {
6233 DepWaitTaskArgs[0] = UpLoc;
6234 DepWaitTaskArgs[1] = ThreadID;
6235 DepWaitTaskArgs[2] = NumOfElements;
6236 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6237 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6238 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6239
6240 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6241
6242 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6243 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6244 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6245 // is specified.
6246 CGF.EmitRuntimeCall(
6247 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6248 DepWaitTaskArgs);
6249
6250 } else {
6251
6252 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6253 // global_tid);
6254 llvm::Value *Args[] = {UpLoc, ThreadID};
6255 // Ignore return result until untied tasks are supported.
6256 CGF.EmitRuntimeCall(
6257 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6258 Args);
6259 }
6260 }
6261
6262 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6263 Region->emitUntiedSwitch(CGF);
6264}
6265
6266void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6267 OpenMPDirectiveKind InnerKind,
6268 const RegionCodeGenTy &CodeGen,
6269 bool HasCancel) {
6270 if (!CGF.HaveInsertPoint())
6271 return;
6272 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6273 InnerKind != OMPD_critical &&
6274 InnerKind != OMPD_master &&
6275 InnerKind != OMPD_masked);
6276 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6277}
6278
6279namespace {
6280enum RTCancelKind {
6281 CancelNoreq = 0,
6282 CancelParallel = 1,
6283 CancelLoop = 2,
6284 CancelSections = 3,
6285 CancelTaskgroup = 4
6286};
6287} // anonymous namespace
6288
6289static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6290 RTCancelKind CancelKind = CancelNoreq;
6291 if (CancelRegion == OMPD_parallel)
6292 CancelKind = CancelParallel;
6293 else if (CancelRegion == OMPD_for)
6294 CancelKind = CancelLoop;
6295 else if (CancelRegion == OMPD_sections)
6296 CancelKind = CancelSections;
6297 else {
6298 assert(CancelRegion == OMPD_taskgroup)(static_cast <bool> (CancelRegion == OMPD_taskgroup) ? void
(0) : __assert_fail ("CancelRegion == OMPD_taskgroup", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 6298, __extension__ __PRETTY_FUNCTION__))
;
6299 CancelKind = CancelTaskgroup;
6300 }
6301 return CancelKind;
6302}
6303
6304void CGOpenMPRuntime::emitCancellationPointCall(
6305 CodeGenFunction &CGF, SourceLocation Loc,
6306 OpenMPDirectiveKind CancelRegion) {
6307 if (!CGF.HaveInsertPoint())
6308 return;
6309 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6310 // global_tid, kmp_int32 cncl_kind);
6311 if (auto *OMPRegionInfo =
6312 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6313 // For 'cancellation point taskgroup', the task region info may not have a
6314 // cancel. This may instead happen in another adjacent task.
6315 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6316 llvm::Value *Args[] = {
6317 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6318 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6319 // Ignore return result until untied tasks are supported.
6320 llvm::Value *Result = CGF.EmitRuntimeCall(
6321 OMPBuilder.getOrCreateRuntimeFunction(
6322 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6323 Args);
6324 // if (__kmpc_cancellationpoint()) {
6325 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6326 // exit from construct;
6327 // }
6328 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6329 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6330 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6331 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6332 CGF.EmitBlock(ExitBB);
6333 if (CancelRegion == OMPD_parallel)
6334 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6335 // exit from construct;
6336 CodeGenFunction::JumpDest CancelDest =
6337 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6338 CGF.EmitBranchThroughCleanup(CancelDest);
6339 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6340 }
6341 }
6342}
6343
6344void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6345 const Expr *IfCond,
6346 OpenMPDirectiveKind CancelRegion) {
6347 if (!CGF.HaveInsertPoint())
6348 return;
6349 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6350 // kmp_int32 cncl_kind);
6351 auto &M = CGM.getModule();
6352 if (auto *OMPRegionInfo =
6353 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6354 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6355 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6356 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6357 llvm::Value *Args[] = {
6358 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6359 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6360 // Ignore return result until untied tasks are supported.
6361 llvm::Value *Result = CGF.EmitRuntimeCall(
6362 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6363 // if (__kmpc_cancel()) {
6364 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6365 // exit from construct;
6366 // }
6367 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6368 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6369 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6370 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6371 CGF.EmitBlock(ExitBB);
6372 if (CancelRegion == OMPD_parallel)
6373 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6374 // exit from construct;
6375 CodeGenFunction::JumpDest CancelDest =
6376 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6377 CGF.EmitBranchThroughCleanup(CancelDest);
6378 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6379 };
6380 if (IfCond) {
6381 emitIfClause(CGF, IfCond, ThenGen,
6382 [](CodeGenFunction &, PrePostActionTy &) {});
6383 } else {
6384 RegionCodeGenTy ThenRCG(ThenGen);
6385 ThenRCG(CGF);
6386 }
6387 }
6388}
6389
6390namespace {
6391/// Cleanup action for uses_allocators support.
6392class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6393 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6394
6395public:
6396 OMPUsesAllocatorsActionTy(
6397 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6398 : Allocators(Allocators) {}
6399 void Enter(CodeGenFunction &CGF) override {
6400 if (!CGF.HaveInsertPoint())
6401 return;
6402 for (const auto &AllocatorData : Allocators) {
6403 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6404 CGF, AllocatorData.first, AllocatorData.second);
6405 }
6406 }
6407 void Exit(CodeGenFunction &CGF) override {
6408 if (!CGF.HaveInsertPoint())
6409 return;
6410 for (const auto &AllocatorData : Allocators) {
6411 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6412 AllocatorData.first);
6413 }
6414 }
6415};
6416} // namespace
6417
6418void CGOpenMPRuntime::emitTargetOutlinedFunction(
6419 const OMPExecutableDirective &D, StringRef ParentName,
6420 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6421 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6422 assert(!ParentName.empty() && "Invalid target region parent name!")(static_cast <bool> (!ParentName.empty() && "Invalid target region parent name!"
) ? void (0) : __assert_fail ("!ParentName.empty() && \"Invalid target region parent name!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6422, __extension__
__PRETTY_FUNCTION__))
;
6423 HasEmittedTargetRegion = true;
6424 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6425 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6426 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6427 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6428 if (!D.AllocatorTraits)
6429 continue;
6430 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6431 }
6432 }
6433 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6434 CodeGen.setAction(UsesAllocatorAction);
6435 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6436 IsOffloadEntry, CodeGen);
6437}
6438
6439void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6440 const Expr *Allocator,
6441 const Expr *AllocatorTraits) {
6442 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6443 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6444 // Use default memspace handle.
6445 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6446 llvm::Value *NumTraits = llvm::ConstantInt::get(
6447 CGF.IntTy, cast<ConstantArrayType>(
6448 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6449 ->getSize()
6450 .getLimitedValue());
6451 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6452 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6453 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6454 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6455 AllocatorTraitsLVal.getBaseInfo(),
6456 AllocatorTraitsLVal.getTBAAInfo());
6457 llvm::Value *Traits =
6458 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6459
6460 llvm::Value *AllocatorVal =
6461 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6462 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6463 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6464 // Store to allocator.
6465 CGF.EmitVarDecl(*cast<VarDecl>(
6466 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6467 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6468 AllocatorVal =
6469 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6470 Allocator->getType(), Allocator->getExprLoc());
6471 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6472}
6473
6474void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6475 const Expr *Allocator) {
6476 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6477 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6478 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6479 llvm::Value *AllocatorVal =
6480 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6481 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6482 CGF.getContext().VoidPtrTy,
6483 Allocator->getExprLoc());
6484 (void)CGF.EmitRuntimeCall(
6485 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6486 OMPRTL___kmpc_destroy_allocator),
6487 {ThreadId, AllocatorVal});
6488}
6489
6490void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6491 const OMPExecutableDirective &D, StringRef ParentName,
6492 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6493 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6494 // Create a unique name for the entry function using the source location
6495 // information of the current target region. The name will be something like:
6496 //
6497 // __omp_offloading_DD_FFFF_PP_lBB
6498 //
6499 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6500 // mangled name of the function that encloses the target region and BB is the
6501 // line number of the target region.
6502
6503 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6504 !CGM.getLangOpts().OpenMPOffloadMandatory;
6505 unsigned DeviceID;
6506 unsigned FileID;
6507 unsigned Line;
6508 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6509 Line);
6510 SmallString<64> EntryFnName;
6511 {
6512 llvm::raw_svector_ostream OS(EntryFnName);
6513 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6514 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6515 }
6516
6517 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6518
6519 CodeGenFunction CGF(CGM, true);
6520 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6521 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6522
6523 if (BuildOutlinedFn)
6524 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6525
6526 // If this target outline function is not an offload entry, we don't need to
6527 // register it.
6528 if (!IsOffloadEntry)
6529 return;
6530
6531 // The target region ID is used by the runtime library to identify the current
6532 // target region, so it only has to be unique and not necessarily point to
6533 // anything. It could be the pointer to the outlined function that implements
6534 // the target region, but we aren't using that so that the compiler doesn't
6535 // need to keep that, and could therefore inline the host function if proven
6536 // worthwhile during optimization. In the other hand, if emitting code for the
6537 // device, the ID has to be the function address so that it can retrieved from
6538 // the offloading entry and launched by the runtime library. We also mark the
6539 // outlined function to have external linkage in case we are emitting code for
6540 // the device, because these functions will be entry points to the device.
6541
6542 if (CGM.getLangOpts().OpenMPIsDevice) {
6543 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6544 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6545 OutlinedFn->setDSOLocal(false);
6546 if (CGM.getTriple().isAMDGCN())
6547 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6548 } else {
6549 std::string Name = getName({EntryFnName, "region_id"});
6550 OutlinedFnID = new llvm::GlobalVariable(
6551 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6552 llvm::GlobalValue::WeakAnyLinkage,
6553 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6554 }
6555
6556 // If we do not allow host fallback we still need a named address to use.
6557 llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6558 if (!BuildOutlinedFn) {
6559 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&(static_cast <bool> (!CGM.getModule().getGlobalVariable
(EntryFnName, true) && "Named kernel already exists?"
) ? void (0) : __assert_fail ("!CGM.getModule().getGlobalVariable(EntryFnName, true) && \"Named kernel already exists?\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6560, __extension__
__PRETTY_FUNCTION__))
6560 "Named kernel already exists?")(static_cast <bool> (!CGM.getModule().getGlobalVariable
(EntryFnName, true) && "Named kernel already exists?"
) ? void (0) : __assert_fail ("!CGM.getModule().getGlobalVariable(EntryFnName, true) && \"Named kernel already exists?\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6560, __extension__
__PRETTY_FUNCTION__))
;
6561 TargetRegionEntryAddr = new llvm::GlobalVariable(
6562 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6563 llvm::GlobalValue::InternalLinkage,
6564 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6565 }
6566
6567 // Register the information for the entry associated with this target region.
6568 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6569 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6570 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6571
6572 // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6573 int32_t DefaultValTeams = -1;
6574 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6575 if (DefaultValTeams > 0 && OutlinedFn) {
6576 OutlinedFn->addFnAttr("omp_target_num_teams",
6577 std::to_string(DefaultValTeams));
6578 }
6579 int32_t DefaultValThreads = -1;
6580 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6581 if (DefaultValThreads > 0 && OutlinedFn) {
6582 OutlinedFn->addFnAttr("omp_target_thread_limit",
6583 std::to_string(DefaultValThreads));
6584 }
6585
6586 if (BuildOutlinedFn)
6587 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6588}
6589
6590/// Checks if the expression is constant or does not have non-trivial function
6591/// calls.
6592static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6593 // We can skip constant expressions.
6594 // We can skip expressions with trivial calls or simple expressions.
6595 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6596 !E->hasNonTrivialCall(Ctx)) &&
6597 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6598}
6599
6600const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6601 const Stmt *Body) {
6602 const Stmt *Child = Body->IgnoreContainers();
6603 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6604 Child = nullptr;
6605 for (const Stmt *S : C->body()) {
6606 if (const auto *E = dyn_cast<Expr>(S)) {
6607 if (isTrivial(Ctx, E))
6608 continue;
6609 }
6610 // Some of the statements can be ignored.
6611 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6612 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6613 continue;
6614 // Analyze declarations.
6615 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6616 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6617 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6618 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6619 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6620 isa<UsingDirectiveDecl>(D) ||
6621 isa<OMPDeclareReductionDecl>(D) ||
6622 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6623 return true;
6624 const auto *VD = dyn_cast<VarDecl>(D);
6625 if (!VD)
6626 return false;
6627 return VD->hasGlobalStorage() || !VD->isUsed();
6628 }))
6629 continue;
6630 }
6631 // Found multiple children - cannot get the one child only.
6632 if (Child)
6633 return nullptr;
6634 Child = S;
6635 }
6636 if (Child)
6637 Child = Child->IgnoreContainers();
6638 }
6639 return Child;
6640}
6641
6642const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6643 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6644 int32_t &DefaultVal) {
6645
6646 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6647 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6648, __extension__
__PRETTY_FUNCTION__))
6648 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6648, __extension__
__PRETTY_FUNCTION__))
;
6649 switch (DirectiveKind) {
6650 case OMPD_target: {
6651 const auto *CS = D.getInnermostCapturedStmt();
6652 const auto *Body =
6653 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6654 const Stmt *ChildStmt =
6655 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6656 if (const auto *NestedDir =
6657 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6658 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6659 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6660 const Expr *NumTeams =
6661 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6662 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6663 if (auto Constant =
6664 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6665 DefaultVal = Constant->getExtValue();
6666 return NumTeams;
6667 }
6668 DefaultVal = 0;
6669 return nullptr;
6670 }
6671 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6672 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6673 DefaultVal = 1;
6674 return nullptr;
6675 }
6676 DefaultVal = 1;
6677 return nullptr;
6678 }
6679 // A value of -1 is used to check if we need to emit no teams region
6680 DefaultVal = -1;
6681 return nullptr;
6682 }
6683 case OMPD_target_teams:
6684 case OMPD_target_teams_distribute:
6685 case OMPD_target_teams_distribute_simd:
6686 case OMPD_target_teams_distribute_parallel_for:
6687 case OMPD_target_teams_distribute_parallel_for_simd: {
6688 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6689 const Expr *NumTeams =
6690 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6691 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6692 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6693 DefaultVal = Constant->getExtValue();
6694 return NumTeams;
6695 }
6696 DefaultVal = 0;
6697 return nullptr;
6698 }
6699 case OMPD_target_parallel:
6700 case OMPD_target_parallel_for:
6701 case OMPD_target_parallel_for_simd:
6702 case OMPD_target_simd:
6703 DefaultVal = 1;
6704 return nullptr;
6705 case OMPD_parallel:
6706 case OMPD_for:
6707 case OMPD_parallel_for:
6708 case OMPD_parallel_master:
6709 case OMPD_parallel_sections:
6710 case OMPD_for_simd:
6711 case OMPD_parallel_for_simd:
6712 case OMPD_cancel:
6713 case OMPD_cancellation_point:
6714 case OMPD_ordered:
6715 case OMPD_threadprivate:
6716 case OMPD_allocate:
6717 case OMPD_task:
6718 case OMPD_simd:
6719 case OMPD_tile:
6720 case OMPD_unroll:
6721 case OMPD_sections:
6722 case OMPD_section:
6723 case OMPD_single:
6724 case OMPD_master:
6725 case OMPD_critical:
6726 case OMPD_taskyield:
6727 case OMPD_barrier:
6728 case OMPD_taskwait:
6729 case OMPD_taskgroup:
6730 case OMPD_atomic:
6731 case OMPD_flush:
6732 case OMPD_depobj:
6733 case OMPD_scan:
6734 case OMPD_teams:
6735 case OMPD_target_data:
6736 case OMPD_target_exit_data:
6737 case OMPD_target_enter_data:
6738 case OMPD_distribute:
6739 case OMPD_distribute_simd:
6740 case OMPD_distribute_parallel_for:
6741 case OMPD_distribute_parallel_for_simd:
6742 case OMPD_teams_distribute:
6743 case OMPD_teams_distribute_simd:
6744 case OMPD_teams_distribute_parallel_for:
6745 case OMPD_teams_distribute_parallel_for_simd:
6746 case OMPD_target_update:
6747 case OMPD_declare_simd:
6748 case OMPD_declare_variant:
6749 case OMPD_begin_declare_variant:
6750 case OMPD_end_declare_variant:
6751 case OMPD_declare_target:
6752 case OMPD_end_declare_target:
6753 case OMPD_declare_reduction:
6754 case OMPD_declare_mapper:
6755 case OMPD_taskloop:
6756 case OMPD_taskloop_simd:
6757 case OMPD_master_taskloop:
6758 case OMPD_master_taskloop_simd:
6759 case OMPD_parallel_master_taskloop:
6760 case OMPD_parallel_master_taskloop_simd:
6761 case OMPD_requires:
6762 case OMPD_metadirective:
6763 case OMPD_unknown:
6764 break;
6765 default:
6766 break;
6767 }
6768 llvm_unreachable("Unexpected directive kind.")::llvm::llvm_unreachable_internal("Unexpected directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6768)
;
6769}
6770
6771llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6772 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6773 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6775, __extension__
__PRETTY_FUNCTION__))
6774 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6775, __extension__
__PRETTY_FUNCTION__))
6775 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6775, __extension__
__PRETTY_FUNCTION__))
;
6776 CGBuilderTy &Bld = CGF.Builder;
6777 int32_t DefaultNT = -1;
6778 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6779 if (NumTeams != nullptr) {
6780 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6781
6782 switch (DirectiveKind) {
6783 case OMPD_target: {
6784 const auto *CS = D.getInnermostCapturedStmt();
6785 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6786 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6787 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6788 /*IgnoreResultAssign*/ true);
6789 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6790 /*isSigned=*/true);
6791 }
6792 case OMPD_target_teams:
6793 case OMPD_target_teams_distribute:
6794 case OMPD_target_teams_distribute_simd:
6795 case OMPD_target_teams_distribute_parallel_for:
6796 case OMPD_target_teams_distribute_parallel_for_simd: {
6797 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6798 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6799 /*IgnoreResultAssign*/ true);
6800 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6801 /*isSigned=*/true);
6802 }
6803 default:
6804 break;
6805 }
6806 } else if (DefaultNT == -1) {
6807 return nullptr;
6808 }
6809
6810 return Bld.getInt32(DefaultNT);
6811}
6812
6813static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6814 llvm::Value *DefaultThreadLimitVal) {
6815 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6816 CGF.getContext(), CS->getCapturedStmt());
6817 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6818 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6819 llvm::Value *NumThreads = nullptr;
6820 llvm::Value *CondVal = nullptr;
6821 // Handle if clause. If if clause present, the number of threads is
6822 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6823 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6824 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6825 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6826 const OMPIfClause *IfClause = nullptr;
6827 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6828 if (C->getNameModifier() == OMPD_unknown ||
6829 C->getNameModifier() == OMPD_parallel) {
6830 IfClause = C;
6831 break;
6832 }
6833 }
6834 if (IfClause) {
6835 const Expr *Cond = IfClause->getCondition();
6836 bool Result;
6837 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6838 if (!Result)
6839 return CGF.Builder.getInt32(1);
6840 } else {
6841 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6842 if (const auto *PreInit =
6843 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6844 for (const auto *I : PreInit->decls()) {
6845 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6846 CGF.EmitVarDecl(cast<VarDecl>(*I));
6847 } else {
6848 CodeGenFunction::AutoVarEmission Emission =
6849 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6850 CGF.EmitAutoVarCleanups(Emission);
6851 }
6852 }
6853 }
6854 CondVal = CGF.EvaluateExprAsBool(Cond);
6855 }
6856 }
6857 }
6858 // Check the value of num_threads clause iff if clause was not specified
6859 // or is not evaluated to false.
6860 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6861 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6862 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6863 const auto *NumThreadsClause =
6864 Dir->getSingleClause<OMPNumThreadsClause>();
6865 CodeGenFunction::LexicalScope Scope(
6866 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6867 if (const auto *PreInit =
6868 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6869 for (const auto *I : PreInit->decls()) {
6870 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6871 CGF.EmitVarDecl(cast<VarDecl>(*I));
6872 } else {
6873 CodeGenFunction::AutoVarEmission Emission =
6874 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6875 CGF.EmitAutoVarCleanups(Emission);
6876 }
6877 }
6878 }
6879 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6880 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6881 /*isSigned=*/false);
6882 if (DefaultThreadLimitVal)
6883 NumThreads = CGF.Builder.CreateSelect(
6884 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6885 DefaultThreadLimitVal, NumThreads);
6886 } else {
6887 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6888 : CGF.Builder.getInt32(0);
6889 }
6890 // Process condition of the if clause.
6891 if (CondVal) {
6892 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6893 CGF.Builder.getInt32(1));
6894 }
6895 return NumThreads;
6896 }
6897 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6898 return CGF.Builder.getInt32(1);
6899 return DefaultThreadLimitVal;
6900 }
6901 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6902 : CGF.Builder.getInt32(0);
6903}
6904
6905const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6906 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6907 int32_t &DefaultVal) {
6908 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6909 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6910, __extension__
__PRETTY_FUNCTION__))
6910 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6910, __extension__
__PRETTY_FUNCTION__))
;
6911
6912 switch (DirectiveKind) {
6913 case OMPD_target:
6914 // Teams have no clause thread_limit
6915 return nullptr;
6916 case OMPD_target_teams:
6917 case OMPD_target_teams_distribute:
6918 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6919 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6920 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6921 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6922 if (auto Constant =
6923 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6924 DefaultVal = Constant->getExtValue();
6925 return ThreadLimit;
6926 }
6927 return nullptr;
6928 case OMPD_target_parallel:
6929 case OMPD_target_parallel_for:
6930 case OMPD_target_parallel_for_simd:
6931 case OMPD_target_teams_distribute_parallel_for:
6932 case OMPD_target_teams_distribute_parallel_for_simd: {
6933 Expr *ThreadLimit = nullptr;
6934 Expr *NumThreads = nullptr;
6935 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6936 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6937 ThreadLimit = ThreadLimitClause->getThreadLimit();
6938 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6939 if (auto Constant =
6940 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6941 DefaultVal = Constant->getExtValue();
6942 }
6943 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6944 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6945 NumThreads = NumThreadsClause->getNumThreads();
6946 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6947 if (auto Constant =
6948 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6949 if (Constant->getExtValue() < DefaultVal) {
6950 DefaultVal = Constant->getExtValue();
6951 ThreadLimit = NumThreads;
6952 }
6953 }
6954 }
6955 }
6956 return ThreadLimit;
6957 }
6958 case OMPD_target_teams_distribute_simd:
6959 case OMPD_target_simd:
6960 DefaultVal = 1;
6961 return nullptr;
6962 case OMPD_parallel:
6963 case OMPD_for:
6964 case OMPD_parallel_for:
6965 case OMPD_parallel_master:
6966 case OMPD_parallel_sections:
6967 case OMPD_for_simd:
6968 case OMPD_parallel_for_simd:
6969 case OMPD_cancel:
6970 case OMPD_cancellation_point:
6971 case OMPD_ordered:
6972 case OMPD_threadprivate:
6973 case OMPD_allocate:
6974 case OMPD_task:
6975 case OMPD_simd:
6976 case OMPD_tile:
6977 case OMPD_unroll:
6978 case OMPD_sections:
6979 case OMPD_section:
6980 case OMPD_single:
6981 case OMPD_master:
6982 case OMPD_critical:
6983 case OMPD_taskyield:
6984 case OMPD_barrier:
6985 case OMPD_taskwait:
6986 case OMPD_taskgroup:
6987 case OMPD_atomic:
6988 case OMPD_flush:
6989 case OMPD_depobj:
6990 case OMPD_scan:
6991 case OMPD_teams:
6992 case OMPD_target_data:
6993 case OMPD_target_exit_data:
6994 case OMPD_target_enter_data:
6995 case OMPD_distribute:
6996 case OMPD_distribute_simd:
6997 case OMPD_distribute_parallel_for:
6998 case OMPD_distribute_parallel_for_simd:
6999 case OMPD_teams_distribute:
7000 case OMPD_teams_distribute_simd:
7001 case OMPD_teams_distribute_parallel_for:
7002 case OMPD_teams_distribute_parallel_for_simd:
7003 case OMPD_target_update:
7004 case OMPD_declare_simd:
7005 case OMPD_declare_variant:
7006 case OMPD_begin_declare_variant:
7007 case OMPD_end_declare_variant:
7008 case OMPD_declare_target:
7009 case OMPD_end_declare_target:
7010 case OMPD_declare_reduction:
7011 case OMPD_declare_mapper:
7012 case OMPD_taskloop:
7013 case OMPD_taskloop_simd:
7014 case OMPD_master_taskloop:
7015 case OMPD_master_taskloop_simd:
7016 case OMPD_parallel_master_taskloop:
7017 case OMPD_parallel_master_taskloop_simd:
7018 case OMPD_requires:
7019 case OMPD_unknown:
7020 break;
7021 default:
7022 break;
7023 }
7024 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7024)
;
7025}
7026
7027llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7028 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7029 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7031, __extension__
__PRETTY_FUNCTION__))
7030 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7031, __extension__
__PRETTY_FUNCTION__))
7031 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7031, __extension__
__PRETTY_FUNCTION__))
;
7032 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7033 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7034, __extension__
__PRETTY_FUNCTION__))
7034 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7034, __extension__
__PRETTY_FUNCTION__))
;
7035 CGBuilderTy &Bld = CGF.Builder;
7036 llvm::Value *ThreadLimitVal = nullptr;
7037 llvm::Value *NumThreadsVal = nullptr;
7038 switch (DirectiveKind) {
7039 case OMPD_target: {
7040 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7041 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7042 return NumThreads;
7043 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7044 CGF.getContext(), CS->getCapturedStmt());
7045 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7046 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7047 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7048 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7049 const auto *ThreadLimitClause =
7050 Dir->getSingleClause<OMPThreadLimitClause>();
7051 CodeGenFunction::LexicalScope Scope(
7052 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7053 if (const auto *PreInit =
7054 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7055 for (const auto *I : PreInit->decls()) {
7056 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7057 CGF.EmitVarDecl(cast<VarDecl>(*I));
7058 } else {
7059 CodeGenFunction::AutoVarEmission Emission =
7060 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7061 CGF.EmitAutoVarCleanups(Emission);
7062 }
7063 }
7064 }
7065 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7066 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7067 ThreadLimitVal =
7068 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7069 }
7070 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7071 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7072 CS = Dir->getInnermostCapturedStmt();
7073 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7074 CGF.getContext(), CS->getCapturedStmt());
7075 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7076 }
7077 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7078 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7079 CS = Dir->getInnermostCapturedStmt();
7080 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7081 return NumThreads;
7082 }
7083 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7084 return Bld.getInt32(1);
7085 }
7086 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7087 }
7088 case OMPD_target_teams: {
7089 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7090 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7091 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7092 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7093 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7094 ThreadLimitVal =
7095 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7096 }
7097 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7098 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7099 return NumThreads;
7100 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7101 CGF.getContext(), CS->getCapturedStmt());
7102 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7103 if (Dir->getDirectiveKind() == OMPD_distribute) {
7104 CS = Dir->getInnermostCapturedStmt();
7105 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7106 return NumThreads;
7107 }
7108 }
7109 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7110 }
7111 case OMPD_target_teams_distribute:
7112 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7113 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7114 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7115 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7116 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7117 ThreadLimitVal =
7118 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7119 }
7120 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7121 case OMPD_target_parallel:
7122 case OMPD_target_parallel_for:
7123 case OMPD_target_parallel_for_simd:
7124 case OMPD_target_teams_distribute_parallel_for:
7125 case OMPD_target_teams_distribute_parallel_for_simd: {
7126 llvm::Value *CondVal = nullptr;
7127 // Handle if clause. If if clause present, the number of threads is
7128 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7129 if (D.hasClausesOfKind<OMPIfClause>()) {
7130 const OMPIfClause *IfClause = nullptr;
7131 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7132 if (C->getNameModifier() == OMPD_unknown ||
7133 C->getNameModifier() == OMPD_parallel) {
7134 IfClause = C;
7135 break;
7136 }
7137 }
7138 if (IfClause) {
7139 const Expr *Cond = IfClause->getCondition();
7140 bool Result;
7141 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7142 if (!Result)
7143 return Bld.getInt32(1);
7144 } else {
7145 CodeGenFunction::RunCleanupsScope Scope(CGF);
7146 CondVal = CGF.EvaluateExprAsBool(Cond);
7147 }
7148 }
7149 }
7150 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7151 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7152 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7153 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7154 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7155 ThreadLimitVal =
7156 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7157 }
7158 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7159 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7160 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7161 llvm::Value *NumThreads = CGF.EmitScalarExpr(
7162 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7163 NumThreadsVal =
7164 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7165 ThreadLimitVal = ThreadLimitVal
7166 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7167 ThreadLimitVal),
7168 NumThreadsVal, ThreadLimitVal)
7169 : NumThreadsVal;
7170 }
7171 if (!ThreadLimitVal)
7172 ThreadLimitVal = Bld.getInt32(0);
7173 if (CondVal)
7174 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7175 return ThreadLimitVal;
7176 }
7177 case OMPD_target_teams_distribute_simd:
7178 case OMPD_target_simd:
7179 return Bld.getInt32(1);
7180 case OMPD_parallel:
7181 case OMPD_for:
7182 case OMPD_parallel_for:
7183 case OMPD_parallel_master:
7184 case OMPD_parallel_sections:
7185 case OMPD_for_simd:
7186 case OMPD_parallel_for_simd:
7187 case OMPD_cancel:
7188 case OMPD_cancellation_point:
7189 case OMPD_ordered:
7190 case OMPD_threadprivate:
7191 case OMPD_allocate:
7192 case OMPD_task:
7193 case OMPD_simd:
7194 case OMPD_tile:
7195 case OMPD_unroll:
7196 case OMPD_sections:
7197 case OMPD_section:
7198 case OMPD_single:
7199 case OMPD_master:
7200 case OMPD_critical:
7201 case OMPD_taskyield:
7202 case OMPD_barrier:
7203 case OMPD_taskwait:
7204 case OMPD_taskgroup:
7205 case OMPD_atomic:
7206 case OMPD_flush:
7207 case OMPD_depobj:
7208 case OMPD_scan:
7209 case OMPD_teams:
7210 case OMPD_target_data:
7211 case OMPD_target_exit_data:
7212 case OMPD_target_enter_data:
7213 case OMPD_distribute:
7214 case OMPD_distribute_simd:
7215 case OMPD_distribute_parallel_for:
7216 case OMPD_distribute_parallel_for_simd:
7217 case OMPD_teams_distribute:
7218 case OMPD_teams_distribute_simd:
7219 case OMPD_teams_distribute_parallel_for:
7220 case OMPD_teams_distribute_parallel_for_simd:
7221 case OMPD_target_update:
7222 case OMPD_declare_simd:
7223 case OMPD_declare_variant:
7224 case OMPD_begin_declare_variant:
7225 case OMPD_end_declare_variant:
7226 case OMPD_declare_target:
7227 case OMPD_end_declare_target:
7228 case OMPD_declare_reduction:
7229 case OMPD_declare_mapper:
7230 case OMPD_taskloop:
7231 case OMPD_taskloop_simd:
7232 case OMPD_master_taskloop:
7233 case OMPD_master_taskloop_simd:
7234 case OMPD_parallel_master_taskloop:
7235 case OMPD_parallel_master_taskloop_simd:
7236 case OMPD_requires:
7237 case OMPD_metadirective:
7238 case OMPD_unknown:
7239 break;
7240 default:
7241 break;
7242 }
7243 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7243)
;
7244}
7245
7246namespace {
7247LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
7248
7249// Utility to handle information from clauses associated with a given
7250// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7251// It provides a convenient interface to obtain the information and generate
7252// code for that information.
7253class MappableExprsHandler {
7254public:
7255 /// Values for bit flags used to specify the mapping type for
7256 /// offloading.
7257 enum OpenMPOffloadMappingFlags : uint64_t {
7258 /// No flags
7259 OMP_MAP_NONE = 0x0,
7260 /// Allocate memory on the device and move data from host to device.
7261 OMP_MAP_TO = 0x01,
7262 /// Allocate memory on the device and move data from device to host.
7263 OMP_MAP_FROM = 0x02,
7264 /// Always perform the requested mapping action on the element, even
7265 /// if it was already mapped before.
7266 OMP_MAP_ALWAYS = 0x04,
7267 /// Delete the element from the device environment, ignoring the
7268 /// current reference count associated with the element.
7269 OMP_MAP_DELETE = 0x08,
7270 /// The element being mapped is a pointer-pointee pair; both the
7271 /// pointer and the pointee should be mapped.
7272 OMP_MAP_PTR_AND_OBJ = 0x10,
7273 /// This flags signals that the base address of an entry should be
7274 /// passed to the target kernel as an argument.
7275 OMP_MAP_TARGET_PARAM = 0x20,
7276 /// Signal that the runtime library has to return the device pointer
7277 /// in the current position for the data being mapped. Used when we have the
7278 /// use_device_ptr or use_device_addr clause.
7279 OMP_MAP_RETURN_PARAM = 0x40,
7280 /// This flag signals that the reference being passed is a pointer to
7281 /// private data.
7282 OMP_MAP_PRIVATE = 0x80,
7283 /// Pass the element to the device by value.
7284 OMP_MAP_LITERAL = 0x100,
7285 /// Implicit map
7286 OMP_MAP_IMPLICIT = 0x200,
7287 /// Close is a hint to the runtime to allocate memory close to
7288 /// the target device.
7289 OMP_MAP_CLOSE = 0x400,
7290 /// 0x800 is reserved for compatibility with XLC.
7291 /// Produce a runtime error if the data is not already allocated.
7292 OMP_MAP_PRESENT = 0x1000,
7293 // Increment and decrement a separate reference counter so that the data
7294 // cannot be unmapped within the associated region. Thus, this flag is
7295 // intended to be used on 'target' and 'target data' directives because they
7296 // are inherently structured. It is not intended to be used on 'target
7297 // enter data' and 'target exit data' directives because they are inherently
7298 // dynamic.
7299 // This is an OpenMP extension for the sake of OpenACC support.
7300 OMP_MAP_OMPX_HOLD = 0x2000,
7301 /// Signal that the runtime library should use args as an array of
7302 /// descriptor_dim pointers and use args_size as dims. Used when we have
7303 /// non-contiguous list items in target update directive
7304 OMP_MAP_NON_CONTIG = 0x100000000000,
7305 /// The 16 MSBs of the flags indicate whether the entry is member of some
7306 /// struct/class.
7307 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7308 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_MAP_MEMBER_OF,
7309 };
7310
7311 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7312 static unsigned getFlagMemberOffset() {
7313 unsigned Offset = 0;
7314 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7315 Remain = Remain >> 1)
7316 Offset++;
7317 return Offset;
7318 }
7319
7320 /// Class that holds debugging information for a data mapping to be passed to
7321 /// the runtime library.
7322 class MappingExprInfo {
7323 /// The variable declaration used for the data mapping.
7324 const ValueDecl *MapDecl = nullptr;
7325 /// The original expression used in the map clause, or null if there is
7326 /// none.
7327 const Expr *MapExpr = nullptr;
7328
7329 public:
7330 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7331 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7332
7333 const ValueDecl *getMapDecl() const { return MapDecl; }
7334 const Expr *getMapExpr() const { return MapExpr; }
7335 };
7336
7337 /// Class that associates information with a base pointer to be passed to the
7338 /// runtime library.
7339 class BasePointerInfo {
7340 /// The base pointer.
7341 llvm::Value *Ptr = nullptr;
7342 /// The base declaration that refers to this device pointer, or null if
7343 /// there is none.
7344 const ValueDecl *DevPtrDecl = nullptr;
7345
7346 public:
7347 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7348 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7349 llvm::Value *operator*() const { return Ptr; }
7350 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7351 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7352 };
7353
7354 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7355 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7356 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7357 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7358 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7359 using MapDimArrayTy = SmallVector<uint64_t, 4>;
7360 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7361
7362 /// This structure contains combined information generated for mappable
7363 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7364 /// mappers, and non-contiguous information.
7365 struct MapCombinedInfoTy {
7366 struct StructNonContiguousInfo {
7367 bool IsNonContiguous = false;
7368 MapDimArrayTy Dims;
7369 MapNonContiguousArrayTy Offsets;
7370 MapNonContiguousArrayTy Counts;
7371 MapNonContiguousArrayTy Strides;
7372 };
7373 MapExprsArrayTy Exprs;
7374 MapBaseValuesArrayTy BasePointers;
7375 MapValuesArrayTy Pointers;
7376 MapValuesArrayTy Sizes;
7377 MapFlagsArrayTy Types;
7378 MapMappersArrayTy Mappers;
7379 StructNonContiguousInfo NonContigInfo;
7380
7381 /// Append arrays in \a CurInfo.
7382 void append(MapCombinedInfoTy &CurInfo) {
7383 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7384 BasePointers.append(CurInfo.BasePointers.begin(),
7385 CurInfo.BasePointers.end());
7386 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7387 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7388 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7389 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7390 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7391 CurInfo.NonContigInfo.Dims.end());
7392 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7393 CurInfo.NonContigInfo.Offsets.end());
7394 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7395 CurInfo.NonContigInfo.Counts.end());
7396 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7397 CurInfo.NonContigInfo.Strides.end());
7398 }
7399 };
7400
7401 /// Map between a struct and the its lowest & highest elements which have been
7402 /// mapped.
7403 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7404 /// HE(FieldIndex, Pointer)}
7405 struct StructRangeInfoTy {
7406 MapCombinedInfoTy PreliminaryMapData;
7407 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7408 0, Address::invalid()};
7409 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7410 0, Address::invalid()};
7411 Address Base = Address::invalid();
7412 Address LB = Address::invalid();
7413 bool IsArraySection = false;
7414 bool HasCompleteRecord = false;
7415 };
7416
7417private:
7418 /// Kind that defines how a device pointer has to be returned.
7419 struct MapInfo {
7420 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7421 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7422 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7423 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7424 bool ReturnDevicePointer = false;
7425 bool IsImplicit = false;
7426 const ValueDecl *Mapper = nullptr;
7427 const Expr *VarRef = nullptr;
7428 bool ForDeviceAddr = false;
7429
7430 MapInfo() = default;
7431 MapInfo(
7432 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7433 OpenMPMapClauseKind MapType,
7434 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7435 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7436 bool ReturnDevicePointer, bool IsImplicit,
7437 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7438 bool ForDeviceAddr = false)
7439 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7440 MotionModifiers(MotionModifiers),
7441 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7442 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7443 };
7444
7445 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7446 /// member and there is no map information about it, then emission of that
7447 /// entry is deferred until the whole struct has been processed.
7448 struct DeferredDevicePtrEntryTy {
7449 const Expr *IE = nullptr;
7450 const ValueDecl *VD = nullptr;
7451 bool ForDeviceAddr = false;
7452
7453 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7454 bool ForDeviceAddr)
7455 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7456 };
7457
7458 /// The target directive from where the mappable clauses were extracted. It
7459 /// is either a executable directive or a user-defined mapper directive.
7460 llvm::PointerUnion<const OMPExecutableDirective *,
7461 const OMPDeclareMapperDecl *>
7462 CurDir;
7463
7464 /// Function the directive is being generated for.
7465 CodeGenFunction &CGF;
7466
7467 /// Set of all first private variables in the current directive.
7468 /// bool data is set to true if the variable is implicitly marked as
7469 /// firstprivate, false otherwise.
7470 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7471
7472 /// Map between device pointer declarations and their expression components.
7473 /// The key value for declarations in 'this' is null.
7474 llvm::DenseMap<
7475 const ValueDecl *,
7476 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7477 DevPointersMap;
7478
7479 /// Map between lambda declarations and their map type.
7480 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7481
7482 llvm::Value *getExprTypeSize(const Expr *E) const {
7483 QualType ExprTy = E->getType().getCanonicalType();
7484
7485 // Calculate the size for array shaping expression.
7486 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7487 llvm::Value *Size =
7488 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7489 for (const Expr *SE : OAE->getDimensions()) {
7490 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7491 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7492 CGF.getContext().getSizeType(),
7493 SE->getExprLoc());
7494 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7495 }
7496 return Size;
7497 }
7498
7499 // Reference types are ignored for mapping purposes.
7500 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7501 ExprTy = RefTy->getPointeeType().getCanonicalType();
7502
7503 // Given that an array section is considered a built-in type, we need to
7504 // do the calculation based on the length of the section instead of relying
7505 // on CGF.getTypeSize(E->getType()).
7506 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7507 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7508 OAE->getBase()->IgnoreParenImpCasts())
7509 .getCanonicalType();
7510
7511 // If there is no length associated with the expression and lower bound is
7512 // not specified too, that means we are using the whole length of the
7513 // base.
7514 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7515 !OAE->getLowerBound())
7516 return CGF.getTypeSize(BaseTy);
7517
7518 llvm::Value *ElemSize;
7519 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7520 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7521 } else {
7522 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7523 assert(ATy && "Expecting array type if not a pointer type.")(static_cast <bool> (ATy && "Expecting array type if not a pointer type."
) ? void (0) : __assert_fail ("ATy && \"Expecting array type if not a pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7523, __extension__
__PRETTY_FUNCTION__))
;
7524 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7525 }
7526
7527 // If we don't have a length at this point, that is because we have an
7528 // array section with a single element.
7529 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7530 return ElemSize;
7531
7532 if (const Expr *LenExpr = OAE->getLength()) {
7533 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7534 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7535 CGF.getContext().getSizeType(),
7536 LenExpr->getExprLoc());
7537 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7538 }
7539 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7540, __extension__
__PRETTY_FUNCTION__))
7540 OAE->getLowerBound() && "expected array_section[lb:].")(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7540, __extension__
__PRETTY_FUNCTION__))
;
7541 // Size = sizetype - lb * elemtype;
7542 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7543 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7544 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7545 CGF.getContext().getSizeType(),
7546 OAE->getLowerBound()->getExprLoc());
7547 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7548 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7549 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7550 LengthVal = CGF.Builder.CreateSelect(
7551 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7552 return LengthVal;
7553 }
7554 return CGF.getTypeSize(ExprTy);
7555 }
7556
7557 /// Return the corresponding bits for a given map clause modifier. Add
7558 /// a flag marking the map as a pointer if requested. Add a flag marking the
7559 /// map as the first one of a series of maps that relate to the same map
7560 /// expression.
7561 OpenMPOffloadMappingFlags getMapTypeBits(
7562 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7563 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7564 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7565 OpenMPOffloadMappingFlags Bits =
7566 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7567 switch (MapType) {
7568 case OMPC_MAP_alloc:
7569 case OMPC_MAP_release:
7570 // alloc and release is the default behavior in the runtime library, i.e.
7571 // if we don't pass any bits alloc/release that is what the runtime is
7572 // going to do. Therefore, we don't need to signal anything for these two
7573 // type modifiers.
7574 break;
7575 case OMPC_MAP_to:
7576 Bits |= OMP_MAP_TO;
7577 break;
7578 case OMPC_MAP_from:
7579 Bits |= OMP_MAP_FROM;
7580 break;
7581 case OMPC_MAP_tofrom:
7582 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7583 break;
7584 case OMPC_MAP_delete:
7585 Bits |= OMP_MAP_DELETE;
7586 break;
7587 case OMPC_MAP_unknown:
7588 llvm_unreachable("Unexpected map type!")::llvm::llvm_unreachable_internal("Unexpected map type!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 7588)
;
7589 }
7590 if (AddPtrFlag)
7591 Bits |= OMP_MAP_PTR_AND_OBJ;
7592 if (AddIsTargetParamFlag)
7593 Bits |= OMP_MAP_TARGET_PARAM;
7594 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7595 Bits |= OMP_MAP_ALWAYS;
7596 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7597 Bits |= OMP_MAP_CLOSE;
7598 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7599 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7600 Bits |= OMP_MAP_PRESENT;
7601 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7602 Bits |= OMP_MAP_OMPX_HOLD;
7603 if (IsNonContiguous)
7604 Bits |= OMP_MAP_NON_CONTIG;
7605 return Bits;
7606 }
7607
7608 /// Return true if the provided expression is a final array section. A
7609 /// final array section, is one whose length can't be proved to be one.
7610 bool isFinalArraySectionExpression(const Expr *E) const {
7611 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7612
7613 // It is not an array section and therefore not a unity-size one.
7614 if (!OASE)
7615 return false;
7616
7617 // An array section with no colon always refer to a single element.
7618 if (OASE->getColonLocFirst().isInvalid())
7619 return false;
7620
7621 const Expr *Length = OASE->getLength();
7622
7623 // If we don't have a length we have to check if the array has size 1
7624 // for this dimension. Also, we should always expect a length if the
7625 // base type is pointer.
7626 if (!Length) {
7627 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7628 OASE->getBase()->IgnoreParenImpCasts())
7629 .getCanonicalType();
7630 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7631 return ATy->getSize().getSExtValue() != 1;
7632 // If we don't have a constant dimension length, we have to consider
7633 // the current section as having any size, so it is not necessarily
7634 // unitary. If it happen to be unity size, that's user fault.
7635 return true;
7636 }
7637
7638 // Check if the length evaluates to 1.
7639 Expr::EvalResult Result;
7640 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7641 return true; // Can have more that size 1.
7642
7643 llvm::APSInt ConstLength = Result.Val.getInt();
7644 return ConstLength.getSExtValue() != 1;
7645 }
7646
7647 /// Generate the base pointers, section pointers, sizes, map type bits, and
7648 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7649 /// map type, map or motion modifiers, and expression components.
7650 /// \a IsFirstComponent should be set to true if the provided set of
7651 /// components is the first associated with a capture.
7652 void generateInfoForComponentList(
7653 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7654 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7655 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7656 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7657 bool IsFirstComponentList, bool IsImplicit,
7658 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7659 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7660 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7661 OverlappedElements = llvm::None) const {
7662 // The following summarizes what has to be generated for each map and the
7663 // types below. The generated information is expressed in this order:
7664 // base pointer, section pointer, size, flags
7665 // (to add to the ones that come from the map type and modifier).
7666 //
7667 // double d;
7668 // int i[100];
7669 // float *p;
7670 //
7671 // struct S1 {
7672 // int i;
7673 // float f[50];
7674 // }
7675 // struct S2 {
7676 // int i;
7677 // float f[50];
7678 // S1 s;
7679 // double *p;
7680 // struct S2 *ps;
7681 // int &ref;
7682 // }
7683 // S2 s;
7684 // S2 *ps;
7685 //
7686 // map(d)
7687 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7688 //
7689 // map(i)
7690 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7691 //
7692 // map(i[1:23])
7693 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7694 //
7695 // map(p)
7696 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7697 //
7698 // map(p[1:24])
7699 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7700 // in unified shared memory mode or for local pointers
7701 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7702 //
7703 // map(s)
7704 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7705 //
7706 // map(s.i)
7707 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7708 //
7709 // map(s.s.f)
7710 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7711 //
7712 // map(s.p)
7713 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7714 //
7715 // map(to: s.p[:22])
7716 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7717 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7718 // &(s.p), &(s.p[0]), 22*sizeof(double),
7719 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7720 // (*) alloc space for struct members, only this is a target parameter
7721 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7722 // optimizes this entry out, same in the examples below)
7723 // (***) map the pointee (map: to)
7724 //
7725 // map(to: s.ref)
7726 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7727 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7728 // (*) alloc space for struct members, only this is a target parameter
7729 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7730 // optimizes this entry out, same in the examples below)
7731 // (***) map the pointee (map: to)
7732 //
7733 // map(s.ps)
7734 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7735 //
7736 // map(from: s.ps->s.i)
7737 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7738 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7739 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7740 //
7741 // map(to: s.ps->ps)
7742 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7743 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7744 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7745 //
7746 // map(s.ps->ps->ps)
7747 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7748 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7749 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7750 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7751 //
7752 // map(to: s.ps->ps->s.f[:22])
7753 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7754 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7755 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7756 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7757 //
7758 // map(ps)
7759 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7760 //
7761 // map(ps->i)
7762 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7763 //
7764 // map(ps->s.f)
7765 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7766 //
7767 // map(from: ps->p)
7768 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7769 //
7770 // map(to: ps->p[:22])
7771 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7772 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7773 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7774 //
7775 // map(ps->ps)
7776 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7777 //
7778 // map(from: ps->ps->s.i)
7779 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7780 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7781 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7782 //
7783 // map(from: ps->ps->ps)
7784 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7785 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7786 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7787 //
7788 // map(ps->ps->ps->ps)
7789 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7790 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7791 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7792 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7793 //
7794 // map(to: ps->ps->ps->s.f[:22])
7795 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7796 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7797 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7798 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7799 //
7800 // map(to: s.f[:22]) map(from: s.p[:33])
7801 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7802 // sizeof(double*) (**), TARGET_PARAM
7803 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7804 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7805 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7806 // (*) allocate contiguous space needed to fit all mapped members even if
7807 // we allocate space for members not mapped (in this example,
7808 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7809 // them as well because they fall between &s.f[0] and &s.p)
7810 //
7811 // map(from: s.f[:22]) map(to: ps->p[:33])
7812 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7813 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7814 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7815 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7816 // (*) the struct this entry pertains to is the 2nd element in the list of
7817 // arguments, hence MEMBER_OF(2)
7818 //
7819 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7820 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7821 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7822 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7823 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7824 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7825 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7826 // (*) the struct this entry pertains to is the 4th element in the list
7827 // of arguments, hence MEMBER_OF(4)
7828
7829 // Track if the map information being generated is the first for a capture.
7830 bool IsCaptureFirstInfo = IsFirstComponentList;
7831 // When the variable is on a declare target link or in a to clause with
7832 // unified memory, a reference is needed to hold the host/device address
7833 // of the variable.
7834 bool RequiresReference = false;
7835
7836 // Scan the components from the base to the complete expression.
7837 auto CI = Components.rbegin();
7838 auto CE = Components.rend();
7839 auto I = CI;
7840
7841 // Track if the map information being generated is the first for a list of
7842 // components.
7843 bool IsExpressionFirstInfo = true;
7844 bool FirstPointerInComplexData = false;
7845 Address BP = Address::invalid();
7846 const Expr *AssocExpr = I->getAssociatedExpression();
7847 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7848 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7849 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7850
7851 if (isa<MemberExpr>(AssocExpr)) {
7852 // The base is the 'this' pointer. The content of the pointer is going
7853 // to be the base of the field being mapped.
7854 BP = CGF.LoadCXXThisAddress();
7855 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7856 (OASE &&
7857 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7858 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7859 } else if (OAShE &&
7860 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7861 BP = Address(
7862 CGF.EmitScalarExpr(OAShE->getBase()),
7863 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7864 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7865 } else {
7866 // The base is the reference to the variable.
7867 // BP = &Var.
7868 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7869 if (const auto *VD =
7870 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7871 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7872 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7873 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7874 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7875 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7876 RequiresReference = true;
7877 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7878 }
7879 }
7880 }
7881
7882 // If the variable is a pointer and is being dereferenced (i.e. is not
7883 // the last component), the base has to be the pointer itself, not its
7884 // reference. References are ignored for mapping purposes.
7885 QualType Ty =
7886 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7887 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7888 // No need to generate individual map information for the pointer, it
7889 // can be associated with the combined storage if shared memory mode is
7890 // active or the base declaration is not global variable.
7891 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7892 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7893 !VD || VD->hasLocalStorage())
7894 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7895 else
7896 FirstPointerInComplexData = true;
7897 ++I;
7898 }
7899 }
7900
7901 // Track whether a component of the list should be marked as MEMBER_OF some
7902 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7903 // in a component list should be marked as MEMBER_OF, all subsequent entries
7904 // do not belong to the base struct. E.g.
7905 // struct S2 s;
7906 // s.ps->ps->ps->f[:]
7907 // (1) (2) (3) (4)
7908 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7909 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7910 // is the pointee of ps(2) which is not member of struct s, so it should not
7911 // be marked as such (it is still PTR_AND_OBJ).
7912 // The variable is initialized to false so that PTR_AND_OBJ entries which
7913 // are not struct members are not considered (e.g. array of pointers to
7914 // data).
7915 bool ShouldBeMemberOf = false;
7916
7917 // Variable keeping track of whether or not we have encountered a component
7918 // in the component list which is a member expression. Useful when we have a
7919 // pointer or a final array section, in which case it is the previous
7920 // component in the list which tells us whether we have a member expression.
7921 // E.g. X.f[:]
7922 // While processing the final array section "[:]" it is "f" which tells us
7923 // whether we are dealing with a member of a declared struct.
7924 const MemberExpr *EncounteredME = nullptr;
7925
7926 // Track for the total number of dimension. Start from one for the dummy
7927 // dimension.
7928 uint64_t DimSize = 1;
7929
7930 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7931 bool IsPrevMemberReference = false;
7932
7933 for (; I != CE; ++I) {
7934 // If the current component is member of a struct (parent struct) mark it.
7935 if (!EncounteredME) {
7936 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7937 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7938 // as MEMBER_OF the parent struct.
7939 if (EncounteredME) {
7940 ShouldBeMemberOf = true;
7941 // Do not emit as complex pointer if this is actually not array-like
7942 // expression.
7943 if (FirstPointerInComplexData) {
7944 QualType Ty = std::prev(I)
7945 ->getAssociatedDeclaration()
7946 ->getType()
7947 .getNonReferenceType();
7948 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7949 FirstPointerInComplexData = false;
7950 }
7951 }
7952 }
7953
7954 auto Next = std::next(I);
7955
7956 // We need to generate the addresses and sizes if this is the last
7957 // component, if the component is a pointer or if it is an array section
7958 // whose length can't be proved to be one. If this is a pointer, it
7959 // becomes the base address for the following components.
7960
7961 // A final array section, is one whose length can't be proved to be one.
7962 // If the map item is non-contiguous then we don't treat any array section
7963 // as final array section.
7964 bool IsFinalArraySection =
7965 !IsNonContiguous &&
7966 isFinalArraySectionExpression(I->getAssociatedExpression());
7967
7968 // If we have a declaration for the mapping use that, otherwise use
7969 // the base declaration of the map clause.
7970 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7971 ? I->getAssociatedDeclaration()
7972 : BaseDecl;
7973 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7974 : MapExpr;
7975
7976 // Get information on whether the element is a pointer. Have to do a
7977 // special treatment for array sections given that they are built-in
7978 // types.
7979 const auto *OASE =
7980 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7981 const auto *OAShE =
7982 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7983 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7984 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7985 bool IsPointer =
7986 OAShE ||
7987 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7988 .getCanonicalType()
7989 ->isAnyPointerType()) ||
7990 I->getAssociatedExpression()->getType()->isAnyPointerType();
7991 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7992 MapDecl &&
7993 MapDecl->getType()->isLValueReferenceType();
7994 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7995
7996 if (OASE)
7997 ++DimSize;
7998
7999 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8000 IsFinalArraySection) {
8001 // If this is not the last component, we expect the pointer to be
8002 // associated with an array expression or member expression.
8003 assert((Next == CE ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8004 isa<MemberExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8005 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8006 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8007 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8008 isa<UnaryOperator>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8009 isa<BinaryOperator>(Next->getAssociatedExpression())) &&(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
8010 "Unexpected expression")(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8010, __extension__
__PRETTY_FUNCTION__))
;
8011
8012 Address LB = Address::invalid();
8013 Address LowestElem = Address::invalid();
8014 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8015 const MemberExpr *E) {
8016 const Expr *BaseExpr = E->getBase();
8017 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8018 // scalar.
8019 LValue BaseLV;
8020 if (E->isArrow()) {
8021 LValueBaseInfo BaseInfo;
8022 TBAAAccessInfo TBAAInfo;
8023 Address Addr =
8024 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8025 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8026 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8027 } else {
8028 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8029 }
8030 return BaseLV;
8031 };
8032 if (OAShE) {
8033 LowestElem = LB =
8034 Address(CGF.EmitScalarExpr(OAShE->getBase()),
8035 CGF.ConvertTypeForMem(
8036 OAShE->getBase()->getType()->getPointeeType()),
8037 CGF.getContext().getTypeAlignInChars(
8038 OAShE->getBase()->getType()));
8039 } else if (IsMemberReference) {
8040 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8041 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8042 LowestElem = CGF.EmitLValueForFieldInitialization(
8043 BaseLVal, cast<FieldDecl>(MapDecl))
8044 .getAddress(CGF);
8045 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8046 .getAddress(CGF);
8047 } else {
8048 LowestElem = LB =
8049 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8050 .getAddress(CGF);
8051 }
8052
8053 // If this component is a pointer inside the base struct then we don't
8054 // need to create any entry for it - it will be combined with the object
8055 // it is pointing to into a single PTR_AND_OBJ entry.
8056 bool IsMemberPointerOrAddr =
8057 EncounteredME &&
8058 (((IsPointer || ForDeviceAddr) &&
8059 I->getAssociatedExpression() == EncounteredME) ||
8060 (IsPrevMemberReference && !IsPointer) ||
8061 (IsMemberReference && Next != CE &&
8062 !Next->getAssociatedExpression()->getType()->isPointerType()));
8063 if (!OverlappedElements.empty() && Next == CE) {
8064 // Handle base element with the info for overlapped elements.
8065 assert(!PartialStruct.Base.isValid() && "The base element is set.")(static_cast <bool> (!PartialStruct.Base.isValid() &&
"The base element is set.") ? void (0) : __assert_fail ("!PartialStruct.Base.isValid() && \"The base element is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8065, __extension__
__PRETTY_FUNCTION__))
;
8066 assert(!IsPointer &&(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8067, __extension__
__PRETTY_FUNCTION__))
8067 "Unexpected base element with the pointer type.")(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8067, __extension__
__PRETTY_FUNCTION__))
;
8068 // Mark the whole struct as the struct that requires allocation on the
8069 // device.
8070 PartialStruct.LowestElem = {0, LowestElem};
8071 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8072 I->getAssociatedExpression()->getType());
8073 Address HB = CGF.Builder.CreateConstGEP(
8074 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
8075 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
8076 TypeSize.getQuantity() - 1);
8077 PartialStruct.HighestElem = {
8078 std::numeric_limits<decltype(
8079 PartialStruct.HighestElem.first)>::max(),
8080 HB};
8081 PartialStruct.Base = BP;
8082 PartialStruct.LB = LB;
8083 assert((static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8085, __extension__
__PRETTY_FUNCTION__))
8084 PartialStruct.PreliminaryMapData.BasePointers.empty() &&(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8085, __extension__
__PRETTY_FUNCTION__))
8085 "Overlapped elements must be used only once for the variable.")(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8085, __extension__
__PRETTY_FUNCTION__))
;
8086 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8087 // Emit data for non-overlapped data.
8088 OpenMPOffloadMappingFlags Flags =
8089 OMP_MAP_MEMBER_OF |
8090 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8091 /*AddPtrFlag=*/false,
8092 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8093 llvm::Value *Size = nullptr;
8094 // Do bitcopy of all non-overlapped structure elements.
8095 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8096 Component : OverlappedElements) {
8097 Address ComponentLB = Address::invalid();
8098 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8099 Component) {
8100 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8101 const auto *FD = dyn_cast<FieldDecl>(VD);
8102 if (FD && FD->getType()->isLValueReferenceType()) {
8103 const auto *ME =
8104 cast<MemberExpr>(MC.getAssociatedExpression());
8105 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8106 ComponentLB =
8107 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8108 .getAddress(CGF);
8109 } else {
8110 ComponentLB =
8111 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8112 .getAddress(CGF);
8113 }
8114 Size = CGF.Builder.CreatePtrDiff(
8115 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8116 CGF.EmitCastToVoidPtr(LB.getPointer()));
8117 break;
8118 }
8119 }
8120 assert(Size && "Failed to determine structure size")(static_cast <bool> (Size && "Failed to determine structure size"
) ? void (0) : __assert_fail ("Size && \"Failed to determine structure size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8120, __extension__
__PRETTY_FUNCTION__))
;
8121 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8122 CombinedInfo.BasePointers.push_back(BP.getPointer());
8123 CombinedInfo.Pointers.push_back(LB.getPointer());
8124 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8125 Size, CGF.Int64Ty, /*isSigned=*/true));
8126 CombinedInfo.Types.push_back(Flags);
8127 CombinedInfo.Mappers.push_back(nullptr);
8128 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8129 : 1);
8130 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8131 }
8132 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8133 CombinedInfo.BasePointers.push_back(BP.getPointer());
8134 CombinedInfo.Pointers.push_back(LB.getPointer());
8135 Size = CGF.Builder.CreatePtrDiff(
8136 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8137 CGF.EmitCastToVoidPtr(LB.getPointer()));
8138 CombinedInfo.Sizes.push_back(
8139 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8140 CombinedInfo.Types.push_back(Flags);
8141 CombinedInfo.Mappers.push_back(nullptr);
8142 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8143 : 1);
8144 break;
8145 }
8146 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8147 if (!IsMemberPointerOrAddr ||
8148 (Next == CE && MapType != OMPC_MAP_unknown)) {
8149 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8150 CombinedInfo.BasePointers.push_back(BP.getPointer());
8151 CombinedInfo.Pointers.push_back(LB.getPointer());
8152 CombinedInfo.Sizes.push_back(
8153 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8154 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8155 : 1);
8156
8157 // If Mapper is valid, the last component inherits the mapper.
8158 bool HasMapper = Mapper && Next == CE;
8159 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8160
8161 // We need to add a pointer flag for each map that comes from the
8162 // same expression except for the first one. We also need to signal
8163 // this map is the first one that relates with the current capture
8164 // (there is a set of entries for each capture).
8165 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8166 MapType, MapModifiers, MotionModifiers, IsImplicit,
8167 !IsExpressionFirstInfo || RequiresReference ||
8168 FirstPointerInComplexData || IsMemberReference,
8169 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8170
8171 if (!IsExpressionFirstInfo || IsMemberReference) {
8172 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8173 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8174 if (IsPointer || (IsMemberReference && Next != CE))
8175 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8176 OMP_MAP_DELETE | OMP_MAP_CLOSE);
8177
8178 if (ShouldBeMemberOf) {
8179 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8180 // should be later updated with the correct value of MEMBER_OF.
8181 Flags |= OMP_MAP_MEMBER_OF;
8182 // From now on, all subsequent PTR_AND_OBJ entries should not be
8183 // marked as MEMBER_OF.
8184 ShouldBeMemberOf = false;
8185 }
8186 }
8187
8188 CombinedInfo.Types.push_back(Flags);
8189 }
8190
8191 // If we have encountered a member expression so far, keep track of the
8192 // mapped member. If the parent is "*this", then the value declaration
8193 // is nullptr.
8194 if (EncounteredME) {
8195 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8196 unsigned FieldIndex = FD->getFieldIndex();
8197
8198 // Update info about the lowest and highest elements for this struct
8199 if (!PartialStruct.Base.isValid()) {
8200 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8201 if (IsFinalArraySection) {
8202 Address HB =
8203 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8204 .getAddress(CGF);
8205 PartialStruct.HighestElem = {FieldIndex, HB};
8206 } else {
8207 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8208 }
8209 PartialStruct.Base = BP;
8210 PartialStruct.LB = BP;
8211 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8212 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8213 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8214 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8215 }
8216 }
8217
8218 // Need to emit combined struct for array sections.
8219 if (IsFinalArraySection || IsNonContiguous)
8220 PartialStruct.IsArraySection = true;
8221
8222 // If we have a final array section, we are done with this expression.
8223 if (IsFinalArraySection)
8224 break;
8225
8226 // The pointer becomes the base for the next element.
8227 if (Next != CE)
8228 BP = IsMemberReference ? LowestElem : LB;
8229
8230 IsExpressionFirstInfo = false;
8231 IsCaptureFirstInfo = false;
8232 FirstPointerInComplexData = false;
8233 IsPrevMemberReference = IsMemberReference;
8234 } else if (FirstPointerInComplexData) {
8235 QualType Ty = Components.rbegin()
8236 ->getAssociatedDeclaration()
8237 ->getType()
8238 .getNonReferenceType();
8239 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8240 FirstPointerInComplexData = false;
8241 }
8242 }
8243 // If ran into the whole component - allocate the space for the whole
8244 // record.
8245 if (!EncounteredME)
8246 PartialStruct.HasCompleteRecord = true;
8247
8248 if (!IsNonContiguous)
8249 return;
8250
8251 const ASTContext &Context = CGF.getContext();
8252
8253 // For supporting stride in array section, we need to initialize the first
8254 // dimension size as 1, first offset as 0, and first count as 1
8255 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8256 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8257 MapValuesArrayTy CurStrides;
8258 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8259 uint64_t ElementTypeSize;
8260
8261 // Collect Size information for each dimension and get the element size as
8262 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8263 // should be [10, 10] and the first stride is 4 btyes.
8264 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8265 Components) {
8266 const Expr *AssocExpr = Component.getAssociatedExpression();
8267 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8268
8269 if (!OASE)
8270 continue;
8271
8272 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8273 auto *CAT = Context.getAsConstantArrayType(Ty);
8274 auto *VAT = Context.getAsVariableArrayType(Ty);
8275
8276 // We need all the dimension size except for the last dimension.
8277 assert((VAT || CAT || &Component == &*Components.begin()) &&(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8279, __extension__
__PRETTY_FUNCTION__))
8278 "Should be either ConstantArray or VariableArray if not the "(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8279, __extension__
__PRETTY_FUNCTION__))
8279 "first Component")(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8279, __extension__
__PRETTY_FUNCTION__))
;
8280
8281 // Get element size if CurStrides is empty.
8282 if (CurStrides.empty()) {
8283 const Type *ElementType = nullptr;
8284 if (CAT)
8285 ElementType = CAT->getElementType().getTypePtr();
8286 else if (VAT)
8287 ElementType = VAT->getElementType().getTypePtr();
8288 else
8289 assert(&Component == &*Components.begin() &&(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8291, __extension__
__PRETTY_FUNCTION__))
8290 "Only expect pointer (non CAT or VAT) when this is the "(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8291, __extension__
__PRETTY_FUNCTION__))
8291 "first Component")(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8291, __extension__
__PRETTY_FUNCTION__))
;
8292 // If ElementType is null, then it means the base is a pointer
8293 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8294 // for next iteration.
8295 if (ElementType) {
8296 // For the case that having pointer as base, we need to remove one
8297 // level of indirection.
8298 if (&Component != &*Components.begin())
8299 ElementType = ElementType->getPointeeOrArrayElementType();
8300 ElementTypeSize =
8301 Context.getTypeSizeInChars(ElementType).getQuantity();
8302 CurStrides.push_back(
8303 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8304 }
8305 }
8306 // Get dimension value except for the last dimension since we don't need
8307 // it.
8308 if (DimSizes.size() < Components.size() - 1) {
8309 if (CAT)
8310 DimSizes.push_back(llvm::ConstantInt::get(
8311 CGF.Int64Ty, CAT->getSize().getZExtValue()));
8312 else if (VAT)
8313 DimSizes.push_back(CGF.Builder.CreateIntCast(
8314 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8315 /*IsSigned=*/false));
8316 }
8317 }
8318
8319 // Skip the dummy dimension since we have already have its information.
8320 auto *DI = DimSizes.begin() + 1;
8321 // Product of dimension.
8322 llvm::Value *DimProd =
8323 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8324
8325 // Collect info for non-contiguous. Notice that offset, count, and stride
8326 // are only meaningful for array-section, so we insert a null for anything
8327 // other than array-section.
8328 // Also, the size of offset, count, and stride are not the same as
8329 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8330 // count, and stride are the same as the number of non-contiguous
8331 // declaration in target update to/from clause.
8332 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8333 Components) {
8334 const Expr *AssocExpr = Component.getAssociatedExpression();
8335
8336 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8337 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8338 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8339 /*isSigned=*/false);
8340 CurOffsets.push_back(Offset);
8341 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8342 CurStrides.push_back(CurStrides.back());
8343 continue;
8344 }
8345
8346 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8347
8348 if (!OASE)
8349 continue;
8350
8351 // Offset
8352 const Expr *OffsetExpr = OASE->getLowerBound();
8353 llvm::Value *Offset = nullptr;
8354 if (!OffsetExpr) {
8355 // If offset is absent, then we just set it to zero.
8356 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8357 } else {
8358 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8359 CGF.Int64Ty,
8360 /*isSigned=*/false);
8361 }
8362 CurOffsets.push_back(Offset);
8363
8364 // Count
8365 const Expr *CountExpr = OASE->getLength();
8366 llvm::Value *Count = nullptr;
8367 if (!CountExpr) {
8368 // In Clang, once a high dimension is an array section, we construct all
8369 // the lower dimension as array section, however, for case like
8370 // arr[0:2][2], Clang construct the inner dimension as an array section
8371 // but it actually is not in an array section form according to spec.
8372 if (!OASE->getColonLocFirst().isValid() &&
8373 !OASE->getColonLocSecond().isValid()) {
8374 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8375 } else {
8376 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8377 // When the length is absent it defaults to ⌈(size −
8378 // lower-bound)/stride⌉, where size is the size of the array
8379 // dimension.
8380 const Expr *StrideExpr = OASE->getStride();
8381 llvm::Value *Stride =
8382 StrideExpr
8383 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8384 CGF.Int64Ty, /*isSigned=*/false)
8385 : nullptr;
8386 if (Stride)
8387 Count = CGF.Builder.CreateUDiv(
8388 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8389 else
8390 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8391 }
8392 } else {
8393 Count = CGF.EmitScalarExpr(CountExpr);
8394 }
8395 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8396 CurCounts.push_back(Count);
8397
8398 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8399 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8400 // Offset Count Stride
8401 // D0 0 1 4 (int) <- dummy dimension
8402 // D1 0 2 8 (2 * (1) * 4)
8403 // D2 1 2 20 (1 * (1 * 5) * 4)
8404 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8405 const Expr *StrideExpr = OASE->getStride();
8406 llvm::Value *Stride =
8407 StrideExpr
8408 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8409 CGF.Int64Ty, /*isSigned=*/false)
8410 : nullptr;
8411 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8412 if (Stride)
8413 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8414 else
8415 CurStrides.push_back(DimProd);
8416 if (DI != DimSizes.end())
8417 ++DI;
8418 }
8419
8420 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8421 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8422 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8423 }
8424
8425 /// Return the adjusted map modifiers if the declaration a capture refers to
8426 /// appears in a first-private clause. This is expected to be used only with
8427 /// directives that start with 'target'.
8428 MappableExprsHandler::OpenMPOffloadMappingFlags
8429 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8430 assert(Cap.capturesVariable() && "Expected capture by reference only!")(static_cast <bool> (Cap.capturesVariable() && "Expected capture by reference only!"
) ? void (0) : __assert_fail ("Cap.capturesVariable() && \"Expected capture by reference only!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8430, __extension__
__PRETTY_FUNCTION__))
;
8431
8432 // A first private variable captured by reference will use only the
8433 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8434 // declaration is known as first-private in this handler.
8435 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8436 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8437 return MappableExprsHandler::OMP_MAP_TO |
8438 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8439 return MappableExprsHandler::OMP_MAP_PRIVATE |
8440 MappableExprsHandler::OMP_MAP_TO;
8441 }
8442 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8443 if (I != LambdasMap.end())
8444 // for map(to: lambda): using user specified map type.
8445 return getMapTypeBits(
8446 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8447 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8448 /*AddPtrFlag=*/false,
8449 /*AddIsTargetParamFlag=*/false,
8450 /*isNonContiguous=*/false);
8451 return MappableExprsHandler::OMP_MAP_TO |
8452 MappableExprsHandler::OMP_MAP_FROM;
8453 }
8454
8455 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8456 // Rotate by getFlagMemberOffset() bits.
8457 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8458 << getFlagMemberOffset());
8459 }
8460
8461 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8462 OpenMPOffloadMappingFlags MemberOfFlag) {
8463 // If the entry is PTR_AND_OBJ but has not been marked with the special
8464 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8465 // marked as MEMBER_OF.
8466 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8467 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8468 return;
8469
8470 // Reset the placeholder value to prepare the flag for the assignment of the
8471 // proper MEMBER_OF value.
8472 Flags &= ~OMP_MAP_MEMBER_OF;
8473 Flags |= MemberOfFlag;
8474 }
8475
8476 void getPlainLayout(const CXXRecordDecl *RD,
8477 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8478 bool AsBase) const {
8479 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8480
8481 llvm::StructType *St =
8482 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8483
8484 unsigned NumElements = St->getNumElements();
8485 llvm::SmallVector<
8486 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8487 RecordLayout(NumElements);
8488
8489 // Fill bases.
8490 for (const auto &I : RD->bases()) {
8491 if (I.isVirtual())
8492 continue;
8493 const auto *Base = I.getType()->getAsCXXRecordDecl();
8494 // Ignore empty bases.
8495 if (Base->isEmpty() || CGF.getContext()
8496 .getASTRecordLayout(Base)
8497 .getNonVirtualSize()
8498 .isZero())
8499 continue;
8500
8501 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8502 RecordLayout[FieldIndex] = Base;
8503 }
8504 // Fill in virtual bases.
8505 for (const auto &I : RD->vbases()) {
8506 const auto *Base = I.getType()->getAsCXXRecordDecl();
8507 // Ignore empty bases.
8508 if (Base->isEmpty())
8509 continue;
8510 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8511 if (RecordLayout[FieldIndex])
8512 continue;
8513 RecordLayout[FieldIndex] = Base;
8514 }
8515 // Fill in all the fields.
8516 assert(!RD->isUnion() && "Unexpected union.")(static_cast <bool> (!RD->isUnion() && "Unexpected union."
) ? void (0) : __assert_fail ("!RD->isUnion() && \"Unexpected union.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8516, __extension__
__PRETTY_FUNCTION__))
;
8517 for (const auto *Field : RD->fields()) {
8518 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8519 // will fill in later.)
8520 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8521 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8522 RecordLayout[FieldIndex] = Field;
8523 }
8524 }
8525 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8526 &Data : RecordLayout) {
8527 if (Data.isNull())
8528 continue;
8529 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8530 getPlainLayout(Base, Layout, /*AsBase=*/true);
8531 else
8532 Layout.push_back(Data.get<const FieldDecl *>());
8533 }
8534 }
8535
8536 /// Generate all the base pointers, section pointers, sizes, map types, and
8537 /// mappers for the extracted mappable expressions (all included in \a
8538 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8539 /// pair of the relevant declaration and index where it occurs is appended to
8540 /// the device pointers info array.
8541 void generateAllInfoForClauses(
8542 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8543 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8544 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8545 // We have to process the component lists that relate with the same
8546 // declaration in a single chunk so that we can generate the map flags
8547 // correctly. Therefore, we organize all lists in a map.
8548 enum MapKind { Present, Allocs, Other, Total };
8549 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8550 SmallVector<SmallVector<MapInfo, 8>, 4>>
8551 Info;
8552
8553 // Helper function to fill the information map for the different supported
8554 // clauses.
8555 auto &&InfoGen =
8556 [&Info, &SkipVarSet](
8557 const ValueDecl *D, MapKind Kind,
8558 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8559 OpenMPMapClauseKind MapType,
8560 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8561 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8562 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8563 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8564 if (SkipVarSet.contains(D))
8565 return;
8566 auto It = Info.find(D);
8567 if (It == Info.end())
8568 It = Info
8569 .insert(std::make_pair(
8570 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8571 .first;
8572 It->second[Kind].emplace_back(
8573 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8574 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8575 };
8576
8577 for (const auto *Cl : Clauses) {
8578 const auto *C = dyn_cast<OMPMapClause>(Cl);
8579 if (!C)
8580 continue;
8581 MapKind Kind = Other;
8582 if (llvm::is_contained(C->getMapTypeModifiers(),
8583 OMPC_MAP_MODIFIER_present))
8584 Kind = Present;
8585 else if (C->getMapType() == OMPC_MAP_alloc)
8586 Kind = Allocs;
8587 const auto *EI = C->getVarRefs().begin();
8588 for (const auto L : C->component_lists()) {
8589 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8590 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8591 C->getMapTypeModifiers(), llvm::None,
8592 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8593 E);
8594 ++EI;
8595 }
8596 }
8597 for (const auto *Cl : Clauses) {
8598 const auto *C = dyn_cast<OMPToClause>(Cl);
8599 if (!C)
8600 continue;
8601 MapKind Kind = Other;
8602 if (llvm::is_contained(C->getMotionModifiers(),
8603 OMPC_MOTION_MODIFIER_present))
8604 Kind = Present;
8605 const auto *EI = C->getVarRefs().begin();
8606 for (const auto L : C->component_lists()) {
8607 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8608 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8609 C->isImplicit(), std::get<2>(L), *EI);
8610 ++EI;
8611 }
8612 }
8613 for (const auto *Cl : Clauses) {
8614 const auto *C = dyn_cast<OMPFromClause>(Cl);
8615 if (!C)
8616 continue;
8617 MapKind Kind = Other;
8618 if (llvm::is_contained(C->getMotionModifiers(),
8619 OMPC_MOTION_MODIFIER_present))
8620 Kind = Present;
8621 const auto *EI = C->getVarRefs().begin();
8622 for (const auto L : C->component_lists()) {
8623 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8624 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8625 C->isImplicit(), std::get<2>(L), *EI);
8626 ++EI;
8627 }
8628 }
8629
8630 // Look at the use_device_ptr clause information and mark the existing map
8631 // entries as such. If there is no map information for an entry in the
8632 // use_device_ptr list, we create one with map type 'alloc' and zero size
8633 // section. It is the user fault if that was not mapped before. If there is
8634 // no map information and the pointer is a struct member, then we defer the
8635 // emission of that entry until the whole struct has been processed.
8636 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8637 SmallVector<DeferredDevicePtrEntryTy, 4>>
8638 DeferredInfo;
8639 MapCombinedInfoTy UseDevicePtrCombinedInfo;
8640
8641 for (const auto *Cl : Clauses) {
8642 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8643 if (!C)
8644 continue;
8645 for (const auto L : C->component_lists()) {
8646 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8647 std::get<1>(L);
8648 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8649, __extension__
__PRETTY_FUNCTION__))
8649 "Not expecting empty list of components!")(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8649, __extension__
__PRETTY_FUNCTION__))
;
8650 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8651 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8652 const Expr *IE = Components.back().getAssociatedExpression();
8653 // If the first component is a member expression, we have to look into
8654 // 'this', which maps to null in the map of map information. Otherwise
8655 // look directly for the information.
8656 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8657
8658 // We potentially have map information for this declaration already.
8659 // Look for the first set of components that refer to it.
8660 if (It != Info.end()) {
8661 bool Found = false;
8662 for (auto &Data : It->second) {
8663 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8664 return MI.Components.back().getAssociatedDeclaration() == VD;
8665 });
8666 // If we found a map entry, signal that the pointer has to be
8667 // returned and move on to the next declaration. Exclude cases where
8668 // the base pointer is mapped as array subscript, array section or
8669 // array shaping. The base address is passed as a pointer to base in
8670 // this case and cannot be used as a base for use_device_ptr list
8671 // item.
8672 if (CI != Data.end()) {
8673 auto PrevCI = std::next(CI->Components.rbegin());
8674 const auto *VarD = dyn_cast<VarDecl>(VD);
8675 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8676 isa<MemberExpr>(IE) ||
8677 !VD->getType().getNonReferenceType()->isPointerType() ||
8678 PrevCI == CI->Components.rend() ||
8679 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8680 VarD->hasLocalStorage()) {
8681 CI->ReturnDevicePointer = true;
8682 Found = true;
8683 break;
8684 }
8685 }
8686 }
8687 if (Found)
8688 continue;
8689 }
8690
8691 // We didn't find any match in our map information - generate a zero
8692 // size array section - if the pointer is a struct member we defer this
8693 // action until the whole struct has been processed.
8694 if (isa<MemberExpr>(IE)) {
8695 // Insert the pointer into Info to be processed by
8696 // generateInfoForComponentList. Because it is a member pointer
8697 // without a pointee, no entry will be generated for it, therefore
8698 // we need to generate one after the whole struct has been processed.
8699 // Nonetheless, generateInfoForComponentList must be called to take
8700 // the pointer into account for the calculation of the range of the
8701 // partial struct.
8702 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8703 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8704 nullptr);
8705 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8706 } else {
8707 llvm::Value *Ptr =
8708 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8709 UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8710 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8711 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8712 UseDevicePtrCombinedInfo.Sizes.push_back(
8713 llvm::Constant::getNullValue(CGF.Int64Ty));
8714 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8715 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8716 }
8717 }
8718 }
8719
8720 // Look at the use_device_addr clause information and mark the existing map
8721 // entries as such. If there is no map information for an entry in the
8722 // use_device_addr list, we create one with map type 'alloc' and zero size
8723 // section. It is the user fault if that was not mapped before. If there is
8724 // no map information and the pointer is a struct member, then we defer the
8725 // emission of that entry until the whole struct has been processed.
8726 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8727 for (const auto *Cl : Clauses) {
8728 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8729 if (!C)
8730 continue;
8731 for (const auto L : C->component_lists()) {
8732 assert(!std::get<1>(L).empty() &&(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8733, __extension__
__PRETTY_FUNCTION__))
8733 "Not expecting empty list of components!")(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8733, __extension__
__PRETTY_FUNCTION__))
;
8734 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8735 if (!Processed.insert(VD).second)
8736 continue;
8737 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8738 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8739 // If the first component is a member expression, we have to look into
8740 // 'this', which maps to null in the map of map information. Otherwise
8741 // look directly for the information.
8742 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8743
8744 // We potentially have map information for this declaration already.
8745 // Look for the first set of components that refer to it.
8746 if (It != Info.end()) {
8747 bool Found = false;
8748 for (auto &Data : It->second) {
8749 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8750 return MI.Components.back().getAssociatedDeclaration() == VD;
8751 });
8752 // If we found a map entry, signal that the pointer has to be
8753 // returned and move on to the next declaration.
8754 if (CI != Data.end()) {
8755 CI->ReturnDevicePointer = true;
8756 Found = true;
8757 break;
8758 }
8759 }
8760 if (Found)
8761 continue;
8762 }
8763
8764 // We didn't find any match in our map information - generate a zero
8765 // size array section - if the pointer is a struct member we defer this
8766 // action until the whole struct has been processed.
8767 if (isa<MemberExpr>(IE)) {
8768 // Insert the pointer into Info to be processed by
8769 // generateInfoForComponentList. Because it is a member pointer
8770 // without a pointee, no entry will be generated for it, therefore
8771 // we need to generate one after the whole struct has been processed.
8772 // Nonetheless, generateInfoForComponentList must be called to take
8773 // the pointer into account for the calculation of the range of the
8774 // partial struct.
8775 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8776 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8777 nullptr, nullptr, /*ForDeviceAddr=*/true);
8778 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8779 } else {
8780 llvm::Value *Ptr;
8781 if (IE->isGLValue())
8782 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8783 else
8784 Ptr = CGF.EmitScalarExpr(IE);
8785 CombinedInfo.Exprs.push_back(VD);
8786 CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8787 CombinedInfo.Pointers.push_back(Ptr);
8788 CombinedInfo.Sizes.push_back(
8789 llvm::Constant::getNullValue(CGF.Int64Ty));
8790 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8791 CombinedInfo.Mappers.push_back(nullptr);
8792 }
8793 }
8794 }
8795
8796 for (const auto &Data : Info) {
8797 StructRangeInfoTy PartialStruct;
8798 // Temporary generated information.
8799 MapCombinedInfoTy CurInfo;
8800 const Decl *D = Data.first;
8801 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8802 for (const auto &M : Data.second) {
8803 for (const MapInfo &L : M) {
8804 assert(!L.Components.empty() &&(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8805, __extension__
__PRETTY_FUNCTION__))
8805 "Not expecting declaration with no component lists.")(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8805, __extension__
__PRETTY_FUNCTION__))
;
8806
8807 // Remember the current base pointer index.
8808 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8809 CurInfo.NonContigInfo.IsNonContiguous =
8810 L.Components.back().isNonContiguous();
8811 generateInfoForComponentList(
8812 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8813 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8814 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8815
8816 // If this entry relates with a device pointer, set the relevant
8817 // declaration and add the 'return pointer' flag.
8818 if (L.ReturnDevicePointer) {
8819 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8820, __extension__
__PRETTY_FUNCTION__))
8820 "Unexpected number of mapped base pointers.")(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8820, __extension__
__PRETTY_FUNCTION__))
;
8821
8822 const ValueDecl *RelevantVD =
8823 L.Components.back().getAssociatedDeclaration();
8824 assert(RelevantVD &&(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8825, __extension__
__PRETTY_FUNCTION__))
8825 "No relevant declaration related with device pointer??")(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8825, __extension__
__PRETTY_FUNCTION__))
;
8826
8827 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8828 RelevantVD);
8829 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8830 }
8831 }
8832 }
8833
8834 // Append any pending zero-length pointers which are struct members and
8835 // used with use_device_ptr or use_device_addr.
8836 auto CI = DeferredInfo.find(Data.first);
8837 if (CI != DeferredInfo.end()) {
8838 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8839 llvm::Value *BasePtr;
8840 llvm::Value *Ptr;
8841 if (L.ForDeviceAddr) {
8842 if (L.IE->isGLValue())
8843 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8844 else
8845 Ptr = this->CGF.EmitScalarExpr(L.IE);
8846 BasePtr = Ptr;
8847 // Entry is RETURN_PARAM. Also, set the placeholder value
8848 // MEMBER_OF=FFFF so that the entry is later updated with the
8849 // correct value of MEMBER_OF.
8850 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8851 } else {
8852 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8853 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8854 L.IE->getExprLoc());
8855 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8856 // placeholder value MEMBER_OF=FFFF so that the entry is later
8857 // updated with the correct value of MEMBER_OF.
8858 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8859 OMP_MAP_MEMBER_OF);
8860 }
8861 CurInfo.Exprs.push_back(L.VD);
8862 CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8863 CurInfo.Pointers.push_back(Ptr);
8864 CurInfo.Sizes.push_back(
8865 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8866 CurInfo.Mappers.push_back(nullptr);
8867 }
8868 }
8869 // If there is an entry in PartialStruct it means we have a struct with
8870 // individual members mapped. Emit an extra combined entry.
8871 if (PartialStruct.Base.isValid()) {
8872 CurInfo.NonContigInfo.Dims.push_back(0);
8873 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8874 }
8875
8876 // We need to append the results of this capture to what we already
8877 // have.
8878 CombinedInfo.append(CurInfo);
8879 }
8880 // Append data for use_device_ptr clauses.
8881 CombinedInfo.append(UseDevicePtrCombinedInfo);
8882 }
8883
8884public:
8885 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8886 : CurDir(&Dir), CGF(CGF) {
8887 // Extract firstprivate clause information.
8888 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8889 for (const auto *D : C->varlists())
8890 FirstPrivateDecls.try_emplace(
8891 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8892 // Extract implicit firstprivates from uses_allocators clauses.
8893 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8894 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8895 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8896 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8897 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8898 /*Implicit=*/true);
8899 else if (const auto *VD = dyn_cast<VarDecl>(
8900 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8901 ->getDecl()))
8902 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8903 }
8904 }
8905 // Extract device pointer clause information.
8906 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8907 for (auto L : C->component_lists())
8908 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8909 // Extract map information.
8910 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8911 if (C->getMapType() != OMPC_MAP_to)
8912 continue;
8913 for (auto L : C->component_lists()) {
8914 const ValueDecl *VD = std::get<0>(L);
8915 const auto *RD = VD ? VD->getType()
8916 .getCanonicalType()
8917 .getNonReferenceType()
8918 ->getAsCXXRecordDecl()
8919 : nullptr;
8920 if (RD && RD->isLambda())
8921 LambdasMap.try_emplace(std::get<0>(L), C);
8922 }
8923 }
8924 }
8925
8926 /// Constructor for the declare mapper directive.
8927 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8928 : CurDir(&Dir), CGF(CGF) {}
8929
8930 /// Generate code for the combined entry if we have a partially mapped struct
8931 /// and take care of the mapping flags of the arguments corresponding to
8932 /// individual struct members.
8933 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8934 MapFlagsArrayTy &CurTypes,
8935 const StructRangeInfoTy &PartialStruct,
8936 const ValueDecl *VD = nullptr,
8937 bool NotTargetParams = true) const {
8938 if (CurTypes.size() == 1 &&
8939 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8940 !PartialStruct.IsArraySection)
8941 return;
8942 Address LBAddr = PartialStruct.LowestElem.second;
8943 Address HBAddr = PartialStruct.HighestElem.second;
8944 if (PartialStruct.HasCompleteRecord) {
8945 LBAddr = PartialStruct.LB;
8946 HBAddr = PartialStruct.LB;
8947 }
8948 CombinedInfo.Exprs.push_back(VD);
8949 // Base is the base of the struct
8950 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8951 // Pointer is the address of the lowest element
8952 llvm::Value *LB = LBAddr.getPointer();
8953 CombinedInfo.Pointers.push_back(LB);
8954 // There should not be a mapper for a combined entry.
8955 CombinedInfo.Mappers.push_back(nullptr);
8956 // Size is (addr of {highest+1} element) - (addr of lowest element)
8957 llvm::Value *HB = HBAddr.getPointer();
8958 llvm::Value *HAddr =
8959 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8960 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8961 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8962 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8963 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8964 /*isSigned=*/false);
8965 CombinedInfo.Sizes.push_back(Size);
8966 // Map type is always TARGET_PARAM, if generate info for captures.
8967 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8968 : OMP_MAP_TARGET_PARAM);
8969 // If any element has the present modifier, then make sure the runtime
8970 // doesn't attempt to allocate the struct.
8971 if (CurTypes.end() !=
8972 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8973 return Type & OMP_MAP_PRESENT;
8974 }))
8975 CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8976 // Remove TARGET_PARAM flag from the first element
8977 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8978 // If any element has the ompx_hold modifier, then make sure the runtime
8979 // uses the hold reference count for the struct as a whole so that it won't
8980 // be unmapped by an extra dynamic reference count decrement. Add it to all
8981 // elements as well so the runtime knows which reference count to check
8982 // when determining whether it's time for device-to-host transfers of
8983 // individual elements.
8984 if (CurTypes.end() !=
8985 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8986 return Type & OMP_MAP_OMPX_HOLD;
8987 })) {
8988 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8989 for (auto &M : CurTypes)
8990 M |= OMP_MAP_OMPX_HOLD;
8991 }
8992
8993 // All other current entries will be MEMBER_OF the combined entry
8994 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8995 // 0xFFFF in the MEMBER_OF field).
8996 OpenMPOffloadMappingFlags MemberOfFlag =
8997 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8998 for (auto &M : CurTypes)
8999 setCorrectMemberOfFlag(M, MemberOfFlag);
9000 }
9001
9002 /// Generate all the base pointers, section pointers, sizes, map types, and
9003 /// mappers for the extracted mappable expressions (all included in \a
9004 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9005 /// pair of the relevant declaration and index where it occurs is appended to
9006 /// the device pointers info array.
9007 void generateAllInfo(
9008 MapCombinedInfoTy &CombinedInfo,
9009 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9010 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9011 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9012, __extension__
__PRETTY_FUNCTION__))
9012 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9012, __extension__
__PRETTY_FUNCTION__))
;
9013 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9014 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9015 }
9016
9017 /// Generate all the base pointers, section pointers, sizes, map types, and
9018 /// mappers for the extracted map clauses of user-defined mapper (all included
9019 /// in \a CombinedInfo).
9020 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9021 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9022, __extension__
__PRETTY_FUNCTION__))
9022 "Expect a declare mapper directive")(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9022, __extension__
__PRETTY_FUNCTION__))
;
9023 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9024 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9025 }
9026
9027 /// Emit capture info for lambdas for variables captured by reference.
9028 void generateInfoForLambdaCaptures(
9029 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9030 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9031 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
9032 const auto *RD = VDType->getAsCXXRecordDecl();
9033 if (!RD || !RD->isLambda())
9034 return;
9035 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
9036 CGF.getContext().getDeclAlign(VD));
9037 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
9038 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9039 FieldDecl *ThisCapture = nullptr;
9040 RD->getCaptureFields(Captures, ThisCapture);
9041 if (ThisCapture) {
9042 LValue ThisLVal =
9043 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9044 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9045 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9046 VDLVal.getPointer(CGF));
9047 CombinedInfo.Exprs.push_back(VD);
9048 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9049 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9050 CombinedInfo.Sizes.push_back(
9051 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9052 CGF.Int64Ty, /*isSigned=*/true));
9053 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9054 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9055 CombinedInfo.Mappers.push_back(nullptr);
9056 }
9057 for (const LambdaCapture &LC : RD->captures()) {
9058 if (!LC.capturesVariable())
9059 continue;
9060 const VarDecl *VD = LC.getCapturedVar();
9061 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9062 continue;
9063 auto It = Captures.find(VD);
9064 assert(It != Captures.end() && "Found lambda capture without field.")(static_cast <bool> (It != Captures.end() && "Found lambda capture without field."
) ? void (0) : __assert_fail ("It != Captures.end() && \"Found lambda capture without field.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9064, __extension__
__PRETTY_FUNCTION__))
;
9065 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9066 if (LC.getCaptureKind() == LCK_ByRef) {
9067 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9068 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9069 VDLVal.getPointer(CGF));
9070 CombinedInfo.Exprs.push_back(VD);
9071 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9072 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9073 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9074 CGF.getTypeSize(
9075 VD->getType().getCanonicalType().getNonReferenceType()),
9076 CGF.Int64Ty, /*isSigned=*/true));
9077 } else {
9078 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9079 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9080 VDLVal.getPointer(CGF));
9081 CombinedInfo.Exprs.push_back(VD);
9082 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9083 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9084 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9085 }
9086 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9087 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9088 CombinedInfo.Mappers.push_back(nullptr);
9089 }
9090 }
9091
9092 /// Set correct indices for lambdas captures.
9093 void adjustMemberOfForLambdaCaptures(
9094 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9095 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9096 MapFlagsArrayTy &Types) const {
9097 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9098 // Set correct member_of idx for all implicit lambda captures.
9099 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9100 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9101 continue;
9102 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9103 assert(BasePtr && "Unable to find base lambda address.")(static_cast <bool> (BasePtr && "Unable to find base lambda address."
) ? void (0) : __assert_fail ("BasePtr && \"Unable to find base lambda address.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9103, __extension__
__PRETTY_FUNCTION__))
;
9104 int TgtIdx = -1;
9105 for (unsigned J = I; J > 0; --J) {
9106 unsigned Idx = J - 1;
9107 if (Pointers[Idx] != BasePtr)
9108 continue;
9109 TgtIdx = Idx;
9110 break;
9111 }
9112 assert(TgtIdx != -1 && "Unable to find parent lambda.")(static_cast <bool> (TgtIdx != -1 && "Unable to find parent lambda."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Unable to find parent lambda.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9112, __extension__
__PRETTY_FUNCTION__))
;
9113 // All other current entries will be MEMBER_OF the combined entry
9114 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9115 // 0xFFFF in the MEMBER_OF field).
9116 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9117 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9118 }
9119 }
9120
9121 /// Generate the base pointers, section pointers, sizes, map types, and
9122 /// mappers associated to a given capture (all included in \a CombinedInfo).
9123 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9124 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9125 StructRangeInfoTy &PartialStruct) const {
9126 assert(!Cap->capturesVariableArrayType() &&(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9127, __extension__
__PRETTY_FUNCTION__))
9127 "Not expecting to generate map info for a variable array type!")(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9127, __extension__
__PRETTY_FUNCTION__))
;
9128
9129 // We need to know when we generating information for the first component
9130 const ValueDecl *VD = Cap->capturesThis()
9131 ? nullptr
9132 : Cap->getCapturedVar()->getCanonicalDecl();
9133
9134 // for map(to: lambda): skip here, processing it in
9135 // generateDefaultMapInfo
9136 if (LambdasMap.count(VD))
9137 return;
9138
9139 // If this declaration appears in a is_device_ptr clause we just have to
9140 // pass the pointer by value. If it is a reference to a declaration, we just
9141 // pass its value.
9142 if (DevPointersMap.count(VD)) {
9143 CombinedInfo.Exprs.push_back(VD);
9144 CombinedInfo.BasePointers.emplace_back(Arg, VD);
9145 CombinedInfo.Pointers.push_back(Arg);
9146 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9147 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9148 /*isSigned=*/true));
9149 CombinedInfo.Types.push_back(
9150 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9151 OMP_MAP_TARGET_PARAM);
9152 CombinedInfo.Mappers.push_back(nullptr);
9153 return;
9154 }
9155
9156 using MapData =
9157 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9158 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9159 const ValueDecl *, const Expr *>;
9160 SmallVector<MapData, 4> DeclComponentLists;
9161 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9162, __extension__
__PRETTY_FUNCTION__))
9162 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9162, __extension__
__PRETTY_FUNCTION__))
;
9163 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9164 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9165 const auto *EI = C->getVarRefs().begin();
9166 for (const auto L : C->decl_component_lists(VD)) {
9167 const ValueDecl *VDecl, *Mapper;
9168 // The Expression is not correct if the mapping is implicit
9169 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9170 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9171 std::tie(VDecl, Components, Mapper) = L;
9172 assert(VDecl == VD && "We got information for the wrong declaration??")(static_cast <bool> (VDecl == VD && "We got information for the wrong declaration??"
) ? void (0) : __assert_fail ("VDecl == VD && \"We got information for the wrong declaration??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9172, __extension__
__PRETTY_FUNCTION__))
;
9173 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9174, __extension__
__PRETTY_FUNCTION__))
9174 "Not expecting declaration with no component lists.")(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9174, __extension__
__PRETTY_FUNCTION__))
;
9175 DeclComponentLists.emplace_back(Components, C->getMapType(),
9176 C->getMapTypeModifiers(),
9177 C->isImplicit(), Mapper, E);
9178 ++EI;
9179 }
9180 }
9181 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9182 const MapData &RHS) {
9183 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9184 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9185 bool HasPresent =
9186 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9187 bool HasAllocs = MapType == OMPC_MAP_alloc;
9188 MapModifiers = std::get<2>(RHS);
9189 MapType = std::get<1>(LHS);
9190 bool HasPresentR =
9191 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9192 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9193 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9194 });
9195
9196 // Find overlapping elements (including the offset from the base element).
9197 llvm::SmallDenseMap<
9198 const MapData *,
9199 llvm::SmallVector<
9200 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9201 4>
9202 OverlappedData;
9203 size_t Count = 0;
9204 for (const MapData &L : DeclComponentLists) {
9205 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9206 OpenMPMapClauseKind MapType;
9207 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9208 bool IsImplicit;
9209 const ValueDecl *Mapper;
9210 const Expr *VarRef;
9211 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9212 L;
9213 ++Count;
9214 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9215 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9216 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9217 VarRef) = L1;
9218 auto CI = Components.rbegin();
9219 auto CE = Components.rend();
9220 auto SI = Components1.rbegin();
9221 auto SE = Components1.rend();
9222 for (; CI != CE && SI != SE; ++CI, ++SI) {
9223 if (CI->getAssociatedExpression()->getStmtClass() !=
9224 SI->getAssociatedExpression()->getStmtClass())
9225 break;
9226 // Are we dealing with different variables/fields?
9227 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9228 break;
9229 }
9230 // Found overlapping if, at least for one component, reached the head
9231 // of the components list.
9232 if (CI == CE || SI == SE) {
9233 // Ignore it if it is the same component.
9234 if (CI == CE && SI == SE)
9235 continue;
9236 const auto It = (SI == SE) ? CI : SI;
9237 // If one component is a pointer and another one is a kind of
9238 // dereference of this pointer (array subscript, section, dereference,
9239 // etc.), it is not an overlapping.
9240 // Same, if one component is a base and another component is a
9241 // dereferenced pointer memberexpr with the same base.
9242 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9243 (std::prev(It)->getAssociatedDeclaration() &&
9244 std::prev(It)
9245 ->getAssociatedDeclaration()
9246 ->getType()
9247 ->isPointerType()) ||
9248 (It->getAssociatedDeclaration() &&
9249 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9250 std::next(It) != CE && std::next(It) != SE))
9251 continue;
9252 const MapData &BaseData = CI == CE ? L : L1;
9253 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9254 SI == SE ? Components : Components1;
9255 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9256 OverlappedElements.getSecond().push_back(SubData);
9257 }
9258 }
9259 }
9260 // Sort the overlapped elements for each item.
9261 llvm::SmallVector<const FieldDecl *, 4> Layout;
9262 if (!OverlappedData.empty()) {
9263 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9264 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9265 while (BaseType != OrigType) {
9266 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9267 OrigType = BaseType->getPointeeOrArrayElementType();
9268 }
9269
9270 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9271 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9272 else {
9273 const auto *RD = BaseType->getAsRecordDecl();
9274 Layout.append(RD->field_begin(), RD->field_end());
9275 }
9276 }
9277 for (auto &Pair : OverlappedData) {
9278 llvm::stable_sort(
9279 Pair.getSecond(),
9280 [&Layout](
9281 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9282 OMPClauseMappableExprCommon::MappableExprComponentListRef
9283 Second) {
9284 auto CI = First.rbegin();
9285 auto CE = First.rend();
9286 auto SI = Second.rbegin();
9287 auto SE = Second.rend();
9288 for (; CI != CE && SI != SE; ++CI, ++SI) {
9289 if (CI->getAssociatedExpression()->getStmtClass() !=
9290 SI->getAssociatedExpression()->getStmtClass())
9291 break;
9292 // Are we dealing with different variables/fields?
9293 if (CI->getAssociatedDeclaration() !=
9294 SI->getAssociatedDeclaration())
9295 break;
9296 }
9297
9298 // Lists contain the same elements.
9299 if (CI == CE && SI == SE)
9300 return false;
9301
9302 // List with less elements is less than list with more elements.
9303 if (CI == CE || SI == SE)
9304 return CI == CE;
9305
9306 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9307 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9308 if (FD1->getParent() == FD2->getParent())
9309 return FD1->getFieldIndex() < FD2->getFieldIndex();
9310 const auto *It =
9311 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9312 return FD == FD1 || FD == FD2;
9313 });
9314 return *It == FD1;
9315 });
9316 }
9317
9318 // Associated with a capture, because the mapping flags depend on it.
9319 // Go through all of the elements with the overlapped elements.
9320 bool IsFirstComponentList = true;
9321 for (const auto &Pair : OverlappedData) {
9322 const MapData &L = *Pair.getFirst();
9323 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9324 OpenMPMapClauseKind MapType;
9325 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9326 bool IsImplicit;
9327 const ValueDecl *Mapper;
9328 const Expr *VarRef;
9329 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9330 L;
9331 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9332 OverlappedComponents = Pair.getSecond();
9333 generateInfoForComponentList(
9334 MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9335 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9336 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9337 IsFirstComponentList = false;
9338 }
9339 // Go through other elements without overlapped elements.
9340 for (const MapData &L : DeclComponentLists) {
9341 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9342 OpenMPMapClauseKind MapType;
9343 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9344 bool IsImplicit;
9345 const ValueDecl *Mapper;
9346 const Expr *VarRef;
9347 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9348 L;
9349 auto It = OverlappedData.find(&L);
9350 if (It == OverlappedData.end())
9351 generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9352 Components, CombinedInfo, PartialStruct,
9353 IsFirstComponentList, IsImplicit, Mapper,
9354 /*ForDeviceAddr=*/false, VD, VarRef);
9355 IsFirstComponentList = false;
9356 }
9357 }
9358
9359 /// Generate the default map information for a given capture \a CI,
9360 /// record field declaration \a RI and captured value \a CV.
9361 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9362 const FieldDecl &RI, llvm::Value *CV,
9363 MapCombinedInfoTy &CombinedInfo) const {
9364 bool IsImplicit = true;
9365 // Do the default mapping.
9366 if (CI.capturesThis()) {
9367 CombinedInfo.Exprs.push_back(nullptr);
9368 CombinedInfo.BasePointers.push_back(CV);
9369 CombinedInfo.Pointers.push_back(CV);
9370 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9371 CombinedInfo.Sizes.push_back(
9372 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9373 CGF.Int64Ty, /*isSigned=*/true));
9374 // Default map type.
9375 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9376 } else if (CI.capturesVariableByCopy()) {
9377 const VarDecl *VD = CI.getCapturedVar();
9378 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9379 CombinedInfo.BasePointers.push_back(CV);
9380 CombinedInfo.Pointers.push_back(CV);
9381 if (!RI.getType()->isAnyPointerType()) {
9382 // We have to signal to the runtime captures passed by value that are
9383 // not pointers.
9384 CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9385 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9386 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9387 } else {
9388 // Pointers are implicitly mapped with a zero size and no flags
9389 // (other than first map that is added for all implicit maps).
9390 CombinedInfo.Types.push_back(OMP_MAP_NONE);
9391 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9392 }
9393 auto I = FirstPrivateDecls.find(VD);
9394 if (I != FirstPrivateDecls.end())
9395 IsImplicit = I->getSecond();
9396 } else {
9397 assert(CI.capturesVariable() && "Expected captured reference.")(static_cast <bool> (CI.capturesVariable() && "Expected captured reference."
) ? void (0) : __assert_fail ("CI.capturesVariable() && \"Expected captured reference.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9397, __extension__
__PRETTY_FUNCTION__))
;
9398 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9399 QualType ElementType = PtrTy->getPointeeType();
9400 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9401 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9402 // The default map type for a scalar/complex type is 'to' because by
9403 // default the value doesn't have to be retrieved. For an aggregate
9404 // type, the default is 'tofrom'.
9405 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9406 const VarDecl *VD = CI.getCapturedVar();
9407 auto I = FirstPrivateDecls.find(VD);
9408 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9409 CombinedInfo.BasePointers.push_back(CV);
9410 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9411 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9412 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9413 AlignmentSource::Decl));
9414 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9415 } else {
9416 CombinedInfo.Pointers.push_back(CV);
9417 }
9418 if (I != FirstPrivateDecls.end())
9419 IsImplicit = I->getSecond();
9420 }
9421 // Every default map produces a single argument which is a target parameter.
9422 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9423
9424 // Add flag stating this is an implicit map.
9425 if (IsImplicit)
9426 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9427
9428 // No user-defined mapper for default mapping.
9429 CombinedInfo.Mappers.push_back(nullptr);
9430 }
9431};
9432} // anonymous namespace
9433
9434static void emitNonContiguousDescriptor(
9435 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9436 CGOpenMPRuntime::TargetDataInfo &Info) {
9437 CodeGenModule &CGM = CGF.CGM;
9438 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9439 &NonContigInfo = CombinedInfo.NonContigInfo;
9440
9441 // Build an array of struct descriptor_dim and then assign it to
9442 // offload_args.
9443 //
9444 // struct descriptor_dim {
9445 // uint64_t offset;
9446 // uint64_t count;
9447 // uint64_t stride
9448 // };
9449 ASTContext &C = CGF.getContext();
9450 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9451 RecordDecl *RD;
9452 RD = C.buildImplicitRecord("descriptor_dim");
9453 RD->startDefinition();
9454 addFieldToRecordDecl(C, RD, Int64Ty);
9455 addFieldToRecordDecl(C, RD, Int64Ty);
9456 addFieldToRecordDecl(C, RD, Int64Ty);
9457 RD->completeDefinition();
9458 QualType DimTy = C.getRecordType(RD);
9459
9460 enum { OffsetFD = 0, CountFD, StrideFD };
9461 // We need two index variable here since the size of "Dims" is the same as the
9462 // size of Components, however, the size of offset, count, and stride is equal
9463 // to the size of base declaration that is non-contiguous.
9464 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9465 // Skip emitting ir if dimension size is 1 since it cannot be
9466 // non-contiguous.
9467 if (NonContigInfo.Dims[I] == 1)
9468 continue;
9469 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9470 QualType ArrayTy =
9471 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9472 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9473 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9474 unsigned RevIdx = EE - II - 1;
9475 LValue DimsLVal = CGF.MakeAddrLValue(
9476 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9477 // Offset
9478 LValue OffsetLVal = CGF.EmitLValueForField(
9479 DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9480 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9481 // Count
9482 LValue CountLVal = CGF.EmitLValueForField(
9483 DimsLVal, *std::next(RD->field_begin(), CountFD));
9484 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9485 // Stride
9486 LValue StrideLVal = CGF.EmitLValueForField(
9487 DimsLVal, *std::next(RD->field_begin(), StrideFD));
9488 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9489 }
9490 // args[I] = &dims
9491 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9492 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9493 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9494 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9495 Info.PointersArray, 0, I);
9496 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9497 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9498 ++L;
9499 }
9500}
9501
9502// Try to extract the base declaration from a `this->x` expression if possible.
9503static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9504 if (!E)
9505 return nullptr;
9506
9507 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9508 if (const MemberExpr *ME =
9509 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9510 return ME->getMemberDecl();
9511 return nullptr;
9512}
9513
9514/// Emit a string constant containing the names of the values mapped to the
9515/// offloading runtime library.
9516llvm::Constant *
9517emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9518 MappableExprsHandler::MappingExprInfo &MapExprs) {
9519
9520 uint32_t SrcLocStrSize;
9521 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9522 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9523
9524 SourceLocation Loc;
9525 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9526 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9527 Loc = VD->getLocation();
9528 else
9529 Loc = MapExprs.getMapExpr()->getExprLoc();
9530 } else {
9531 Loc = MapExprs.getMapDecl()->getLocation();
9532 }
9533
9534 std::string ExprName;
9535 if (MapExprs.getMapExpr()) {
9536 PrintingPolicy P(CGF.getContext().getLangOpts());
9537 llvm::raw_string_ostream OS(ExprName);
9538 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9539 OS.flush();
9540 } else {
9541 ExprName = MapExprs.getMapDecl()->getNameAsString();
9542 }
9543
9544 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9545 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9546 PLoc.getLine(), PLoc.getColumn(),
9547 SrcLocStrSize);
9548}
9549
9550/// Emit the arrays used to pass the captures and map information to the
9551/// offloading runtime library. If there is no map or capture information,
9552/// return nullptr by reference.
9553static void emitOffloadingArrays(
9554 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9555 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9556 bool IsNonContiguous = false) {
9557 CodeGenModule &CGM = CGF.CGM;
9558 ASTContext &Ctx = CGF.getContext();
9559
9560 // Reset the array information.
9561 Info.clearArrayInfo();
9562 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9563
9564 if (Info.NumberOfPtrs) {
9565 // Detect if we have any capture size requiring runtime evaluation of the
9566 // size so that a constant array could be eventually used.
9567
9568 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9569 QualType PointerArrayType = Ctx.getConstantArrayType(
9570 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9571 /*IndexTypeQuals=*/0);
9572
9573 Info.BasePointersArray =
9574 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9575 Info.PointersArray =
9576 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9577 Address MappersArray =
9578 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9579 Info.MappersArray = MappersArray.getPointer();
9580
9581 // If we don't have any VLA types or other types that require runtime
9582 // evaluation, we can use a constant array for the map sizes, otherwise we
9583 // need to fill up the arrays as we do for the pointers.
9584 QualType Int64Ty =
9585 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9586 SmallVector<llvm::Constant *> ConstSizes(
9587 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9588 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9589 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9590 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9591 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9592 if (IsNonContiguous && (CombinedInfo.Types[I] &
9593 MappableExprsHandler::OMP_MAP_NON_CONTIG))
9594 ConstSizes[I] = llvm::ConstantInt::get(
9595 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9596 else
9597 ConstSizes[I] = CI;
9598 continue;
9599 }
9600 }
9601 RuntimeSizes.set(I);
9602 }
9603
9604 if (RuntimeSizes.all()) {
9605 QualType SizeArrayType = Ctx.getConstantArrayType(
9606 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9607 /*IndexTypeQuals=*/0);
9608 Info.SizesArray =
9609 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9610 } else {
9611 auto *SizesArrayInit = llvm::ConstantArray::get(
9612 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9613 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9614 auto *SizesArrayGbl = new llvm::GlobalVariable(
9615 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9616 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9617 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9618 if (RuntimeSizes.any()) {
9619 QualType SizeArrayType = Ctx.getConstantArrayType(
9620 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9621 /*IndexTypeQuals=*/0);
9622 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9623 llvm::Value *GblConstPtr =
9624 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9625 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9626 CGF.Builder.CreateMemCpy(
9627 Buffer,
9628 Address(GblConstPtr, CGM.Int64Ty,
9629 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9630 /*DestWidth=*/64, /*Signed=*/false))),
9631 CGF.getTypeSize(SizeArrayType));
9632 Info.SizesArray = Buffer.getPointer();
9633 } else {
9634 Info.SizesArray = SizesArrayGbl;
9635 }
9636 }
9637
9638 // The map types are always constant so we don't need to generate code to
9639 // fill arrays. Instead, we create an array constant.
9640 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9641 llvm::copy(CombinedInfo.Types, Mapping.begin());
9642 std::string MaptypesName =
9643 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9644 auto *MapTypesArrayGbl =
9645 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9646 Info.MapTypesArray = MapTypesArrayGbl;
9647
9648 // The information types are only built if there is debug information
9649 // requested.
9650 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9651 Info.MapNamesArray = llvm::Constant::getNullValue(
9652 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9653 } else {
9654 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9655 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9656 };
9657 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9658 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9659 std::string MapnamesName =
9660 CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9661 auto *MapNamesArrayGbl =
9662 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9663 Info.MapNamesArray = MapNamesArrayGbl;
9664 }
9665
9666 // If there's a present map type modifier, it must not be applied to the end
9667 // of a region, so generate a separate map type array in that case.
9668 if (Info.separateBeginEndCalls()) {
9669 bool EndMapTypesDiffer = false;
9670 for (uint64_t &Type : Mapping) {
9671 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9672 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9673 EndMapTypesDiffer = true;
9674 }
9675 }
9676 if (EndMapTypesDiffer) {
9677 MapTypesArrayGbl =
9678 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9679 Info.MapTypesArrayEnd = MapTypesArrayGbl;
9680 }
9681 }
9682
9683 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9684 llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9685 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9686 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9687 Info.BasePointersArray, 0, I);
9688 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9689 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9690 Address BPAddr(BP, BPVal->getType(),
9691 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9692 CGF.Builder.CreateStore(BPVal, BPAddr);
9693
9694 if (Info.requiresDevicePointerInfo())
9695 if (const ValueDecl *DevVD =
9696 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9697 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9698
9699 llvm::Value *PVal = CombinedInfo.Pointers[I];
9700 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9701 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9702 Info.PointersArray, 0, I);
9703 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9704 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9705 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9706 CGF.Builder.CreateStore(PVal, PAddr);
9707
9708 if (RuntimeSizes.test(I)) {
9709 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9710 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9711 Info.SizesArray,
9712 /*Idx0=*/0,
9713 /*Idx1=*/I);
9714 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9715 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9716 CGM.Int64Ty,
9717 /*isSigned=*/true),
9718 SAddr);
9719 }
9720
9721 // Fill up the mapper array.
9722 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9723 if (CombinedInfo.Mappers[I]) {
9724 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9725 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9726 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9727 Info.HasMapper = true;
9728 }
9729 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9730 CGF.Builder.CreateStore(MFunc, MAddr);
9731 }
9732 }
9733
9734 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9735 Info.NumberOfPtrs == 0)
9736 return;
9737
9738 emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9739}
9740
9741namespace {
9742/// Additional arguments for emitOffloadingArraysArgument function.
9743struct ArgumentsOptions {
9744 bool ForEndCall = false;
9745 ArgumentsOptions() = default;
9746 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9747};
9748} // namespace
9749
9750/// Emit the arguments to be passed to the runtime library based on the
9751/// arrays of base pointers, pointers, sizes, map types, and mappers. If
9752/// ForEndCall, emit map types to be passed for the end of the region instead of
9753/// the beginning.
9754static void emitOffloadingArraysArgument(
9755 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9756 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9757 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9758 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9759 const ArgumentsOptions &Options = ArgumentsOptions()) {
9760 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&(static_cast <bool> ((!Options.ForEndCall || Info.separateBeginEndCalls
()) && "expected region end call to runtime only when end call is separate"
) ? void (0) : __assert_fail ("(!Options.ForEndCall || Info.separateBeginEndCalls()) && \"expected region end call to runtime only when end call is separate\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9761, __extension__
__PRETTY_FUNCTION__))
9761 "expected region end call to runtime only when end call is separate")(static_cast <bool> ((!Options.ForEndCall || Info.separateBeginEndCalls
()) && "expected region end call to runtime only when end call is separate"
) ? void (0) : __assert_fail ("(!Options.ForEndCall || Info.separateBeginEndCalls()) && \"expected region end call to runtime only when end call is separate\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9761, __extension__
__PRETTY_FUNCTION__))
;
9762 CodeGenModule &CGM = CGF.CGM;
9763 if (Info.NumberOfPtrs) {
9764 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9765 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9766 Info.BasePointersArray,
9767 /*Idx0=*/0, /*Idx1=*/0);
9768 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9769 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9770 Info.PointersArray,
9771 /*Idx0=*/0,
9772 /*Idx1=*/0);
9773 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9774 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9775 /*Idx0=*/0, /*Idx1=*/0);
9776 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9777 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9778 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9779 : Info.MapTypesArray,
9780 /*Idx0=*/0,
9781 /*Idx1=*/0);
9782
9783 // Only emit the mapper information arrays if debug information is
9784 // requested.
9785 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9786 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9787 else
9788 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9789 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9790 Info.MapNamesArray,
9791 /*Idx0=*/0,
9792 /*Idx1=*/0);
9793 // If there is no user-defined mapper, set the mapper array to nullptr to
9794 // avoid an unnecessary data privatization
9795 if (!Info.HasMapper)
9796 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9797 else
9798 MappersArrayArg =
9799 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9800 } else {
9801 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9802 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9803 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9804 MapTypesArrayArg =
9805 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9806 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9807 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9808 }
9809}
9810
9811/// Check for inner distribute directive.
9812static const OMPExecutableDirective *
9813getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9814 const auto *CS = D.getInnermostCapturedStmt();
9815 const auto *Body =
9816 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9817 const Stmt *ChildStmt =
9818 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9819
9820 if (const auto *NestedDir =
9821 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9822 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9823 switch (D.getDirectiveKind()) {
9824 case OMPD_target:
9825 if (isOpenMPDistributeDirective(DKind))
9826 return NestedDir;
9827 if (DKind == OMPD_teams) {
9828 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9829 /*IgnoreCaptured=*/true);
9830 if (!Body)
9831 return nullptr;
9832 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9833 if (const auto *NND =
9834 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9835 DKind = NND->getDirectiveKind();
9836 if (isOpenMPDistributeDirective(DKind))
9837 return NND;
9838 }
9839 }
9840 return nullptr;
9841 case OMPD_target_teams:
9842 if (isOpenMPDistributeDirective(DKind))
9843 return NestedDir;
9844 return nullptr;
9845 case OMPD_target_parallel:
9846 case OMPD_target_simd:
9847 case OMPD_target_parallel_for:
9848 case OMPD_target_parallel_for_simd:
9849 return nullptr;
9850 case OMPD_target_teams_distribute:
9851 case OMPD_target_teams_distribute_simd:
9852 case OMPD_target_teams_distribute_parallel_for:
9853 case OMPD_target_teams_distribute_parallel_for_simd:
9854 case OMPD_parallel:
9855 case OMPD_for:
9856 case OMPD_parallel_for:
9857 case OMPD_parallel_master:
9858 case OMPD_parallel_sections:
9859 case OMPD_for_simd:
9860 case OMPD_parallel_for_simd:
9861 case OMPD_cancel:
9862 case OMPD_cancellation_point:
9863 case OMPD_ordered:
9864 case OMPD_threadprivate:
9865 case OMPD_allocate:
9866 case OMPD_task:
9867 case OMPD_simd:
9868 case OMPD_tile:
9869 case OMPD_unroll:
9870 case OMPD_sections:
9871 case OMPD_section:
9872 case OMPD_single:
9873 case OMPD_master:
9874 case OMPD_critical:
9875 case OMPD_taskyield:
9876 case OMPD_barrier:
9877 case OMPD_taskwait:
9878 case OMPD_taskgroup:
9879 case OMPD_atomic:
9880 case OMPD_flush:
9881 case OMPD_depobj:
9882 case OMPD_scan:
9883 case OMPD_teams:
9884 case OMPD_target_data:
9885 case OMPD_target_exit_data:
9886 case OMPD_target_enter_data:
9887 case OMPD_distribute:
9888 case OMPD_distribute_simd:
9889 case OMPD_distribute_parallel_for:
9890 case OMPD_distribute_parallel_for_simd:
9891 case OMPD_teams_distribute:
9892 case OMPD_teams_distribute_simd:
9893 case OMPD_teams_distribute_parallel_for:
9894 case OMPD_teams_distribute_parallel_for_simd:
9895 case OMPD_target_update:
9896 case OMPD_declare_simd:
9897 case OMPD_declare_variant:
9898 case OMPD_begin_declare_variant:
9899 case OMPD_end_declare_variant:
9900 case OMPD_declare_target:
9901 case OMPD_end_declare_target:
9902 case OMPD_declare_reduction:
9903 case OMPD_declare_mapper:
9904 case OMPD_taskloop:
9905 case OMPD_taskloop_simd:
9906 case OMPD_master_taskloop:
9907 case OMPD_master_taskloop_simd:
9908 case OMPD_parallel_master_taskloop:
9909 case OMPD_parallel_master_taskloop_simd:
9910 case OMPD_requires:
9911 case OMPD_metadirective:
9912 case OMPD_unknown:
9913 default:
9914 llvm_unreachable("Unexpected directive.")::llvm::llvm_unreachable_internal("Unexpected directive.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 9914)
;
9915 }
9916 }
9917
9918 return nullptr;
9919}
9920
9921/// Emit the user-defined mapper function. The code generation follows the
9922/// pattern in the example below.
9923/// \code
9924/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9925/// void *base, void *begin,
9926/// int64_t size, int64_t type,
9927/// void *name = nullptr) {
9928/// // Allocate space for an array section first or add a base/begin for
9929/// // pointer dereference.
9930/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9931/// !maptype.IsDelete)
9932/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9933/// size*sizeof(Ty), clearToFromMember(type));
9934/// // Map members.
9935/// for (unsigned i = 0; i < size; i++) {
9936/// // For each component specified by this mapper:
9937/// for (auto c : begin[i]->all_components) {
9938/// if (c.hasMapper())
9939/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9940/// c.arg_type, c.arg_name);
9941/// else
9942/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9943/// c.arg_begin, c.arg_size, c.arg_type,
9944/// c.arg_name);
9945/// }
9946/// }
9947/// // Delete the array section.
9948/// if (size > 1 && maptype.IsDelete)
9949/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9950/// size*sizeof(Ty), clearToFromMember(type));
9951/// }
9952/// \endcode
9953void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9954 CodeGenFunction *CGF) {
9955 if (UDMMap.count(D) > 0)
9956 return;
9957 ASTContext &C = CGM.getContext();
9958 QualType Ty = D->getType();
9959 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9960 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9961 auto *MapperVarDecl =
9962 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9963 SourceLocation Loc = D->getLocation();
9964 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9965 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9966
9967 // Prepare mapper function arguments and attributes.
9968 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9969 C.VoidPtrTy, ImplicitParamDecl::Other);
9970 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9971 ImplicitParamDecl::Other);
9972 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9973 C.VoidPtrTy, ImplicitParamDecl::Other);
9974 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9975 ImplicitParamDecl::Other);
9976 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9977 ImplicitParamDecl::Other);
9978 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9979 ImplicitParamDecl::Other);
9980 FunctionArgList Args;
9981 Args.push_back(&HandleArg);
9982 Args.push_back(&BaseArg);
9983 Args.push_back(&BeginArg);
9984 Args.push_back(&SizeArg);
9985 Args.push_back(&TypeArg);
9986 Args.push_back(&NameArg);
9987 const CGFunctionInfo &FnInfo =
9988 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9989 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9990 SmallString<64> TyStr;
9991 llvm::raw_svector_ostream Out(TyStr);
9992 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9993 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9994 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9995 Name, &CGM.getModule());
9996 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9997 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9998 // Start the mapper function code generation.
9999 CodeGenFunction MapperCGF(CGM);
10000 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
10001 // Compute the starting and end addresses of array elements.
10002 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10003 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10004 C.getPointerType(Int64Ty), Loc);
10005 // Prepare common arguments for array initiation and deletion.
10006 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10007 MapperCGF.GetAddrOfLocalVar(&HandleArg),
10008 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10009 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10010 MapperCGF.GetAddrOfLocalVar(&BaseArg),
10011 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10012 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10013 MapperCGF.GetAddrOfLocalVar(&BeginArg),
10014 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10015 // Convert the size in bytes into the number of array elements.
10016 Size = MapperCGF.Builder.CreateExactUDiv(
10017 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10018 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10019 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10020 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
10021 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10022 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10023 C.getPointerType(Int64Ty), Loc);
10024 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10025 MapperCGF.GetAddrOfLocalVar(&NameArg),
10026 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10027
10028 // Emit array initiation if this is an array section and \p MapType indicates
10029 // that memory allocation is required.
10030 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10031 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10032 MapName, ElementSize, HeadBB, /*IsInit=*/true);
10033
10034 // Emit a for loop to iterate through SizeArg of elements and map all of them.
10035
10036 // Emit the loop header block.
10037 MapperCGF.EmitBlock(HeadBB);
10038 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10039 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10040 // Evaluate whether the initial condition is satisfied.
10041 llvm::Value *IsEmpty =
10042 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10043 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10044 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10045
10046 // Emit the loop body block.
10047 MapperCGF.EmitBlock(BodyBB);
10048 llvm::BasicBlock *LastBB = BodyBB;
10049 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10050 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10051 PtrPHI->addIncoming(PtrBegin, EntryBB);
10052 Address PtrCurrent(PtrPHI, ElemTy,
10053 MapperCGF.GetAddrOfLocalVar(&BeginArg)
10054 .getAlignment()
10055 .alignmentOfArrayElement(ElementSize));
10056 // Privatize the declared variable of mapper to be the current array element.
10057 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10058 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10059 (void)Scope.Privatize();
10060
10061 // Get map clause information. Fill up the arrays with all mapped variables.
10062 MappableExprsHandler::MapCombinedInfoTy Info;
10063 MappableExprsHandler MEHandler(*D, MapperCGF);
10064 MEHandler.generateAllInfoForMapper(Info);
10065
10066 // Call the runtime API __tgt_mapper_num_components to get the number of
10067 // pre-existing components.
10068 llvm::Value *OffloadingArgs[] = {Handle};
10069 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10070 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10071 OMPRTL___tgt_mapper_num_components),
10072 OffloadingArgs);
10073 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10074 PreviousSize,
10075 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10076
10077 // Fill up the runtime mapper handle for all components.
10078 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10079 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10080 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10081 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10082 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10083 llvm::Value *CurSizeArg = Info.Sizes[I];
10084 llvm::Value *CurNameArg =
10085 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10086 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10087 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10088
10089 // Extract the MEMBER_OF field from the map type.
10090 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10091 llvm::Value *MemberMapType =
10092 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10093
10094 // Combine the map type inherited from user-defined mapper with that
10095 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10096 // bits of the \a MapType, which is the input argument of the mapper
10097 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10098 // bits of MemberMapType.
10099 // [OpenMP 5.0], 1.2.6. map-type decay.
10100 // | alloc | to | from | tofrom | release | delete
10101 // ----------------------------------------------------------
10102 // alloc | alloc | alloc | alloc | alloc | release | delete
10103 // to | alloc | to | alloc | to | release | delete
10104 // from | alloc | alloc | from | from | release | delete
10105 // tofrom | alloc | to | from | tofrom | release | delete
10106 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10107 MapType,
10108 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10109 MappableExprsHandler::OMP_MAP_FROM));
10110 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10111 llvm::BasicBlock *AllocElseBB =
10112 MapperCGF.createBasicBlock("omp.type.alloc.else");
10113 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10114 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10115 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10116 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10117 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10118 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10119 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10120 MapperCGF.EmitBlock(AllocBB);
10121 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10122 MemberMapType,
10123 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10124 MappableExprsHandler::OMP_MAP_FROM)));
10125 MapperCGF.Builder.CreateBr(EndBB);
10126 MapperCGF.EmitBlock(AllocElseBB);
10127 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10128 LeftToFrom,
10129 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10130 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10131 // In case of to, clear OMP_MAP_FROM.
10132 MapperCGF.EmitBlock(ToBB);
10133 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10134 MemberMapType,
10135 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10136 MapperCGF.Builder.CreateBr(EndBB);
10137 MapperCGF.EmitBlock(ToElseBB);
10138 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10139 LeftToFrom,
10140 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10141 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10142 // In case of from, clear OMP_MAP_TO.
10143 MapperCGF.EmitBlock(FromBB);
10144 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10145 MemberMapType,
10146 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10147 // In case of tofrom, do nothing.
10148 MapperCGF.EmitBlock(EndBB);
10149 LastBB = EndBB;
10150 llvm::PHINode *CurMapType =
10151 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10152 CurMapType->addIncoming(AllocMapType, AllocBB);
10153 CurMapType->addIncoming(ToMapType, ToBB);
10154 CurMapType->addIncoming(FromMapType, FromBB);
10155 CurMapType->addIncoming(MemberMapType, ToElseBB);
10156
10157 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
10158 CurSizeArg, CurMapType, CurNameArg};
10159 if (Info.Mappers[I]) {
10160 // Call the corresponding mapper function.
10161 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10162 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10163 assert(MapperFunc && "Expect a valid mapper function is available.")(static_cast <bool> (MapperFunc && "Expect a valid mapper function is available."
) ? void (0) : __assert_fail ("MapperFunc && \"Expect a valid mapper function is available.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10163, __extension__
__PRETTY_FUNCTION__))
;
10164 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10165 } else {
10166 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10167 // data structure.
10168 MapperCGF.EmitRuntimeCall(
10169 OMPBuilder.getOrCreateRuntimeFunction(
10170 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10171 OffloadingArgs);
10172 }
10173 }
10174
10175 // Update the pointer to point to the next element that needs to be mapped,
10176 // and check whether we have mapped all elements.
10177 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10178 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10179 PtrPHI->addIncoming(PtrNext, LastBB);
10180 llvm::Value *IsDone =
10181 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10182 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10183 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10184
10185 MapperCGF.EmitBlock(ExitBB);
10186 // Emit array deletion if this is an array section and \p MapType indicates
10187 // that deletion is required.
10188 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10189 MapName, ElementSize, DoneBB, /*IsInit=*/false);
10190
10191 // Emit the function exit block.
10192 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10193 MapperCGF.FinishFunction();
10194 UDMMap.try_emplace(D, Fn);
10195 if (CGF) {
10196 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10197 Decls.second.push_back(D);
10198 }
10199}
10200
10201/// Emit the array initialization or deletion portion for user-defined mapper
10202/// code generation. First, it evaluates whether an array section is mapped and
10203/// whether the \a MapType instructs to delete this section. If \a IsInit is
10204/// true, and \a MapType indicates to not delete this array, array
10205/// initialization code is generated. If \a IsInit is false, and \a MapType
10206/// indicates to not this array, array deletion code is generated.
10207void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10208 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10209 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10210 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10211 bool IsInit) {
10212 StringRef Prefix = IsInit ? ".init" : ".del";
10213
10214 // Evaluate if this is an array section.
10215 llvm::BasicBlock *BodyBB =
10216 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10217 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10218 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10219 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10220 MapType,
10221 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10222 llvm::Value *DeleteCond;
10223 llvm::Value *Cond;
10224 if (IsInit) {
10225 // base != begin?
10226 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10227 // IsPtrAndObj?
10228 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10229 MapType,
10230 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10231 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10232 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10233 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10234 DeleteCond = MapperCGF.Builder.CreateIsNull(
10235 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10236 } else {
10237 Cond = IsArray;
10238 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10239 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10240 }
10241 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10242 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10243
10244 MapperCGF.EmitBlock(BodyBB);
10245 // Get the array size by multiplying element size and element number (i.e., \p
10246 // Size).
10247 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10248 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10249 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10250 // memory allocation/deletion purpose only.
10251 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10252 MapType,
10253 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10254 MappableExprsHandler::OMP_MAP_FROM)));
10255 MapTypeArg = MapperCGF.Builder.CreateOr(
10256 MapTypeArg,
10257 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10258
10259 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10260 // data structure.
10261 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
10262 ArraySize, MapTypeArg, MapName};
10263 MapperCGF.EmitRuntimeCall(
10264 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10265 OMPRTL___tgt_push_mapper_component),
10266 OffloadingArgs);
10267}
10268
10269llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10270 const OMPDeclareMapperDecl *D) {
10271 auto I = UDMMap.find(D);
10272 if (I != UDMMap.end())
10273 return I->second;
10274 emitUserDefinedMapper(D);
10275 return UDMMap.lookup(D);
10276}
10277
10278void CGOpenMPRuntime::emitTargetNumIterationsCall(
10279 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10280 llvm::Value *DeviceID,
10281 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10282 const OMPLoopDirective &D)>
10283 SizeEmitter) {
10284 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10285 const OMPExecutableDirective *TD = &D;
10286 // Get nested teams distribute kind directive, if any.
10287 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10288 TD = getNestedDistributeDirective(CGM.getContext(), D);
10289 if (!TD)
10290 return;
10291 const auto *LD = cast<OMPLoopDirective>(TD);
10292 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10293 PrePostActionTy &) {
10294 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10297 CGF.EmitRuntimeCall(
10298 OMPBuilder.getOrCreateRuntimeFunction(
10299 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10300 Args);
10301 }
10302 };
10303 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10304}
10305
10306void CGOpenMPRuntime::emitTargetCall(
10307 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10308 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10309 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10310 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10311 const OMPLoopDirective &D)>
10312 SizeEmitter) {
10313 if (!CGF.HaveInsertPoint())
10314 return;
10315
10316 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10317 CGM.getLangOpts().OpenMPOffloadMandatory;
10318
10319 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!")(static_cast <bool> ((OffloadingMandatory || OutlinedFn
) && "Invalid outlined function!") ? void (0) : __assert_fail
("(OffloadingMandatory || OutlinedFn) && \"Invalid outlined function!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10319, __extension__
__PRETTY_FUNCTION__))
;
10320
10321 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10322 D.hasClausesOfKind<OMPNowaitClause>();
10323 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10324 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10325 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10326 PrePostActionTy &) {
10327 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10328 };
10329 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10330
10331 CodeGenFunction::OMPTargetDataInfo InputInfo;
10332 llvm::Value *MapTypesArray = nullptr;
10333 llvm::Value *MapNamesArray = nullptr;
10334 // Generate code for the host fallback function.
10335 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10336 &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10337 if (OffloadingMandatory) {
10338 CGF.Builder.CreateUnreachable();
10339 } else {
10340 if (RequiresOuterTask) {
10341 CapturedVars.clear();
10342 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10343 }
10344 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10345 }
10346 };
10347 // Fill up the pointer arrays and transfer execution to the device.
10348 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10349 &MapNamesArray, SizeEmitter,
10350 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10351 if (Device.getInt() == OMPC_DEVICE_ancestor) {
10352 // Reverse offloading is not supported, so just execute on the host.
10353 FallbackGen(CGF);
10354 return;
10355 }
10356
10357 // On top of the arrays that were filled up, the target offloading call
10358 // takes as arguments the device id as well as the host pointer. The host
10359 // pointer is used by the runtime library to identify the current target
10360 // region, so it only has to be unique and not necessarily point to
10361 // anything. It could be the pointer to the outlined function that
10362 // implements the target region, but we aren't using that so that the
10363 // compiler doesn't need to keep that, and could therefore inline the host
10364 // function if proven worthwhile during optimization.
10365
10366 // From this point on, we need to have an ID of the target region defined.
10367 assert(OutlinedFnID && "Invalid outlined function ID!")(static_cast <bool> (OutlinedFnID && "Invalid outlined function ID!"
) ? void (0) : __assert_fail ("OutlinedFnID && \"Invalid outlined function ID!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10367, __extension__
__PRETTY_FUNCTION__))
;
10368 (void)OutlinedFnID;
10369
10370 // Emit device ID if any.
10371 llvm::Value *DeviceID;
10372 if (Device.getPointer()) {
10373 assert((Device.getInt() == OMPC_DEVICE_unknown ||(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10375, __extension__
__PRETTY_FUNCTION__))
10374 Device.getInt() == OMPC_DEVICE_device_num) &&(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10375, __extension__
__PRETTY_FUNCTION__))
10375 "Expected device_num modifier.")(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10375, __extension__
__PRETTY_FUNCTION__))
;
10376 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10377 DeviceID =
10378 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10379 } else {
10380 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10381 }
10382
10383 // Emit the number of elements in the offloading arrays.
10384 llvm::Value *PointerNum =
10385 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10386
10387 // Return value of the runtime offloading call.
10388 llvm::Value *Return;
10389
10390 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10391 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10392
10393 // Source location for the ident struct
10394 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10395
10396 // Emit tripcount for the target loop-based directive.
10397 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10398
10399 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10400 // The target region is an outlined function launched by the runtime
10401 // via calls __tgt_target() or __tgt_target_teams().
10402 //
10403 // __tgt_target() launches a target region with one team and one thread,
10404 // executing a serial region. This master thread may in turn launch
10405 // more threads within its team upon encountering a parallel region,
10406 // however, no additional teams can be launched on the device.
10407 //
10408 // __tgt_target_teams() launches a target region with one or more teams,
10409 // each with one or more threads. This call is required for target
10410 // constructs such as:
10411 // 'target teams'
10412 // 'target' / 'teams'
10413 // 'target teams distribute parallel for'
10414 // 'target parallel'
10415 // and so on.
10416 //
10417 // Note that on the host and CPU targets, the runtime implementation of
10418 // these calls simply call the outlined function without forking threads.
10419 // The outlined functions themselves have runtime calls to
10420 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10421 // the compiler in emitTeamsCall() and emitParallelCall().
10422 //
10423 // In contrast, on the NVPTX target, the implementation of
10424 // __tgt_target_teams() launches a GPU kernel with the requested number
10425 // of teams and threads so no additional calls to the runtime are required.
10426 if (NumTeams) {
10427 // If we have NumTeams defined this means that we have an enclosed teams
10428 // region. Therefore we also expect to have NumThreads defined. These two
10429 // values should be defined in the presence of a teams directive,
10430 // regardless of having any clauses associated. If the user is using teams
10431 // but no clauses, these two values will be the default that should be
10432 // passed to the runtime library - a 32-bit integer with the value zero.
10433 assert(NumThreads && "Thread limit expression should be available along "(static_cast <bool> (NumThreads && "Thread limit expression should be available along "
"with number of teams.") ? void (0) : __assert_fail ("NumThreads && \"Thread limit expression should be available along \" \"with number of teams.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10434, __extension__
__PRETTY_FUNCTION__))
10434 "with number of teams.")(static_cast <bool> (NumThreads && "Thread limit expression should be available along "
"with number of teams.") ? void (0) : __assert_fail ("NumThreads && \"Thread limit expression should be available along \" \"with number of teams.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10434, __extension__
__PRETTY_FUNCTION__))
;
10435 SmallVector<llvm::Value *> OffloadingArgs = {
10436 RTLoc,
10437 DeviceID,
10438 OutlinedFnID,
10439 PointerNum,
10440 InputInfo.BasePointersArray.getPointer(),
10441 InputInfo.PointersArray.getPointer(),
10442 InputInfo.SizesArray.getPointer(),
10443 MapTypesArray,
10444 MapNamesArray,
10445 InputInfo.MappersArray.getPointer(),
10446 NumTeams,
10447 NumThreads};
10448 if (HasNowait) {
10449 // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10450 // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10451 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10452 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10453 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10454 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10455 }
10456 Return = CGF.EmitRuntimeCall(
10457 OMPBuilder.getOrCreateRuntimeFunction(
10458 CGM.getModule(), HasNowait
10459 ? OMPRTL___tgt_target_teams_nowait_mapper
10460 : OMPRTL___tgt_target_teams_mapper),
10461 OffloadingArgs);
10462 } else {
10463 SmallVector<llvm::Value *> OffloadingArgs = {
10464 RTLoc,
10465 DeviceID,
10466 OutlinedFnID,
10467 PointerNum,
10468 InputInfo.BasePointersArray.getPointer(),
10469 InputInfo.PointersArray.getPointer(),
10470 InputInfo.SizesArray.getPointer(),
10471 MapTypesArray,
10472 MapNamesArray,
10473 InputInfo.MappersArray.getPointer()};
10474 if (HasNowait) {
10475 // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10476 // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10477 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10478 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10479 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10480 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10481 }
10482 Return = CGF.EmitRuntimeCall(
10483 OMPBuilder.getOrCreateRuntimeFunction(
10484 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10485 : OMPRTL___tgt_target_mapper),
10486 OffloadingArgs);
10487 }
10488
10489 // Check the error code and execute the host version if required.
10490 llvm::BasicBlock *OffloadFailedBlock =
10491 CGF.createBasicBlock("omp_offload.failed");
10492 llvm::BasicBlock *OffloadContBlock =
10493 CGF.createBasicBlock("omp_offload.cont");
10494 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10495 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10496
10497 CGF.EmitBlock(OffloadFailedBlock);
10498 FallbackGen(CGF);
10499
10500 CGF.EmitBranch(OffloadContBlock);
10501
10502 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10503 };
10504
10505 // Notify that the host version must be executed.
10506 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10507 FallbackGen(CGF);
10508 };
10509
10510 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10511 &MapNamesArray, &CapturedVars, RequiresOuterTask,
10512 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10513 // Fill up the arrays with all the captured variables.
10514 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10515
10516 // Get mappable expression information.
10517 MappableExprsHandler MEHandler(D, CGF);
10518 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10519 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10520
10521 auto RI = CS.getCapturedRecordDecl()->field_begin();
10522 auto *CV = CapturedVars.begin();
10523 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10524 CE = CS.capture_end();
10525 CI != CE; ++CI, ++RI, ++CV) {
10526 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10527 MappableExprsHandler::StructRangeInfoTy PartialStruct;
10528
10529 // VLA sizes are passed to the outlined region by copy and do not have map
10530 // information associated.
10531 if (CI->capturesVariableArrayType()) {
10532 CurInfo.Exprs.push_back(nullptr);
10533 CurInfo.BasePointers.push_back(*CV);
10534 CurInfo.Pointers.push_back(*CV);
10535 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10536 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10537 // Copy to the device as an argument. No need to retrieve it.
10538 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10539 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10540 MappableExprsHandler::OMP_MAP_IMPLICIT);
10541 CurInfo.Mappers.push_back(nullptr);
10542 } else {
10543 // If we have any information in the map clause, we use it, otherwise we
10544 // just do a default mapping.
10545 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10546 if (!CI->capturesThis())
10547 MappedVarSet.insert(CI->getCapturedVar());
10548 else
10549 MappedVarSet.insert(nullptr);
10550 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10551 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10552 // Generate correct mapping for variables captured by reference in
10553 // lambdas.
10554 if (CI->capturesVariable())
10555 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10556 CurInfo, LambdaPointers);
10557 }
10558 // We expect to have at least an element of information for this capture.
10559 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10560, __extension__
__PRETTY_FUNCTION__))
10560 "Non-existing map pointer for capture!")(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10560, __extension__
__PRETTY_FUNCTION__))
;
10561 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10565, __extension__
__PRETTY_FUNCTION__))
10562 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10565, __extension__
__PRETTY_FUNCTION__))
10563 CurInfo.BasePointers.size() == CurInfo.Types.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10565, __extension__
__PRETTY_FUNCTION__))
10564 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10565, __extension__
__PRETTY_FUNCTION__))
10565 "Inconsistent map information sizes!")(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10565, __extension__
__PRETTY_FUNCTION__))
;
10566
10567 // If there is an entry in PartialStruct it means we have a struct with
10568 // individual members mapped. Emit an extra combined entry.
10569 if (PartialStruct.Base.isValid()) {
10570 CombinedInfo.append(PartialStruct.PreliminaryMapData);
10571 MEHandler.emitCombinedEntry(
10572 CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10573 !PartialStruct.PreliminaryMapData.BasePointers.empty());
10574 }
10575
10576 // We need to append the results of this capture to what we already have.
10577 CombinedInfo.append(CurInfo);
10578 }
10579 // Adjust MEMBER_OF flags for the lambdas captures.
10580 MEHandler.adjustMemberOfForLambdaCaptures(
10581 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10582 CombinedInfo.Types);
10583 // Map any list items in a map clause that were not captures because they
10584 // weren't referenced within the construct.
10585 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10586
10587 TargetDataInfo Info;
10588 // Fill up the arrays and create the arguments.
10589 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10590 emitOffloadingArraysArgument(
10591 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10592 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10593 {/*ForEndCall=*/false});
10594
10595 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10596 InputInfo.BasePointersArray =
10597 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10598 InputInfo.PointersArray =
10599 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10600 InputInfo.SizesArray =
10601 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10602 InputInfo.MappersArray =
10603 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10604 MapTypesArray = Info.MapTypesArray;
10605 MapNamesArray = Info.MapNamesArray;
10606 if (RequiresOuterTask)
10607 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10608 else
10609 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10610 };
10611
10612 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10613 CodeGenFunction &CGF, PrePostActionTy &) {
10614 if (RequiresOuterTask) {
10615 CodeGenFunction::OMPTargetDataInfo InputInfo;
10616 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10617 } else {
10618 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10619 }
10620 };
10621
10622 // If we have a target function ID it means that we need to support
10623 // offloading, otherwise, just execute on the host. We need to execute on host
10624 // regardless of the conditional in the if clause if, e.g., the user do not
10625 // specify target triples.
10626 if (OutlinedFnID) {
10627 if (IfCond) {
10628 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10629 } else {
10630 RegionCodeGenTy ThenRCG(TargetThenGen);
10631 ThenRCG(CGF);
10632 }
10633 } else {
10634 RegionCodeGenTy ElseRCG(TargetElseGen);
10635 ElseRCG(CGF);
10636 }
10637}
10638
10639void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10640 StringRef ParentName) {
10641 if (!S)
10642 return;
10643
10644 // Codegen OMP target directives that offload compute to the device.
10645 bool RequiresDeviceCodegen =
10646 isa<OMPExecutableDirective>(S) &&
10647 isOpenMPTargetExecutionDirective(
10648 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10649
10650 if (RequiresDeviceCodegen) {
10651 const auto &E = *cast<OMPExecutableDirective>(S);
10652 unsigned DeviceID;
10653 unsigned FileID;
10654 unsigned Line;
10655 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10656 FileID, Line);
10657
10658 // Is this a target region that should not be emitted as an entry point? If
10659 // so just signal we are done with this target region.
10660 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10661 ParentName, Line))
10662 return;
10663
10664 switch (E.getDirectiveKind()) {
10665 case OMPD_target:
10666 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10667 cast<OMPTargetDirective>(E));
10668 break;
10669 case OMPD_target_parallel:
10670 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10671 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10672 break;
10673 case OMPD_target_teams:
10674 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10675 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10676 break;
10677 case OMPD_target_teams_distribute:
10678 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10679 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10680 break;
10681 case OMPD_target_teams_distribute_simd:
10682 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10683 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10684 break;
10685 case OMPD_target_parallel_for:
10686 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10687 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10688 break;
10689 case OMPD_target_parallel_for_simd:
10690 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10691 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10692 break;
10693 case OMPD_target_simd:
10694 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10695 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10696 break;
10697 case OMPD_target_teams_distribute_parallel_for:
10698 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10699 CGM, ParentName,
10700 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10701 break;
10702 case OMPD_target_teams_distribute_parallel_for_simd:
10703 CodeGenFunction::
10704 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10705 CGM, ParentName,
10706 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10707 break;
10708 case OMPD_parallel:
10709 case OMPD_for:
10710 case OMPD_parallel_for:
10711 case OMPD_parallel_master:
10712 case OMPD_parallel_sections:
10713 case OMPD_for_simd:
10714 case OMPD_parallel_for_simd:
10715 case OMPD_cancel:
10716 case OMPD_cancellation_point:
10717 case OMPD_ordered:
10718 case OMPD_threadprivate:
10719 case OMPD_allocate:
10720 case OMPD_task:
10721 case OMPD_simd:
10722 case OMPD_tile:
10723 case OMPD_unroll:
10724 case OMPD_sections:
10725 case OMPD_section:
10726 case OMPD_single:
10727 case OMPD_master:
10728 case OMPD_critical:
10729 case OMPD_taskyield:
10730 case OMPD_barrier:
10731 case OMPD_taskwait:
10732 case OMPD_taskgroup:
10733 case OMPD_atomic:
10734 case OMPD_flush:
10735 case OMPD_depobj:
10736 case OMPD_scan:
10737 case OMPD_teams:
10738 case OMPD_target_data:
10739 case OMPD_target_exit_data:
10740 case OMPD_target_enter_data:
10741 case OMPD_distribute:
10742 case OMPD_distribute_simd:
10743 case OMPD_distribute_parallel_for:
10744 case OMPD_distribute_parallel_for_simd:
10745 case OMPD_teams_distribute:
10746 case OMPD_teams_distribute_simd:
10747 case OMPD_teams_distribute_parallel_for:
10748 case OMPD_teams_distribute_parallel_for_simd:
10749 case OMPD_target_update:
10750 case OMPD_declare_simd:
10751 case OMPD_declare_variant:
10752 case OMPD_begin_declare_variant:
10753 case OMPD_end_declare_variant:
10754 case OMPD_declare_target:
10755 case OMPD_end_declare_target:
10756 case OMPD_declare_reduction:
10757 case OMPD_declare_mapper:
10758 case OMPD_taskloop:
10759 case OMPD_taskloop_simd:
10760 case OMPD_master_taskloop:
10761 case OMPD_master_taskloop_simd:
10762 case OMPD_parallel_master_taskloop:
10763 case OMPD_parallel_master_taskloop_simd:
10764 case OMPD_requires:
10765 case OMPD_metadirective:
10766 case OMPD_unknown:
10767 default:
10768 llvm_unreachable("Unknown target directive for OpenMP device codegen.")::llvm::llvm_unreachable_internal("Unknown target directive for OpenMP device codegen."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10768)
;
10769 }
10770 return;
10771 }
10772
10773 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10774 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10775 return;
10776
10777 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10778 return;
10779 }
10780
10781 // If this is a lambda function, look into its body.
10782 if (const auto *L = dyn_cast<LambdaExpr>(S))
10783 S = L->getBody();
10784
10785 // Keep looking for target regions recursively.
10786 for (const Stmt *II : S->children())
10787 scanForTargetRegionsFunctions(II, ParentName);
10788}
10789
10790static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10791 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10792 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10793 if (!DevTy)
10794 return false;
10795 // Do not emit device_type(nohost) functions for the host.
10796 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10797 return true;
10798 // Do not emit device_type(host) functions for the device.
10799 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10800 return true;
10801 return false;
10802}
10803
10804bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10805 // If emitting code for the host, we do not process FD here. Instead we do
10806 // the normal code generation.
10807 if (!CGM.getLangOpts().OpenMPIsDevice) {
10808 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10809 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10810 CGM.getLangOpts().OpenMPIsDevice))
10811 return true;
10812 return false;
10813 }
10814
10815 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10816 // Try to detect target regions in the function.
10817 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10818 StringRef Name = CGM.getMangledName(GD);
10819 scanForTargetRegionsFunctions(FD->getBody(), Name);
10820 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10821 CGM.getLangOpts().OpenMPIsDevice))
10822 return true;
10823 }
10824
10825 // Do not to emit function if it is not marked as declare target.
10826 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10827 AlreadyEmittedTargetDecls.count(VD) == 0;
10828}
10829
10830bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10831 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10832 CGM.getLangOpts().OpenMPIsDevice))
10833 return true;
10834
10835 if (!CGM.getLangOpts().OpenMPIsDevice)
10836 return false;
10837
10838 // Check if there are Ctors/Dtors in this declaration and look for target
10839 // regions in it. We use the complete variant to produce the kernel name
10840 // mangling.
10841 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10842 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10843 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10844 StringRef ParentName =
10845 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10846 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10847 }
10848 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10849 StringRef ParentName =
10850 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10851 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10852 }
10853 }
10854
10855 // Do not to emit variable if it is not marked as declare target.
10856 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10857 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10858 cast<VarDecl>(GD.getDecl()));
10859 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10860 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10861 HasRequiresUnifiedSharedMemory)) {
10862 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10863 return true;
10864 }
10865 return false;
10866}
10867
10868void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10869 llvm::Constant *Addr) {
10870 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10871 !CGM.getLangOpts().OpenMPIsDevice)
10872 return;
10873
10874 // If we have host/nohost variables, they do not need to be registered.
10875 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10876 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10877 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10878 return;
10879
10880 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10881 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10882 if (!Res) {
10883 if (CGM.getLangOpts().OpenMPIsDevice) {
10884 // Register non-target variables being emitted in device code (debug info
10885 // may cause this).
10886 StringRef VarName = CGM.getMangledName(VD);
10887 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10888 }
10889 return;
10890 }
10891 // Register declare target variables.
10892 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10893 StringRef VarName;
10894 CharUnits VarSize;
10895 llvm::GlobalValue::LinkageTypes Linkage;
10896
10897 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10898 !HasRequiresUnifiedSharedMemory) {
10899 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10900 VarName = CGM.getMangledName(VD);
10901 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10902 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10903 assert(!VarSize.isZero() && "Expected non-zero size of the variable")(static_cast <bool> (!VarSize.isZero() && "Expected non-zero size of the variable"
) ? void (0) : __assert_fail ("!VarSize.isZero() && \"Expected non-zero size of the variable\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10903, __extension__
__PRETTY_FUNCTION__))
;
10904 } else {
10905 VarSize = CharUnits::Zero();
10906 }
10907 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10908 // Temp solution to prevent optimizations of the internal variables.
10909 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10910 // Do not create a "ref-variable" if the original is not also available
10911 // on the host.
10912 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10913 return;
10914 std::string RefName = getName({VarName, "ref"});
10915 if (!CGM.GetGlobalValue(RefName)) {
10916 llvm::Constant *AddrRef =
10917 getOrCreateInternalVariable(Addr->getType(), RefName);
10918 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10919 GVAddrRef->setConstant(/*Val=*/true);
10920 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10921 GVAddrRef->setInitializer(Addr);
10922 CGM.addCompilerUsedGlobal(GVAddrRef);
10923 }
10924 }
10925 } else {
10926 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10929, __extension__
__PRETTY_FUNCTION__))
10927 (*Res == OMPDeclareTargetDeclAttr::MT_To &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10929, __extension__
__PRETTY_FUNCTION__))
10928 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10929, __extension__
__PRETTY_FUNCTION__))
10929 "Declare target attribute must link or to with unified memory.")(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10929, __extension__
__PRETTY_FUNCTION__))
;
10930 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10931 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10932 else
10933 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10934
10935 if (CGM.getLangOpts().OpenMPIsDevice) {
10936 VarName = Addr->getName();
10937 Addr = nullptr;
10938 } else {
10939 VarName = getAddrOfDeclareTargetVar(VD).getName();
10940 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10941 }
10942 VarSize = CGM.getPointerSize();
10943 Linkage = llvm::GlobalValue::WeakAnyLinkage;
10944 }
10945
10946 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10947 VarName, Addr, VarSize, Flags, Linkage);
10948}
10949
10950bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10951 if (isa<FunctionDecl>(GD.getDecl()) ||
10952 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10953 return emitTargetFunctions(GD);
10954
10955 return emitTargetGlobalVariable(GD);
10956}
10957
10958void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10959 for (const VarDecl *VD : DeferredGlobalVariables) {
10960 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10961 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10962 if (!Res)
10963 continue;
10964 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10965 !HasRequiresUnifiedSharedMemory) {
10966 CGM.EmitGlobal(VD);
10967 } else {
10968 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10971, __extension__
__PRETTY_FUNCTION__))
10969 (*Res == OMPDeclareTargetDeclAttr::MT_To &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10971, __extension__
__PRETTY_FUNCTION__))
10970 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10971, __extension__
__PRETTY_FUNCTION__))
10971 "Expected link clause or to clause with unified memory.")(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10971, __extension__
__PRETTY_FUNCTION__))
;
10972 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10973 }
10974 }
10975}
10976
10977void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10978 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10979 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10980, __extension__
__PRETTY_FUNCTION__))
10980 " Expected target-based directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10980, __extension__
__PRETTY_FUNCTION__))
;
10981}
10982
10983void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10984 for (const OMPClause *Clause : D->clauselists()) {
10985 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10986 HasRequiresUnifiedSharedMemory = true;
10987 } else if (const auto *AC =
10988 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10989 switch (AC->getAtomicDefaultMemOrderKind()) {
10990 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10991 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10992 break;
10993 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10994 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10995 break;
10996 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10997 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10998 break;
10999 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
11000 break;
11001 }
11002 }
11003 }
11004}
11005
11006llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11007 return RequiresAtomicOrdering;
11008}
11009
11010bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11011 LangAS &AS) {
11012 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11013 return false;
11014 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11015 switch(A->getAllocatorType()) {
11016 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11017 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11018 // Not supported, fallback to the default mem space.
11019 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11020 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11021 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11022 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11023 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11024 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11025 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11026 AS = LangAS::Default;
11027 return true;
11028 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11029 llvm_unreachable("Expected predefined allocator for the variables with the "::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11030
)
11030 "static storage.")::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11030
)
;
11031 }
11032 return false;
11033}
11034
11035bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11036 return HasRequiresUnifiedSharedMemory;
11037}
11038
11039CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11040 CodeGenModule &CGM)
11041 : CGM(CGM) {
11042 if (CGM.getLangOpts().OpenMPIsDevice) {
11043 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11044 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11045 }
11046}
11047
11048CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11049 if (CGM.getLangOpts().OpenMPIsDevice)
11050 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11051}
11052
11053bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11054 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11055 return true;
11056
11057 const auto *D = cast<FunctionDecl>(GD.getDecl());
11058 // Do not to emit function if it is marked as declare target as it was already
11059 // emitted.
11060 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11061 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11062 if (auto *F = dyn_cast_or_null<llvm::Function>(
11063 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11064 return !F->isDeclaration();
11065 return false;
11066 }
11067 return true;
11068 }
11069
11070 return !AlreadyEmittedTargetDecls.insert(D).second;
11071}
11072
11073llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11074 // If we don't have entries or if we are emitting code for the device, we
11075 // don't need to do anything.
11076 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11077 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11078 (OffloadEntriesInfoManager.empty() &&
11079 !HasEmittedDeclareTargetRegion &&
11080 !HasEmittedTargetRegion))
11081 return nullptr;
11082
11083 // Create and register the function that handles the requires directives.
11084 ASTContext &C = CGM.getContext();
11085
11086 llvm::Function *RequiresRegFn;
11087 {
11088 CodeGenFunction CGF(CGM);
11089 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11090 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11091 std::string ReqName = getName({"omp_offloading", "requires_reg"});
11092 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11093 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11094 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11095 // TODO: check for other requires clauses.
11096 // The requires directive takes effect only when a target region is
11097 // present in the compilation unit. Otherwise it is ignored and not
11098 // passed to the runtime. This avoids the runtime from throwing an error
11099 // for mismatching requires clauses across compilation units that don't
11100 // contain at least 1 target region.
11101 assert((HasEmittedTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11104, __extension__
__PRETTY_FUNCTION__))
11102 HasEmittedDeclareTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11104, __extension__
__PRETTY_FUNCTION__))
11103 !OffloadEntriesInfoManager.empty()) &&(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11104, __extension__
__PRETTY_FUNCTION__))
11104 "Target or declare target region expected.")(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11104, __extension__
__PRETTY_FUNCTION__))
;
11105 if (HasRequiresUnifiedSharedMemory)
11106 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11107 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11108 CGM.getModule(), OMPRTL___tgt_register_requires),
11109 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11110 CGF.FinishFunction();
11111 }
11112 return RequiresRegFn;
11113}
11114
11115void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11116 const OMPExecutableDirective &D,
11117 SourceLocation Loc,
11118 llvm::Function *OutlinedFn,
11119 ArrayRef<llvm::Value *> CapturedVars) {
11120 if (!CGF.HaveInsertPoint())
11121 return;
11122
11123 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11124 CodeGenFunction::RunCleanupsScope Scope(CGF);
11125
11126 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11127 llvm::Value *Args[] = {
11128 RTLoc,
11129 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11130 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11131 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11132 RealArgs.append(std::begin(Args), std::end(Args));
11133 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11134
11135 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11136 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11137 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11138}
11139
11140void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11141 const Expr *NumTeams,
11142 const Expr *ThreadLimit,
11143 SourceLocation Loc) {
11144 if (!CGF.HaveInsertPoint())
11145 return;
11146
11147 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11148
11149 llvm::Value *NumTeamsVal =
11150 NumTeams
11151 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11152 CGF.CGM.Int32Ty, /* isSigned = */ true)
11153 : CGF.Builder.getInt32(0);
11154
11155 llvm::Value *ThreadLimitVal =
11156 ThreadLimit
11157 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11158 CGF.CGM.Int32Ty, /* isSigned = */ true)
11159 : CGF.Builder.getInt32(0);
11160
11161 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11162 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11163 ThreadLimitVal};
11164 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11165 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11166 PushNumTeamsArgs);
11167}
11168
11169void CGOpenMPRuntime::emitTargetDataCalls(
11170 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11171 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11172 if (!CGF.HaveInsertPoint())
11173 return;
11174
11175 // Action used to replace the default codegen action and turn privatization
11176 // off.
11177 PrePostActionTy NoPrivAction;
11178
11179 // Generate the code for the opening of the data environment. Capture all the
11180 // arguments of the runtime call by reference because they are used in the
11181 // closing of the region.
11182 auto &&BeginThenGen = [this, &D, Device, &Info,
11183 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11184 // Fill up the arrays with all the mapped variables.
11185 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11186
11187 // Get map clause information.
11188 MappableExprsHandler MEHandler(D, CGF);
11189 MEHandler.generateAllInfo(CombinedInfo);
11190
11191 // Fill up the arrays and create the arguments.
11192 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11193 /*IsNonContiguous=*/true);
11194
11195 llvm::Value *BasePointersArrayArg = nullptr;
11196 llvm::Value *PointersArrayArg = nullptr;
11197 llvm::Value *SizesArrayArg = nullptr;
11198 llvm::Value *MapTypesArrayArg = nullptr;
11199 llvm::Value *MapNamesArrayArg = nullptr;
11200 llvm::Value *MappersArrayArg = nullptr;
11201 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11202 SizesArrayArg, MapTypesArrayArg,
11203 MapNamesArrayArg, MappersArrayArg, Info);
11204
11205 // Emit device ID if any.
11206 llvm::Value *DeviceID = nullptr;
11207 if (Device) {
11208 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11209 CGF.Int64Ty, /*isSigned=*/true);
11210 } else {
11211 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11212 }
11213
11214 // Emit the number of elements in the offloading arrays.
11215 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11216 //
11217 // Source location for the ident struct
11218 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11219
11220 llvm::Value *OffloadingArgs[] = {RTLoc,
11221 DeviceID,
11222 PointerNum,
11223 BasePointersArrayArg,
11224 PointersArrayArg,
11225 SizesArrayArg,
11226 MapTypesArrayArg,
11227 MapNamesArrayArg,
11228 MappersArrayArg};
11229 CGF.EmitRuntimeCall(
11230 OMPBuilder.getOrCreateRuntimeFunction(
11231 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11232 OffloadingArgs);
11233
11234 // If device pointer privatization is required, emit the body of the region
11235 // here. It will have to be duplicated: with and without privatization.
11236 if (!Info.CaptureDeviceAddrMap.empty())
11237 CodeGen(CGF);
11238 };
11239
11240 // Generate code for the closing of the data region.
11241 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11242 PrePostActionTy &) {
11243 assert(Info.isValid() && "Invalid data environment closing arguments.")(static_cast <bool> (Info.isValid() && "Invalid data environment closing arguments."
) ? void (0) : __assert_fail ("Info.isValid() && \"Invalid data environment closing arguments.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11243, __extension__
__PRETTY_FUNCTION__))
;
11244
11245 llvm::Value *BasePointersArrayArg = nullptr;
11246 llvm::Value *PointersArrayArg = nullptr;
11247 llvm::Value *SizesArrayArg = nullptr;
11248 llvm::Value *MapTypesArrayArg = nullptr;
11249 llvm::Value *MapNamesArrayArg = nullptr;
11250 llvm::Value *MappersArrayArg = nullptr;
11251 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11252 SizesArrayArg, MapTypesArrayArg,
11253 MapNamesArrayArg, MappersArrayArg, Info,
11254 {/*ForEndCall=*/true});
11255
11256 // Emit device ID if any.
11257 llvm::Value *DeviceID = nullptr;
11258 if (Device) {
11259 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11260 CGF.Int64Ty, /*isSigned=*/true);
11261 } else {
11262 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11263 }
11264
11265 // Emit the number of elements in the offloading arrays.
11266 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11267
11268 // Source location for the ident struct
11269 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11270
11271 llvm::Value *OffloadingArgs[] = {RTLoc,
11272 DeviceID,
11273 PointerNum,
11274 BasePointersArrayArg,
11275 PointersArrayArg,
11276 SizesArrayArg,
11277 MapTypesArrayArg,
11278 MapNamesArrayArg,
11279 MappersArrayArg};
11280 CGF.EmitRuntimeCall(
11281 OMPBuilder.getOrCreateRuntimeFunction(
11282 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11283 OffloadingArgs);
11284 };
11285
11286 // If we need device pointer privatization, we need to emit the body of the
11287 // region with no privatization in the 'else' branch of the conditional.
11288 // Otherwise, we don't have to do anything.
11289 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11290 PrePostActionTy &) {
11291 if (!Info.CaptureDeviceAddrMap.empty()) {
11292 CodeGen.setAction(NoPrivAction);
11293 CodeGen(CGF);
11294 }
11295 };
11296
11297 // We don't have to do anything to close the region if the if clause evaluates
11298 // to false.
11299 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11300
11301 if (IfCond) {
11302 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11303 } else {
11304 RegionCodeGenTy RCG(BeginThenGen);
11305 RCG(CGF);
11306 }
11307
11308 // If we don't require privatization of device pointers, we emit the body in
11309 // between the runtime calls. This avoids duplicating the body code.
11310 if (Info.CaptureDeviceAddrMap.empty()) {
11311 CodeGen.setAction(NoPrivAction);
11312 CodeGen(CGF);
11313 }
11314
11315 if (IfCond) {
11316 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11317 } else {
11318 RegionCodeGenTy RCG(EndThenGen);
11319 RCG(CGF);
11320 }
11321}
11322
11323void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11324 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11325 const Expr *Device) {
11326 if (!CGF.HaveInsertPoint())
11327 return;
11328
11329 assert((isa<OMPTargetEnterDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11332, __extension__
__PRETTY_FUNCTION__))
11330 isa<OMPTargetExitDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11332, __extension__
__PRETTY_FUNCTION__))
11331 isa<OMPTargetUpdateDirective>(D)) &&(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11332, __extension__
__PRETTY_FUNCTION__))
11332 "Expecting either target enter, exit data, or update directives.")(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11332, __extension__
__PRETTY_FUNCTION__))
;
11333
11334 CodeGenFunction::OMPTargetDataInfo InputInfo;
11335 llvm::Value *MapTypesArray = nullptr;
11336 llvm::Value *MapNamesArray = nullptr;
11337 // Generate the code for the opening of the data environment.
11338 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11339 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11340 // Emit device ID if any.
11341 llvm::Value *DeviceID = nullptr;
11342 if (Device) {
11343 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11344 CGF.Int64Ty, /*isSigned=*/true);
11345 } else {
11346 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11347 }
11348
11349 // Emit the number of elements in the offloading arrays.
11350 llvm::Constant *PointerNum =
11351 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11352
11353 // Source location for the ident struct
11354 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11355
11356 llvm::Value *OffloadingArgs[] = {RTLoc,
11357 DeviceID,
11358 PointerNum,
11359 InputInfo.BasePointersArray.getPointer(),
11360 InputInfo.PointersArray.getPointer(),
11361 InputInfo.SizesArray.getPointer(),
11362 MapTypesArray,
11363 MapNamesArray,
11364 InputInfo.MappersArray.getPointer()};
11365
11366 // Select the right runtime function call for each standalone
11367 // directive.
11368 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11369 RuntimeFunction RTLFn;
11370 switch (D.getDirectiveKind()) {
11371 case OMPD_target_enter_data:
11372 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11373 : OMPRTL___tgt_target_data_begin_mapper;
11374 break;
11375 case OMPD_target_exit_data:
11376 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11377 : OMPRTL___tgt_target_data_end_mapper;
11378 break;
11379 case OMPD_target_update:
11380 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11381 : OMPRTL___tgt_target_data_update_mapper;
11382 break;
11383 case OMPD_parallel:
11384 case OMPD_for:
11385 case OMPD_parallel_for:
11386 case OMPD_parallel_master:
11387 case OMPD_parallel_sections:
11388 case OMPD_for_simd:
11389 case OMPD_parallel_for_simd:
11390 case OMPD_cancel:
11391 case OMPD_cancellation_point:
11392 case OMPD_ordered:
11393 case OMPD_threadprivate:
11394 case OMPD_allocate:
11395 case OMPD_task:
11396 case OMPD_simd:
11397 case OMPD_tile:
11398 case OMPD_unroll:
11399 case OMPD_sections:
11400 case OMPD_section:
11401 case OMPD_single:
11402 case OMPD_master:
11403 case OMPD_critical:
11404 case OMPD_taskyield:
11405 case OMPD_barrier:
11406 case OMPD_taskwait:
11407 case OMPD_taskgroup:
11408 case OMPD_atomic:
11409 case OMPD_flush:
11410 case OMPD_depobj:
11411 case OMPD_scan:
11412 case OMPD_teams:
11413 case OMPD_target_data:
11414 case OMPD_distribute:
11415 case OMPD_distribute_simd:
11416 case OMPD_distribute_parallel_for:
11417 case OMPD_distribute_parallel_for_simd:
11418 case OMPD_teams_distribute:
11419 case OMPD_teams_distribute_simd:
11420 case OMPD_teams_distribute_parallel_for:
11421 case OMPD_teams_distribute_parallel_for_simd:
11422 case OMPD_declare_simd:
11423 case OMPD_declare_variant:
11424 case OMPD_begin_declare_variant:
11425 case OMPD_end_declare_variant:
11426 case OMPD_declare_target:
11427 case OMPD_end_declare_target:
11428 case OMPD_declare_reduction:
11429 case OMPD_declare_mapper:
11430 case OMPD_taskloop:
11431 case OMPD_taskloop_simd:
11432 case OMPD_master_taskloop:
11433 case OMPD_master_taskloop_simd:
11434 case OMPD_parallel_master_taskloop:
11435 case OMPD_parallel_master_taskloop_simd:
11436 case OMPD_target:
11437 case OMPD_target_simd:
11438 case OMPD_target_teams_distribute:
11439 case OMPD_target_teams_distribute_simd:
11440 case OMPD_target_teams_distribute_parallel_for:
11441 case OMPD_target_teams_distribute_parallel_for_simd:
11442 case OMPD_target_teams:
11443 case OMPD_target_parallel:
11444 case OMPD_target_parallel_for:
11445 case OMPD_target_parallel_for_simd:
11446 case OMPD_requires:
11447 case OMPD_metadirective:
11448 case OMPD_unknown:
11449 default:
11450 llvm_unreachable("Unexpected standalone target data directive.")::llvm::llvm_unreachable_internal("Unexpected standalone target data directive."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11450)
;
11451 break;
11452 }
11453 CGF.EmitRuntimeCall(
11454 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11455 OffloadingArgs);
11456 };
11457
11458 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11459 &MapNamesArray](CodeGenFunction &CGF,
11460 PrePostActionTy &) {
11461 // Fill up the arrays with all the mapped variables.
11462 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11463
11464 // Get map clause information.
11465 MappableExprsHandler MEHandler(D, CGF);
11466 MEHandler.generateAllInfo(CombinedInfo);
11467
11468 TargetDataInfo Info;
11469 // Fill up the arrays and create the arguments.
11470 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11471 /*IsNonContiguous=*/true);
11472 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11473 D.hasClausesOfKind<OMPNowaitClause>();
11474 emitOffloadingArraysArgument(
11475 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11476 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11477 {/*ForEndCall=*/false});
11478 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11479 InputInfo.BasePointersArray =
11480 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11481 InputInfo.PointersArray =
11482 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11483 InputInfo.SizesArray =
11484 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11485 InputInfo.MappersArray =
11486 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11487 MapTypesArray = Info.MapTypesArray;
11488 MapNamesArray = Info.MapNamesArray;
11489 if (RequiresOuterTask)
11490 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11491 else
11492 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11493 };
11494
11495 if (IfCond) {
11496 emitIfClause(CGF, IfCond, TargetThenGen,
11497 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11498 } else {
11499 RegionCodeGenTy ThenRCG(TargetThenGen);
11500 ThenRCG(CGF);
11501 }
11502}
11503
11504namespace {
11505 /// Kind of parameter in a function with 'declare simd' directive.
11506 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11507 /// Attribute set of the parameter.
11508 struct ParamAttrTy {
11509 ParamKindTy Kind = Vector;
11510 llvm::APSInt StrideOrArg;
11511 llvm::APSInt Alignment;
11512 };
11513} // namespace
11514
11515static unsigned evaluateCDTSize(const FunctionDecl *FD,
11516 ArrayRef<ParamAttrTy> ParamAttrs) {
11517 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11518 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11519 // of that clause. The VLEN value must be power of 2.
11520 // In other case the notion of the function`s "characteristic data type" (CDT)
11521 // is used to compute the vector length.
11522 // CDT is defined in the following order:
11523 // a) For non-void function, the CDT is the return type.
11524 // b) If the function has any non-uniform, non-linear parameters, then the
11525 // CDT is the type of the first such parameter.
11526 // c) If the CDT determined by a) or b) above is struct, union, or class
11527 // type which is pass-by-value (except for the type that maps to the
11528 // built-in complex data type), the characteristic data type is int.
11529 // d) If none of the above three cases is applicable, the CDT is int.
11530 // The VLEN is then determined based on the CDT and the size of vector
11531 // register of that ISA for which current vector version is generated. The
11532 // VLEN is computed using the formula below:
11533 // VLEN = sizeof(vector_register) / sizeof(CDT),
11534 // where vector register size specified in section 3.2.1 Registers and the
11535 // Stack Frame of original AMD64 ABI document.
11536 QualType RetType = FD->getReturnType();
11537 if (RetType.isNull())
11538 return 0;
11539 ASTContext &C = FD->getASTContext();
11540 QualType CDT;
11541 if (!RetType.isNull() && !RetType->isVoidType()) {
11542 CDT = RetType;
11543 } else {
11544 unsigned Offset = 0;
11545 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11546 if (ParamAttrs[Offset].Kind == Vector)
11547 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11548 ++Offset;
11549 }
11550 if (CDT.isNull()) {
11551 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11552 if (ParamAttrs[I + Offset].Kind == Vector) {
11553 CDT = FD->getParamDecl(I)->getType();
11554 break;
11555 }
11556 }
11557 }
11558 }
11559 if (CDT.isNull())
11560 CDT = C.IntTy;
11561 CDT = CDT->getCanonicalTypeUnqualified();
11562 if (CDT->isRecordType() || CDT->isUnionType())
11563 CDT = C.IntTy;
11564 return C.getTypeSize(CDT);
11565}
11566
11567static void
11568emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11569 const llvm::APSInt &VLENVal,
11570 ArrayRef<ParamAttrTy> ParamAttrs,
11571 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11572 struct ISADataTy {
11573 char ISA;
11574 unsigned VecRegSize;
11575 };
11576 ISADataTy ISAData[] = {
11577 {
11578 'b', 128
11579 }, // SSE
11580 {
11581 'c', 256
11582 }, // AVX
11583 {
11584 'd', 256
11585 }, // AVX2
11586 {
11587 'e', 512
11588 }, // AVX512
11589 };
11590 llvm::SmallVector<char, 2> Masked;
11591 switch (State) {
11592 case OMPDeclareSimdDeclAttr::BS_Undefined:
11593 Masked.push_back('N');
11594 Masked.push_back('M');
11595 break;
11596 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11597 Masked.push_back('N');
11598 break;
11599 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11600 Masked.push_back('M');
11601 break;
11602 }
11603 for (char Mask : Masked) {
11604 for (const ISADataTy &Data : ISAData) {
11605 SmallString<256> Buffer;
11606 llvm::raw_svector_ostream Out(Buffer);
11607 Out << "_ZGV" << Data.ISA << Mask;
11608 if (!VLENVal) {
11609 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11610 assert(NumElts && "Non-zero simdlen/cdtsize expected")(static_cast <bool> (NumElts && "Non-zero simdlen/cdtsize expected"
) ? void (0) : __assert_fail ("NumElts && \"Non-zero simdlen/cdtsize expected\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11610, __extension__
__PRETTY_FUNCTION__))
;
11611 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11612 } else {
11613 Out << VLENVal;
11614 }
11615 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11616 switch (ParamAttr.Kind){
11617 case LinearWithVarStride:
11618 Out << 's' << ParamAttr.StrideOrArg;
11619 break;
11620 case Linear:
11621 Out << 'l';
11622 if (ParamAttr.StrideOrArg != 1)
11623 Out << ParamAttr.StrideOrArg;
11624 break;
11625 case Uniform:
11626 Out << 'u';
11627 break;
11628 case Vector:
11629 Out << 'v';
11630 break;
11631 }
11632 if (!!ParamAttr.Alignment)
11633 Out << 'a' << ParamAttr.Alignment;
11634 }
11635 Out << '_' << Fn->getName();
11636 Fn->addFnAttr(Out.str());
11637 }
11638 }
11639}
11640
11641// This are the Functions that are needed to mangle the name of the
11642// vector functions generated by the compiler, according to the rules
11643// defined in the "Vector Function ABI specifications for AArch64",
11644// available at
11645// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11646
11647/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11648///
11649/// TODO: Need to implement the behavior for reference marked with a
11650/// var or no linear modifiers (1.b in the section). For this, we
11651/// need to extend ParamKindTy to support the linear modifiers.
11652static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11653 QT = QT.getCanonicalType();
11654
11655 if (QT->isVoidType())
11656 return false;
11657
11658 if (Kind == ParamKindTy::Uniform)
11659 return false;
11660
11661 if (Kind == ParamKindTy::Linear)
11662 return false;
11663
11664 // TODO: Handle linear references with modifiers
11665
11666 if (Kind == ParamKindTy::LinearWithVarStride)
11667 return false;
11668
11669 return true;
11670}
11671
11672/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11673static bool getAArch64PBV(QualType QT, ASTContext &C) {
11674 QT = QT.getCanonicalType();
11675 unsigned Size = C.getTypeSize(QT);
11676
11677 // Only scalars and complex within 16 bytes wide set PVB to true.
11678 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11679 return false;
11680
11681 if (QT->isFloatingType())
11682 return true;
11683
11684 if (QT->isIntegerType())
11685 return true;
11686
11687 if (QT->isPointerType())
11688 return true;
11689
11690 // TODO: Add support for complex types (section 3.1.2, item 2).
11691
11692 return false;
11693}
11694
11695/// Computes the lane size (LS) of a return type or of an input parameter,
11696/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11697/// TODO: Add support for references, section 3.2.1, item 1.
11698static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11699 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11700 QualType PTy = QT.getCanonicalType()->getPointeeType();
11701 if (getAArch64PBV(PTy, C))
11702 return C.getTypeSize(PTy);
11703 }
11704 if (getAArch64PBV(QT, C))
11705 return C.getTypeSize(QT);
11706
11707 return C.getTypeSize(C.getUIntPtrType());
11708}
11709
11710// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11711// signature of the scalar function, as defined in 3.2.2 of the
11712// AAVFABI.
11713static std::tuple<unsigned, unsigned, bool>
11714getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11715 QualType RetType = FD->getReturnType().getCanonicalType();
11716
11717 ASTContext &C = FD->getASTContext();
11718
11719 bool OutputBecomesInput = false;
11720
11721 llvm::SmallVector<unsigned, 8> Sizes;
11722 if (!RetType->isVoidType()) {
11723 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11724 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11725 OutputBecomesInput = true;
11726 }
11727 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11728 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11729 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11730 }
11731
11732 assert(!Sizes.empty() && "Unable to determine NDS and WDS.")(static_cast <bool> (!Sizes.empty() && "Unable to determine NDS and WDS."
) ? void (0) : __assert_fail ("!Sizes.empty() && \"Unable to determine NDS and WDS.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11732, __extension__
__PRETTY_FUNCTION__))
;
11733 // The LS of a function parameter / return value can only be a power
11734 // of 2, starting from 8 bits, up to 128.
11735 assert(llvm::all_of(Sizes,(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11740, __extension__
__PRETTY_FUNCTION__))
11736 [](unsigned Size) {(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11740, __extension__
__PRETTY_FUNCTION__))
11737 return Size == 8 || Size == 16 || Size == 32 ||(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11740, __extension__
__PRETTY_FUNCTION__))
11738 Size == 64 || Size == 128;(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11740, __extension__
__PRETTY_FUNCTION__))
11739 }) &&(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11740, __extension__
__PRETTY_FUNCTION__))
11740 "Invalid size")(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11740, __extension__
__PRETTY_FUNCTION__))
;
11741
11742 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11743 *std::max_element(std::begin(Sizes), std::end(Sizes)),
11744 OutputBecomesInput);
11745}
11746
11747/// Mangle the parameter part of the vector function name according to
11748/// their OpenMP classification. The mangling function is defined in
11749/// section 3.5 of the AAVFABI.
11750static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11751 SmallString<256> Buffer;
11752 llvm::raw_svector_ostream Out(Buffer);
11753 for (const auto &ParamAttr : ParamAttrs) {
11754 switch (ParamAttr.Kind) {
11755 case LinearWithVarStride:
11756 Out << "ls" << ParamAttr.StrideOrArg;
11757 break;
11758 case Linear:
11759 Out << 'l';
11760 // Don't print the step value if it is not present or if it is
11761 // equal to 1.
11762 if (ParamAttr.StrideOrArg != 1)
11763 Out << ParamAttr.StrideOrArg;
11764 break;
11765 case Uniform:
11766 Out << 'u';
11767 break;
11768 case Vector:
11769 Out << 'v';
11770 break;
11771 }
11772
11773 if (!!ParamAttr.Alignment)
11774 Out << 'a' << ParamAttr.Alignment;
11775 }
11776
11777 return std::string(Out.str());
11778}
11779
11780// Function used to add the attribute. The parameter `VLEN` is
11781// templated to allow the use of "x" when targeting scalable functions
11782// for SVE.
11783template <typename T>
11784static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11785 char ISA, StringRef ParSeq,
11786 StringRef MangledName, bool OutputBecomesInput,
11787 llvm::Function *Fn) {
11788 SmallString<256> Buffer;
11789 llvm::raw_svector_ostream Out(Buffer);
11790 Out << Prefix << ISA << LMask << VLEN;
11791 if (OutputBecomesInput)
11792 Out << "v";
11793 Out << ParSeq << "_" << MangledName;
11794 Fn->addFnAttr(Out.str());
11795}
11796
11797// Helper function to generate the Advanced SIMD names depending on
11798// the value of the NDS when simdlen is not present.
11799static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11800 StringRef Prefix, char ISA,
11801 StringRef ParSeq, StringRef MangledName,
11802 bool OutputBecomesInput,
11803 llvm::Function *Fn) {
11804 switch (NDS) {
11805 case 8:
11806 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11807 OutputBecomesInput, Fn);
11808 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11809 OutputBecomesInput, Fn);
11810 break;
11811 case 16:
11812 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11813 OutputBecomesInput, Fn);
11814 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11815 OutputBecomesInput, Fn);
11816 break;
11817 case 32:
11818 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11819 OutputBecomesInput, Fn);
11820 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11821 OutputBecomesInput, Fn);
11822 break;
11823 case 64:
11824 case 128:
11825 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11826 OutputBecomesInput, Fn);
11827 break;
11828 default:
11829 llvm_unreachable("Scalar type is too wide.")::llvm::llvm_unreachable_internal("Scalar type is too wide.",
"clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11829)
;
11830 }
11831}
11832
11833/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11834static void emitAArch64DeclareSimdFunction(
11835 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11836 ArrayRef<ParamAttrTy> ParamAttrs,
11837 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11838 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11839
11840 // Get basic data for building the vector signature.
11841 const auto Data = getNDSWDS(FD, ParamAttrs);
11842 const unsigned NDS = std::get<0>(Data);
11843 const unsigned WDS = std::get<1>(Data);
11844 const bool OutputBecomesInput = std::get<2>(Data);
11845
11846 // Check the values provided via `simdlen` by the user.
11847 // 1. A `simdlen(1)` doesn't produce vector signatures,
11848 if (UserVLEN == 1) {
11849 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11850 DiagnosticsEngine::Warning,
11851 "The clause simdlen(1) has no effect when targeting aarch64.");
11852 CGM.getDiags().Report(SLoc, DiagID);
11853 return;
11854 }
11855
11856 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11857 // Advanced SIMD output.
11858 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11859 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11860 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11861 "power of 2 when targeting Advanced SIMD.");
11862 CGM.getDiags().Report(SLoc, DiagID);
11863 return;
11864 }
11865
11866 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11867 // limits.
11868 if (ISA == 's' && UserVLEN != 0) {
11869 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11870 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11871 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11872 "lanes in the architectural constraints "
11873 "for SVE (min is 128-bit, max is "
11874 "2048-bit, by steps of 128-bit)");
11875 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11876 return;
11877 }
11878 }
11879
11880 // Sort out parameter sequence.
11881 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11882 StringRef Prefix = "_ZGV";
11883 // Generate simdlen from user input (if any).
11884 if (UserVLEN) {
11885 if (ISA == 's') {
11886 // SVE generates only a masked function.
11887 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11888 OutputBecomesInput, Fn);
11889 } else {
11890 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11890, __extension__
__PRETTY_FUNCTION__))
;
11891 // Advanced SIMD generates one or two functions, depending on
11892 // the `[not]inbranch` clause.
11893 switch (State) {
11894 case OMPDeclareSimdDeclAttr::BS_Undefined:
11895 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11896 OutputBecomesInput, Fn);
11897 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11898 OutputBecomesInput, Fn);
11899 break;
11900 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11901 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11902 OutputBecomesInput, Fn);
11903 break;
11904 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11905 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11906 OutputBecomesInput, Fn);
11907 break;
11908 }
11909 }
11910 } else {
11911 // If no user simdlen is provided, follow the AAVFABI rules for
11912 // generating the vector length.
11913 if (ISA == 's') {
11914 // SVE, section 3.4.1, item 1.
11915 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11916 OutputBecomesInput, Fn);
11917 } else {
11918 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11918, __extension__
__PRETTY_FUNCTION__))
;
11919 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11920 // two vector names depending on the use of the clause
11921 // `[not]inbranch`.
11922 switch (State) {
11923 case OMPDeclareSimdDeclAttr::BS_Undefined:
11924 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11925 OutputBecomesInput, Fn);
11926 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11927 OutputBecomesInput, Fn);
11928 break;
11929 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11930 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11931 OutputBecomesInput, Fn);
11932 break;
11933 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11934 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11935 OutputBecomesInput, Fn);
11936 break;
11937 }
11938 }
11939 }
11940}
11941
11942void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11943 llvm::Function *Fn) {
11944 ASTContext &C = CGM.getContext();
11945 FD = FD->getMostRecentDecl();
11946 while (FD) {
11947 // Map params to their positions in function decl.
11948 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11949 if (isa<CXXMethodDecl>(FD))
11950 ParamPositions.try_emplace(FD, 0);
11951 unsigned ParamPos = ParamPositions.size();
11952 for (const ParmVarDecl *P : FD->parameters()) {
11953 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11954 ++ParamPos;
11955 }
11956 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11957 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11958 // Mark uniform parameters.
11959 for (const Expr *E : Attr->uniforms()) {
11960 E = E->IgnoreParenImpCasts();
11961 unsigned Pos;
11962 if (isa<CXXThisExpr>(E)) {
11963 Pos = ParamPositions[FD];
11964 } else {
11965 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11966 ->getCanonicalDecl();
11967 auto It = ParamPositions.find(PVD);
11968 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11968, __extension__
__PRETTY_FUNCTION__))
;
11969 Pos = It->second;
11970 }
11971 ParamAttrs[Pos].Kind = Uniform;
11972 }
11973 // Get alignment info.
11974 auto *NI = Attr->alignments_begin();
11975 for (const Expr *E : Attr->aligneds()) {
11976 E = E->IgnoreParenImpCasts();
11977 unsigned Pos;
11978 QualType ParmTy;
11979 if (isa<CXXThisExpr>(E)) {
11980 Pos = ParamPositions[FD];
11981 ParmTy = E->getType();
11982 } else {
11983 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11984 ->getCanonicalDecl();
11985 auto It = ParamPositions.find(PVD);
11986 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11986, __extension__
__PRETTY_FUNCTION__))
;
11987 Pos = It->second;
11988 ParmTy = PVD->getType();
11989 }
11990 ParamAttrs[Pos].Alignment =
11991 (*NI)
11992 ? (*NI)->EvaluateKnownConstInt(C)
11993 : llvm::APSInt::getUnsigned(
11994 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11995 .getQuantity());
11996 ++NI;
11997 }
11998 // Mark linear parameters.
11999 auto *SI = Attr->steps_begin();
12000 for (const Expr *E : Attr->linears()) {
12001 E = E->IgnoreParenImpCasts();
12002 unsigned Pos;
12003 // Rescaling factor needed to compute the linear parameter
12004 // value in the mangled name.
12005 unsigned PtrRescalingFactor = 1;
12006 if (isa<CXXThisExpr>(E)) {
12007 Pos = ParamPositions[FD];
12008 } else {
12009 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12010 ->getCanonicalDecl();
12011 auto It = ParamPositions.find(PVD);
12012 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12012, __extension__
__PRETTY_FUNCTION__))
;
12013 Pos = It->second;
12014 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12015 PtrRescalingFactor = CGM.getContext()
12016 .getTypeSizeInChars(P->getPointeeType())
12017 .getQuantity();
12018 }
12019 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12020 ParamAttr.Kind = Linear;
12021 // Assuming a stride of 1, for `linear` without modifiers.
12022 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12023 if (*SI) {
12024 Expr::EvalResult Result;
12025 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12026 if (const auto *DRE =
12027 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12028 if (const auto *StridePVD =
12029 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
12030 ParamAttr.Kind = LinearWithVarStride;
12031 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
12032 assert(It != ParamPositions.end() &&(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12033, __extension__
__PRETTY_FUNCTION__))
12033 "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12033, __extension__
__PRETTY_FUNCTION__))
;
12034 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
12035 }
12036 }
12037 } else {
12038 ParamAttr.StrideOrArg = Result.Val.getInt();
12039 }
12040 }
12041 // If we are using a linear clause on a pointer, we need to
12042 // rescale the value of linear_step with the byte size of the
12043 // pointee type.
12044 if (Linear == ParamAttr.Kind)
12045 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12046 ++SI;
12047 }
12048 llvm::APSInt VLENVal;
12049 SourceLocation ExprLoc;
12050 const Expr *VLENExpr = Attr->getSimdlen();
12051 if (VLENExpr) {
12052 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12053 ExprLoc = VLENExpr->getExprLoc();
12054 }
12055 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12056 if (CGM.getTriple().isX86()) {
12057 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12058 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12059 unsigned VLEN = VLENVal.getExtValue();
12060 StringRef MangledName = Fn->getName();
12061 if (CGM.getTarget().hasFeature("sve"))
12062 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12063 MangledName, 's', 128, Fn, ExprLoc);
12064 if (CGM.getTarget().hasFeature("neon"))
12065 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12066 MangledName, 'n', 128, Fn, ExprLoc);
12067 }
12068 }
12069 FD = FD->getPreviousDecl();
12070 }
12071}
12072
12073namespace {
12074/// Cleanup action for doacross support.
12075class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12076public:
12077 static const int DoacrossFinArgs = 2;
12078
12079private:
12080 llvm::FunctionCallee RTLFn;
12081 llvm::Value *Args[DoacrossFinArgs];
12082
12083public:
12084 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12085 ArrayRef<llvm::Value *> CallArgs)
12086 : RTLFn(RTLFn) {
12087 assert(CallArgs.size() == DoacrossFinArgs)(static_cast <bool> (CallArgs.size() == DoacrossFinArgs
) ? void (0) : __assert_fail ("CallArgs.size() == DoacrossFinArgs"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12087, __extension__
__PRETTY_FUNCTION__))
;
12088 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12089 }
12090 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12091 if (!CGF.HaveInsertPoint())
12092 return;
12093 CGF.EmitRuntimeCall(RTLFn, Args);
12094 }
12095};
12096} // namespace
12097
12098void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12099 const OMPLoopDirective &D,
12100 ArrayRef<Expr *> NumIterations) {
12101 if (!CGF.HaveInsertPoint())
12102 return;
12103
12104 ASTContext &C = CGM.getContext();
12105 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12106 RecordDecl *RD;
12107 if (KmpDimTy.isNull()) {
12108 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12109 // kmp_int64 lo; // lower
12110 // kmp_int64 up; // upper
12111 // kmp_int64 st; // stride
12112 // };
12113 RD = C.buildImplicitRecord("kmp_dim");
12114 RD->startDefinition();
12115 addFieldToRecordDecl(C, RD, Int64Ty);
12116 addFieldToRecordDecl(C, RD, Int64Ty);
12117 addFieldToRecordDecl(C, RD, Int64Ty);
12118 RD->completeDefinition();
12119 KmpDimTy = C.getRecordType(RD);
12120 } else {
12121 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12122 }
12123 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12124 QualType ArrayTy =
12125 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12126
12127 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12128 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12129 enum { LowerFD = 0, UpperFD, StrideFD };
12130 // Fill dims with data.
12131 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12132 LValue DimsLVal = CGF.MakeAddrLValue(
12133 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12134 // dims.upper = num_iterations;
12135 LValue UpperLVal = CGF.EmitLValueForField(
12136 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12137 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12138 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12139 Int64Ty, NumIterations[I]->getExprLoc());
12140 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12141 // dims.stride = 1;
12142 LValue StrideLVal = CGF.EmitLValueForField(
12143 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12144 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12145 StrideLVal);
12146 }
12147
12148 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12149 // kmp_int32 num_dims, struct kmp_dim * dims);
12150 llvm::Value *Args[] = {
12151 emitUpdateLocation(CGF, D.getBeginLoc()),
12152 getThreadID(CGF, D.getBeginLoc()),
12153 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12154 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12155 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12156 CGM.VoidPtrTy)};
12157
12158 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12159 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12160 CGF.EmitRuntimeCall(RTLFn, Args);
12161 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12162 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12163 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12164 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12165 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12166 llvm::makeArrayRef(FiniArgs));
12167}
12168
12169void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12170 const OMPDependClause *C) {
12171 QualType Int64Ty =
12172 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12173 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12174 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12175 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12176 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12177 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12178 const Expr *CounterVal = C->getLoopData(I);
12179 assert(CounterVal)(static_cast <bool> (CounterVal) ? void (0) : __assert_fail
("CounterVal", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12179
, __extension__ __PRETTY_FUNCTION__))
;
12180 llvm::Value *CntVal = CGF.EmitScalarConversion(
12181 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12182 CounterVal->getExprLoc());
12183 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12184 /*Volatile=*/false, Int64Ty);
12185 }
12186 llvm::Value *Args[] = {
12187 emitUpdateLocation(CGF, C->getBeginLoc()),
12188 getThreadID(CGF, C->getBeginLoc()),
12189 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12190 llvm::FunctionCallee RTLFn;
12191 if (C->getDependencyKind() == OMPC_DEPEND_source) {
12192 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12193 OMPRTL___kmpc_doacross_post);
12194 } else {
12195 assert(C->getDependencyKind() == OMPC_DEPEND_sink)(static_cast <bool> (C->getDependencyKind() == OMPC_DEPEND_sink
) ? void (0) : __assert_fail ("C->getDependencyKind() == OMPC_DEPEND_sink"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12195, __extension__
__PRETTY_FUNCTION__))
;
12196 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12197 OMPRTL___kmpc_doacross_wait);
12198 }
12199 CGF.EmitRuntimeCall(RTLFn, Args);
12200}
12201
12202void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12203 llvm::FunctionCallee Callee,
12204 ArrayRef<llvm::Value *> Args) const {
12205 assert(Loc.isValid() && "Outlined function call location must be valid.")(static_cast <bool> (Loc.isValid() && "Outlined function call location must be valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Outlined function call location must be valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12205, __extension__
__PRETTY_FUNCTION__))
;
12206 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12207
12208 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12209 if (Fn->doesNotThrow()) {
12210 CGF.EmitNounwindRuntimeCall(Fn, Args);
12211 return;
12212 }
12213 }
12214 CGF.EmitRuntimeCall(Callee, Args);
12215}
12216
12217void CGOpenMPRuntime::emitOutlinedFunctionCall(
12218 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12219 ArrayRef<llvm::Value *> Args) const {
12220 emitCall(CGF, Loc, OutlinedFn, Args);
12221}
12222
12223void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12224 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12225 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12226 HasEmittedDeclareTargetRegion = true;
12227}
12228
12229Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12230 const VarDecl *NativeParam,
12231 const VarDecl *TargetParam) const {
12232 return CGF.GetAddrOfLocalVar(NativeParam);
12233}
12234
12235/// Return allocator value from expression, or return a null allocator (default
12236/// when no allocator specified).
12237static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12238 const Expr *Allocator) {
12239 llvm::Value *AllocVal;
12240 if (Allocator) {
12241 AllocVal = CGF.EmitScalarExpr(Allocator);
12242 // According to the standard, the original allocator type is a enum
12243 // (integer). Convert to pointer type, if required.
12244 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12245 CGF.getContext().VoidPtrTy,
12246 Allocator->getExprLoc());
12247 } else {
12248 // If no allocator specified, it defaults to the null allocator.
12249 AllocVal = llvm::Constant::getNullValue(
12250 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12251 }
12252 return AllocVal;
12253}
12254
12255Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12256 const VarDecl *VD) {
12257 if (!VD)
12258 return Address::invalid();
12259 Address UntiedAddr = Address::invalid();
12260 Address UntiedRealAddr = Address::invalid();
12261 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12262 if (It != FunctionToUntiedTaskStackMap.end()) {
12263 const UntiedLocalVarsAddressesMap &UntiedData =
12264 UntiedLocalVarsStack[It->second];
12265 auto I = UntiedData.find(VD);
12266 if (I != UntiedData.end()) {
12267 UntiedAddr = I->second.first;
12268 UntiedRealAddr = I->second.second;
12269 }
12270 }
12271 const VarDecl *CVD = VD->getCanonicalDecl();
12272 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12273 // Use the default allocation.
12274 if (!isAllocatableDecl(VD))
12275 return UntiedAddr;
12276 llvm::Value *Size;
12277 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12278 if (CVD->getType()->isVariablyModifiedType()) {
12279 Size = CGF.getTypeSize(CVD->getType());
12280 // Align the size: ((size + align - 1) / align) * align
12281 Size = CGF.Builder.CreateNUWAdd(
12282 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12283 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12284 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12285 } else {
12286 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12287 Size = CGM.getSize(Sz.alignTo(Align));
12288 }
12289 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12290 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12291 const Expr *Allocator = AA->getAllocator();
12292 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12293 llvm::Value *Alignment =
12294 AA->getAlignment()
12295 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12296 CGM.SizeTy, /*isSigned=*/false)
12297 : nullptr;
12298 SmallVector<llvm::Value *, 4> Args;
12299 Args.push_back(ThreadID);
12300 if (Alignment)
12301 Args.push_back(Alignment);
12302 Args.push_back(Size);
12303 Args.push_back(AllocVal);
12304 llvm::omp::RuntimeFunction FnID =
12305 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12306 llvm::Value *Addr = CGF.EmitRuntimeCall(
12307 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12308 getName({CVD->getName(), ".void.addr"}));
12309 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12310 CGM.getModule(), OMPRTL___kmpc_free);
12311 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12312 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12313 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12314 if (UntiedAddr.isValid())
12315 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12316
12317 // Cleanup action for allocate support.
12318 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12319 llvm::FunctionCallee RTLFn;
12320 SourceLocation::UIntTy LocEncoding;
12321 Address Addr;
12322 const Expr *AllocExpr;
12323
12324 public:
12325 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12326 SourceLocation::UIntTy LocEncoding, Address Addr,
12327 const Expr *AllocExpr)
12328 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12329 AllocExpr(AllocExpr) {}
12330 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12331 if (!CGF.HaveInsertPoint())
12332 return;
12333 llvm::Value *Args[3];
12334 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12335 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12336 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12337 Addr.getPointer(), CGF.VoidPtrTy);
12338 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12339 Args[2] = AllocVal;
12340 CGF.EmitRuntimeCall(RTLFn, Args);
12341 }
12342 };
12343 Address VDAddr =
12344 UntiedRealAddr.isValid()
12345 ? UntiedRealAddr
12346 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12347 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12348 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12349 VDAddr, Allocator);
12350 if (UntiedRealAddr.isValid())
12351 if (auto *Region =
12352 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12353 Region->emitUntiedSwitch(CGF);
12354 return VDAddr;
12355 }
12356 return UntiedAddr;
12357}
12358
12359bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12360 const VarDecl *VD) const {
12361 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12362 if (It == FunctionToUntiedTaskStackMap.end())
12363 return false;
12364 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12365}
12366
12367CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12368 CodeGenModule &CGM, const OMPLoopDirective &S)
12369 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12370 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12370, __extension__
__PRETTY_FUNCTION__))
;
12371 if (!NeedToPush)
12372 return;
12373 NontemporalDeclsSet &DS =
12374 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12375 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12376 for (const Stmt *Ref : C->private_refs()) {
12377 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12378 const ValueDecl *VD;
12379 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12380 VD = DRE->getDecl();
12381 } else {
12382 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12383 assert((ME->isImplicitCXXThis() ||(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12385, __extension__
__PRETTY_FUNCTION__))
12384 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12385, __extension__
__PRETTY_FUNCTION__))
12385 "Expected member of current class.")(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12385, __extension__
__PRETTY_FUNCTION__))
;
12386 VD = ME->getMemberDecl();
12387 }
12388 DS.insert(VD);
12389 }
12390 }
12391}
12392
12393CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12394 if (!NeedToPush)
12395 return;
12396 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12397}
12398
12399CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12400 CodeGenFunction &CGF,
12401 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12402 std::pair<Address, Address>> &LocalVars)
12403 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12404 if (!NeedToPush)
12405 return;
12406 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12407 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12408 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12409}
12410
12411CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12412 if (!NeedToPush)
12413 return;
12414 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12415}
12416
12417bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12418 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12418, __extension__
__PRETTY_FUNCTION__))
;
12419
12420 return llvm::any_of(
12421 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12422 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12423}
12424
12425void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12426 const OMPExecutableDirective &S,
12427 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12428 const {
12429 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12430 // Vars in target/task regions must be excluded completely.
12431 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12432 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12433 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12434 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12435 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12436 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12437 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12438 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12439 }
12440 }
12441 // Exclude vars in private clauses.
12442 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12443 for (const Expr *Ref : C->varlists()) {
12444 if (!Ref->getType()->isScalarType())
12445 continue;
12446 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12447 if (!DRE)
12448 continue;
12449 NeedToCheckForLPCs.insert(DRE->getDecl());
12450 }
12451 }
12452 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12453 for (const Expr *Ref : C->varlists()) {
12454 if (!Ref->getType()->isScalarType())
12455 continue;
12456 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12457 if (!DRE)
12458 continue;
12459 NeedToCheckForLPCs.insert(DRE->getDecl());
12460 }
12461 }
12462 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12463 for (const Expr *Ref : C->varlists()) {
12464 if (!Ref->getType()->isScalarType())
12465 continue;
12466 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12467 if (!DRE)
12468 continue;
12469 NeedToCheckForLPCs.insert(DRE->getDecl());
12470 }
12471 }
12472 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12473 for (const Expr *Ref : C->varlists()) {
12474 if (!Ref->getType()->isScalarType())
12475 continue;
12476 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12477 if (!DRE)
12478 continue;
12479 NeedToCheckForLPCs.insert(DRE->getDecl());
12480 }
12481 }
12482 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12483 for (const Expr *Ref : C->varlists()) {
12484 if (!Ref->getType()->isScalarType())
12485 continue;
12486 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12487 if (!DRE)
12488 continue;
12489 NeedToCheckForLPCs.insert(DRE->getDecl());
12490 }
12491 }
12492 for (const Decl *VD : NeedToCheckForLPCs) {
12493 for (const LastprivateConditionalData &Data :
12494 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12495 if (Data.DeclToUniqueName.count(VD) > 0) {
12496 if (!Data.Disabled)
12497 NeedToAddForLPCsAsDisabled.insert(VD);
12498 break;
12499 }
12500 }
12501 }
12502}
12503
12504CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12505 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12506 : CGM(CGF.CGM),
12507 Action((CGM.getLangOpts().OpenMP >= 50 &&
12508 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12509 [](const OMPLastprivateClause *C) {
12510 return C->getKind() ==
12511 OMPC_LASTPRIVATE_conditional;
12512 }))
12513 ? ActionToDo::PushAsLastprivateConditional
12514 : ActionToDo::DoNotPush) {
12515 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12515, __extension__
__PRETTY_FUNCTION__))
;
12516 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12517 return;
12518 assert(Action == ActionToDo::PushAsLastprivateConditional &&(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12519, __extension__
__PRETTY_FUNCTION__))
12519 "Expected a push action.")(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12519, __extension__
__PRETTY_FUNCTION__))
;
12520 LastprivateConditionalData &Data =
12521 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12522 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12523 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12524 continue;
12525
12526 for (const Expr *Ref : C->varlists()) {
12527 Data.DeclToUniqueName.insert(std::make_pair(
12528 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12529 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12530 }
12531 }
12532 Data.IVLVal = IVLVal;
12533 Data.Fn = CGF.CurFn;
12534}
12535
12536CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12537 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12538 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12539 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12539, __extension__
__PRETTY_FUNCTION__))
;
12540 if (CGM.getLangOpts().OpenMP < 50)
12541 return;
12542 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12543 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12544 if (!NeedToAddForLPCsAsDisabled.empty()) {
12545 Action = ActionToDo::DisableLastprivateConditional;
12546 LastprivateConditionalData &Data =
12547 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12548 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12549 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12550 Data.Fn = CGF.CurFn;
12551 Data.Disabled = true;
12552 }
12553}
12554
12555CGOpenMPRuntime::LastprivateConditionalRAII
12556CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12557 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12558 return LastprivateConditionalRAII(CGF, S);
12559}
12560
12561CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12562 if (CGM.getLangOpts().OpenMP < 50)
12563 return;
12564 if (Action == ActionToDo::DisableLastprivateConditional) {
12565 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12566, __extension__
__PRETTY_FUNCTION__))
12566 "Expected list of disabled private vars.")(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12566, __extension__
__PRETTY_FUNCTION__))
;
12567 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12568 }
12569 if (Action == ActionToDo::PushAsLastprivateConditional) {
12570 assert((static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12572, __extension__
__PRETTY_FUNCTION__))
12571 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12572, __extension__
__PRETTY_FUNCTION__))
12572 "Expected list of lastprivate conditional vars.")(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12572, __extension__
__PRETTY_FUNCTION__))
;
12573 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12574 }
12575}
12576
12577Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12578 const VarDecl *VD) {
12579 ASTContext &C = CGM.getContext();
12580 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12581 if (I == LastprivateConditionalToTypes.end())
12582 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12583 QualType NewType;
12584 const FieldDecl *VDField;
12585 const FieldDecl *FiredField;
12586 LValue BaseLVal;
12587 auto VI = I->getSecond().find(VD);
12588 if (VI == I->getSecond().end()) {
12589 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12590 RD->startDefinition();
12591 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12592 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12593 RD->completeDefinition();
12594 NewType = C.getRecordType(RD);
12595 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12596 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12597 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12598 } else {
12599 NewType = std::get<0>(VI->getSecond());
12600 VDField = std::get<1>(VI->getSecond());
12601 FiredField = std::get<2>(VI->getSecond());
12602 BaseLVal = std::get<3>(VI->getSecond());
12603 }
12604 LValue FiredLVal =
12605 CGF.EmitLValueForField(BaseLVal, FiredField);
12606 CGF.EmitStoreOfScalar(
12607 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12608 FiredLVal);
12609 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12610}
12611
12612namespace {
12613/// Checks if the lastprivate conditional variable is referenced in LHS.
12614class LastprivateConditionalRefChecker final
12615 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12616 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12617 const Expr *FoundE = nullptr;
12618 const Decl *FoundD = nullptr;
12619 StringRef UniqueDeclName;
12620 LValue IVLVal;
12621 llvm::Function *FoundFn = nullptr;
12622 SourceLocation Loc;
12623
12624public:
12625 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12626 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12627 llvm::reverse(LPM)) {
12628 auto It = D.DeclToUniqueName.find(E->getDecl());
12629 if (It == D.DeclToUniqueName.end())
12630 continue;
12631 if (D.Disabled)
12632 return false;
12633 FoundE = E;
12634 FoundD = E->getDecl()->getCanonicalDecl();
12635 UniqueDeclName = It->second;
12636 IVLVal = D.IVLVal;
12637 FoundFn = D.Fn;
12638 break;
12639 }
12640 return FoundE == E;
12641 }
12642 bool VisitMemberExpr(const MemberExpr *E) {
12643 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12644 return false;
12645 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12646 llvm::reverse(LPM)) {
12647 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12648 if (It == D.DeclToUniqueName.end())
12649 continue;
12650 if (D.Disabled)
12651 return false;
12652 FoundE = E;
12653 FoundD = E->getMemberDecl()->getCanonicalDecl();
12654 UniqueDeclName = It->second;
12655 IVLVal = D.IVLVal;
12656 FoundFn = D.Fn;
12657 break;
12658 }
12659 return FoundE == E;
12660 }
12661 bool VisitStmt(const Stmt *S) {
12662 for (const Stmt *Child : S->children()) {
12663 if (!Child)
12664 continue;
12665 if (const auto *E = dyn_cast<Expr>(Child))
12666 if (!E->isGLValue())
12667 continue;
12668 if (Visit(Child))
12669 return true;
12670 }
12671 return false;
12672 }
12673 explicit LastprivateConditionalRefChecker(
12674 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12675 : LPM(LPM) {}
12676 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12677 getFoundData() const {
12678 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12679 }
12680};
12681} // namespace
12682
12683void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12684 LValue IVLVal,
12685 StringRef UniqueDeclName,
12686 LValue LVal,
12687 SourceLocation Loc) {
12688 // Last updated loop counter for the lastprivate conditional var.
12689 // int<xx> last_iv = 0;
12690 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12691 llvm::Constant *LastIV =
12692 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12693 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
8
'LastIV' is a 'GlobalVariable'
12694 IVLVal.getAlignment().getAsAlign());
12695 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12696
12697 // Last value of the lastprivate conditional.
12698 // decltype(priv_a) last_a;
12699 llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12700 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12701 Last->setAlignment(LVal.getAlignment().getAsAlign());
12702 LValue LastLVal = CGF.MakeAddrLValue(
12703 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12704
12705 // Global loop counter. Required to handle inner parallel-for regions.
12706 // iv
12707 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12708
12709 // #pragma omp critical(a)
12710 // if (last_iv <= iv) {
12711 // last_iv = iv;
12712 // last_a = priv_a;
12713 // }
12714 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12715 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12716 Action.Enter(CGF);
12717 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12718 // (last_iv <= iv) ? Check if the variable is updated and store new
12719 // value in global var.
12720 llvm::Value *CmpRes;
12721 if (IVLVal.getType()->isSignedIntegerType()) {
12722 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12723 } else {
12724 assert(IVLVal.getType()->isUnsignedIntegerType() &&(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12725, __extension__
__PRETTY_FUNCTION__))
12725 "Loop iteration variable must be integer.")(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12725, __extension__
__PRETTY_FUNCTION__))
;
12726 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12727 }
12728 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12729 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12730 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12731 // {
12732 CGF.EmitBlock(ThenBB);
12733
12734 // last_iv = iv;
12735 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12736
12737 // last_a = priv_a;
12738 switch (CGF.getEvaluationKind(LVal.getType())) {
12739 case TEK_Scalar: {
12740 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12741 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12742 break;
12743 }
12744 case TEK_Complex: {
12745 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12746 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12747 break;
12748 }
12749 case TEK_Aggregate:
12750 llvm_unreachable(::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12751)
12751 "Aggregates are not supported in lastprivate conditional.")::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12751)
;
12752 }
12753 // }
12754 CGF.EmitBranch(ExitBB);
12755 // There is no need to emit line number for unconditional branch.
12756 (void)ApplyDebugLocation::CreateEmpty(CGF);
12757 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12758 };
12759
12760 if (CGM.getLangOpts().OpenMPSimd) {
9
Assuming field 'OpenMPSimd' is 0
10
Taking false branch
12761 // Do not emit as a critical region as no parallel region could be emitted.
12762 RegionCodeGenTy ThenRCG(CodeGen);
12763 ThenRCG(CGF);
12764 } else {
12765 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
11
Calling 'CGOpenMPRuntime::emitCriticalRegion'
12766 }
12767}
12768
12769void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12770 const Expr *LHS) {
12771 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
1
Assuming field 'OpenMP' is >= 50
2
Taking false branch
12772 return;
12773 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12774 if (!Checker.Visit(LHS))
3
Assuming the condition is false
4
Taking false branch
12775 return;
12776 const Expr *FoundE;
12777 const Decl *FoundD;
12778 StringRef UniqueDeclName;
12779 LValue IVLVal;
12780 llvm::Function *FoundFn;
12781 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12782 Checker.getFoundData();
12783 if (FoundFn != CGF.CurFn) {
5
Assuming 'FoundFn' is equal to field 'CurFn'
6
Taking false branch
12784 // Special codegen for inner parallel regions.
12785 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12786 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12787 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12788, __extension__
__PRETTY_FUNCTION__))
12788 "Lastprivate conditional is not found in outer region.")(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12788, __extension__
__PRETTY_FUNCTION__))
;
12789 QualType StructTy = std::get<0>(It->getSecond());
12790 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12791 LValue PrivLVal = CGF.EmitLValue(FoundE);
12792 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12793 PrivLVal.getAddress(CGF),
12794 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12795 CGF.ConvertTypeForMem(StructTy));
12796 LValue BaseLVal =
12797 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12798 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12799 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12800 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12801 FiredLVal, llvm::AtomicOrdering::Unordered,
12802 /*IsVolatile=*/true, /*isInit=*/false);
12803 return;
12804 }
12805
12806 // Private address of the lastprivate conditional in the current context.
12807 // priv_a
12808 LValue LVal = CGF.EmitLValue(FoundE);
12809 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
7
Calling 'CGOpenMPRuntime::emitLastprivateConditionalUpdate'
12810 FoundE->getExprLoc());
12811}
12812
12813void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12814 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12815 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12816 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12817 return;
12818 auto Range = llvm::reverse(LastprivateConditionalStack);
12819 auto It = llvm::find_if(
12820 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12821 if (It == Range.end() || It->Fn != CGF.CurFn)
12822 return;
12823 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12824 assert(LPCI != LastprivateConditionalToTypes.end() &&(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12825, __extension__
__PRETTY_FUNCTION__))
12825 "Lastprivates must be registered already.")(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12825, __extension__
__PRETTY_FUNCTION__))
;
12826 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12827 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12828 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12829 for (const auto &Pair : It->DeclToUniqueName) {
12830 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12831 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12832 continue;
12833 auto I = LPCI->getSecond().find(Pair.first);
12834 assert(I != LPCI->getSecond().end() &&(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12835, __extension__
__PRETTY_FUNCTION__))
12835 "Lastprivate must be rehistered already.")(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12835, __extension__
__PRETTY_FUNCTION__))
;
12836 // bool Cmp = priv_a.Fired != 0;
12837 LValue BaseLVal = std::get<3>(I->getSecond());
12838 LValue FiredLVal =
12839 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12840 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12841 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12842 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12843 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12844 // if (Cmp) {
12845 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12846 CGF.EmitBlock(ThenBB);
12847 Address Addr = CGF.GetAddrOfLocalVar(VD);
12848 LValue LVal;
12849 if (VD->getType()->isReferenceType())
12850 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12851 AlignmentSource::Decl);
12852 else
12853 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12854 AlignmentSource::Decl);
12855 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12856 D.getBeginLoc());
12857 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12858 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12859 // }
12860 }
12861}
12862
12863void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12864 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12865 SourceLocation Loc) {
12866 if (CGF.getLangOpts().OpenMP < 50)
12867 return;
12868 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12869 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12870, __extension__
__PRETTY_FUNCTION__))
12870 "Unknown lastprivate conditional variable.")(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12870, __extension__
__PRETTY_FUNCTION__))
;
12871 StringRef UniqueName = It->second;
12872 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12873 // The variable was not updated in the region - exit.
12874 if (!GV)
12875 return;
12876 LValue LPLVal = CGF.MakeAddrLValue(
12877 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12878 PrivLVal.getType().getNonReferenceType());
12879 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12880 CGF.EmitStoreOfScalar(Res, PrivLVal);
12881}
12882
12883llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12884 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12885 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12886 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12886)
;
12887}
12888
12889llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12890 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12891 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12892 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12892)
;
12893}
12894
12895llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12896 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12897 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12898 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12899 bool Tied, unsigned &NumberOfParts) {
12900 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12900)
;
12901}
12902
12903void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12904 SourceLocation Loc,
12905 llvm::Function *OutlinedFn,
12906 ArrayRef<llvm::Value *> CapturedVars,
12907 const Expr *IfCond,
12908 llvm::Value *NumThreads) {
12909 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12909)
;
12910}
12911
12912void CGOpenMPSIMDRuntime::emitCriticalRegion(
12913 CodeGenFunction &CGF, StringRef CriticalName,
12914 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12915 const Expr *Hint) {
12916 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12916)
;
12917}
12918
12919void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12920 const RegionCodeGenTy &MasterOpGen,
12921 SourceLocation Loc) {
12922 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12922)
;
12923}
12924
12925void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12926 const RegionCodeGenTy &MasterOpGen,
12927 SourceLocation Loc,
12928 const Expr *Filter) {
12929 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12929)
;
12930}
12931
12932void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12933 SourceLocation Loc) {
12934 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12934)
;
12935}
12936
12937void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12938 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12939 SourceLocation Loc) {
12940 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12940)
;
12941}
12942
12943void CGOpenMPSIMDRuntime::emitSingleRegion(
12944 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12945 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12946 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12947 ArrayRef<const Expr *> AssignmentOps) {
12948 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12948)
;
12949}
12950
12951void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12952 const RegionCodeGenTy &OrderedOpGen,
12953 SourceLocation Loc,
12954 bool IsThreads) {
12955 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12955)
;
12956}
12957
12958void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12959 SourceLocation Loc,
12960 OpenMPDirectiveKind Kind,
12961 bool EmitChecks,
12962 bool ForceSimpleCall) {
12963 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12963)
;
12964}
12965
12966void CGOpenMPSIMDRuntime::emitForDispatchInit(
12967 CodeGenFunction &CGF, SourceLocation Loc,
12968 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12969 bool Ordered, const DispatchRTInput &DispatchValues) {
12970 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12970)
;
12971}
12972
12973void CGOpenMPSIMDRuntime::emitForStaticInit(
12974 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12975 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12976 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12976)
;
12977}
12978
12979void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12980 CodeGenFunction &CGF, SourceLocation Loc,
12981 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12982 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12982)
;
12983}
12984
12985void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12986 SourceLocation Loc,
12987 unsigned IVSize,
12988 bool IVSigned) {
12989 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12989)
;
12990}
12991
12992void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12993 SourceLocation Loc,
12994 OpenMPDirectiveKind DKind) {
12995 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12995)
;
12996}
12997
12998llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12999 SourceLocation Loc,
13000 unsigned IVSize, bool IVSigned,
13001 Address IL, Address LB,
13002 Address UB, Address ST) {
13003 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13003)
;
13004}
13005
13006void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
13007 llvm::Value *NumThreads,
13008 SourceLocation Loc) {
13009 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13009)
;
13010}
13011
13012void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
13013 ProcBindKind ProcBind,
13014 SourceLocation Loc) {
13015 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13015)
;
13016}
13017
13018Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13019 const VarDecl *VD,
13020 Address VDAddr,
13021 SourceLocation Loc) {
13022 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13022)
;
13023}
13024
13025llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13026 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13027 CodeGenFunction *CGF) {
13028 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13028)
;
13029}
13030
13031Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13032 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13033 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13033)
;
13034}
13035
13036void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13037 ArrayRef<const Expr *> Vars,
13038 SourceLocation Loc,
13039 llvm::AtomicOrdering AO) {
13040 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13040)
;
13041}
13042
13043void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13044 const OMPExecutableDirective &D,
13045 llvm::Function *TaskFunction,
13046 QualType SharedsTy, Address Shareds,
13047 const Expr *IfCond,
13048 const OMPTaskDataTy &Data) {
13049 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13049)
;
13050}
13051
13052void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13053 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13054 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13055 const Expr *IfCond, const OMPTaskDataTy &Data) {
13056 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13056)
;
13057}
13058
13059void CGOpenMPSIMDRuntime::emitReduction(
13060 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13061 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13062 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13063 assert(Options.SimpleReduction && "Only simple reduction is expected.")(static_cast <bool> (Options.SimpleReduction &&
"Only simple reduction is expected.") ? void (0) : __assert_fail
("Options.SimpleReduction && \"Only simple reduction is expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13063, __extension__
__PRETTY_FUNCTION__))
;
13064 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13065 ReductionOps, Options);
13066}
13067
13068llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13069 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13070 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13071 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13071)
;
13072}
13073
13074void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13075 SourceLocation Loc,
13076 bool IsWorksharingReduction) {
13077 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13077)
;
13078}
13079
13080void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13081 SourceLocation Loc,
13082 ReductionCodeGen &RCG,
13083 unsigned N) {
13084 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13084)
;
13085}
13086
13087Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13088 SourceLocation Loc,
13089 llvm::Value *ReductionsPtr,
13090 LValue SharedLVal) {
13091 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13091)
;
13092}
13093
13094void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13095 SourceLocation Loc,
13096 const OMPTaskDataTy &Data) {
13097 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13097)
;
13098}
13099
13100void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13101 CodeGenFunction &CGF, SourceLocation Loc,
13102 OpenMPDirectiveKind CancelRegion) {
13103 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13103)
;
13104}
13105
13106void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13107 SourceLocation Loc, const Expr *IfCond,
13108 OpenMPDirectiveKind CancelRegion) {
13109 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13109)
;
13110}
13111
13112void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13113 const OMPExecutableDirective &D, StringRef ParentName,
13114 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13115 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13116 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13116)
;
13117}
13118
13119void CGOpenMPSIMDRuntime::emitTargetCall(
13120 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13121 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13122 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13123 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13124 const OMPLoopDirective &D)>
13125 SizeEmitter) {
13126 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13126)
;
13127}
13128
13129bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13130 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13130)
;
13131}
13132
13133bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13134 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13134)
;
13135}
13136
13137bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13138 return false;
13139}
13140
13141void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13142 const OMPExecutableDirective &D,
13143 SourceLocation Loc,
13144 llvm::Function *OutlinedFn,
13145 ArrayRef<llvm::Value *> CapturedVars) {
13146 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13146)
;
13147}
13148
13149void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13150 const Expr *NumTeams,
13151 const Expr *ThreadLimit,
13152 SourceLocation Loc) {
13153 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13153)
;
13154}
13155
13156void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13157 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13158 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13159 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13159)
;
13160}
13161
13162void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13163 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13164 const Expr *Device) {
13165 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13165)
;
13166}
13167
13168void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13169 const OMPLoopDirective &D,
13170 ArrayRef<Expr *> NumIterations) {
13171 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13171)
;
13172}
13173
13174void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13175 const OMPDependClause *C) {
13176 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13176)
;
13177}
13178
13179const VarDecl *
13180CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13181 const VarDecl *NativeParam) const {
13182 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13182)
;
13183}
13184
13185Address
13186CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13187 const VarDecl *NativeParam,
13188 const VarDecl *TargetParam) const {
13189 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13189)
;
13190}