Bug Summary

File:build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Warning:line 9075, column 11
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CGOpenMPRuntime.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/clang/include -I tools/clang/include -I include -I /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-10-03-140002-15933-1 -x c++ /build/llvm-toolchain-snapshot-16~++20221003111214+1fa2019828ca/clang/lib/CodeGen/CGOpenMPRuntime.cpp
1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <numeric>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 123, __extension__
__PRETTY_FUNCTION__))
;
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 CGF.GetAddrOfLocalVar(PartIDVar),
164 PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
169 CGF.EmitBlock(DoneBB);
170 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
171 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
173 CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 CGF.GetAddrOfLocalVar(PartIDVar),
181 PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
183 PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(".untied.next.");
187 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
189 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
190 CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(CurPoint);
192 CGF.EmitBlock(CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 204, __extension__
__PRETTY_FUNCTION__))
;
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 250)
;
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 258)
;
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 288)
;
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 295)
;
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 348)
;
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 388)
;
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 394)
;
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 399)
;
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE
479};
480
481namespace {
482LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
483/// Values for bit flags for marking which requires clauses have been used.
484enum OpenMPOffloadingRequiresDirFlags : int64_t {
485 /// flag undefined.
486 OMP_REQ_UNDEFINED = 0x000,
487 /// no requires clause present.
488 OMP_REQ_NONE = 0x001,
489 /// reverse_offload clause.
490 OMP_REQ_REVERSE_OFFLOAD = 0x002,
491 /// unified_address clause.
492 OMP_REQ_UNIFIED_ADDRESS = 0x004,
493 /// unified_shared_memory clause.
494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
495 /// dynamic_allocators clause.
496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS
498};
499
500enum OpenMPOffloadingReservedDeviceIDs {
501 /// Device ID if the device was not defined, runtime should get it
502 /// from environment variables in the spec.
503 OMP_DEVICEID_UNDEF = -1,
504};
505} // anonymous namespace
506
507/// Describes ident structure that describes a source location.
508/// All descriptions are taken from
509/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510/// Original structure:
511/// typedef struct ident {
512/// kmp_int32 reserved_1; /**< might be used in Fortran;
513/// see above */
514/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
515/// KMP_IDENT_KMPC identifies this union
516/// member */
517/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
518/// see above */
519///#if USE_ITT_BUILD
520/// /* but currently used for storing
521/// region-specific ITT */
522/// /* contextual information. */
523///#endif /* USE_ITT_BUILD */
524/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
525/// C++ */
526/// char const *psource; /**< String describing the source location.
527/// The string is composed of semi-colon separated
528// fields which describe the source file,
529/// the function and a pair of line numbers that
530/// delimit the construct.
531/// */
532/// } ident_t;
533enum IdentFieldIndex {
534 /// might be used in Fortran
535 IdentField_Reserved_1,
536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
537 IdentField_Flags,
538 /// Not really used in Fortran any more
539 IdentField_Reserved_2,
540 /// Source[4] in Fortran, do not use for C++
541 IdentField_Reserved_3,
542 /// String describing the source location. The string is composed of
543 /// semi-colon separated fields which describe the source file, the function
544 /// and a pair of line numbers that delimit the construct.
545 IdentField_PSource
546};
547
548/// Schedule types for 'omp for' loops (these enumerators are taken from
549/// the enum sched_type in kmp.h).
550enum OpenMPSchedType {
551 /// Lower bound for default (unordered) versions.
552 OMP_sch_lower = 32,
553 OMP_sch_static_chunked = 33,
554 OMP_sch_static = 34,
555 OMP_sch_dynamic_chunked = 35,
556 OMP_sch_guided_chunked = 36,
557 OMP_sch_runtime = 37,
558 OMP_sch_auto = 38,
559 /// static with chunk adjustment (e.g., simd)
560 OMP_sch_static_balanced_chunked = 45,
561 /// Lower bound for 'ordered' versions.
562 OMP_ord_lower = 64,
563 OMP_ord_static_chunked = 65,
564 OMP_ord_static = 66,
565 OMP_ord_dynamic_chunked = 67,
566 OMP_ord_guided_chunked = 68,
567 OMP_ord_runtime = 69,
568 OMP_ord_auto = 70,
569 OMP_sch_default = OMP_sch_static,
570 /// dist_schedule types
571 OMP_dist_sch_static_chunked = 91,
572 OMP_dist_sch_static = 92,
573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574 /// Set if the monotonic schedule modifier was present.
575 OMP_sch_modifier_monotonic = (1 << 29),
576 /// Set if the nonmonotonic schedule modifier was present.
577 OMP_sch_modifier_nonmonotonic = (1 << 30),
578};
579
580/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
581/// region.
582class CleanupTy final : public EHScopeStack::Cleanup {
583 PrePostActionTy *Action;
584
585public:
586 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
587 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
588 if (!CGF.HaveInsertPoint())
589 return;
590 Action->Exit(CGF);
591 }
592};
593
594} // anonymous namespace
595
596void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
597 CodeGenFunction::RunCleanupsScope Scope(CGF);
598 if (PrePostAction) {
599 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
600 Callback(CodeGen, CGF, *PrePostAction);
601 } else {
602 PrePostActionTy Action;
603 Callback(CodeGen, CGF, Action);
604 }
605}
606
607/// Check if the combiner is a call to UDR combiner and if it is so return the
608/// UDR decl used for reduction.
609static const OMPDeclareReductionDecl *
610getReductionInit(const Expr *ReductionOp) {
611 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
612 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
613 if (const auto *DRE =
614 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
615 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
616 return DRD;
617 return nullptr;
618}
619
620static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
621 const OMPDeclareReductionDecl *DRD,
622 const Expr *InitOp,
623 Address Private, Address Original,
624 QualType Ty) {
625 if (DRD->getInitializer()) {
626 std::pair<llvm::Function *, llvm::Function *> Reduction =
627 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
628 const auto *CE = cast<CallExpr>(InitOp);
629 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
630 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
632 const auto *LHSDRE =
633 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
634 const auto *RHSDRE =
635 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
636 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
637 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private);
638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original);
639 (void)PrivateScope.Privatize();
640 RValue Func = RValue::get(Reduction.second);
641 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
642 CGF.EmitIgnoredExpr(InitOp);
643 } else {
644 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
645 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
646 auto *GV = new llvm::GlobalVariable(
647 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
648 llvm::GlobalValue::PrivateLinkage, Init, Name);
649 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
650 RValue InitRVal;
651 switch (CGF.getEvaluationKind(Ty)) {
652 case TEK_Scalar:
653 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
654 break;
655 case TEK_Complex:
656 InitRVal =
657 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
658 break;
659 case TEK_Aggregate: {
660 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
661 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
662 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
663 /*IsInitializer=*/false);
664 return;
665 }
666 }
667 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
668 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
669 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
670 /*IsInitializer=*/false);
671 }
672}
673
674/// Emit initialization of arrays of complex types.
675/// \param DestAddr Address of the array.
676/// \param Type Type of array.
677/// \param Init Initial expression of array.
678/// \param SrcAddr Address of the original array.
679static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
680 QualType Type, bool EmitDeclareReductionInit,
681 const Expr *Init,
682 const OMPDeclareReductionDecl *DRD,
683 Address SrcAddr = Address::invalid()) {
684 // Perform element-by-element initialization.
685 QualType ElementTy;
686
687 // Drill down to the base element type on both arrays.
688 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
689 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
690 if (DRD)
691 SrcAddr =
692 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
693
694 llvm::Value *SrcBegin = nullptr;
695 if (DRD)
696 SrcBegin = SrcAddr.getPointer();
697 llvm::Value *DestBegin = DestAddr.getPointer();
698 // Cast from pointer to array type to pointer to single element.
699 llvm::Value *DestEnd =
700 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
701 // The basic structure here is a while-do loop.
702 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
703 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
704 llvm::Value *IsEmpty =
705 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
706 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
707
708 // Enter the loop body, making that address the current address.
709 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
710 CGF.EmitBlock(BodyBB);
711
712 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
713
714 llvm::PHINode *SrcElementPHI = nullptr;
715 Address SrcElementCurrent = Address::invalid();
716 if (DRD) {
717 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
718 "omp.arraycpy.srcElementPast");
719 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
720 SrcElementCurrent =
721 Address(SrcElementPHI, SrcAddr.getElementType(),
722 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
723 }
724 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
725 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
726 DestElementPHI->addIncoming(DestBegin, EntryBB);
727 Address DestElementCurrent =
728 Address(DestElementPHI, DestAddr.getElementType(),
729 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
730
731 // Emit copy.
732 {
733 CodeGenFunction::RunCleanupsScope InitScope(CGF);
734 if (EmitDeclareReductionInit) {
735 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
736 SrcElementCurrent, ElementTy);
737 } else
738 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
739 /*IsInitializer=*/false);
740 }
741
742 if (DRD) {
743 // Shift the address forward by one element.
744 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
745 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
746 "omp.arraycpy.dest.element");
747 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
748 }
749
750 // Shift the address forward by one element.
751 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
752 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
753 "omp.arraycpy.dest.element");
754 // Check whether we've reached the end.
755 llvm::Value *Done =
756 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
757 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
758 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
759
760 // Done.
761 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
762}
763
764LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
765 return CGF.EmitOMPSharedLValue(E);
766}
767
768LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
769 const Expr *E) {
770 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
771 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
772 return LValue();
773}
774
775void ReductionCodeGen::emitAggregateInitialization(
776 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
777 const OMPDeclareReductionDecl *DRD) {
778 // Emit VarDecl with copy init for arrays.
779 // Get the address of the original variable captured in current
780 // captured region.
781 const auto *PrivateVD =
782 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
783 bool EmitDeclareReductionInit =
784 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
785 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
786 EmitDeclareReductionInit,
787 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
788 : PrivateVD->getInit(),
789 DRD, SharedAddr);
790}
791
792ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
793 ArrayRef<const Expr *> Origs,
794 ArrayRef<const Expr *> Privates,
795 ArrayRef<const Expr *> ReductionOps) {
796 ClausesData.reserve(Shareds.size());
797 SharedAddresses.reserve(Shareds.size());
798 Sizes.reserve(Shareds.size());
799 BaseDecls.reserve(Shareds.size());
800 const auto *IOrig = Origs.begin();
801 const auto *IPriv = Privates.begin();
802 const auto *IRed = ReductionOps.begin();
803 for (const Expr *Ref : Shareds) {
804 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
805 std::advance(IOrig, 1);
806 std::advance(IPriv, 1);
807 std::advance(IRed, 1);
808 }
809}
810
811void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
812 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 813, __extension__
__PRETTY_FUNCTION__))
813 "Number of generated lvalues must be exactly N.")(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 813, __extension__
__PRETTY_FUNCTION__))
;
814 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
815 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
816 SharedAddresses.emplace_back(First, Second);
817 if (ClausesData[N].Shared == ClausesData[N].Ref) {
818 OrigAddresses.emplace_back(First, Second);
819 } else {
820 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
821 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
822 OrigAddresses.emplace_back(First, Second);
823 }
824}
825
826void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
827 QualType PrivateType = getPrivateType(N);
828 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
829 if (!PrivateType->isVariablyModifiedType()) {
830 Sizes.emplace_back(
831 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
832 nullptr);
833 return;
834 }
835 llvm::Value *Size;
836 llvm::Value *SizeInChars;
837 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
838 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
839 if (AsArraySection) {
840 Size = CGF.Builder.CreatePtrDiff(ElemType,
841 OrigAddresses[N].second.getPointer(CGF),
842 OrigAddresses[N].first.getPointer(CGF));
843 Size = CGF.Builder.CreateNUWAdd(
844 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
845 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
846 } else {
847 SizeInChars =
848 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
849 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
850 }
851 Sizes.emplace_back(SizeInChars, Size);
852 CodeGenFunction::OpaqueValueMapping OpaqueMap(
853 CGF,
854 cast<OpaqueValueExpr>(
855 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
856 RValue::get(Size));
857 CGF.EmitVariablyModifiedType(PrivateType);
858}
859
860void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
861 llvm::Value *Size) {
862 QualType PrivateType = getPrivateType(N);
863 if (!PrivateType->isVariablyModifiedType()) {
864 assert(!Size && !Sizes[N].second &&(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 866, __extension__
__PRETTY_FUNCTION__))
865 "Size should be nullptr for non-variably modified reduction "(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 866, __extension__
__PRETTY_FUNCTION__))
866 "items.")(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 866, __extension__
__PRETTY_FUNCTION__))
;
867 return;
868 }
869 CodeGenFunction::OpaqueValueMapping OpaqueMap(
870 CGF,
871 cast<OpaqueValueExpr>(
872 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
873 RValue::get(Size));
874 CGF.EmitVariablyModifiedType(PrivateType);
875}
876
877void ReductionCodeGen::emitInitialization(
878 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
879 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
880 assert(SharedAddresses.size() > N && "No variable was generated")(static_cast <bool> (SharedAddresses.size() > N &&
"No variable was generated") ? void (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 880, __extension__
__PRETTY_FUNCTION__))
;
881 const auto *PrivateVD =
882 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
883 const OMPDeclareReductionDecl *DRD =
884 getReductionInit(ClausesData[N].ReductionOp);
885 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
886 if (DRD && DRD->getInitializer())
887 (void)DefaultInit(CGF);
888 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
889 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
890 (void)DefaultInit(CGF);
891 QualType SharedType = SharedAddresses[N].first.getType();
892 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
893 PrivateAddr, SharedAddr, SharedType);
894 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
895 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
896 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
897 PrivateVD->getType().getQualifiers(),
898 /*IsInitializer=*/false);
899 }
900}
901
902bool ReductionCodeGen::needCleanups(unsigned N) {
903 QualType PrivateType = getPrivateType(N);
904 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
905 return DTorKind != QualType::DK_none;
906}
907
908void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
909 Address PrivateAddr) {
910 QualType PrivateType = getPrivateType(N);
911 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
912 if (needCleanups(N)) {
913 PrivateAddr = CGF.Builder.CreateElementBitCast(
914 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
915 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
916 }
917}
918
919static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
920 LValue BaseLV) {
921 BaseTy = BaseTy.getNonReferenceType();
922 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
923 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
924 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
925 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
926 } else {
927 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
928 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
929 }
930 BaseTy = BaseTy->getPointeeType();
931 }
932 return CGF.MakeAddrLValue(
933 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
934 CGF.ConvertTypeForMem(ElTy)),
935 BaseLV.getType(), BaseLV.getBaseInfo(),
936 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
937}
938
939static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
940 Address OriginalBaseAddress, llvm::Value *Addr) {
941 Address Tmp = Address::invalid();
942 Address TopTmp = Address::invalid();
943 Address MostTopTmp = Address::invalid();
944 BaseTy = BaseTy.getNonReferenceType();
945 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
946 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
947 Tmp = CGF.CreateMemTemp(BaseTy);
948 if (TopTmp.isValid())
949 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
950 else
951 MostTopTmp = Tmp;
952 TopTmp = Tmp;
953 BaseTy = BaseTy->getPointeeType();
954 }
955
956 if (Tmp.isValid()) {
957 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
958 Addr, Tmp.getElementType());
959 CGF.Builder.CreateStore(Addr, Tmp);
960 return MostTopTmp;
961 }
962
963 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
964 Addr, OriginalBaseAddress.getType());
965 return OriginalBaseAddress.withPointer(Addr);
966}
967
968static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
969 const VarDecl *OrigVD = nullptr;
970 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
971 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
972 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
973 Base = TempOASE->getBase()->IgnoreParenImpCasts();
974 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
975 Base = TempASE->getBase()->IgnoreParenImpCasts();
976 DE = cast<DeclRefExpr>(Base);
977 OrigVD = cast<VarDecl>(DE->getDecl());
978 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
979 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
980 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
981 Base = TempASE->getBase()->IgnoreParenImpCasts();
982 DE = cast<DeclRefExpr>(Base);
983 OrigVD = cast<VarDecl>(DE->getDecl());
984 }
985 return OrigVD;
986}
987
988Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
989 Address PrivateAddr) {
990 const DeclRefExpr *DE;
991 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
992 BaseDecls.emplace_back(OrigVD);
993 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
994 LValue BaseLValue =
995 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
996 OriginalBaseLValue);
997 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
998 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
999 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1000 SharedAddr.getPointer());
1001 llvm::Value *PrivatePointer =
1002 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1003 PrivateAddr.getPointer(), SharedAddr.getType());
1004 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1005 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1006 return castToBase(CGF, OrigVD->getType(),
1007 SharedAddresses[N].first.getType(),
1008 OriginalBaseLValue.getAddress(CGF), Ptr);
1009 }
1010 BaseDecls.emplace_back(
1011 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1012 return PrivateAddr;
1013}
1014
1015bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1016 const OMPDeclareReductionDecl *DRD =
1017 getReductionInit(ClausesData[N].ReductionOp);
1018 return DRD && DRD->getInitializer();
1019}
1020
1021LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1022 return CGF.EmitLoadOfPointerLValue(
1023 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1024 getThreadIDVariable()->getType()->castAs<PointerType>());
1025}
1026
1027void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1028 if (!CGF.HaveInsertPoint())
1029 return;
1030 // 1.2.2 OpenMP Language Terminology
1031 // Structured block - An executable statement with a single entry at the
1032 // top and a single exit at the bottom.
1033 // The point of exit cannot be a branch out of the structured block.
1034 // longjmp() and throw() must not violate the entry/exit criteria.
1035 CGF.EHStack.pushTerminate();
1036 if (S)
1037 CGF.incrementProfileCounter(S);
1038 CodeGen(CGF);
1039 CGF.EHStack.popTerminate();
1040}
1041
1042LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043 CodeGenFunction &CGF) {
1044 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1045 getThreadIDVariable()->getType(),
1046 AlignmentSource::Decl);
1047}
1048
1049static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1050 QualType FieldTy) {
1051 auto *Field = FieldDecl::Create(
1052 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1053 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1055 Field->setAccess(AS_public);
1056 DC->addDecl(Field);
1057 return Field;
1058}
1059
1060CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1061 StringRef Separator)
1062 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1063 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1064 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1065
1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067 OMPBuilder.initialize();
1068 loadOffloadInfoMetadata();
1069}
1070
1071void CGOpenMPRuntime::clear() {
1072 InternalVars.clear();
1073 // Clean non-target variable declarations possibly used only in debug info.
1074 for (const auto &Data : EmittedNonTargetVariables) {
1075 if (!Data.getValue().pointsToAliveValue())
1076 continue;
1077 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1078 if (!GV)
1079 continue;
1080 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1081 continue;
1082 GV->eraseFromParent();
1083 }
1084}
1085
1086std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1087 SmallString<128> Buffer;
1088 llvm::raw_svector_ostream OS(Buffer);
1089 StringRef Sep = FirstSeparator;
1090 for (StringRef Part : Parts) {
1091 OS << Sep << Part;
1092 Sep = Separator;
1093 }
1094 return std::string(OS.str());
1095}
1096
1097static llvm::Function *
1098emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1099 const Expr *CombinerInitializer, const VarDecl *In,
1100 const VarDecl *Out, bool IsCombiner) {
1101 // void .omp_combiner.(Ty *in, Ty *out);
1102 ASTContext &C = CGM.getContext();
1103 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1104 FunctionArgList Args;
1105 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1106 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1107 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1108 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1109 Args.push_back(&OmpOutParm);
1110 Args.push_back(&OmpInParm);
1111 const CGFunctionInfo &FnInfo =
1112 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1113 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1114 std::string Name = CGM.getOpenMPRuntime().getName(
1115 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1116 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1117 Name, &CGM.getModule());
1118 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1119 if (CGM.getLangOpts().Optimize) {
1120 Fn->removeFnAttr(llvm::Attribute::NoInline);
1121 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1122 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1123 }
1124 CodeGenFunction CGF(CGM);
1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1128 Out->getLocation());
1129 CodeGenFunction::OMPPrivateScope Scope(CGF);
1130 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1131 Scope.addPrivate(
1132 In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1133 .getAddress(CGF));
1134 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1135 Scope.addPrivate(
1136 Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1137 .getAddress(CGF));
1138 (void)Scope.Privatize();
1139 if (!IsCombiner && Out->hasInit() &&
1140 !CGF.isTrivialInitializer(Out->getInit())) {
1141 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1142 Out->getType().getQualifiers(),
1143 /*IsInitializer=*/true);
1144 }
1145 if (CombinerInitializer)
1146 CGF.EmitIgnoredExpr(CombinerInitializer);
1147 Scope.ForceCleanup();
1148 CGF.FinishFunction();
1149 return Fn;
1150}
1151
1152void CGOpenMPRuntime::emitUserDefinedReduction(
1153 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1154 if (UDRMap.count(D) > 0)
1155 return;
1156 llvm::Function *Combiner = emitCombinerOrInitializer(
1157 CGM, D->getType(), D->getCombiner(),
1158 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1159 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1160 /*IsCombiner=*/true);
1161 llvm::Function *Initializer = nullptr;
1162 if (const Expr *Init = D->getInitializer()) {
1163 Initializer = emitCombinerOrInitializer(
1164 CGM, D->getType(),
1165 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1166 : nullptr,
1167 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1168 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1169 /*IsCombiner=*/false);
1170 }
1171 UDRMap.try_emplace(D, Combiner, Initializer);
1172 if (CGF) {
1173 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1174 Decls.second.push_back(D);
1175 }
1176}
1177
1178std::pair<llvm::Function *, llvm::Function *>
1179CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1180 auto I = UDRMap.find(D);
1181 if (I != UDRMap.end())
1182 return I->second;
1183 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1184 return UDRMap.lookup(D);
1185}
1186
1187namespace {
1188// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189// Builder if one is present.
1190struct PushAndPopStackRAII {
1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1192 bool HasCancel, llvm::omp::Directive Kind)
1193 : OMPBuilder(OMPBuilder) {
1194 if (!OMPBuilder)
1195 return;
1196
1197 // The following callback is the crucial part of clangs cleanup process.
1198 //
1199 // NOTE:
1200 // Once the OpenMPIRBuilder is used to create parallel regions (and
1201 // similar), the cancellation destination (Dest below) is determined via
1202 // IP. That means if we have variables to finalize we split the block at IP,
1203 // use the new block (=BB) as destination to build a JumpDest (via
1204 // getJumpDestInCurrentScope(BB)) which then is fed to
1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206 // to push & pop an FinalizationInfo object.
1207 // The FiniCB will still be needed but at the point where the
1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1210 assert(IP.getBlock()->end() == IP.getPoint() &&(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1211, __extension__
__PRETTY_FUNCTION__))
1211 "Clang CG should cause non-terminated block!")(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1211, __extension__
__PRETTY_FUNCTION__))
;
1212 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1213 CGF.Builder.restoreIP(IP);
1214 CodeGenFunction::JumpDest Dest =
1215 CGF.getOMPCancelDestination(OMPD_parallel);
1216 CGF.EmitBranchThroughCleanup(Dest);
1217 };
1218
1219 // TODO: Remove this once we emit parallel regions through the
1220 // OpenMPIRBuilder as it can do this setup internally.
1221 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1222 OMPBuilder->pushFinalizationCB(std::move(FI));
1223 }
1224 ~PushAndPopStackRAII() {
1225 if (OMPBuilder)
1226 OMPBuilder->popFinalizationCB();
1227 }
1228 llvm::OpenMPIRBuilder *OMPBuilder;
1229};
1230} // namespace
1231
1232static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1233 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1234 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1235 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1236 assert(ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1237, __extension__
__PRETTY_FUNCTION__))
1237 "thread id variable must be of type kmp_int32 *")(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1237, __extension__
__PRETTY_FUNCTION__))
;
1238 CodeGenFunction CGF(CGM, true);
1239 bool HasCancel = false;
1240 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1241 HasCancel = OPD->hasCancel();
1242 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1243 HasCancel = OPD->hasCancel();
1244 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1245 HasCancel = OPSD->hasCancel();
1246 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1247 HasCancel = OPFD->hasCancel();
1248 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1249 HasCancel = OPFD->hasCancel();
1250 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1251 HasCancel = OPFD->hasCancel();
1252 else if (const auto *OPFD =
1253 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1254 HasCancel = OPFD->hasCancel();
1255 else if (const auto *OPFD =
1256 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1257 HasCancel = OPFD->hasCancel();
1258
1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260 // parallel region to make cancellation barriers work properly.
1261 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1262 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1263 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1264 HasCancel, OutlinedHelperName);
1265 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1266 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1267}
1268
1269llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1270 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1271 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1275}
1276
1277llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1279 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1280 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1281 return emitParallelOrTeamsOutlinedFunction(
1282 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1283}
1284
1285llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1286 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1287 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1288 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1289 bool Tied, unsigned &NumberOfParts) {
1290 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1291 PrePostActionTy &) {
1292 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1293 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1294 llvm::Value *TaskArgs[] = {
1295 UpLoc, ThreadID,
1296 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1297 TaskTVar->getType()->castAs<PointerType>())
1298 .getPointer(CGF)};
1299 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1300 CGM.getModule(), OMPRTL___kmpc_omp_task),
1301 TaskArgs);
1302 };
1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1304 UntiedCodeGen);
1305 CodeGen.setAction(Action);
1306 assert(!ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1307, __extension__
__PRETTY_FUNCTION__))
1307 "thread id variable must be of type kmp_int32 for tasks")(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1307, __extension__
__PRETTY_FUNCTION__))
;
1308 const OpenMPDirectiveKind Region =
1309 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1310 : OMPD_task;
1311 const CapturedStmt *CS = D.getCapturedStmt(Region);
1312 bool HasCancel = false;
1313 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1316 HasCancel = TD->hasCancel();
1317 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1318 HasCancel = TD->hasCancel();
1319 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1320 HasCancel = TD->hasCancel();
1321
1322 CodeGenFunction CGF(CGM, true);
1323 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1324 InnermostKind, HasCancel, Action);
1325 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1326 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1327 if (!Tied)
1328 NumberOfParts = Action.getNumberOfParts();
1329 return Res;
1330}
1331
1332void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1333 bool AtCurrentPoint) {
1334 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1335 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")(static_cast <bool> (!Elem.second.ServiceInsertPt &&
"Insert point is set already.") ? void (0) : __assert_fail (
"!Elem.second.ServiceInsertPt && \"Insert point is set already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1335, __extension__
__PRETTY_FUNCTION__))
;
1336
1337 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1338 if (AtCurrentPoint) {
1339 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1340 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1341 } else {
1342 Elem.second.ServiceInsertPt =
1343 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1344 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1345 }
1346}
1347
1348void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1349 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1350 if (Elem.second.ServiceInsertPt) {
1351 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1352 Elem.second.ServiceInsertPt = nullptr;
1353 Ptr->eraseFromParent();
1354 }
1355}
1356
1357static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1358 SourceLocation Loc,
1359 SmallString<128> &Buffer) {
1360 llvm::raw_svector_ostream OS(Buffer);
1361 // Build debug location
1362 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1363 OS << ";" << PLoc.getFilename() << ";";
1364 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1365 OS << FD->getQualifiedNameAsString();
1366 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1367 return OS.str();
1368}
1369
1370llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1371 SourceLocation Loc,
1372 unsigned Flags) {
1373 uint32_t SrcLocStrSize;
1374 llvm::Constant *SrcLocStr;
1375 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1376 Loc.isInvalid()) {
1377 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1378 } else {
1379 std::string FunctionName;
1380 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1381 FunctionName = FD->getQualifiedNameAsString();
1382 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1383 const char *FileName = PLoc.getFilename();
1384 unsigned Line = PLoc.getLine();
1385 unsigned Column = PLoc.getColumn();
1386 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1387 Column, SrcLocStrSize);
1388 }
1389 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1390 return OMPBuilder.getOrCreateIdent(
1391 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1392}
1393
1394llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1395 SourceLocation Loc) {
1396 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1396, __extension__
__PRETTY_FUNCTION__))
;
1397 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398 // the clang invariants used below might be broken.
1399 if (CGM.getLangOpts().OpenMPIRBuilder) {
1400 SmallString<128> Buffer;
1401 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1402 uint32_t SrcLocStrSize;
1403 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1404 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1405 return OMPBuilder.getOrCreateThreadID(
1406 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1407 }
1408
1409 llvm::Value *ThreadID = nullptr;
1410 // Check whether we've already cached a load of the thread id in this
1411 // function.
1412 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1413 if (I != OpenMPLocThreadIDMap.end()) {
1414 ThreadID = I->second.ThreadID;
1415 if (ThreadID != nullptr)
1416 return ThreadID;
1417 }
1418 // If exceptions are enabled, do not use parameter to avoid possible crash.
1419 if (auto *OMPRegionInfo =
1420 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1421 if (OMPRegionInfo->getThreadIDVariable()) {
1422 // Check if this an outlined function with thread id passed as argument.
1423 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1424 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1425 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1426 !CGF.getLangOpts().CXXExceptions ||
1427 CGF.Builder.GetInsertBlock() == TopBlock ||
1428 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1429 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1430 TopBlock ||
1431 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1432 CGF.Builder.GetInsertBlock()) {
1433 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1434 // If value loaded in entry block, cache it and use it everywhere in
1435 // function.
1436 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1437 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1438 Elem.second.ThreadID = ThreadID;
1439 }
1440 return ThreadID;
1441 }
1442 }
1443 }
1444
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1450 if (!Elem.second.ServiceInsertPt)
1451 setLocThreadIdInsertPt(CGF);
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1454 llvm::CallInst *Call = CGF.Builder.CreateCall(
1455 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1456 OMPRTL___kmpc_global_thread_num),
1457 emitUpdateLocation(CGF, Loc));
1458 Call->setCallingConv(CGF.getRuntimeCC());
1459 Elem.second.ThreadID = Call;
1460 return Call;
1461}
1462
1463void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1464 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1464, __extension__
__PRETTY_FUNCTION__))
;
1465 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1466 clearLocThreadIdInsertPt(CGF);
1467 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1468 }
1469 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1470 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1471 UDRMap.erase(D);
1472 FunctionUDRMap.erase(CGF.CurFn);
1473 }
1474 auto I = FunctionUDMMap.find(CGF.CurFn);
1475 if (I != FunctionUDMMap.end()) {
1476 for(const auto *D : I->second)
1477 UDMMap.erase(D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1482}
1483
1484llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486}
1487
1488llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489 if (!Kmpc_MicroTy) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1492 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1493 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1494 }
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1496}
1497
1498llvm::FunctionCallee
1499CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1500 bool IsGPUDistribute) {
1501 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1502, __extension__
__PRETTY_FUNCTION__))
1502 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1502, __extension__
__PRETTY_FUNCTION__))
;
1503 StringRef Name;
1504 if (IsGPUDistribute)
1505 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1506 : "__kmpc_distribute_static_init_4u")
1507 : (IVSigned ? "__kmpc_distribute_static_init_8"
1508 : "__kmpc_distribute_static_init_8u");
1509 else
1510 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1511 : "__kmpc_for_static_init_4u")
1512 : (IVSigned ? "__kmpc_for_static_init_8"
1513 : "__kmpc_for_static_init_8u");
1514
1515 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1516 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1517 llvm::Type *TypeParams[] = {
1518 getIdentTyPointerTy(), // loc
1519 CGM.Int32Ty, // tid
1520 CGM.Int32Ty, // schedtype
1521 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1522 PtrTy, // p_lower
1523 PtrTy, // p_upper
1524 PtrTy, // p_stride
1525 ITy, // incr
1526 ITy // chunk
1527 };
1528 auto *FnTy =
1529 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1530 return CGM.CreateRuntimeFunction(FnTy, Name);
1531}
1532
1533llvm::FunctionCallee
1534CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1535 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1536, __extension__
__PRETTY_FUNCTION__))
1536 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1536, __extension__
__PRETTY_FUNCTION__))
;
1537 StringRef Name =
1538 IVSize == 32
1539 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1542 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1543 CGM.Int32Ty, // tid
1544 CGM.Int32Ty, // schedtype
1545 ITy, // lower
1546 ITy, // upper
1547 ITy, // stride
1548 ITy // chunk
1549 };
1550 auto *FnTy =
1551 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1552 return CGM.CreateRuntimeFunction(FnTy, Name);
1553}
1554
1555llvm::FunctionCallee
1556CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1557 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1558, __extension__
__PRETTY_FUNCTION__))
1558 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1558, __extension__
__PRETTY_FUNCTION__))
;
1559 StringRef Name =
1560 IVSize == 32
1561 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563 llvm::Type *TypeParams[] = {
1564 getIdentTyPointerTy(), // loc
1565 CGM.Int32Ty, // tid
1566 };
1567 auto *FnTy =
1568 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1569 return CGM.CreateRuntimeFunction(FnTy, Name);
1570}
1571
1572llvm::FunctionCallee
1573CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1574 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1575, __extension__
__PRETTY_FUNCTION__))
1575 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1575, __extension__
__PRETTY_FUNCTION__))
;
1576 StringRef Name =
1577 IVSize == 32
1578 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1581 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1582 llvm::Type *TypeParams[] = {
1583 getIdentTyPointerTy(), // loc
1584 CGM.Int32Ty, // tid
1585 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1586 PtrTy, // p_lower
1587 PtrTy, // p_upper
1588 PtrTy // p_stride
1589 };
1590 auto *FnTy =
1591 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1592 return CGM.CreateRuntimeFunction(FnTy, Name);
1593}
1594
1595/// Obtain information that uniquely identifies a target entry. This
1596/// consists of the file and device IDs as well as line number associated with
1597/// the relevant entry source location.
1598static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1599 unsigned &DeviceID, unsigned &FileID,
1600 unsigned &LineNum) {
1601 SourceManager &SM = C.getSourceManager();
1602
1603 // The loc should be always valid and have a file ID (the user cannot use
1604 // #pragma directives in macros)
1605
1606 assert(Loc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (Loc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1606, __extension__
__PRETTY_FUNCTION__))
;
1607
1608 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1609 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1609, __extension__
__PRETTY_FUNCTION__))
;
1610
1611 llvm::sys::fs::UniqueID ID;
1612 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1613 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1614 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1614, __extension__
__PRETTY_FUNCTION__))
;
1615 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1616 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1617 << PLoc.getFilename() << EC.message();
1618 }
1619
1620 DeviceID = ID.getDevice();
1621 FileID = ID.getFile();
1622 LineNum = PLoc.getLine();
1623}
1624
1625Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1626 if (CGM.getLangOpts().OpenMPSimd)
1627 return Address::invalid();
1628 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1629 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1630 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1631 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1632 HasRequiresUnifiedSharedMemory))) {
1633 SmallString<64> PtrName;
1634 {
1635 llvm::raw_svector_ostream OS(PtrName);
1636 OS << CGM.getMangledName(GlobalDecl(VD));
1637 if (!VD->isExternallyVisible()) {
1638 unsigned DeviceID, FileID, Line;
1639 getTargetEntryUniqueInfo(CGM.getContext(),
1640 VD->getCanonicalDecl()->getBeginLoc(),
1641 DeviceID, FileID, Line);
1642 OS << llvm::format("_%x", FileID);
1643 }
1644 OS << "_decl_tgt_ref_ptr";
1645 }
1646 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1647 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1648 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy);
1649 if (!Ptr) {
1650 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
1651
1652 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1653 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1654
1655 if (!CGM.getLangOpts().OpenMPIsDevice)
1656 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1657 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1658 }
1659 return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1660 }
1661 return Address::invalid();
1662}
1663
1664llvm::Constant *
1665CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1666 assert(!CGM.getLangOpts().OpenMPUseTLS ||(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1667, __extension__
__PRETTY_FUNCTION__))
1667 !CGM.getContext().getTargetInfo().isTLSSupported())(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1667, __extension__
__PRETTY_FUNCTION__))
;
1668 // Lookup the entry, lazily creating it if necessary.
1669 std::string Suffix = getName({"cache", ""});
1670 return getOrCreateInternalVariable(
1671 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1672}
1673
1674Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1675 const VarDecl *VD,
1676 Address VDAddr,
1677 SourceLocation Loc) {
1678 if (CGM.getLangOpts().OpenMPUseTLS &&
1679 CGM.getContext().getTargetInfo().isTLSSupported())
1680 return VDAddr;
1681
1682 llvm::Type *VarTy = VDAddr.getElementType();
1683 llvm::Value *Args[] = {
1684 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1685 CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy),
1686 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1687 getOrCreateThreadPrivateCache(VD)};
1688 return Address(
1689 CGF.EmitRuntimeCall(
1690 OMPBuilder.getOrCreateRuntimeFunction(
1691 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1692 Args),
1693 CGF.Int8Ty, VDAddr.getAlignment());
1694}
1695
1696void CGOpenMPRuntime::emitThreadPrivateVarInit(
1697 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1698 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1699 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1700 // library.
1701 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1702 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1703 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1704 OMPLoc);
1705 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1706 // to register constructor/destructor for variable.
1707 llvm::Value *Args[] = {
1708 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1709 Ctor, CopyCtor, Dtor};
1710 CGF.EmitRuntimeCall(
1711 OMPBuilder.getOrCreateRuntimeFunction(
1712 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1713 Args);
1714}
1715
1716llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1717 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1718 bool PerformInit, CodeGenFunction *CGF) {
1719 if (CGM.getLangOpts().OpenMPUseTLS &&
1720 CGM.getContext().getTargetInfo().isTLSSupported())
1721 return nullptr;
1722
1723 VD = VD->getDefinition(CGM.getContext());
1724 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1725 QualType ASTTy = VD->getType();
1726
1727 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1728 const Expr *Init = VD->getAnyInitializer();
1729 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1730 // Generate function that re-emits the declaration's initializer into the
1731 // threadprivate copy of the variable VD
1732 CodeGenFunction CtorCGF(CGM);
1733 FunctionArgList Args;
1734 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1735 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1736 ImplicitParamDecl::Other);
1737 Args.push_back(&Dst);
1738
1739 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1740 CGM.getContext().VoidPtrTy, Args);
1741 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1742 std::string Name = getName({"__kmpc_global_ctor_", ""});
1743 llvm::Function *Fn =
1744 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1745 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1746 Args, Loc, Loc);
1747 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1748 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1749 CGM.getContext().VoidPtrTy, Dst.getLocation());
1750 Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment());
1751 Arg = CtorCGF.Builder.CreateElementBitCast(
1752 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1753 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1754 /*IsInitializer=*/true);
1755 ArgVal = CtorCGF.EmitLoadOfScalar(
1756 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1757 CGM.getContext().VoidPtrTy, Dst.getLocation());
1758 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1759 CtorCGF.FinishFunction();
1760 Ctor = Fn;
1761 }
1762 if (VD->getType().isDestructedType() != QualType::DK_none) {
1763 // Generate function that emits destructor call for the threadprivate copy
1764 // of the variable VD
1765 CodeGenFunction DtorCGF(CGM);
1766 FunctionArgList Args;
1767 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1768 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1769 ImplicitParamDecl::Other);
1770 Args.push_back(&Dst);
1771
1772 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1773 CGM.getContext().VoidTy, Args);
1774 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1775 std::string Name = getName({"__kmpc_global_dtor_", ""});
1776 llvm::Function *Fn =
1777 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1778 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1779 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1780 Loc, Loc);
1781 // Create a scope with an artificial location for the body of this function.
1782 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1783 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1784 DtorCGF.GetAddrOfLocalVar(&Dst),
1785 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1786 DtorCGF.emitDestroy(
1787 Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy,
1788 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1789 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1790 DtorCGF.FinishFunction();
1791 Dtor = Fn;
1792 }
1793 // Do not emit init function if it is not required.
1794 if (!Ctor && !Dtor)
1795 return nullptr;
1796
1797 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1798 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1799 /*isVarArg=*/false)
1800 ->getPointerTo();
1801 // Copying constructor for the threadprivate variable.
1802 // Must be NULL - reserved by runtime, but currently it requires that this
1803 // parameter is always NULL. Otherwise it fires assertion.
1804 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1805 if (Ctor == nullptr) {
1806 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1807 /*isVarArg=*/false)
1808 ->getPointerTo();
1809 Ctor = llvm::Constant::getNullValue(CtorTy);
1810 }
1811 if (Dtor == nullptr) {
1812 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1813 /*isVarArg=*/false)
1814 ->getPointerTo();
1815 Dtor = llvm::Constant::getNullValue(DtorTy);
1816 }
1817 if (!CGF) {
1818 auto *InitFunctionTy =
1819 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1820 std::string Name = getName({"__omp_threadprivate_init_", ""});
1821 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1822 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1823 CodeGenFunction InitCGF(CGM);
1824 FunctionArgList ArgList;
1825 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1826 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1827 Loc, Loc);
1828 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1829 InitCGF.FinishFunction();
1830 return InitFunction;
1831 }
1832 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1833 }
1834 return nullptr;
1835}
1836
1837bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1838 llvm::GlobalVariable *Addr,
1839 bool PerformInit) {
1840 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1841 !CGM.getLangOpts().OpenMPIsDevice)
1842 return false;
1843 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1844 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1845 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1846 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1847 HasRequiresUnifiedSharedMemory))
1848 return CGM.getLangOpts().OpenMPIsDevice;
1849 VD = VD->getDefinition(CGM.getContext());
1850 assert(VD && "Unknown VarDecl")(static_cast <bool> (VD && "Unknown VarDecl") ?
void (0) : __assert_fail ("VD && \"Unknown VarDecl\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1850, __extension__
__PRETTY_FUNCTION__))
;
1851
1852 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1853 return CGM.getLangOpts().OpenMPIsDevice;
1854
1855 QualType ASTTy = VD->getType();
1856 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1857
1858 // Produce the unique prefix to identify the new target regions. We use
1859 // the source location of the variable declaration which we know to not
1860 // conflict with any target region.
1861 unsigned DeviceID;
1862 unsigned FileID;
1863 unsigned Line;
1864 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1865 SmallString<128> Buffer, Out;
1866 {
1867 llvm::raw_svector_ostream OS(Buffer);
1868 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1869 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1870 }
1871
1872 const Expr *Init = VD->getAnyInitializer();
1873 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1874 llvm::Constant *Ctor;
1875 llvm::Constant *ID;
1876 if (CGM.getLangOpts().OpenMPIsDevice) {
1877 // Generate function that re-emits the declaration's initializer into
1878 // the threadprivate copy of the variable VD
1879 CodeGenFunction CtorCGF(CGM);
1880
1881 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1882 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1883 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1884 FTy, Twine(Buffer, "_ctor"), FI, Loc, false,
1885 llvm::GlobalValue::WeakODRLinkage);
1886 if (CGM.getTriple().isAMDGCN())
1887 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1888 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1889 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1890 FunctionArgList(), Loc, Loc);
1891 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1892 llvm::Constant *AddrInAS0 = Addr;
1893 if (Addr->getAddressSpace() != 0)
1894 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1895 Addr, llvm::PointerType::getWithSamePointeeType(
1896 cast<llvm::PointerType>(Addr->getType()), 0));
1897 CtorCGF.EmitAnyExprToMem(Init,
1898 Address(AddrInAS0, Addr->getValueType(),
1899 CGM.getContext().getDeclAlign(VD)),
1900 Init->getType().getQualifiers(),
1901 /*IsInitializer=*/true);
1902 CtorCGF.FinishFunction();
1903 Ctor = Fn;
1904 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1905 } else {
1906 Ctor = new llvm::GlobalVariable(
1907 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1908 llvm::GlobalValue::PrivateLinkage,
1909 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1910 ID = Ctor;
1911 }
1912
1913 // Register the information for the entry associated with the constructor.
1914 Out.clear();
1915 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1916 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1917 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1918 }
1919 if (VD->getType().isDestructedType() != QualType::DK_none) {
1920 llvm::Constant *Dtor;
1921 llvm::Constant *ID;
1922 if (CGM.getLangOpts().OpenMPIsDevice) {
1923 // Generate function that emits destructor call for the threadprivate
1924 // copy of the variable VD
1925 CodeGenFunction DtorCGF(CGM);
1926
1927 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1928 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1929 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1930 FTy, Twine(Buffer, "_dtor"), FI, Loc, false,
1931 llvm::GlobalValue::WeakODRLinkage);
1932 if (CGM.getTriple().isAMDGCN())
1933 Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
1934 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1935 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1936 FunctionArgList(), Loc, Loc);
1937 // Create a scope with an artificial location for the body of this
1938 // function.
1939 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1940 llvm::Constant *AddrInAS0 = Addr;
1941 if (Addr->getAddressSpace() != 0)
1942 AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast(
1943 Addr, llvm::PointerType::getWithSamePointeeType(
1944 cast<llvm::PointerType>(Addr->getType()), 0));
1945 DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(),
1946 CGM.getContext().getDeclAlign(VD)),
1947 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1948 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1949 DtorCGF.FinishFunction();
1950 Dtor = Fn;
1951 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1952 } else {
1953 Dtor = new llvm::GlobalVariable(
1954 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1955 llvm::GlobalValue::PrivateLinkage,
1956 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1957 ID = Dtor;
1958 }
1959 // Register the information for the entry associated with the destructor.
1960 Out.clear();
1961 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1962 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
1963 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
1964 }
1965 return CGM.getLangOpts().OpenMPIsDevice;
1966}
1967
1968Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1969 QualType VarType,
1970 StringRef Name) {
1971 std::string Suffix = getName({"artificial", ""});
1972 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
1973 llvm::GlobalVariable *GAddr =
1974 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
1975 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1976 CGM.getTarget().isTLSSupported()) {
1977 GAddr->setThreadLocal(/*Val=*/true);
1978 return Address(GAddr, GAddr->getValueType(),
1979 CGM.getContext().getTypeAlignInChars(VarType));
1980 }
1981 std::string CacheSuffix = getName({"cache", ""});
1982 llvm::Value *Args[] = {
1983 emitUpdateLocation(CGF, SourceLocation()),
1984 getThreadID(CGF, SourceLocation()),
1985 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
1986 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
1987 /*isSigned=*/false),
1988 getOrCreateInternalVariable(
1989 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
1990 return Address(
1991 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1992 CGF.EmitRuntimeCall(
1993 OMPBuilder.getOrCreateRuntimeFunction(
1994 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1995 Args),
1996 VarLVType->getPointerTo(/*AddrSpace=*/0)),
1997 VarLVType, CGM.getContext().getTypeAlignInChars(VarType));
1998}
1999
2000void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2001 const RegionCodeGenTy &ThenGen,
2002 const RegionCodeGenTy &ElseGen) {
2003 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2004
2005 // If the condition constant folds and can be elided, try to avoid emitting
2006 // the condition and the dead arm of the if/else.
2007 bool CondConstant;
2008 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2009 if (CondConstant)
2010 ThenGen(CGF);
2011 else
2012 ElseGen(CGF);
2013 return;
2014 }
2015
2016 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2017 // emit the conditional branch.
2018 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2019 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2020 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2021 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2022
2023 // Emit the 'then' code.
2024 CGF.EmitBlock(ThenBlock);
2025 ThenGen(CGF);
2026 CGF.EmitBranch(ContBlock);
2027 // Emit the 'else' code if present.
2028 // There is no need to emit line number for unconditional branch.
2029 (void)ApplyDebugLocation::CreateEmpty(CGF);
2030 CGF.EmitBlock(ElseBlock);
2031 ElseGen(CGF);
2032 // There is no need to emit line number for unconditional branch.
2033 (void)ApplyDebugLocation::CreateEmpty(CGF);
2034 CGF.EmitBranch(ContBlock);
2035 // Emit the continuation block for code after the if.
2036 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2037}
2038
2039void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2040 llvm::Function *OutlinedFn,
2041 ArrayRef<llvm::Value *> CapturedVars,
2042 const Expr *IfCond,
2043 llvm::Value *NumThreads) {
2044 if (!CGF.HaveInsertPoint())
2045 return;
2046 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2047 auto &M = CGM.getModule();
2048 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2049 this](CodeGenFunction &CGF, PrePostActionTy &) {
2050 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2051 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2052 llvm::Value *Args[] = {
2053 RTLoc,
2054 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2055 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2056 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2057 RealArgs.append(std::begin(Args), std::end(Args));
2058 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2059
2060 llvm::FunctionCallee RTLFn =
2061 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2062 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2063 };
2064 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2065 this](CodeGenFunction &CGF, PrePostActionTy &) {
2066 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2067 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2068 // Build calls:
2069 // __kmpc_serialized_parallel(&Loc, GTid);
2070 llvm::Value *Args[] = {RTLoc, ThreadID};
2071 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2072 M, OMPRTL___kmpc_serialized_parallel),
2073 Args);
2074
2075 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2076 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2077 Address ZeroAddrBound =
2078 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2079 /*Name=*/".bound.zero.addr");
2080 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2081 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2082 // ThreadId for serialized parallels is 0.
2083 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2084 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2085 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2086
2087 // Ensure we do not inline the function. This is trivially true for the ones
2088 // passed to __kmpc_fork_call but the ones called in serialized regions
2089 // could be inlined. This is not a perfect but it is closer to the invariant
2090 // we want, namely, every data environment starts with a new function.
2091 // TODO: We should pass the if condition to the runtime function and do the
2092 // handling there. Much cleaner code.
2093 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2094 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2095 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2096
2097 // __kmpc_end_serialized_parallel(&Loc, GTid);
2098 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2099 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2100 M, OMPRTL___kmpc_end_serialized_parallel),
2101 EndArgs);
2102 };
2103 if (IfCond) {
2104 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2105 } else {
2106 RegionCodeGenTy ThenRCG(ThenGen);
2107 ThenRCG(CGF);
2108 }
2109}
2110
2111// If we're inside an (outlined) parallel region, use the region info's
2112// thread-ID variable (it is passed in a first argument of the outlined function
2113// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2114// regular serial code region, get thread ID by calling kmp_int32
2115// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2116// return the address of that temp.
2117Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2118 SourceLocation Loc) {
2119 if (auto *OMPRegionInfo =
2120 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2121 if (OMPRegionInfo->getThreadIDVariable())
2122 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2123
2124 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2125 QualType Int32Ty =
2126 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2127 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2128 CGF.EmitStoreOfScalar(ThreadID,
2129 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2130
2131 return ThreadIDTemp;
2132}
2133
2134llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2135 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2136 SmallString<256> Buffer;
2137 llvm::raw_svector_ostream Out(Buffer);
2138 Out << Name;
2139 StringRef RuntimeName = Out.str();
2140 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2141 if (Elem.second) {
2142 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2143, __extension__
__PRETTY_FUNCTION__))
2143 "OMP internal variable has different type than requested")(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2143, __extension__
__PRETTY_FUNCTION__))
;
2144 return &*Elem.second;
2145 }
2146
2147 return Elem.second = new llvm::GlobalVariable(
2148 CGM.getModule(), Ty, /*IsConstant*/ false,
2149 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2150 Elem.first(), /*InsertBefore=*/nullptr,
2151 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2152}
2153
2154llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2155 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2156 std::string Name = getName({Prefix, "var"});
2157 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2158}
2159
2160namespace {
2161/// Common pre(post)-action for different OpenMP constructs.
2162class CommonActionTy final : public PrePostActionTy {
2163 llvm::FunctionCallee EnterCallee;
2164 ArrayRef<llvm::Value *> EnterArgs;
2165 llvm::FunctionCallee ExitCallee;
2166 ArrayRef<llvm::Value *> ExitArgs;
2167 bool Conditional;
2168 llvm::BasicBlock *ContBlock = nullptr;
2169
2170public:
2171 CommonActionTy(llvm::FunctionCallee EnterCallee,
2172 ArrayRef<llvm::Value *> EnterArgs,
2173 llvm::FunctionCallee ExitCallee,
2174 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2175 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2176 ExitArgs(ExitArgs), Conditional(Conditional) {}
2177 void Enter(CodeGenFunction &CGF) override {
2178 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2179 if (Conditional) {
2180 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2181 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2182 ContBlock = CGF.createBasicBlock("omp_if.end");
2183 // Generate the branch (If-stmt)
2184 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2185 CGF.EmitBlock(ThenBlock);
2186 }
2187 }
2188 void Done(CodeGenFunction &CGF) {
2189 // Emit the rest of blocks/branches
2190 CGF.EmitBranch(ContBlock);
2191 CGF.EmitBlock(ContBlock, true);
2192 }
2193 void Exit(CodeGenFunction &CGF) override {
2194 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2195 }
2196};
2197} // anonymous namespace
2198
2199void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2200 StringRef CriticalName,
2201 const RegionCodeGenTy &CriticalOpGen,
2202 SourceLocation Loc, const Expr *Hint) {
2203 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204 // CriticalOpGen();
2205 // __kmpc_end_critical(ident_t *, gtid, Lock);
2206 // Prepare arguments and build a call to __kmpc_critical
2207 if (!CGF.HaveInsertPoint())
2208 return;
2209 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2210 getCriticalRegionLock(CriticalName)};
2211 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2212 std::end(Args));
2213 if (Hint) {
2214 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2215 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2216 }
2217 CommonActionTy Action(
2218 OMPBuilder.getOrCreateRuntimeFunction(
2219 CGM.getModule(),
2220 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2221 EnterArgs,
2222 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2223 OMPRTL___kmpc_end_critical),
2224 Args);
2225 CriticalOpGen.setAction(Action);
2226 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2227}
2228
2229void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2230 const RegionCodeGenTy &MasterOpGen,
2231 SourceLocation Loc) {
2232 if (!CGF.HaveInsertPoint())
2233 return;
2234 // if(__kmpc_master(ident_t *, gtid)) {
2235 // MasterOpGen();
2236 // __kmpc_end_master(ident_t *, gtid);
2237 // }
2238 // Prepare arguments and build a call to __kmpc_master
2239 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2240 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2241 CGM.getModule(), OMPRTL___kmpc_master),
2242 Args,
2243 OMPBuilder.getOrCreateRuntimeFunction(
2244 CGM.getModule(), OMPRTL___kmpc_end_master),
2245 Args,
2246 /*Conditional=*/true);
2247 MasterOpGen.setAction(Action);
2248 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2249 Action.Done(CGF);
2250}
2251
2252void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2253 const RegionCodeGenTy &MaskedOpGen,
2254 SourceLocation Loc, const Expr *Filter) {
2255 if (!CGF.HaveInsertPoint())
2256 return;
2257 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258 // MaskedOpGen();
2259 // __kmpc_end_masked(iden_t *, gtid);
2260 // }
2261 // Prepare arguments and build a call to __kmpc_masked
2262 llvm::Value *FilterVal = Filter
2263 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2264 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2265 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2266 FilterVal};
2267 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2268 getThreadID(CGF, Loc)};
2269 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2270 CGM.getModule(), OMPRTL___kmpc_masked),
2271 Args,
2272 OMPBuilder.getOrCreateRuntimeFunction(
2273 CGM.getModule(), OMPRTL___kmpc_end_masked),
2274 ArgsEnd,
2275 /*Conditional=*/true);
2276 MaskedOpGen.setAction(Action);
2277 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2278 Action.Done(CGF);
2279}
2280
2281void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2282 SourceLocation Loc) {
2283 if (!CGF.HaveInsertPoint())
2284 return;
2285 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2286 OMPBuilder.createTaskyield(CGF.Builder);
2287 } else {
2288 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2289 llvm::Value *Args[] = {
2290 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2291 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2293 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2294 Args);
2295 }
2296
2297 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2298 Region->emitUntiedSwitch(CGF);
2299}
2300
2301void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2302 const RegionCodeGenTy &TaskgroupOpGen,
2303 SourceLocation Loc) {
2304 if (!CGF.HaveInsertPoint())
2305 return;
2306 // __kmpc_taskgroup(ident_t *, gtid);
2307 // TaskgroupOpGen();
2308 // __kmpc_end_taskgroup(ident_t *, gtid);
2309 // Prepare arguments and build a call to __kmpc_taskgroup
2310 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2311 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2312 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2313 Args,
2314 OMPBuilder.getOrCreateRuntimeFunction(
2315 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2316 Args);
2317 TaskgroupOpGen.setAction(Action);
2318 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2319}
2320
2321/// Given an array of pointers to variables, project the address of a
2322/// given variable.
2323static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2324 unsigned Index, const VarDecl *Var) {
2325 // Pull out the pointer to the variable.
2326 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2327 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2328
2329 llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType());
2330 return Address(
2331 CGF.Builder.CreateBitCast(
2332 Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())),
2333 ElemTy, CGF.getContext().getDeclAlign(Var));
2334}
2335
2336static llvm::Value *emitCopyprivateCopyFunction(
2337 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2338 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2339 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2340 SourceLocation Loc) {
2341 ASTContext &C = CGM.getContext();
2342 // void copy_func(void *LHSArg, void *RHSArg);
2343 FunctionArgList Args;
2344 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2345 ImplicitParamDecl::Other);
2346 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2347 ImplicitParamDecl::Other);
2348 Args.push_back(&LHSArg);
2349 Args.push_back(&RHSArg);
2350 const auto &CGFI =
2351 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2352 std::string Name =
2353 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2354 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2355 llvm::GlobalValue::InternalLinkage, Name,
2356 &CGM.getModule());
2357 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2358 Fn->setDoesNotRecurse();
2359 CodeGenFunction CGF(CGM);
2360 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2361 // Dest = (void*[n])(LHSArg);
2362 // Src = (void*[n])(RHSArg);
2363 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2364 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2365 ArgsElemType->getPointerTo()),
2366 ArgsElemType, CGF.getPointerAlign());
2367 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2368 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2369 ArgsElemType->getPointerTo()),
2370 ArgsElemType, CGF.getPointerAlign());
2371 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2372 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373 // ...
2374 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2375 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2376 const auto *DestVar =
2377 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2378 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2379
2380 const auto *SrcVar =
2381 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2382 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2383
2384 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2385 QualType Type = VD->getType();
2386 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2387 }
2388 CGF.FinishFunction();
2389 return Fn;
2390}
2391
2392void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2393 const RegionCodeGenTy &SingleOpGen,
2394 SourceLocation Loc,
2395 ArrayRef<const Expr *> CopyprivateVars,
2396 ArrayRef<const Expr *> SrcExprs,
2397 ArrayRef<const Expr *> DstExprs,
2398 ArrayRef<const Expr *> AssignmentOps) {
2399 if (!CGF.HaveInsertPoint())
2400 return;
2401 assert(CopyprivateVars.size() == SrcExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2403, __extension__
__PRETTY_FUNCTION__))
2402 CopyprivateVars.size() == DstExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2403, __extension__
__PRETTY_FUNCTION__))
2403 CopyprivateVars.size() == AssignmentOps.size())(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2403, __extension__
__PRETTY_FUNCTION__))
;
2404 ASTContext &C = CGM.getContext();
2405 // int32 did_it = 0;
2406 // if(__kmpc_single(ident_t *, gtid)) {
2407 // SingleOpGen();
2408 // __kmpc_end_single(ident_t *, gtid);
2409 // did_it = 1;
2410 // }
2411 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2412 // <copy_func>, did_it);
2413
2414 Address DidIt = Address::invalid();
2415 if (!CopyprivateVars.empty()) {
2416 // int32 did_it = 0;
2417 QualType KmpInt32Ty =
2418 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2419 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2420 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2421 }
2422 // Prepare arguments and build a call to __kmpc_single
2423 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2424 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2425 CGM.getModule(), OMPRTL___kmpc_single),
2426 Args,
2427 OMPBuilder.getOrCreateRuntimeFunction(
2428 CGM.getModule(), OMPRTL___kmpc_end_single),
2429 Args,
2430 /*Conditional=*/true);
2431 SingleOpGen.setAction(Action);
2432 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2433 if (DidIt.isValid()) {
2434 // did_it = 1;
2435 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2436 }
2437 Action.Done(CGF);
2438 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2439 // <copy_func>, did_it);
2440 if (DidIt.isValid()) {
2441 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2442 QualType CopyprivateArrayTy = C.getConstantArrayType(
2443 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2444 /*IndexTypeQuals=*/0);
2445 // Create a list of all private variables for copyprivate.
2446 Address CopyprivateList =
2447 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2448 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2449 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2450 CGF.Builder.CreateStore(
2451 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2452 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2453 CGF.VoidPtrTy),
2454 Elem);
2455 }
2456 // Build function that copies private values from single region to all other
2457 // threads in the corresponding parallel region.
2458 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2459 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars,
2460 SrcExprs, DstExprs, AssignmentOps, Loc);
2461 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2462 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2463 CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty);
2464 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2465 llvm::Value *Args[] = {
2466 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2467 getThreadID(CGF, Loc), // i32 <gtid>
2468 BufSize, // size_t <buf_size>
2469 CL.getPointer(), // void *<copyprivate list>
2470 CpyFn, // void (*) (void *, void *) <copy_func>
2471 DidItVal // i32 did_it
2472 };
2473 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2474 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2475 Args);
2476 }
2477}
2478
2479void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2480 const RegionCodeGenTy &OrderedOpGen,
2481 SourceLocation Loc, bool IsThreads) {
2482 if (!CGF.HaveInsertPoint())
2483 return;
2484 // __kmpc_ordered(ident_t *, gtid);
2485 // OrderedOpGen();
2486 // __kmpc_end_ordered(ident_t *, gtid);
2487 // Prepare arguments and build a call to __kmpc_ordered
2488 if (IsThreads) {
2489 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2490 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2491 CGM.getModule(), OMPRTL___kmpc_ordered),
2492 Args,
2493 OMPBuilder.getOrCreateRuntimeFunction(
2494 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2495 Args);
2496 OrderedOpGen.setAction(Action);
2497 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2498 return;
2499 }
2500 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2501}
2502
2503unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2504 unsigned Flags;
2505 if (Kind == OMPD_for)
2506 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2507 else if (Kind == OMPD_sections)
2508 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2509 else if (Kind == OMPD_single)
2510 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2511 else if (Kind == OMPD_barrier)
2512 Flags = OMP_IDENT_BARRIER_EXPL;
2513 else
2514 Flags = OMP_IDENT_BARRIER_IMPL;
2515 return Flags;
2516}
2517
2518void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2519 CodeGenFunction &CGF, const OMPLoopDirective &S,
2520 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2521 // Check if the loop directive is actually a doacross loop directive. In this
2522 // case choose static, 1 schedule.
2523 if (llvm::any_of(
2524 S.getClausesOfKind<OMPOrderedClause>(),
2525 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2526 ScheduleKind = OMPC_SCHEDULE_static;
2527 // Chunk size is 1 in this case.
2528 llvm::APInt ChunkSize(32, 1);
2529 ChunkExpr = IntegerLiteral::Create(
2530 CGF.getContext(), ChunkSize,
2531 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2532 SourceLocation());
2533 }
2534}
2535
2536void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2537 OpenMPDirectiveKind Kind, bool EmitChecks,
2538 bool ForceSimpleCall) {
2539 // Check if we should use the OMPBuilder
2540 auto *OMPRegionInfo =
2541 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2542 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2543 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2544 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2545 return;
2546 }
2547
2548 if (!CGF.HaveInsertPoint())
2549 return;
2550 // Build call __kmpc_cancel_barrier(loc, thread_id);
2551 // Build call __kmpc_barrier(loc, thread_id);
2552 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2553 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554 // thread_id);
2555 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2556 getThreadID(CGF, Loc)};
2557 if (OMPRegionInfo) {
2558 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2559 llvm::Value *Result = CGF.EmitRuntimeCall(
2560 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2561 OMPRTL___kmpc_cancel_barrier),
2562 Args);
2563 if (EmitChecks) {
2564 // if (__kmpc_cancel_barrier()) {
2565 // exit from construct;
2566 // }
2567 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2568 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2569 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2570 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2571 CGF.EmitBlock(ExitBB);
2572 // exit from construct;
2573 CodeGenFunction::JumpDest CancelDestination =
2574 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2575 CGF.EmitBranchThroughCleanup(CancelDestination);
2576 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2577 }
2578 return;
2579 }
2580 }
2581 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2582 CGM.getModule(), OMPRTL___kmpc_barrier),
2583 Args);
2584}
2585
2586/// Map the OpenMP loop schedule to the runtime enumeration.
2587static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2588 bool Chunked, bool Ordered) {
2589 switch (ScheduleKind) {
2590 case OMPC_SCHEDULE_static:
2591 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2592 : (Ordered ? OMP_ord_static : OMP_sch_static);
2593 case OMPC_SCHEDULE_dynamic:
2594 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2595 case OMPC_SCHEDULE_guided:
2596 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2597 case OMPC_SCHEDULE_runtime:
2598 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2599 case OMPC_SCHEDULE_auto:
2600 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2601 case OMPC_SCHEDULE_unknown:
2602 assert(!Chunked && "chunk was specified but schedule kind not known")(static_cast <bool> (!Chunked && "chunk was specified but schedule kind not known"
) ? void (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2602, __extension__
__PRETTY_FUNCTION__))
;
2603 return Ordered ? OMP_ord_static : OMP_sch_static;
2604 }
2605 llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2605)
;
2606}
2607
2608/// Map the OpenMP distribute schedule to the runtime enumeration.
2609static OpenMPSchedType
2610getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2611 // only static is allowed for dist_schedule
2612 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2613}
2614
2615bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2616 bool Chunked) const {
2617 OpenMPSchedType Schedule =
2618 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2619 return Schedule == OMP_sch_static;
2620}
2621
2622bool CGOpenMPRuntime::isStaticNonchunked(
2623 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2624 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2625 return Schedule == OMP_dist_sch_static;
2626}
2627
2628bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2629 bool Chunked) const {
2630 OpenMPSchedType Schedule =
2631 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2632 return Schedule == OMP_sch_static_chunked;
2633}
2634
2635bool CGOpenMPRuntime::isStaticChunked(
2636 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2637 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2638 return Schedule == OMP_dist_sch_static_chunked;
2639}
2640
2641bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2642 OpenMPSchedType Schedule =
2643 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2644 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")(static_cast <bool> (Schedule != OMP_sch_static_chunked
&& "cannot be chunked here") ? void (0) : __assert_fail
("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2644, __extension__
__PRETTY_FUNCTION__))
;
2645 return Schedule != OMP_sch_static;
2646}
2647
2648static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2649 OpenMPScheduleClauseModifier M1,
2650 OpenMPScheduleClauseModifier M2) {
2651 int Modifier = 0;
2652 switch (M1) {
2653 case OMPC_SCHEDULE_MODIFIER_monotonic:
2654 Modifier = OMP_sch_modifier_monotonic;
2655 break;
2656 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2657 Modifier = OMP_sch_modifier_nonmonotonic;
2658 break;
2659 case OMPC_SCHEDULE_MODIFIER_simd:
2660 if (Schedule == OMP_sch_static_chunked)
2661 Schedule = OMP_sch_static_balanced_chunked;
2662 break;
2663 case OMPC_SCHEDULE_MODIFIER_last:
2664 case OMPC_SCHEDULE_MODIFIER_unknown:
2665 break;
2666 }
2667 switch (M2) {
2668 case OMPC_SCHEDULE_MODIFIER_monotonic:
2669 Modifier = OMP_sch_modifier_monotonic;
2670 break;
2671 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2672 Modifier = OMP_sch_modifier_nonmonotonic;
2673 break;
2674 case OMPC_SCHEDULE_MODIFIER_simd:
2675 if (Schedule == OMP_sch_static_chunked)
2676 Schedule = OMP_sch_static_balanced_chunked;
2677 break;
2678 case OMPC_SCHEDULE_MODIFIER_last:
2679 case OMPC_SCHEDULE_MODIFIER_unknown:
2680 break;
2681 }
2682 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2683 // If the static schedule kind is specified or if the ordered clause is
2684 // specified, and if the nonmonotonic modifier is not specified, the effect is
2685 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2686 // modifier is specified, the effect is as if the nonmonotonic modifier is
2687 // specified.
2688 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2689 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2690 Schedule == OMP_sch_static_balanced_chunked ||
2691 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2692 Schedule == OMP_dist_sch_static_chunked ||
2693 Schedule == OMP_dist_sch_static))
2694 Modifier = OMP_sch_modifier_nonmonotonic;
2695 }
2696 return Schedule | Modifier;
2697}
2698
2699void CGOpenMPRuntime::emitForDispatchInit(
2700 CodeGenFunction &CGF, SourceLocation Loc,
2701 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2702 bool Ordered, const DispatchRTInput &DispatchValues) {
2703 if (!CGF.HaveInsertPoint())
2704 return;
2705 OpenMPSchedType Schedule = getRuntimeSchedule(
2706 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2707 assert(Ordered ||(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2710, __extension__
__PRETTY_FUNCTION__))
2708 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2710, __extension__
__PRETTY_FUNCTION__))
2709 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2710, __extension__
__PRETTY_FUNCTION__))
2710 Schedule != OMP_sch_static_balanced_chunked))(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2710, __extension__
__PRETTY_FUNCTION__))
;
2711 // Call __kmpc_dispatch_init(
2712 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2713 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2714 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715
2716 // If the Chunk was not specified in the clause - use default value 1.
2717 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2718 : CGF.Builder.getIntN(IVSize, 1);
2719 llvm::Value *Args[] = {
2720 emitUpdateLocation(CGF, Loc),
2721 getThreadID(CGF, Loc),
2722 CGF.Builder.getInt32(addMonoNonMonoModifier(
2723 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2724 DispatchValues.LB, // Lower
2725 DispatchValues.UB, // Upper
2726 CGF.Builder.getIntN(IVSize, 1), // Stride
2727 Chunk // Chunk
2728 };
2729 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2730}
2731
2732static void emitForStaticInitCall(
2733 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2734 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2735 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2736 const CGOpenMPRuntime::StaticRTInput &Values) {
2737 if (!CGF.HaveInsertPoint())
2738 return;
2739
2740 assert(!Values.Ordered)(static_cast <bool> (!Values.Ordered) ? void (0) : __assert_fail
("!Values.Ordered", "clang/lib/CodeGen/CGOpenMPRuntime.cpp",
2740, __extension__ __PRETTY_FUNCTION__))
;
2741 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2745, __extension__
__PRETTY_FUNCTION__))
2742 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2745, __extension__
__PRETTY_FUNCTION__))
2743 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2745, __extension__
__PRETTY_FUNCTION__))
2744 Schedule == OMP_dist_sch_static ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2745, __extension__
__PRETTY_FUNCTION__))
2745 Schedule == OMP_dist_sch_static_chunked)(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2745, __extension__
__PRETTY_FUNCTION__))
;
2746
2747 // Call __kmpc_for_static_init(
2748 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2749 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2750 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2751 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2752 llvm::Value *Chunk = Values.Chunk;
2753 if (Chunk == nullptr) {
2754 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2756, __extension__
__PRETTY_FUNCTION__))
2755 Schedule == OMP_dist_sch_static) &&(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2756, __extension__
__PRETTY_FUNCTION__))
2756 "expected static non-chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2756, __extension__
__PRETTY_FUNCTION__))
;
2757 // If the Chunk was not specified in the clause - use default value 1.
2758 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2759 } else {
2760 assert((Schedule == OMP_sch_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2764, __extension__
__PRETTY_FUNCTION__))
2761 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2764, __extension__
__PRETTY_FUNCTION__))
2762 Schedule == OMP_ord_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2764, __extension__
__PRETTY_FUNCTION__))
2763 Schedule == OMP_dist_sch_static_chunked) &&(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2764, __extension__
__PRETTY_FUNCTION__))
2764 "expected static chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2764, __extension__
__PRETTY_FUNCTION__))
;
2765 }
2766 llvm::Value *Args[] = {
2767 UpdateLocation,
2768 ThreadId,
2769 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2770 M2)), // Schedule type
2771 Values.IL.getPointer(), // &isLastIter
2772 Values.LB.getPointer(), // &LB
2773 Values.UB.getPointer(), // &UB
2774 Values.ST.getPointer(), // &Stride
2775 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2776 Chunk // Chunk
2777 };
2778 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2779}
2780
2781void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2782 SourceLocation Loc,
2783 OpenMPDirectiveKind DKind,
2784 const OpenMPScheduleTy &ScheduleKind,
2785 const StaticRTInput &Values) {
2786 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2787 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2788 assert(isOpenMPWorksharingDirective(DKind) &&(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2789, __extension__
__PRETTY_FUNCTION__))
2789 "Expected loop-based or sections-based directive.")(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2789, __extension__
__PRETTY_FUNCTION__))
;
2790 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2791 isOpenMPLoopDirective(DKind)
2792 ? OMP_IDENT_WORK_LOOP
2793 : OMP_IDENT_WORK_SECTIONS);
2794 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2795 llvm::FunctionCallee StaticInitFunction =
2796 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2797 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2798 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2799 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2800}
2801
2802void CGOpenMPRuntime::emitDistributeStaticInit(
2803 CodeGenFunction &CGF, SourceLocation Loc,
2804 OpenMPDistScheduleClauseKind SchedKind,
2805 const CGOpenMPRuntime::StaticRTInput &Values) {
2806 OpenMPSchedType ScheduleNum =
2807 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2808 llvm::Value *UpdatedLocation =
2809 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2810 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2811 llvm::FunctionCallee StaticInitFunction;
2812 bool isGPUDistribute =
2813 CGM.getLangOpts().OpenMPIsDevice &&
2814 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2815 StaticInitFunction = createForStaticInitFunction(
2816 Values.IVSize, Values.IVSigned, isGPUDistribute);
2817
2818 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2819 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2820 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2821}
2822
2823void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2824 SourceLocation Loc,
2825 OpenMPDirectiveKind DKind) {
2826 if (!CGF.HaveInsertPoint())
2827 return;
2828 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2829 llvm::Value *Args[] = {
2830 emitUpdateLocation(CGF, Loc,
2831 isOpenMPDistributeDirective(DKind)
2832 ? OMP_IDENT_WORK_DISTRIBUTE
2833 : isOpenMPLoopDirective(DKind)
2834 ? OMP_IDENT_WORK_LOOP
2835 : OMP_IDENT_WORK_SECTIONS),
2836 getThreadID(CGF, Loc)};
2837 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2838 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2839 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2840 CGF.EmitRuntimeCall(
2841 OMPBuilder.getOrCreateRuntimeFunction(
2842 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2843 Args);
2844 else
2845 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2846 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2847 Args);
2848}
2849
2850void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2851 SourceLocation Loc,
2852 unsigned IVSize,
2853 bool IVSigned) {
2854 if (!CGF.HaveInsertPoint())
2855 return;
2856 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2857 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2858 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2859}
2860
2861llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2862 SourceLocation Loc, unsigned IVSize,
2863 bool IVSigned, Address IL,
2864 Address LB, Address UB,
2865 Address ST) {
2866 // Call __kmpc_dispatch_next(
2867 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2868 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2869 // kmp_int[32|64] *p_stride);
2870 llvm::Value *Args[] = {
2871 emitUpdateLocation(CGF, Loc),
2872 getThreadID(CGF, Loc),
2873 IL.getPointer(), // &isLastIter
2874 LB.getPointer(), // &Lower
2875 UB.getPointer(), // &Upper
2876 ST.getPointer() // &Stride
2877 };
2878 llvm::Value *Call =
2879 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2880 return CGF.EmitScalarConversion(
2881 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2882 CGF.getContext().BoolTy, Loc);
2883}
2884
2885void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2886 llvm::Value *NumThreads,
2887 SourceLocation Loc) {
2888 if (!CGF.HaveInsertPoint())
2889 return;
2890 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2891 llvm::Value *Args[] = {
2892 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2893 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2894 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2895 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2896 Args);
2897}
2898
2899void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2900 ProcBindKind ProcBind,
2901 SourceLocation Loc) {
2902 if (!CGF.HaveInsertPoint())
2903 return;
2904 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")(static_cast <bool> (ProcBind != OMP_PROC_BIND_unknown &&
"Unsupported proc_bind value.") ? void (0) : __assert_fail (
"ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2904, __extension__
__PRETTY_FUNCTION__))
;
2905 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2906 llvm::Value *Args[] = {
2907 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2908 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2909 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2910 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2911 Args);
2912}
2913
2914void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2915 SourceLocation Loc, llvm::AtomicOrdering AO) {
2916 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2917 OMPBuilder.createFlush(CGF.Builder);
2918 } else {
2919 if (!CGF.HaveInsertPoint())
2920 return;
2921 // Build call void __kmpc_flush(ident_t *loc)
2922 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2923 CGM.getModule(), OMPRTL___kmpc_flush),
2924 emitUpdateLocation(CGF, Loc));
2925 }
2926}
2927
2928namespace {
2929/// Indexes of fields for type kmp_task_t.
2930enum KmpTaskTFields {
2931 /// List of shared variables.
2932 KmpTaskTShareds,
2933 /// Task routine.
2934 KmpTaskTRoutine,
2935 /// Partition id for the untied tasks.
2936 KmpTaskTPartId,
2937 /// Function with call of destructors for private variables.
2938 Data1,
2939 /// Task priority.
2940 Data2,
2941 /// (Taskloops only) Lower bound.
2942 KmpTaskTLowerBound,
2943 /// (Taskloops only) Upper bound.
2944 KmpTaskTUpperBound,
2945 /// (Taskloops only) Stride.
2946 KmpTaskTStride,
2947 /// (Taskloops only) Is last iteration flag.
2948 KmpTaskTLastIter,
2949 /// (Taskloops only) Reduction data.
2950 KmpTaskTReductions,
2951};
2952} // anonymous namespace
2953
2954bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2955 return OffloadEntriesTargetRegion.empty() &&
2956 OffloadEntriesDeviceGlobalVar.empty();
2957}
2958
2959/// Initialize target region entry.
2960void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2961 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2962 StringRef ParentName, unsigned LineNum,
2963 unsigned Order) {
2964 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2966, __extension__
__PRETTY_FUNCTION__))
2965 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2966, __extension__
__PRETTY_FUNCTION__))
2966 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2966, __extension__
__PRETTY_FUNCTION__))
;
2967 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
2968 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
2969 OMPTargetRegionEntryTargetRegion);
2970 ++OffloadingEntriesNum;
2971}
2972
2973void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
2974 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
2975 StringRef ParentName, unsigned LineNum,
2976 llvm::Constant *Addr, llvm::Constant *ID,
2977 OMPTargetRegionEntryKind Flags) {
2978 // If we are emitting code for a target, the entry is already initialized,
2979 // only has to be registered.
2980 if (CGM.getLangOpts().OpenMPIsDevice) {
2981 // This could happen if the device compilation is invoked standalone.
2982 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
2983 return;
2984 auto &Entry =
2985 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
2986 Entry.setAddress(Addr);
2987 Entry.setID(ID);
2988 Entry.setFlags(Flags);
2989 } else {
2990 if (Flags ==
2991 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
2992 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
2993 /*IgnoreAddressId*/ true))
2994 return;
2995 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2996, __extension__
__PRETTY_FUNCTION__))
2996 "Target region entry already registered!")(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2996, __extension__
__PRETTY_FUNCTION__))
;
2997 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
2998 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
2999 ++OffloadingEntriesNum;
3000 }
3001}
3002
3003bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3004 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3005 bool IgnoreAddressId) const {
3006 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3007 if (PerDevice == OffloadEntriesTargetRegion.end())
3008 return false;
3009 auto PerFile = PerDevice->second.find(FileID);
3010 if (PerFile == PerDevice->second.end())
3011 return false;
3012 auto PerParentName = PerFile->second.find(ParentName);
3013 if (PerParentName == PerFile->second.end())
3014 return false;
3015 auto PerLine = PerParentName->second.find(LineNum);
3016 if (PerLine == PerParentName->second.end())
3017 return false;
3018 // Fail if this entry is already registered.
3019 if (!IgnoreAddressId &&
3020 (PerLine->second.getAddress() || PerLine->second.getID()))
3021 return false;
3022 return true;
3023}
3024
3025void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3026 const OffloadTargetRegionEntryInfoActTy &Action) {
3027 // Scan all target region entries and perform the provided action.
3028 for (const auto &D : OffloadEntriesTargetRegion)
3029 for (const auto &F : D.second)
3030 for (const auto &P : F.second)
3031 for (const auto &L : P.second)
3032 Action(D.first, F.first, P.first(), L.first, L.second);
3033}
3034
3035void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3036 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3037 OMPTargetGlobalVarEntryKind Flags,
3038 unsigned Order) {
3039 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3041, __extension__
__PRETTY_FUNCTION__))
3040 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3041, __extension__
__PRETTY_FUNCTION__))
3041 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3041, __extension__
__PRETTY_FUNCTION__))
;
3042 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3043 ++OffloadingEntriesNum;
3044}
3045
3046void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3047 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3048 CharUnits VarSize,
3049 OMPTargetGlobalVarEntryKind Flags,
3050 llvm::GlobalValue::LinkageTypes Linkage) {
3051 if (CGM.getLangOpts().OpenMPIsDevice) {
3052 // This could happen if the device compilation is invoked standalone.
3053 if (!hasDeviceGlobalVarEntryInfo(VarName))
3054 return;
3055 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3056 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3057 if (Entry.getVarSize().isZero()) {
3058 Entry.setVarSize(VarSize);
3059 Entry.setLinkage(Linkage);
3060 }
3061 return;
3062 }
3063 Entry.setVarSize(VarSize);
3064 Entry.setLinkage(Linkage);
3065 Entry.setAddress(Addr);
3066 } else {
3067 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3068 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3069 assert(Entry.isValid() && Entry.getFlags() == Flags &&(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3070, __extension__
__PRETTY_FUNCTION__))
3070 "Entry not initialized!")(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3070, __extension__
__PRETTY_FUNCTION__))
;
3071 if (Entry.getVarSize().isZero()) {
3072 Entry.setVarSize(VarSize);
3073 Entry.setLinkage(Linkage);
3074 }
3075 return;
3076 }
3077 OffloadEntriesDeviceGlobalVar.try_emplace(
3078 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3079 ++OffloadingEntriesNum;
3080 }
3081}
3082
3083void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3084 actOnDeviceGlobalVarEntriesInfo(
3085 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3086 // Scan all target region entries and perform the provided action.
3087 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3088 Action(E.getKey(), E.getValue());
3089}
3090
3091void CGOpenMPRuntime::createOffloadEntry(
3092 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3093 llvm::GlobalValue::LinkageTypes Linkage) {
3094 OMPBuilder.emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
3095}
3096
3097void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3098 // Emit the offloading entries and metadata so that the device codegen side
3099 // can easily figure out what to emit. The produced metadata looks like
3100 // this:
3101 //
3102 // !omp_offload.info = !{!1, ...}
3103 //
3104 // Right now we only generate metadata for function that contain target
3105 // regions.
3106
3107 // If we are in simd mode or there are no entries, we don't need to do
3108 // anything.
3109 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3110 return;
3111
3112 llvm::Module &M = CGM.getModule();
3113 llvm::LLVMContext &C = M.getContext();
3114 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3115 SourceLocation, StringRef>,
3116 16>
3117 OrderedEntries(OffloadEntriesInfoManager.size());
3118 llvm::SmallVector<StringRef, 16> ParentFunctions(
3119 OffloadEntriesInfoManager.size());
3120
3121 // Auxiliary methods to create metadata values and strings.
3122 auto &&GetMDInt = [this](unsigned V) {
3123 return llvm::ConstantAsMetadata::get(
3124 llvm::ConstantInt::get(CGM.Int32Ty, V));
3125 };
3126
3127 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3128
3129 // Create the offloading info metadata node.
3130 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3131
3132 // Create function that emits metadata for each target region entry;
3133 auto &&TargetRegionMetadataEmitter =
3134 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3135 &GetMDString](
3136 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3137 unsigned Line,
3138 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3139 // Generate metadata for target regions. Each entry of this metadata
3140 // contains:
3141 // - Entry 0 -> Kind of this type of metadata (0).
3142 // - Entry 1 -> Device ID of the file where the entry was identified.
3143 // - Entry 2 -> File ID of the file where the entry was identified.
3144 // - Entry 3 -> Mangled name of the function where the entry was
3145 // identified.
3146 // - Entry 4 -> Line in the file where the entry was identified.
3147 // - Entry 5 -> Order the entry was created.
3148 // The first element of the metadata node is the kind.
3149 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3150 GetMDInt(FileID), GetMDString(ParentName),
3151 GetMDInt(Line), GetMDInt(E.getOrder())};
3152
3153 SourceLocation Loc;
3154 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3155 E = CGM.getContext().getSourceManager().fileinfo_end();
3156 I != E; ++I) {
3157 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3158 I->getFirst()->getUniqueID().getFile() == FileID) {
3159 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3160 I->getFirst(), Line, 1);
3161 break;
3162 }
3163 }
3164 // Save this entry in the right position of the ordered entries array.
3165 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3166 ParentFunctions[E.getOrder()] = ParentName;
3167
3168 // Add metadata to the named metadata node.
3169 MD->addOperand(llvm::MDNode::get(C, Ops));
3170 };
3171
3172 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3173 TargetRegionMetadataEmitter);
3174
3175 // Create function that emits metadata for each device global variable entry;
3176 auto &&DeviceGlobalVarMetadataEmitter =
3177 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3178 MD](StringRef MangledName,
3179 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3180 &E) {
3181 // Generate metadata for global variables. Each entry of this metadata
3182 // contains:
3183 // - Entry 0 -> Kind of this type of metadata (1).
3184 // - Entry 1 -> Mangled name of the variable.
3185 // - Entry 2 -> Declare target kind.
3186 // - Entry 3 -> Order the entry was created.
3187 // The first element of the metadata node is the kind.
3188 llvm::Metadata *Ops[] = {
3189 GetMDInt(E.getKind()), GetMDString(MangledName),
3190 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3191
3192 // Save this entry in the right position of the ordered entries array.
3193 OrderedEntries[E.getOrder()] =
3194 std::make_tuple(&E, SourceLocation(), MangledName);
3195
3196 // Add metadata to the named metadata node.
3197 MD->addOperand(llvm::MDNode::get(C, Ops));
3198 };
3199
3200 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3201 DeviceGlobalVarMetadataEmitter);
3202
3203 for (const auto &E : OrderedEntries) {
3204 assert(std::get<0>(E) && "All ordered entries must exist!")(static_cast <bool> (std::get<0>(E) && "All ordered entries must exist!"
) ? void (0) : __assert_fail ("std::get<0>(E) && \"All ordered entries must exist!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3204, __extension__
__PRETTY_FUNCTION__))
;
3205 if (const auto *CE =
3206 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3207 std::get<0>(E))) {
3208 if (!CE->getID() || !CE->getAddress()) {
3209 // Do not blame the entry if the parent funtion is not emitted.
3210 StringRef FnName = ParentFunctions[CE->getOrder()];
3211 if (!CGM.GetGlobalValue(FnName))
3212 continue;
3213 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3214 DiagnosticsEngine::Error,
3215 "Offloading entry for target region in %0 is incorrect: either the "
3216 "address or the ID is invalid.");
3217 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3218 continue;
3219 }
3220 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3221 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3222 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3223 OffloadEntryInfoDeviceGlobalVar>(
3224 std::get<0>(E))) {
3225 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3226 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3227 CE->getFlags());
3228 switch (Flags) {
3229 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3230 if (CGM.getLangOpts().OpenMPIsDevice &&
3231 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3232 continue;
3233 if (!CE->getAddress()) {
3234 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3235 DiagnosticsEngine::Error, "Offloading entry for declare target "
3236 "variable %0 is incorrect: the "
3237 "address is invalid.");
3238 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3239 continue;
3240 }
3241 // The vaiable has no definition - no need to add the entry.
3242 if (CE->getVarSize().isZero())
3243 continue;
3244 break;
3245 }
3246 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3247 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3249, __extension__
__PRETTY_FUNCTION__))
3248 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3249, __extension__
__PRETTY_FUNCTION__))
3249 "Declaret target link address is set.")(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3249, __extension__
__PRETTY_FUNCTION__))
;
3250 if (CGM.getLangOpts().OpenMPIsDevice)
3251 continue;
3252 if (!CE->getAddress()) {
3253 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3254 DiagnosticsEngine::Error,
3255 "Offloading entry for declare target variable is incorrect: the "
3256 "address is invalid.");
3257 CGM.getDiags().Report(DiagID);
3258 continue;
3259 }
3260 break;
3261 }
3262
3263 // Hidden or internal symbols on the device are not externally visible. We
3264 // should not attempt to register them by creating an offloading entry.
3265 if (auto *GV = dyn_cast<llvm::GlobalValue>(CE->getAddress()))
3266 if (GV->hasLocalLinkage() || GV->hasHiddenVisibility())
3267 continue;
3268
3269 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3270 CE->getVarSize().getQuantity(), Flags,
3271 CE->getLinkage());
3272 } else {
3273 llvm_unreachable("Unsupported entry kind.")::llvm::llvm_unreachable_internal("Unsupported entry kind.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3273)
;
3274 }
3275 }
3276}
3277
3278/// Loads all the offload entries information from the host IR
3279/// metadata.
3280void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3281 // If we are in target mode, load the metadata from the host IR. This code has
3282 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3283
3284 if (!CGM.getLangOpts().OpenMPIsDevice)
3285 return;
3286
3287 if (CGM.getLangOpts().OMPHostIRFile.empty())
3288 return;
3289
3290 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3291 if (auto EC = Buf.getError()) {
3292 CGM.getDiags().Report(diag::err_cannot_open_file)
3293 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3294 return;
3295 }
3296
3297 llvm::LLVMContext C;
3298 auto ME = expectedToErrorOrAndEmitErrors(
3299 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3300
3301 if (auto EC = ME.getError()) {
3302 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3303 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3304 CGM.getDiags().Report(DiagID)
3305 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3306 return;
3307 }
3308
3309 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3310 if (!MD)
3311 return;
3312
3313 for (llvm::MDNode *MN : MD->operands()) {
3314 auto &&GetMDInt = [MN](unsigned Idx) {
3315 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3316 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3317 };
3318
3319 auto &&GetMDString = [MN](unsigned Idx) {
3320 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3321 return V->getString();
3322 };
3323
3324 switch (GetMDInt(0)) {
3325 default:
3326 llvm_unreachable("Unexpected metadata!")::llvm::llvm_unreachable_internal("Unexpected metadata!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3326)
;
3327 break;
3328 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3329 OffloadingEntryInfoTargetRegion:
3330 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3331 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3332 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3333 /*Order=*/GetMDInt(5));
3334 break;
3335 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3336 OffloadingEntryInfoDeviceGlobalVar:
3337 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3338 /*MangledName=*/GetMDString(1),
3339 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3340 /*Flags=*/GetMDInt(2)),
3341 /*Order=*/GetMDInt(3));
3342 break;
3343 }
3344 }
3345}
3346
3347void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3348 if (!KmpRoutineEntryPtrTy) {
3349 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3350 ASTContext &C = CGM.getContext();
3351 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3352 FunctionProtoType::ExtProtoInfo EPI;
3353 KmpRoutineEntryPtrQTy = C.getPointerType(
3354 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3355 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3356 }
3357}
3358
3359namespace {
3360struct PrivateHelpersTy {
3361 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3362 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3363 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3364 PrivateElemInit(PrivateElemInit) {}
3365 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3366 const Expr *OriginalRef = nullptr;
3367 const VarDecl *Original = nullptr;
3368 const VarDecl *PrivateCopy = nullptr;
3369 const VarDecl *PrivateElemInit = nullptr;
3370 bool isLocalPrivate() const {
3371 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3372 }
3373};
3374typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3375} // anonymous namespace
3376
3377static bool isAllocatableDecl(const VarDecl *VD) {
3378 const VarDecl *CVD = VD->getCanonicalDecl();
3379 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3380 return false;
3381 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3382 // Use the default allocation.
3383 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3384 !AA->getAllocator());
3385}
3386
3387static RecordDecl *
3388createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3389 if (!Privates.empty()) {
3390 ASTContext &C = CGM.getContext();
3391 // Build struct .kmp_privates_t. {
3392 // /* private vars */
3393 // };
3394 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3395 RD->startDefinition();
3396 for (const auto &Pair : Privates) {
3397 const VarDecl *VD = Pair.second.Original;
3398 QualType Type = VD->getType().getNonReferenceType();
3399 // If the private variable is a local variable with lvalue ref type,
3400 // allocate the pointer instead of the pointee type.
3401 if (Pair.second.isLocalPrivate()) {
3402 if (VD->getType()->isLValueReferenceType())
3403 Type = C.getPointerType(Type);
3404 if (isAllocatableDecl(VD))
3405 Type = C.getPointerType(Type);
3406 }
3407 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3408 if (VD->hasAttrs()) {
3409 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3410 E(VD->getAttrs().end());
3411 I != E; ++I)
3412 FD->addAttr(*I);
3413 }
3414 }
3415 RD->completeDefinition();
3416 return RD;
3417 }
3418 return nullptr;
3419}
3420
3421static RecordDecl *
3422createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3423 QualType KmpInt32Ty,
3424 QualType KmpRoutineEntryPointerQTy) {
3425 ASTContext &C = CGM.getContext();
3426 // Build struct kmp_task_t {
3427 // void * shareds;
3428 // kmp_routine_entry_t routine;
3429 // kmp_int32 part_id;
3430 // kmp_cmplrdata_t data1;
3431 // kmp_cmplrdata_t data2;
3432 // For taskloops additional fields:
3433 // kmp_uint64 lb;
3434 // kmp_uint64 ub;
3435 // kmp_int64 st;
3436 // kmp_int32 liter;
3437 // void * reductions;
3438 // };
3439 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3440 UD->startDefinition();
3441 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3442 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3443 UD->completeDefinition();
3444 QualType KmpCmplrdataTy = C.getRecordType(UD);
3445 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3446 RD->startDefinition();
3447 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3448 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3449 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3450 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3451 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3452 if (isOpenMPTaskLoopDirective(Kind)) {
3453 QualType KmpUInt64Ty =
3454 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3455 QualType KmpInt64Ty =
3456 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3457 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3458 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3459 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3460 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3461 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3462 }
3463 RD->completeDefinition();
3464 return RD;
3465}
3466
3467static RecordDecl *
3468createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3469 ArrayRef<PrivateDataTy> Privates) {
3470 ASTContext &C = CGM.getContext();
3471 // Build struct kmp_task_t_with_privates {
3472 // kmp_task_t task_data;
3473 // .kmp_privates_t. privates;
3474 // };
3475 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3476 RD->startDefinition();
3477 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3478 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3479 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3480 RD->completeDefinition();
3481 return RD;
3482}
3483
3484/// Emit a proxy function which accepts kmp_task_t as the second
3485/// argument.
3486/// \code
3487/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3488/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3489/// For taskloops:
3490/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3491/// tt->reductions, tt->shareds);
3492/// return 0;
3493/// }
3494/// \endcode
3495static llvm::Function *
3496emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3497 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3498 QualType KmpTaskTWithPrivatesPtrQTy,
3499 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3500 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3501 llvm::Value *TaskPrivatesMap) {
3502 ASTContext &C = CGM.getContext();
3503 FunctionArgList Args;
3504 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3505 ImplicitParamDecl::Other);
3506 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3507 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3508 ImplicitParamDecl::Other);
3509 Args.push_back(&GtidArg);
3510 Args.push_back(&TaskTypeArg);
3511 const auto &TaskEntryFnInfo =
3512 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3513 llvm::FunctionType *TaskEntryTy =
3514 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3515 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3516 auto *TaskEntry = llvm::Function::Create(
3517 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3518 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3519 TaskEntry->setDoesNotRecurse();
3520 CodeGenFunction CGF(CGM);
3521 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3522 Loc, Loc);
3523
3524 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3525 // tt,
3526 // For taskloops:
3527 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3528 // tt->task_data.shareds);
3529 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3530 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3531 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3532 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3533 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3534 const auto *KmpTaskTWithPrivatesQTyRD =
3535 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3536 LValue Base =
3537 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3538 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3539 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3540 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3541 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3542
3543 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3544 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3545 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3546 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3547 CGF.ConvertTypeForMem(SharedsPtrTy));
3548
3549 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3550 llvm::Value *PrivatesParam;
3551 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3552 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3553 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3554 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3555 } else {
3556 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3557 }
3558
3559 llvm::Value *CommonArgs[] = {
3560 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3561 CGF.Builder
3562 .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF),
3563 CGF.VoidPtrTy, CGF.Int8Ty)
3564 .getPointer()};
3565 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3566 std::end(CommonArgs));
3567 if (isOpenMPTaskLoopDirective(Kind)) {
3568 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3569 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3570 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3571 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3572 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3573 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3574 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3575 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3576 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3577 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3578 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3579 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3580 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3581 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3582 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3583 CallArgs.push_back(LBParam);
3584 CallArgs.push_back(UBParam);
3585 CallArgs.push_back(StParam);
3586 CallArgs.push_back(LIParam);
3587 CallArgs.push_back(RParam);
3588 }
3589 CallArgs.push_back(SharedsParam);
3590
3591 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3592 CallArgs);
3593 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3594 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3595 CGF.FinishFunction();
3596 return TaskEntry;
3597}
3598
3599static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3600 SourceLocation Loc,
3601 QualType KmpInt32Ty,
3602 QualType KmpTaskTWithPrivatesPtrQTy,
3603 QualType KmpTaskTWithPrivatesQTy) {
3604 ASTContext &C = CGM.getContext();
3605 FunctionArgList Args;
3606 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3607 ImplicitParamDecl::Other);
3608 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3609 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3610 ImplicitParamDecl::Other);
3611 Args.push_back(&GtidArg);
3612 Args.push_back(&TaskTypeArg);
3613 const auto &DestructorFnInfo =
3614 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3615 llvm::FunctionType *DestructorFnTy =
3616 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3617 std::string Name =
3618 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3619 auto *DestructorFn =
3620 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3621 Name, &CGM.getModule());
3622 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3623 DestructorFnInfo);
3624 DestructorFn->setDoesNotRecurse();
3625 CodeGenFunction CGF(CGM);
3626 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3627 Args, Loc, Loc);
3628
3629 LValue Base = CGF.EmitLoadOfPointerLValue(
3630 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3631 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3632 const auto *KmpTaskTWithPrivatesQTyRD =
3633 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3634 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3635 Base = CGF.EmitLValueForField(Base, *FI);
3636 for (const auto *Field :
3637 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3638 if (QualType::DestructionKind DtorKind =
3639 Field->getType().isDestructedType()) {
3640 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3641 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3642 }
3643 }
3644 CGF.FinishFunction();
3645 return DestructorFn;
3646}
3647
3648/// Emit a privates mapping function for correct handling of private and
3649/// firstprivate variables.
3650/// \code
3651/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3652/// **noalias priv1,..., <tyn> **noalias privn) {
3653/// *priv1 = &.privates.priv1;
3654/// ...;
3655/// *privn = &.privates.privn;
3656/// }
3657/// \endcode
3658static llvm::Value *
3659emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3660 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3661 ArrayRef<PrivateDataTy> Privates) {
3662 ASTContext &C = CGM.getContext();
3663 FunctionArgList Args;
3664 ImplicitParamDecl TaskPrivatesArg(
3665 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3666 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3667 ImplicitParamDecl::Other);
3668 Args.push_back(&TaskPrivatesArg);
3669 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3670 unsigned Counter = 1;
3671 for (const Expr *E : Data.PrivateVars) {
3672 Args.push_back(ImplicitParamDecl::Create(
3673 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3674 C.getPointerType(C.getPointerType(E->getType()))
3675 .withConst()
3676 .withRestrict(),
3677 ImplicitParamDecl::Other));
3678 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3679 PrivateVarsPos[VD] = Counter;
3680 ++Counter;
3681 }
3682 for (const Expr *E : Data.FirstprivateVars) {
3683 Args.push_back(ImplicitParamDecl::Create(
3684 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3685 C.getPointerType(C.getPointerType(E->getType()))
3686 .withConst()
3687 .withRestrict(),
3688 ImplicitParamDecl::Other));
3689 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3690 PrivateVarsPos[VD] = Counter;
3691 ++Counter;
3692 }
3693 for (const Expr *E : Data.LastprivateVars) {
3694 Args.push_back(ImplicitParamDecl::Create(
3695 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3696 C.getPointerType(C.getPointerType(E->getType()))
3697 .withConst()
3698 .withRestrict(),
3699 ImplicitParamDecl::Other));
3700 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3701 PrivateVarsPos[VD] = Counter;
3702 ++Counter;
3703 }
3704 for (const VarDecl *VD : Data.PrivateLocals) {
3705 QualType Ty = VD->getType().getNonReferenceType();
3706 if (VD->getType()->isLValueReferenceType())
3707 Ty = C.getPointerType(Ty);
3708 if (isAllocatableDecl(VD))
3709 Ty = C.getPointerType(Ty);
3710 Args.push_back(ImplicitParamDecl::Create(
3711 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3712 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3713 ImplicitParamDecl::Other));
3714 PrivateVarsPos[VD] = Counter;
3715 ++Counter;
3716 }
3717 const auto &TaskPrivatesMapFnInfo =
3718 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3719 llvm::FunctionType *TaskPrivatesMapTy =
3720 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3721 std::string Name =
3722 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3723 auto *TaskPrivatesMap = llvm::Function::Create(
3724 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3725 &CGM.getModule());
3726 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3727 TaskPrivatesMapFnInfo);
3728 if (CGM.getLangOpts().Optimize) {
3729 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3730 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3731 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3732 }
3733 CodeGenFunction CGF(CGM);
3734 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3735 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3736
3737 // *privi = &.privates.privi;
3738 LValue Base = CGF.EmitLoadOfPointerLValue(
3739 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3740 TaskPrivatesArg.getType()->castAs<PointerType>());
3741 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3742 Counter = 0;
3743 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3744 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3745 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3746 LValue RefLVal =
3747 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3748 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3749 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3750 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3751 ++Counter;
3752 }
3753 CGF.FinishFunction();
3754 return TaskPrivatesMap;
3755}
3756
3757/// Emit initialization for private variables in task-based directives.
3758static void emitPrivatesInit(CodeGenFunction &CGF,
3759 const OMPExecutableDirective &D,
3760 Address KmpTaskSharedsPtr, LValue TDBase,
3761 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3762 QualType SharedsTy, QualType SharedsPtrTy,
3763 const OMPTaskDataTy &Data,
3764 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3765 ASTContext &C = CGF.getContext();
3766 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3767 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3768 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3769 ? OMPD_taskloop
3770 : OMPD_task;
3771 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3772 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3773 LValue SrcBase;
3774 bool IsTargetTask =
3775 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3776 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3777 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3778 // PointersArray, SizesArray, and MappersArray. The original variables for
3779 // these arrays are not captured and we get their addresses explicitly.
3780 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3781 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3782 SrcBase = CGF.MakeAddrLValue(
3783 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3784 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy),
3785 CGF.ConvertTypeForMem(SharedsTy)),
3786 SharedsTy);
3787 }
3788 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3789 for (const PrivateDataTy &Pair : Privates) {
3790 // Do not initialize private locals.
3791 if (Pair.second.isLocalPrivate()) {
3792 ++FI;
3793 continue;
3794 }
3795 const VarDecl *VD = Pair.second.PrivateCopy;
3796 const Expr *Init = VD->getAnyInitializer();
3797 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3798 !CGF.isTrivialInitializer(Init)))) {
3799 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3800 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3801 const VarDecl *OriginalVD = Pair.second.Original;
3802 // Check if the variable is the target-based BasePointersArray,
3803 // PointersArray, SizesArray, or MappersArray.
3804 LValue SharedRefLValue;
3805 QualType Type = PrivateLValue.getType();
3806 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3807 if (IsTargetTask && !SharedField) {
3808 assert(isa<ImplicitParamDecl>(OriginalVD) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3809 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3810 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3811 ->getNumParams() == 0 &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3812 isa<TranslationUnitDecl>((static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3813 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3814 ->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
3815 "Expected artificial target data variable.")(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3815, __extension__
__PRETTY_FUNCTION__))
;
3816 SharedRefLValue =
3817 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3818 } else if (ForDup) {
3819 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3820 SharedRefLValue = CGF.MakeAddrLValue(
3821 SharedRefLValue.getAddress(CGF).withAlignment(
3822 C.getDeclAlign(OriginalVD)),
3823 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3824 SharedRefLValue.getTBAAInfo());
3825 } else if (CGF.LambdaCaptureFields.count(
3826 Pair.second.Original->getCanonicalDecl()) > 0 ||
3827 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3828 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3829 } else {
3830 // Processing for implicitly captured variables.
3831 InlinedOpenMPRegionRAII Region(
3832 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3833 /*HasCancel=*/false, /*NoInheritance=*/true);
3834 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3835 }
3836 if (Type->isArrayType()) {
3837 // Initialize firstprivate array.
3838 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3839 // Perform simple memcpy.
3840 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3841 } else {
3842 // Initialize firstprivate array using element-by-element
3843 // initialization.
3844 CGF.EmitOMPAggregateAssign(
3845 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3846 Type,
3847 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3848 Address SrcElement) {
3849 // Clean up any temporaries needed by the initialization.
3850 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3851 InitScope.addPrivate(Elem, SrcElement);
3852 (void)InitScope.Privatize();
3853 // Emit initialization for single element.
3854 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3855 CGF, &CapturesInfo);
3856 CGF.EmitAnyExprToMem(Init, DestElement,
3857 Init->getType().getQualifiers(),
3858 /*IsInitializer=*/false);
3859 });
3860 }
3861 } else {
3862 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3863 InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF));
3864 (void)InitScope.Privatize();
3865 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3866 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3867 /*capturedByInit=*/false);
3868 }
3869 } else {
3870 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3871 }
3872 }
3873 ++FI;
3874 }
3875}
3876
3877/// Check if duplication function is required for taskloops.
3878static bool checkInitIsRequired(CodeGenFunction &CGF,
3879 ArrayRef<PrivateDataTy> Privates) {
3880 bool InitRequired = false;
3881 for (const PrivateDataTy &Pair : Privates) {
3882 if (Pair.second.isLocalPrivate())
3883 continue;
3884 const VarDecl *VD = Pair.second.PrivateCopy;
3885 const Expr *Init = VD->getAnyInitializer();
3886 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) &&
3887 !CGF.isTrivialInitializer(Init));
3888 if (InitRequired)
3889 break;
3890 }
3891 return InitRequired;
3892}
3893
3894
3895/// Emit task_dup function (for initialization of
3896/// private/firstprivate/lastprivate vars and last_iter flag)
3897/// \code
3898/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3899/// lastpriv) {
3900/// // setup lastprivate flag
3901/// task_dst->last = lastpriv;
3902/// // could be constructor calls here...
3903/// }
3904/// \endcode
3905static llvm::Value *
3906emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3907 const OMPExecutableDirective &D,
3908 QualType KmpTaskTWithPrivatesPtrQTy,
3909 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3910 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3911 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3912 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3913 ASTContext &C = CGM.getContext();
3914 FunctionArgList Args;
3915 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3916 KmpTaskTWithPrivatesPtrQTy,
3917 ImplicitParamDecl::Other);
3918 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3919 KmpTaskTWithPrivatesPtrQTy,
3920 ImplicitParamDecl::Other);
3921 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3922 ImplicitParamDecl::Other);
3923 Args.push_back(&DstArg);
3924 Args.push_back(&SrcArg);
3925 Args.push_back(&LastprivArg);
3926 const auto &TaskDupFnInfo =
3927 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3928 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3929 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
3930 auto *TaskDup = llvm::Function::Create(
3931 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3932 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
3933 TaskDup->setDoesNotRecurse();
3934 CodeGenFunction CGF(CGM);
3935 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
3936 Loc);
3937
3938 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3939 CGF.GetAddrOfLocalVar(&DstArg),
3940 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3941 // task_dst->liter = lastpriv;
3942 if (WithLastIter) {
3943 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3944 LValue Base = CGF.EmitLValueForField(
3945 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3946 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3947 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3948 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3949 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
3950 }
3951
3952 // Emit initial values for private copies (if any).
3953 assert(!Privates.empty())(static_cast <bool> (!Privates.empty()) ? void (0) : __assert_fail
("!Privates.empty()", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3953, __extension__ __PRETTY_FUNCTION__))
;
3954 Address KmpTaskSharedsPtr = Address::invalid();
3955 if (!Data.FirstprivateVars.empty()) {
3956 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3957 CGF.GetAddrOfLocalVar(&SrcArg),
3958 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3959 LValue Base = CGF.EmitLValueForField(
3960 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3961 KmpTaskSharedsPtr = Address(
3962 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
3963 Base, *std::next(KmpTaskTQTyRD->field_begin(),
3964 KmpTaskTShareds)),
3965 Loc),
3966 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
3967 }
3968 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3969 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3970 CGF.FinishFunction();
3971 return TaskDup;
3972}
3973
3974/// Checks if destructor function is required to be generated.
3975/// \return true if cleanups are required, false otherwise.
3976static bool
3977checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3978 ArrayRef<PrivateDataTy> Privates) {
3979 for (const PrivateDataTy &P : Privates) {
3980 if (P.second.isLocalPrivate())
3981 continue;
3982 QualType Ty = P.second.Original->getType().getNonReferenceType();
3983 if (Ty.isDestructedType())
3984 return true;
3985 }
3986 return false;
3987}
3988
3989namespace {
3990/// Loop generator for OpenMP iterator expression.
3991class OMPIteratorGeneratorScope final
3992 : public CodeGenFunction::OMPPrivateScope {
3993 CodeGenFunction &CGF;
3994 const OMPIteratorExpr *E = nullptr;
3995 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3996 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3997 OMPIteratorGeneratorScope() = delete;
3998 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3999
4000public:
4001 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4002 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4003 if (!E)
4004 return;
4005 SmallVector<llvm::Value *, 4> Uppers;
4006 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4007 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4008 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4009 addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName()));
4010 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4011 addPrivate(
4012 HelperData.CounterVD,
4013 CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
4014 }
4015 Privatize();
4016
4017 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4018 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4019 LValue CLVal =
4020 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4021 HelperData.CounterVD->getType());
4022 // Counter = 0;
4023 CGF.EmitStoreOfScalar(
4024 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4025 CLVal);
4026 CodeGenFunction::JumpDest &ContDest =
4027 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4028 CodeGenFunction::JumpDest &ExitDest =
4029 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4030 // N = <number-of_iterations>;
4031 llvm::Value *N = Uppers[I];
4032 // cont:
4033 // if (Counter < N) goto body; else goto exit;
4034 CGF.EmitBlock(ContDest.getBlock());
4035 auto *CVal =
4036 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4037 llvm::Value *Cmp =
4038 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4039 ? CGF.Builder.CreateICmpSLT(CVal, N)
4040 : CGF.Builder.CreateICmpULT(CVal, N);
4041 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4042 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4043 // body:
4044 CGF.EmitBlock(BodyBB);
4045 // Iteri = Begini + Counter * Stepi;
4046 CGF.EmitIgnoredExpr(HelperData.Update);
4047 }
4048 }
4049 ~OMPIteratorGeneratorScope() {
4050 if (!E)
4051 return;
4052 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4053 // Counter = Counter + 1;
4054 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4055 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4056 // goto cont;
4057 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4058 // exit:
4059 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4060 }
4061 }
4062};
4063} // namespace
4064
4065static std::pair<llvm::Value *, llvm::Value *>
4066getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4067 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4068 llvm::Value *Addr;
4069 if (OASE) {
4070 const Expr *Base = OASE->getBase();
4071 Addr = CGF.EmitScalarExpr(Base);
4072 } else {
4073 Addr = CGF.EmitLValue(E).getPointer(CGF);
4074 }
4075 llvm::Value *SizeVal;
4076 QualType Ty = E->getType();
4077 if (OASE) {
4078 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4079 for (const Expr *SE : OASE->getDimensions()) {
4080 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4081 Sz = CGF.EmitScalarConversion(
4082 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4083 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4084 }
4085 } else if (const auto *ASE =
4086 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4087 LValue UpAddrLVal =
4088 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4089 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4090 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4091 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4092 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4093 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4094 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4095 } else {
4096 SizeVal = CGF.getTypeSize(Ty);
4097 }
4098 return std::make_pair(Addr, SizeVal);
4099}
4100
4101/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4102static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4103 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4104 if (KmpTaskAffinityInfoTy.isNull()) {
4105 RecordDecl *KmpAffinityInfoRD =
4106 C.buildImplicitRecord("kmp_task_affinity_info_t");
4107 KmpAffinityInfoRD->startDefinition();
4108 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4109 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4110 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4111 KmpAffinityInfoRD->completeDefinition();
4112 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4113 }
4114}
4115
4116CGOpenMPRuntime::TaskResultTy
4117CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4118 const OMPExecutableDirective &D,
4119 llvm::Function *TaskFunction, QualType SharedsTy,
4120 Address Shareds, const OMPTaskDataTy &Data) {
4121 ASTContext &C = CGM.getContext();
4122 llvm::SmallVector<PrivateDataTy, 4> Privates;
4123 // Aggregate privates and sort them by the alignment.
4124 const auto *I = Data.PrivateCopies.begin();
4125 for (const Expr *E : Data.PrivateVars) {
4126 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4127 Privates.emplace_back(
4128 C.getDeclAlign(VD),
4129 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4130 /*PrivateElemInit=*/nullptr));
4131 ++I;
4132 }
4133 I = Data.FirstprivateCopies.begin();
4134 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4135 for (const Expr *E : Data.FirstprivateVars) {
4136 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4137 Privates.emplace_back(
4138 C.getDeclAlign(VD),
4139 PrivateHelpersTy(
4140 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4141 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4142 ++I;
4143 ++IElemInitRef;
4144 }
4145 I = Data.LastprivateCopies.begin();
4146 for (const Expr *E : Data.LastprivateVars) {
4147 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4148 Privates.emplace_back(
4149 C.getDeclAlign(VD),
4150 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4151 /*PrivateElemInit=*/nullptr));
4152 ++I;
4153 }
4154 for (const VarDecl *VD : Data.PrivateLocals) {
4155 if (isAllocatableDecl(VD))
4156 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4157 else
4158 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4159 }
4160 llvm::stable_sort(Privates,
4161 [](const PrivateDataTy &L, const PrivateDataTy &R) {
4162 return L.first > R.first;
4163 });
4164 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4165 // Build type kmp_routine_entry_t (if not built yet).
4166 emitKmpRoutineEntryT(KmpInt32Ty);
4167 // Build type kmp_task_t (if not built yet).
4168 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4169 if (SavedKmpTaskloopTQTy.isNull()) {
4170 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4171 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4172 }
4173 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4174 } else {
4175 assert((D.getDirectiveKind() == OMPD_task ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4178, __extension__
__PRETTY_FUNCTION__))
4176 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4178, __extension__
__PRETTY_FUNCTION__))
4177 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4178, __extension__
__PRETTY_FUNCTION__))
4178 "Expected taskloop, task or target directive")(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4178, __extension__
__PRETTY_FUNCTION__))
;
4179 if (SavedKmpTaskTQTy.isNull()) {
4180 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4181 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4182 }
4183 KmpTaskTQTy = SavedKmpTaskTQTy;
4184 }
4185 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4186 // Build particular struct kmp_task_t for the given task.
4187 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4188 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4189 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4190 QualType KmpTaskTWithPrivatesPtrQTy =
4191 C.getPointerType(KmpTaskTWithPrivatesQTy);
4192 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4193 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4194 KmpTaskTWithPrivatesTy->getPointerTo();
4195 llvm::Value *KmpTaskTWithPrivatesTySize =
4196 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4197 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4198
4199 // Emit initial values for private copies (if any).
4200 llvm::Value *TaskPrivatesMap = nullptr;
4201 llvm::Type *TaskPrivatesMapTy =
4202 std::next(TaskFunction->arg_begin(), 3)->getType();
4203 if (!Privates.empty()) {
4204 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4205 TaskPrivatesMap =
4206 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4207 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4208 TaskPrivatesMap, TaskPrivatesMapTy);
4209 } else {
4210 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4211 cast<llvm::PointerType>(TaskPrivatesMapTy));
4212 }
4213 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4214 // kmp_task_t *tt);
4215 llvm::Function *TaskEntry = emitProxyTaskFunction(
4216 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4217 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4218 TaskPrivatesMap);
4219
4220 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4221 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4222 // kmp_routine_entry_t *task_entry);
4223 // Task flags. Format is taken from
4224 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4225 // description of kmp_tasking_flags struct.
4226 enum {
4227 TiedFlag = 0x1,
4228 FinalFlag = 0x2,
4229 DestructorsFlag = 0x8,
4230 PriorityFlag = 0x20,
4231 DetachableFlag = 0x40,
4232 };
4233 unsigned Flags = Data.Tied ? TiedFlag : 0;
4234 bool NeedsCleanup = false;
4235 if (!Privates.empty()) {
4236 NeedsCleanup =
4237 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4238 if (NeedsCleanup)
4239 Flags = Flags | DestructorsFlag;
4240 }
4241 if (Data.Priority.getInt())
4242 Flags = Flags | PriorityFlag;
4243 if (D.hasClausesOfKind<OMPDetachClause>())
4244 Flags = Flags | DetachableFlag;
4245 llvm::Value *TaskFlags =
4246 Data.Final.getPointer()
4247 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4248 CGF.Builder.getInt32(FinalFlag),
4249 CGF.Builder.getInt32(/*C=*/0))
4250 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4251 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4252 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4253 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4254 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4255 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4256 TaskEntry, KmpRoutineEntryPtrTy)};
4257 llvm::Value *NewTask;
4258 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4259 // Check if we have any device clause associated with the directive.
4260 const Expr *Device = nullptr;
4261 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4262 Device = C->getDevice();
4263 // Emit device ID if any otherwise use default value.
4264 llvm::Value *DeviceID;
4265 if (Device)
4266 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4267 CGF.Int64Ty, /*isSigned=*/true);
4268 else
4269 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4270 AllocArgs.push_back(DeviceID);
4271 NewTask = CGF.EmitRuntimeCall(
4272 OMPBuilder.getOrCreateRuntimeFunction(
4273 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4274 AllocArgs);
4275 } else {
4276 NewTask =
4277 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4278 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4279 AllocArgs);
4280 }
4281 // Emit detach clause initialization.
4282 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4283 // task_descriptor);
4284 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4285 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4286 LValue EvtLVal = CGF.EmitLValue(Evt);
4287
4288 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4289 // int gtid, kmp_task_t *task);
4290 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4291 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4292 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4293 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4294 OMPBuilder.getOrCreateRuntimeFunction(
4295 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4296 {Loc, Tid, NewTask});
4297 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4298 Evt->getExprLoc());
4299 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4300 }
4301 // Process affinity clauses.
4302 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4303 // Process list of affinity data.
4304 ASTContext &C = CGM.getContext();
4305 Address AffinitiesArray = Address::invalid();
4306 // Calculate number of elements to form the array of affinity data.
4307 llvm::Value *NumOfElements = nullptr;
4308 unsigned NumAffinities = 0;
4309 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4310 if (const Expr *Modifier = C->getModifier()) {
4311 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4312 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4313 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4314 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4315 NumOfElements =
4316 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4317 }
4318 } else {
4319 NumAffinities += C->varlist_size();
4320 }
4321 }
4322 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4323 // Fields ids in kmp_task_affinity_info record.
4324 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4325
4326 QualType KmpTaskAffinityInfoArrayTy;
4327 if (NumOfElements) {
4328 NumOfElements = CGF.Builder.CreateNUWAdd(
4329 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4330 auto *OVE = new (C) OpaqueValueExpr(
4331 Loc,
4332 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4333 VK_PRValue);
4334 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4335 RValue::get(NumOfElements));
4336 KmpTaskAffinityInfoArrayTy =
4337 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4338 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4339 // Properly emit variable-sized array.
4340 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4341 ImplicitParamDecl::Other);
4342 CGF.EmitVarDecl(*PD);
4343 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4344 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4345 /*isSigned=*/false);
4346 } else {
4347 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4348 KmpTaskAffinityInfoTy,
4349 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4350 ArrayType::Normal, /*IndexTypeQuals=*/0);
4351 AffinitiesArray =
4352 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4353 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4354 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4355 /*isSigned=*/false);
4356 }
4357
4358 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4359 // Fill array by elements without iterators.
4360 unsigned Pos = 0;
4361 bool HasIterator = false;
4362 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4363 if (C->getModifier()) {
4364 HasIterator = true;
4365 continue;
4366 }
4367 for (const Expr *E : C->varlists()) {
4368 llvm::Value *Addr;
4369 llvm::Value *Size;
4370 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4371 LValue Base =
4372 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4373 KmpTaskAffinityInfoTy);
4374 // affs[i].base_addr = &<Affinities[i].second>;
4375 LValue BaseAddrLVal = CGF.EmitLValueForField(
4376 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4377 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4378 BaseAddrLVal);
4379 // affs[i].len = sizeof(<Affinities[i].second>);
4380 LValue LenLVal = CGF.EmitLValueForField(
4381 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4382 CGF.EmitStoreOfScalar(Size, LenLVal);
4383 ++Pos;
4384 }
4385 }
4386 LValue PosLVal;
4387 if (HasIterator) {
4388 PosLVal = CGF.MakeAddrLValue(
4389 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4390 C.getSizeType());
4391 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4392 }
4393 // Process elements with iterators.
4394 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4395 const Expr *Modifier = C->getModifier();
4396 if (!Modifier)
4397 continue;
4398 OMPIteratorGeneratorScope IteratorScope(
4399 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4400 for (const Expr *E : C->varlists()) {
4401 llvm::Value *Addr;
4402 llvm::Value *Size;
4403 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4404 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4405 LValue Base = CGF.MakeAddrLValue(
4406 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4407 // affs[i].base_addr = &<Affinities[i].second>;
4408 LValue BaseAddrLVal = CGF.EmitLValueForField(
4409 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4410 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4411 BaseAddrLVal);
4412 // affs[i].len = sizeof(<Affinities[i].second>);
4413 LValue LenLVal = CGF.EmitLValueForField(
4414 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4415 CGF.EmitStoreOfScalar(Size, LenLVal);
4416 Idx = CGF.Builder.CreateNUWAdd(
4417 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4418 CGF.EmitStoreOfScalar(Idx, PosLVal);
4419 }
4420 }
4421 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4422 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4423 // naffins, kmp_task_affinity_info_t *affin_list);
4424 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4425 llvm::Value *GTid = getThreadID(CGF, Loc);
4426 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4427 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4428 // FIXME: Emit the function and ignore its result for now unless the
4429 // runtime function is properly implemented.
4430 (void)CGF.EmitRuntimeCall(
4431 OMPBuilder.getOrCreateRuntimeFunction(
4432 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4433 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4434 }
4435 llvm::Value *NewTaskNewTaskTTy =
4436 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4437 NewTask, KmpTaskTWithPrivatesPtrTy);
4438 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4439 KmpTaskTWithPrivatesQTy);
4440 LValue TDBase =
4441 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4442 // Fill the data in the resulting kmp_task_t record.
4443 // Copy shareds if there are any.
4444 Address KmpTaskSharedsPtr = Address::invalid();
4445 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4446 KmpTaskSharedsPtr = Address(
4447 CGF.EmitLoadOfScalar(
4448 CGF.EmitLValueForField(
4449 TDBase,
4450 *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
4451 Loc),
4452 CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy));
4453 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4454 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4455 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4456 }
4457 // Emit initial values for private copies (if any).
4458 TaskResultTy Result;
4459 if (!Privates.empty()) {
4460 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4461 SharedsTy, SharedsPtrTy, Data, Privates,
4462 /*ForDup=*/false);
4463 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4464 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4465 Result.TaskDupFn = emitTaskDupFunction(
4466 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4467 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4468 /*WithLastIter=*/!Data.LastprivateVars.empty());
4469 }
4470 }
4471 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4472 enum { Priority = 0, Destructors = 1 };
4473 // Provide pointer to function with destructors for privates.
4474 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4475 const RecordDecl *KmpCmplrdataUD =
4476 (*FI)->getType()->getAsUnionType()->getDecl();
4477 if (NeedsCleanup) {
4478 llvm::Value *DestructorFn = emitDestructorsFunction(
4479 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4480 KmpTaskTWithPrivatesQTy);
4481 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4482 LValue DestructorsLV = CGF.EmitLValueForField(
4483 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4484 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4485 DestructorFn, KmpRoutineEntryPtrTy),
4486 DestructorsLV);
4487 }
4488 // Set priority.
4489 if (Data.Priority.getInt()) {
4490 LValue Data2LV = CGF.EmitLValueForField(
4491 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4492 LValue PriorityLV = CGF.EmitLValueForField(
4493 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4494 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4495 }
4496 Result.NewTask = NewTask;
4497 Result.TaskEntry = TaskEntry;
4498 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4499 Result.TDBase = TDBase;
4500 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4501 return Result;
4502}
4503
4504namespace {
4505/// Dependence kind for RTL.
4506enum RTLDependenceKindTy {
4507 DepIn = 0x01,
4508 DepInOut = 0x3,
4509 DepMutexInOutSet = 0x4,
4510 DepInOutSet = 0x8,
4511 DepOmpAllMem = 0x80,
4512};
4513/// Fields ids in kmp_depend_info record.
4514enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4515} // namespace
4516
4517/// Translates internal dependency kind into the runtime kind.
4518static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4519 RTLDependenceKindTy DepKind;
4520 switch (K) {
4521 case OMPC_DEPEND_in:
4522 DepKind = DepIn;
4523 break;
4524 // Out and InOut dependencies must use the same code.
4525 case OMPC_DEPEND_out:
4526 case OMPC_DEPEND_inout:
4527 DepKind = DepInOut;
4528 break;
4529 case OMPC_DEPEND_mutexinoutset:
4530 DepKind = DepMutexInOutSet;
4531 break;
4532 case OMPC_DEPEND_inoutset:
4533 DepKind = DepInOutSet;
4534 break;
4535 case OMPC_DEPEND_outallmemory:
4536 DepKind = DepOmpAllMem;
4537 break;
4538 case OMPC_DEPEND_source:
4539 case OMPC_DEPEND_sink:
4540 case OMPC_DEPEND_depobj:
4541 case OMPC_DEPEND_inoutallmemory:
4542 case OMPC_DEPEND_unknown:
4543 llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4543)
;
4544 }
4545 return DepKind;
4546}
4547
4548/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4549static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4550 QualType &FlagsTy) {
4551 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4552 if (KmpDependInfoTy.isNull()) {
4553 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4554 KmpDependInfoRD->startDefinition();
4555 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4556 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4557 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4558 KmpDependInfoRD->completeDefinition();
4559 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4560 }
4561}
4562
4563std::pair<llvm::Value *, LValue>
4564CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4565 SourceLocation Loc) {
4566 ASTContext &C = CGM.getContext();
4567 QualType FlagsTy;
4568 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4569 RecordDecl *KmpDependInfoRD =
4570 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4571 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4572 LValue Base = CGF.EmitLoadOfPointerLValue(
4573 CGF.Builder.CreateElementBitCast(
4574 DepobjLVal.getAddress(CGF),
4575 CGF.ConvertTypeForMem(KmpDependInfoPtrTy)),
4576 KmpDependInfoPtrTy->castAs<PointerType>());
4577 Address DepObjAddr = CGF.Builder.CreateGEP(
4578 Base.getAddress(CGF),
4579 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4580 LValue NumDepsBase = CGF.MakeAddrLValue(
4581 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4582 // NumDeps = deps[i].base_addr;
4583 LValue BaseAddrLVal = CGF.EmitLValueForField(
4584 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4585 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4586 return std::make_pair(NumDeps, Base);
4587}
4588
4589static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4590 llvm::PointerUnion<unsigned *, LValue *> Pos,
4591 const OMPTaskDataTy::DependData &Data,
4592 Address DependenciesArray) {
4593 CodeGenModule &CGM = CGF.CGM;
4594 ASTContext &C = CGM.getContext();
4595 QualType FlagsTy;
4596 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4597 RecordDecl *KmpDependInfoRD =
4598 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4599 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4600
4601 OMPIteratorGeneratorScope IteratorScope(
4602 CGF, cast_or_null<OMPIteratorExpr>(
4603 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4604 : nullptr));
4605 for (const Expr *E : Data.DepExprs) {
4606 llvm::Value *Addr;
4607 llvm::Value *Size;
4608
4609 // The expression will be a nullptr in the 'omp_all_memory' case.
4610 if (E) {
4611 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4612 Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy);
4613 } else {
4614 Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4615 Size = llvm::ConstantInt::get(CGF.SizeTy, 0);
4616 }
4617 LValue Base;
4618 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4619 Base = CGF.MakeAddrLValue(
4620 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4621 } else {
4622 assert(E && "Expected a non-null expression")(static_cast <bool> (E && "Expected a non-null expression"
) ? void (0) : __assert_fail ("E && \"Expected a non-null expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4622, __extension__
__PRETTY_FUNCTION__))
;
4623 LValue &PosLVal = *Pos.get<LValue *>();
4624 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4625 Base = CGF.MakeAddrLValue(
4626 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4627 }
4628 // deps[i].base_addr = &<Dependencies[i].second>;
4629 LValue BaseAddrLVal = CGF.EmitLValueForField(
4630 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4631 CGF.EmitStoreOfScalar(Addr, BaseAddrLVal);
4632 // deps[i].len = sizeof(<Dependencies[i].second>);
4633 LValue LenLVal = CGF.EmitLValueForField(
4634 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4635 CGF.EmitStoreOfScalar(Size, LenLVal);
4636 // deps[i].flags = <Dependencies[i].first>;
4637 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4638 LValue FlagsLVal = CGF.EmitLValueForField(
4639 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4640 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4641 FlagsLVal);
4642 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4643 ++(*P);
4644 } else {
4645 LValue &PosLVal = *Pos.get<LValue *>();
4646 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4647 Idx = CGF.Builder.CreateNUWAdd(Idx,
4648 llvm::ConstantInt::get(Idx->getType(), 1));
4649 CGF.EmitStoreOfScalar(Idx, PosLVal);
4650 }
4651 }
4652}
4653
4654SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4655 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4656 const OMPTaskDataTy::DependData &Data) {
4657 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4658, __extension__
__PRETTY_FUNCTION__))
4658 "Expected depobj dependency kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4658, __extension__
__PRETTY_FUNCTION__))
;
4659 SmallVector<llvm::Value *, 4> Sizes;
4660 SmallVector<LValue, 4> SizeLVals;
4661 ASTContext &C = CGF.getContext();
4662 {
4663 OMPIteratorGeneratorScope IteratorScope(
4664 CGF, cast_or_null<OMPIteratorExpr>(
4665 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4666 : nullptr));
4667 for (const Expr *E : Data.DepExprs) {
4668 llvm::Value *NumDeps;
4669 LValue Base;
4670 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4671 std::tie(NumDeps, Base) =
4672 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4673 LValue NumLVal = CGF.MakeAddrLValue(
4674 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4675 C.getUIntPtrType());
4676 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4677 NumLVal.getAddress(CGF));
4678 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4679 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4680 CGF.EmitStoreOfScalar(Add, NumLVal);
4681 SizeLVals.push_back(NumLVal);
4682 }
4683 }
4684 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4685 llvm::Value *Size =
4686 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4687 Sizes.push_back(Size);
4688 }
4689 return Sizes;
4690}
4691
4692void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4693 QualType &KmpDependInfoTy,
4694 LValue PosLVal,
4695 const OMPTaskDataTy::DependData &Data,
4696 Address DependenciesArray) {
4697 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4698, __extension__
__PRETTY_FUNCTION__))
4698 "Expected depobj dependency kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependency kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4698, __extension__
__PRETTY_FUNCTION__))
;
4699 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4700 {
4701 OMPIteratorGeneratorScope IteratorScope(
4702 CGF, cast_or_null<OMPIteratorExpr>(
4703 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4704 : nullptr));
4705 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4706 const Expr *E = Data.DepExprs[I];
4707 llvm::Value *NumDeps;
4708 LValue Base;
4709 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4710 std::tie(NumDeps, Base) =
4711 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4712
4713 // memcopy dependency data.
4714 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4715 ElSize,
4716 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4717 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4718 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4719 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4720
4721 // Increase pos.
4722 // pos += size;
4723 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4724 CGF.EmitStoreOfScalar(Add, PosLVal);
4725 }
4726 }
4727}
4728
4729std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4730 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4731 SourceLocation Loc) {
4732 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4733 return D.DepExprs.empty();
4734 }))
4735 return std::make_pair(nullptr, Address::invalid());
4736 // Process list of dependencies.
4737 ASTContext &C = CGM.getContext();
4738 Address DependenciesArray = Address::invalid();
4739 llvm::Value *NumOfElements = nullptr;
4740 unsigned NumDependencies = std::accumulate(
4741 Dependencies.begin(), Dependencies.end(), 0,
4742 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4743 return D.DepKind == OMPC_DEPEND_depobj
4744 ? V
4745 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4746 });
4747 QualType FlagsTy;
4748 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4749 bool HasDepobjDeps = false;
4750 bool HasRegularWithIterators = false;
4751 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4752 llvm::Value *NumOfRegularWithIterators =
4753 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4754 // Calculate number of depobj dependencies and regular deps with the
4755 // iterators.
4756 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4757 if (D.DepKind == OMPC_DEPEND_depobj) {
4758 SmallVector<llvm::Value *, 4> Sizes =
4759 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4760 for (llvm::Value *Size : Sizes) {
4761 NumOfDepobjElements =
4762 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4763 }
4764 HasDepobjDeps = true;
4765 continue;
4766 }
4767 // Include number of iterations, if any.
4768
4769 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4770 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4771 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4772 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4773 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4774 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4775 NumOfRegularWithIterators =
4776 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4777 }
4778 HasRegularWithIterators = true;
4779 continue;
4780 }
4781 }
4782
4783 QualType KmpDependInfoArrayTy;
4784 if (HasDepobjDeps || HasRegularWithIterators) {
4785 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4786 /*isSigned=*/false);
4787 if (HasDepobjDeps) {
4788 NumOfElements =
4789 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4790 }
4791 if (HasRegularWithIterators) {
4792 NumOfElements =
4793 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4794 }
4795 auto *OVE = new (C) OpaqueValueExpr(
4796 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4797 VK_PRValue);
4798 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4799 RValue::get(NumOfElements));
4800 KmpDependInfoArrayTy =
4801 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4802 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4803 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4804 // Properly emit variable-sized array.
4805 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4806 ImplicitParamDecl::Other);
4807 CGF.EmitVarDecl(*PD);
4808 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4809 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4810 /*isSigned=*/false);
4811 } else {
4812 KmpDependInfoArrayTy = C.getConstantArrayType(
4813 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4814 ArrayType::Normal, /*IndexTypeQuals=*/0);
4815 DependenciesArray =
4816 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4817 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4818 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4819 /*isSigned=*/false);
4820 }
4821 unsigned Pos = 0;
4822 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4823 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4824 Dependencies[I].IteratorExpr)
4825 continue;
4826 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4827 DependenciesArray);
4828 }
4829 // Copy regular dependencies with iterators.
4830 LValue PosLVal = CGF.MakeAddrLValue(
4831 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4832 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4833 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4834 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4835 !Dependencies[I].IteratorExpr)
4836 continue;
4837 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4838 DependenciesArray);
4839 }
4840 // Copy final depobj arrays without iterators.
4841 if (HasDepobjDeps) {
4842 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4843 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4844 continue;
4845 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4846 DependenciesArray);
4847 }
4848 }
4849 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4850 DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty);
4851 return std::make_pair(NumOfElements, DependenciesArray);
4852}
4853
4854Address CGOpenMPRuntime::emitDepobjDependClause(
4855 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4856 SourceLocation Loc) {
4857 if (Dependencies.DepExprs.empty())
4858 return Address::invalid();
4859 // Process list of dependencies.
4860 ASTContext &C = CGM.getContext();
4861 Address DependenciesArray = Address::invalid();
4862 unsigned NumDependencies = Dependencies.DepExprs.size();
4863 QualType FlagsTy;
4864 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4865 RecordDecl *KmpDependInfoRD =
4866 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4867
4868 llvm::Value *Size;
4869 // Define type kmp_depend_info[<Dependencies.size()>];
4870 // For depobj reserve one extra element to store the number of elements.
4871 // It is required to handle depobj(x) update(in) construct.
4872 // kmp_depend_info[<Dependencies.size()>] deps;
4873 llvm::Value *NumDepsVal;
4874 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4875 if (const auto *IE =
4876 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4877 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4878 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4879 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4880 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4881 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4882 }
4883 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4884 NumDepsVal);
4885 CharUnits SizeInBytes =
4886 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4887 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
4888 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
4889 NumDepsVal =
4890 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
4891 } else {
4892 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4893 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4894 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
4895 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
4896 Size = CGM.getSize(Sz.alignTo(Align));
4897 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
4898 }
4899 // Need to allocate on the dynamic memory.
4900 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4901 // Use default allocator.
4902 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4903 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4904
4905 llvm::Value *Addr =
4906 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4907 CGM.getModule(), OMPRTL___kmpc_alloc),
4908 Args, ".dep.arr.addr");
4909 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4910 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4911 Addr, KmpDependInfoLlvmTy->getPointerTo());
4912 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4913 // Write number of elements in the first element of array for depobj.
4914 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4915 // deps[i].base_addr = NumDependencies;
4916 LValue BaseAddrLVal = CGF.EmitLValueForField(
4917 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4918 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
4919 llvm::PointerUnion<unsigned *, LValue *> Pos;
4920 unsigned Idx = 1;
4921 LValue PosLVal;
4922 if (Dependencies.IteratorExpr) {
4923 PosLVal = CGF.MakeAddrLValue(
4924 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
4925 C.getSizeType());
4926 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
4927 /*IsInit=*/true);
4928 Pos = &PosLVal;
4929 } else {
4930 Pos = &Idx;
4931 }
4932 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4933 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4934 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy,
4935 CGF.Int8Ty);
4936 return DependenciesArray;
4937}
4938
4939void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4940 SourceLocation Loc) {
4941 ASTContext &C = CGM.getContext();
4942 QualType FlagsTy;
4943 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4944 LValue Base = CGF.EmitLoadOfPointerLValue(
4945 DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>());
4946 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4947 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4948 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4949 CGF.ConvertTypeForMem(KmpDependInfoTy));
4950 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4951 Addr.getElementType(), Addr.getPointer(),
4952 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4953 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
4954 CGF.VoidPtrTy);
4955 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4956 // Use default allocator.
4957 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4958 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4959
4960 // _kmpc_free(gtid, addr, nullptr);
4961 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4962 CGM.getModule(), OMPRTL___kmpc_free),
4963 Args);
4964}
4965
4966void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4967 OpenMPDependClauseKind NewDepKind,
4968 SourceLocation Loc) {
4969 ASTContext &C = CGM.getContext();
4970 QualType FlagsTy;
4971 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4972 RecordDecl *KmpDependInfoRD =
4973 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4974 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4975 llvm::Value *NumDeps;
4976 LValue Base;
4977 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4978
4979 Address Begin = Base.getAddress(CGF);
4980 // Cast from pointer to array type to pointer to single element.
4981 llvm::Value *End = CGF.Builder.CreateGEP(
4982 Begin.getElementType(), Begin.getPointer(), NumDeps);
4983 // The basic structure here is a while-do loop.
4984 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
4985 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
4986 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4987 CGF.EmitBlock(BodyBB);
4988 llvm::PHINode *ElementPHI =
4989 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
4990 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
4991 Begin = Begin.withPointer(ElementPHI);
4992 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4993 Base.getTBAAInfo());
4994 // deps[i].flags = NewDepKind;
4995 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
4996 LValue FlagsLVal = CGF.EmitLValueForField(
4997 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4998 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4999 FlagsLVal);
5000
5001 // Shift the address forward by one element.
5002 Address ElementNext =
5003 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5004 ElementPHI->addIncoming(ElementNext.getPointer(),
5005 CGF.Builder.GetInsertBlock());
5006 llvm::Value *IsEmpty =
5007 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5008 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5009 // Done.
5010 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5011}
5012
5013void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5014 const OMPExecutableDirective &D,
5015 llvm::Function *TaskFunction,
5016 QualType SharedsTy, Address Shareds,
5017 const Expr *IfCond,
5018 const OMPTaskDataTy &Data) {
5019 if (!CGF.HaveInsertPoint())
5020 return;
5021
5022 TaskResultTy Result =
5023 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5024 llvm::Value *NewTask = Result.NewTask;
5025 llvm::Function *TaskEntry = Result.TaskEntry;
5026 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5027 LValue TDBase = Result.TDBase;
5028 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5029 // Process list of dependences.
5030 Address DependenciesArray = Address::invalid();
5031 llvm::Value *NumOfElements;
5032 std::tie(NumOfElements, DependenciesArray) =
5033 emitDependClause(CGF, Data.Dependences, Loc);
5034
5035 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5036 // libcall.
5037 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5038 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5039 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5040 // list is not empty
5041 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5042 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5043 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5044 llvm::Value *DepTaskArgs[7];
5045 if (!Data.Dependences.empty()) {
5046 DepTaskArgs[0] = UpLoc;
5047 DepTaskArgs[1] = ThreadID;
5048 DepTaskArgs[2] = NewTask;
5049 DepTaskArgs[3] = NumOfElements;
5050 DepTaskArgs[4] = DependenciesArray.getPointer();
5051 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5052 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5053 }
5054 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5055 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5056 if (!Data.Tied) {
5057 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5058 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5059 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5060 }
5061 if (!Data.Dependences.empty()) {
5062 CGF.EmitRuntimeCall(
5063 OMPBuilder.getOrCreateRuntimeFunction(
5064 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5065 DepTaskArgs);
5066 } else {
5067 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5068 CGM.getModule(), OMPRTL___kmpc_omp_task),
5069 TaskArgs);
5070 }
5071 // Check if parent region is untied and build return for untied task;
5072 if (auto *Region =
5073 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5074 Region->emitUntiedSwitch(CGF);
5075 };
5076
5077 llvm::Value *DepWaitTaskArgs[6];
5078 if (!Data.Dependences.empty()) {
5079 DepWaitTaskArgs[0] = UpLoc;
5080 DepWaitTaskArgs[1] = ThreadID;
5081 DepWaitTaskArgs[2] = NumOfElements;
5082 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5083 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5084 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5085 }
5086 auto &M = CGM.getModule();
5087 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5088 TaskEntry, &Data, &DepWaitTaskArgs,
5089 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5090 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5091 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5092 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5093 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5094 // is specified.
5095 if (!Data.Dependences.empty())
5096 CGF.EmitRuntimeCall(
5097 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5098 DepWaitTaskArgs);
5099 // Call proxy_task_entry(gtid, new_task);
5100 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5101 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5102 Action.Enter(CGF);
5103 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5104 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5105 OutlinedFnArgs);
5106 };
5107
5108 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5109 // kmp_task_t *new_task);
5110 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5111 // kmp_task_t *new_task);
5112 RegionCodeGenTy RCG(CodeGen);
5113 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5114 M, OMPRTL___kmpc_omp_task_begin_if0),
5115 TaskArgs,
5116 OMPBuilder.getOrCreateRuntimeFunction(
5117 M, OMPRTL___kmpc_omp_task_complete_if0),
5118 TaskArgs);
5119 RCG.setAction(Action);
5120 RCG(CGF);
5121 };
5122
5123 if (IfCond) {
5124 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5125 } else {
5126 RegionCodeGenTy ThenRCG(ThenCodeGen);
5127 ThenRCG(CGF);
5128 }
5129}
5130
5131void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5132 const OMPLoopDirective &D,
5133 llvm::Function *TaskFunction,
5134 QualType SharedsTy, Address Shareds,
5135 const Expr *IfCond,
5136 const OMPTaskDataTy &Data) {
5137 if (!CGF.HaveInsertPoint())
5138 return;
5139 TaskResultTy Result =
5140 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5141 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5142 // libcall.
5143 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5144 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5145 // sched, kmp_uint64 grainsize, void *task_dup);
5146 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5147 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5148 llvm::Value *IfVal;
5149 if (IfCond) {
5150 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5151 /*isSigned=*/true);
5152 } else {
5153 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5154 }
5155
5156 LValue LBLVal = CGF.EmitLValueForField(
5157 Result.TDBase,
5158 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5159 const auto *LBVar =
5160 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5161 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5162 LBLVal.getQuals(),
5163 /*IsInitializer=*/true);
5164 LValue UBLVal = CGF.EmitLValueForField(
5165 Result.TDBase,
5166 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5167 const auto *UBVar =
5168 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5169 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5170 UBLVal.getQuals(),
5171 /*IsInitializer=*/true);
5172 LValue StLVal = CGF.EmitLValueForField(
5173 Result.TDBase,
5174 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5175 const auto *StVar =
5176 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5177 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5178 StLVal.getQuals(),
5179 /*IsInitializer=*/true);
5180 // Store reductions address.
5181 LValue RedLVal = CGF.EmitLValueForField(
5182 Result.TDBase,
5183 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5184 if (Data.Reductions) {
5185 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5186 } else {
5187 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5188 CGF.getContext().VoidPtrTy);
5189 }
5190 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5191 llvm::Value *TaskArgs[] = {
5192 UpLoc,
5193 ThreadID,
5194 Result.NewTask,
5195 IfVal,
5196 LBLVal.getPointer(CGF),
5197 UBLVal.getPointer(CGF),
5198 CGF.EmitLoadOfScalar(StLVal, Loc),
5199 llvm::ConstantInt::getSigned(
5200 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5201 llvm::ConstantInt::getSigned(
5202 CGF.IntTy, Data.Schedule.getPointer()
5203 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5204 : NoSchedule),
5205 Data.Schedule.getPointer()
5206 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5207 /*isSigned=*/false)
5208 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5209 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5210 Result.TaskDupFn, CGF.VoidPtrTy)
5211 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5212 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5213 CGM.getModule(), OMPRTL___kmpc_taskloop),
5214 TaskArgs);
5215}
5216
5217/// Emit reduction operation for each element of array (required for
5218/// array sections) LHS op = RHS.
5219/// \param Type Type of array.
5220/// \param LHSVar Variable on the left side of the reduction operation
5221/// (references element of array in original variable).
5222/// \param RHSVar Variable on the right side of the reduction operation
5223/// (references element of array in original variable).
5224/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5225/// RHSVar.
5226static void EmitOMPAggregateReduction(
5227 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5228 const VarDecl *RHSVar,
5229 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5230 const Expr *, const Expr *)> &RedOpGen,
5231 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5232 const Expr *UpExpr = nullptr) {
5233 // Perform element-by-element initialization.
5234 QualType ElementTy;
5235 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5236 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5237
5238 // Drill down to the base element type on both arrays.
5239 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5240 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5241
5242 llvm::Value *RHSBegin = RHSAddr.getPointer();
5243 llvm::Value *LHSBegin = LHSAddr.getPointer();
5244 // Cast from pointer to array type to pointer to single element.
5245 llvm::Value *LHSEnd =
5246 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5247 // The basic structure here is a while-do loop.
5248 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5249 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5250 llvm::Value *IsEmpty =
5251 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5252 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5253
5254 // Enter the loop body, making that address the current address.
5255 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5256 CGF.EmitBlock(BodyBB);
5257
5258 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5259
5260 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5261 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5262 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5263 Address RHSElementCurrent(
5264 RHSElementPHI, RHSAddr.getElementType(),
5265 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5266
5267 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5268 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5269 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5270 Address LHSElementCurrent(
5271 LHSElementPHI, LHSAddr.getElementType(),
5272 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5273
5274 // Emit copy.
5275 CodeGenFunction::OMPPrivateScope Scope(CGF);
5276 Scope.addPrivate(LHSVar, LHSElementCurrent);
5277 Scope.addPrivate(RHSVar, RHSElementCurrent);
5278 Scope.Privatize();
5279 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5280 Scope.ForceCleanup();
5281
5282 // Shift the address forward by one element.
5283 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5284 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5285 "omp.arraycpy.dest.element");
5286 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5287 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5288 "omp.arraycpy.src.element");
5289 // Check whether we've reached the end.
5290 llvm::Value *Done =
5291 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5292 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5293 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5294 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5295
5296 // Done.
5297 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5298}
5299
5300/// Emit reduction combiner. If the combiner is a simple expression emit it as
5301/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5302/// UDR combiner function.
5303static void emitReductionCombiner(CodeGenFunction &CGF,
5304 const Expr *ReductionOp) {
5305 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5306 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5307 if (const auto *DRE =
5308 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5309 if (const auto *DRD =
5310 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5311 std::pair<llvm::Function *, llvm::Function *> Reduction =
5312 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5313 RValue Func = RValue::get(Reduction.first);
5314 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5315 CGF.EmitIgnoredExpr(ReductionOp);
5316 return;
5317 }
5318 CGF.EmitIgnoredExpr(ReductionOp);
5319}
5320
5321llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5322 SourceLocation Loc, llvm::Type *ArgsElemType,
5323 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
5324 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
5325 ASTContext &C = CGM.getContext();
5326
5327 // void reduction_func(void *LHSArg, void *RHSArg);
5328 FunctionArgList Args;
5329 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5330 ImplicitParamDecl::Other);
5331 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5332 ImplicitParamDecl::Other);
5333 Args.push_back(&LHSArg);
5334 Args.push_back(&RHSArg);
5335 const auto &CGFI =
5336 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5337 std::string Name = getName({"omp", "reduction", "reduction_func"});
5338 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5339 llvm::GlobalValue::InternalLinkage, Name,
5340 &CGM.getModule());
5341 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5342 Fn->setDoesNotRecurse();
5343 CodeGenFunction CGF(CGM);
5344 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5345
5346 // Dst = (void*[n])(LHSArg);
5347 // Src = (void*[n])(RHSArg);
5348 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5349 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5350 ArgsElemType->getPointerTo()),
5351 ArgsElemType, CGF.getPointerAlign());
5352 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5353 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5354 ArgsElemType->getPointerTo()),
5355 ArgsElemType, CGF.getPointerAlign());
5356
5357 // ...
5358 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5359 // ...
5360 CodeGenFunction::OMPPrivateScope Scope(CGF);
5361 const auto *IPriv = Privates.begin();
5362 unsigned Idx = 0;
5363 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5364 const auto *RHSVar =
5365 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5366 Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar));
5367 const auto *LHSVar =
5368 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5369 Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar));
5370 QualType PrivTy = (*IPriv)->getType();
5371 if (PrivTy->isVariablyModifiedType()) {
5372 // Get array size and emit VLA type.
5373 ++Idx;
5374 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5375 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5376 const VariableArrayType *VLA =
5377 CGF.getContext().getAsVariableArrayType(PrivTy);
5378 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5379 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5380 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5381 CGF.EmitVariablyModifiedType(PrivTy);
5382 }
5383 }
5384 Scope.Privatize();
5385 IPriv = Privates.begin();
5386 const auto *ILHS = LHSExprs.begin();
5387 const auto *IRHS = RHSExprs.begin();
5388 for (const Expr *E : ReductionOps) {
5389 if ((*IPriv)->getType()->isArrayType()) {
5390 // Emit reduction for array section.
5391 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5392 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5393 EmitOMPAggregateReduction(
5394 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5395 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5396 emitReductionCombiner(CGF, E);
5397 });
5398 } else {
5399 // Emit reduction for array subscript or single variable.
5400 emitReductionCombiner(CGF, E);
5401 }
5402 ++IPriv;
5403 ++ILHS;
5404 ++IRHS;
5405 }
5406 Scope.ForceCleanup();
5407 CGF.FinishFunction();
5408 return Fn;
5409}
5410
5411void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5412 const Expr *ReductionOp,
5413 const Expr *PrivateRef,
5414 const DeclRefExpr *LHS,
5415 const DeclRefExpr *RHS) {
5416 if (PrivateRef->getType()->isArrayType()) {
5417 // Emit reduction for array section.
5418 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5419 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5420 EmitOMPAggregateReduction(
5421 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5422 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5423 emitReductionCombiner(CGF, ReductionOp);
5424 });
5425 } else {
5426 // Emit reduction for array subscript or single variable.
5427 emitReductionCombiner(CGF, ReductionOp);
5428 }
5429}
5430
5431void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5432 ArrayRef<const Expr *> Privates,
5433 ArrayRef<const Expr *> LHSExprs,
5434 ArrayRef<const Expr *> RHSExprs,
5435 ArrayRef<const Expr *> ReductionOps,
5436 ReductionOptionsTy Options) {
5437 if (!CGF.HaveInsertPoint())
5438 return;
5439
5440 bool WithNowait = Options.WithNowait;
5441 bool SimpleReduction = Options.SimpleReduction;
5442
5443 // Next code should be emitted for reduction:
5444 //
5445 // static kmp_critical_name lock = { 0 };
5446 //
5447 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5448 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5449 // ...
5450 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5451 // *(Type<n>-1*)rhs[<n>-1]);
5452 // }
5453 //
5454 // ...
5455 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5456 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5457 // RedList, reduce_func, &<lock>)) {
5458 // case 1:
5459 // ...
5460 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5461 // ...
5462 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5463 // break;
5464 // case 2:
5465 // ...
5466 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5467 // ...
5468 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5469 // break;
5470 // default:;
5471 // }
5472 //
5473 // if SimpleReduction is true, only the next code is generated:
5474 // ...
5475 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5476 // ...
5477
5478 ASTContext &C = CGM.getContext();
5479
5480 if (SimpleReduction) {
5481 CodeGenFunction::RunCleanupsScope Scope(CGF);
5482 const auto *IPriv = Privates.begin();
5483 const auto *ILHS = LHSExprs.begin();
5484 const auto *IRHS = RHSExprs.begin();
5485 for (const Expr *E : ReductionOps) {
5486 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5487 cast<DeclRefExpr>(*IRHS));
5488 ++IPriv;
5489 ++ILHS;
5490 ++IRHS;
5491 }
5492 return;
5493 }
5494
5495 // 1. Build a list of reduction variables.
5496 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5497 auto Size = RHSExprs.size();
5498 for (const Expr *E : Privates) {
5499 if (E->getType()->isVariablyModifiedType())
5500 // Reserve place for array size.
5501 ++Size;
5502 }
5503 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5504 QualType ReductionArrayTy =
5505 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5506 /*IndexTypeQuals=*/0);
5507 Address ReductionList =
5508 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5509 const auto *IPriv = Privates.begin();
5510 unsigned Idx = 0;
5511 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5512 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5513 CGF.Builder.CreateStore(
5514 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5515 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5516 Elem);
5517 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5518 // Store array size.
5519 ++Idx;
5520 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5521 llvm::Value *Size = CGF.Builder.CreateIntCast(
5522 CGF.getVLASize(
5523 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5524 .NumElts,
5525 CGF.SizeTy, /*isSigned=*/false);
5526 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5527 Elem);
5528 }
5529 }
5530
5531 // 2. Emit reduce_func().
5532 llvm::Function *ReductionFn =
5533 emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy),
5534 Privates, LHSExprs, RHSExprs, ReductionOps);
5535
5536 // 3. Create static kmp_critical_name lock = { 0 };
5537 std::string Name = getName({"reduction"});
5538 llvm::Value *Lock = getCriticalRegionLock(Name);
5539
5540 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5541 // RedList, reduce_func, &<lock>);
5542 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5543 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5544 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5545 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5546 ReductionList.getPointer(), CGF.VoidPtrTy);
5547 llvm::Value *Args[] = {
5548 IdentTLoc, // ident_t *<loc>
5549 ThreadId, // i32 <gtid>
5550 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5551 ReductionArrayTySize, // size_type sizeof(RedList)
5552 RL, // void *RedList
5553 ReductionFn, // void (*) (void *, void *) <reduce_func>
5554 Lock // kmp_critical_name *&<lock>
5555 };
5556 llvm::Value *Res = CGF.EmitRuntimeCall(
5557 OMPBuilder.getOrCreateRuntimeFunction(
5558 CGM.getModule(),
5559 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5560 Args);
5561
5562 // 5. Build switch(res)
5563 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5564 llvm::SwitchInst *SwInst =
5565 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5566
5567 // 6. Build case 1:
5568 // ...
5569 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5570 // ...
5571 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5572 // break;
5573 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5574 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5575 CGF.EmitBlock(Case1BB);
5576
5577 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5578 llvm::Value *EndArgs[] = {
5579 IdentTLoc, // ident_t *<loc>
5580 ThreadId, // i32 <gtid>
5581 Lock // kmp_critical_name *&<lock>
5582 };
5583 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5584 CodeGenFunction &CGF, PrePostActionTy &Action) {
5585 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5586 const auto *IPriv = Privates.begin();
5587 const auto *ILHS = LHSExprs.begin();
5588 const auto *IRHS = RHSExprs.begin();
5589 for (const Expr *E : ReductionOps) {
5590 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5591 cast<DeclRefExpr>(*IRHS));
5592 ++IPriv;
5593 ++ILHS;
5594 ++IRHS;
5595 }
5596 };
5597 RegionCodeGenTy RCG(CodeGen);
5598 CommonActionTy Action(
5599 nullptr, llvm::None,
5600 OMPBuilder.getOrCreateRuntimeFunction(
5601 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5602 : OMPRTL___kmpc_end_reduce),
5603 EndArgs);
5604 RCG.setAction(Action);
5605 RCG(CGF);
5606
5607 CGF.EmitBranch(DefaultBB);
5608
5609 // 7. Build case 2:
5610 // ...
5611 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5612 // ...
5613 // break;
5614 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5615 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5616 CGF.EmitBlock(Case2BB);
5617
5618 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5619 CodeGenFunction &CGF, PrePostActionTy &Action) {
5620 const auto *ILHS = LHSExprs.begin();
5621 const auto *IRHS = RHSExprs.begin();
5622 const auto *IPriv = Privates.begin();
5623 for (const Expr *E : ReductionOps) {
5624 const Expr *XExpr = nullptr;
5625 const Expr *EExpr = nullptr;
5626 const Expr *UpExpr = nullptr;
5627 BinaryOperatorKind BO = BO_Comma;
5628 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5629 if (BO->getOpcode() == BO_Assign) {
5630 XExpr = BO->getLHS();
5631 UpExpr = BO->getRHS();
5632 }
5633 }
5634 // Try to emit update expression as a simple atomic.
5635 const Expr *RHSExpr = UpExpr;
5636 if (RHSExpr) {
5637 // Analyze RHS part of the whole expression.
5638 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5639 RHSExpr->IgnoreParenImpCasts())) {
5640 // If this is a conditional operator, analyze its condition for
5641 // min/max reduction operator.
5642 RHSExpr = ACO->getCond();
5643 }
5644 if (const auto *BORHS =
5645 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5646 EExpr = BORHS->getRHS();
5647 BO = BORHS->getOpcode();
5648 }
5649 }
5650 if (XExpr) {
5651 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5652 auto &&AtomicRedGen = [BO, VD,
5653 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5654 const Expr *EExpr, const Expr *UpExpr) {
5655 LValue X = CGF.EmitLValue(XExpr);
5656 RValue E;
5657 if (EExpr)
5658 E = CGF.EmitAnyExpr(EExpr);
5659 CGF.EmitOMPAtomicSimpleUpdateExpr(
5660 X, E, BO, /*IsXLHSInRHSPart=*/true,
5661 llvm::AtomicOrdering::Monotonic, Loc,
5662 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5663 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5664 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5665 CGF.emitOMPSimpleStore(
5666 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5667 VD->getType().getNonReferenceType(), Loc);
5668 PrivateScope.addPrivate(VD, LHSTemp);
5669 (void)PrivateScope.Privatize();
5670 return CGF.EmitAnyExpr(UpExpr);
5671 });
5672 };
5673 if ((*IPriv)->getType()->isArrayType()) {
5674 // Emit atomic reduction for array section.
5675 const auto *RHSVar =
5676 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5677 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5678 AtomicRedGen, XExpr, EExpr, UpExpr);
5679 } else {
5680 // Emit atomic reduction for array subscript or single variable.
5681 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5682 }
5683 } else {
5684 // Emit as a critical region.
5685 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5686 const Expr *, const Expr *) {
5687 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5688 std::string Name = RT.getName({"atomic_reduction"});
5689 RT.emitCriticalRegion(
5690 CGF, Name,
5691 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5692 Action.Enter(CGF);
5693 emitReductionCombiner(CGF, E);
5694 },
5695 Loc);
5696 };
5697 if ((*IPriv)->getType()->isArrayType()) {
5698 const auto *LHSVar =
5699 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5700 const auto *RHSVar =
5701 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5702 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5703 CritRedGen);
5704 } else {
5705 CritRedGen(CGF, nullptr, nullptr, nullptr);
5706 }
5707 }
5708 ++ILHS;
5709 ++IRHS;
5710 ++IPriv;
5711 }
5712 };
5713 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5714 if (!WithNowait) {
5715 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5716 llvm::Value *EndArgs[] = {
5717 IdentTLoc, // ident_t *<loc>
5718 ThreadId, // i32 <gtid>
5719 Lock // kmp_critical_name *&<lock>
5720 };
5721 CommonActionTy Action(nullptr, llvm::None,
5722 OMPBuilder.getOrCreateRuntimeFunction(
5723 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5724 EndArgs);
5725 AtomicRCG.setAction(Action);
5726 AtomicRCG(CGF);
5727 } else {
5728 AtomicRCG(CGF);
5729 }
5730
5731 CGF.EmitBranch(DefaultBB);
5732 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5733}
5734
5735/// Generates unique name for artificial threadprivate variables.
5736/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5737static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5738 const Expr *Ref) {
5739 SmallString<256> Buffer;
5740 llvm::raw_svector_ostream Out(Buffer);
5741 const clang::DeclRefExpr *DE;
5742 const VarDecl *D = ::getBaseDecl(Ref, DE);
5743 if (!D)
5744 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5745 D = D->getCanonicalDecl();
5746 std::string Name = CGM.getOpenMPRuntime().getName(
5747 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5748 Out << Prefix << Name << "_"
5749 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5750 return std::string(Out.str());
5751}
5752
5753/// Emits reduction initializer function:
5754/// \code
5755/// void @.red_init(void* %arg, void* %orig) {
5756/// %0 = bitcast void* %arg to <type>*
5757/// store <type> <init>, <type>* %0
5758/// ret void
5759/// }
5760/// \endcode
5761static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5762 SourceLocation Loc,
5763 ReductionCodeGen &RCG, unsigned N) {
5764 ASTContext &C = CGM.getContext();
5765 QualType VoidPtrTy = C.VoidPtrTy;
5766 VoidPtrTy.addRestrict();
5767 FunctionArgList Args;
5768 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5769 ImplicitParamDecl::Other);
5770 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5771 ImplicitParamDecl::Other);
5772 Args.emplace_back(&Param);
5773 Args.emplace_back(&ParamOrig);
5774 const auto &FnInfo =
5775 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5776 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5777 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5778 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5779 Name, &CGM.getModule());
5780 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5781 Fn->setDoesNotRecurse();
5782 CodeGenFunction CGF(CGM);
5783 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5784 QualType PrivateType = RCG.getPrivateType(N);
5785 Address PrivateAddr = CGF.EmitLoadOfPointer(
5786 CGF.Builder.CreateElementBitCast(
5787 CGF.GetAddrOfLocalVar(&Param),
5788 CGF.ConvertTypeForMem(PrivateType)->getPointerTo()),
5789 C.getPointerType(PrivateType)->castAs<PointerType>());
5790 llvm::Value *Size = nullptr;
5791 // If the size of the reduction item is non-constant, load it from global
5792 // threadprivate variable.
5793 if (RCG.getSizes(N).second) {
5794 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5795 CGF, CGM.getContext().getSizeType(),
5796 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5797 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5798 CGM.getContext().getSizeType(), Loc);
5799 }
5800 RCG.emitAggregateType(CGF, N, Size);
5801 Address OrigAddr = Address::invalid();
5802 // If initializer uses initializer from declare reduction construct, emit a
5803 // pointer to the address of the original reduction item (reuired by reduction
5804 // initializer)
5805 if (RCG.usesReductionInitializer(N)) {
5806 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5807 OrigAddr = CGF.EmitLoadOfPointer(
5808 SharedAddr,
5809 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5810 }
5811 // Emit the initializer:
5812 // %0 = bitcast void* %arg to <type>*
5813 // store <type> <init>, <type>* %0
5814 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5815 [](CodeGenFunction &) { return false; });
5816 CGF.FinishFunction();
5817 return Fn;
5818}
5819
5820/// Emits reduction combiner function:
5821/// \code
5822/// void @.red_comb(void* %arg0, void* %arg1) {
5823/// %lhs = bitcast void* %arg0 to <type>*
5824/// %rhs = bitcast void* %arg1 to <type>*
5825/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5826/// store <type> %2, <type>* %lhs
5827/// ret void
5828/// }
5829/// \endcode
5830static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5831 SourceLocation Loc,
5832 ReductionCodeGen &RCG, unsigned N,
5833 const Expr *ReductionOp,
5834 const Expr *LHS, const Expr *RHS,
5835 const Expr *PrivateRef) {
5836 ASTContext &C = CGM.getContext();
5837 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5838 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5839 FunctionArgList Args;
5840 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5841 C.VoidPtrTy, ImplicitParamDecl::Other);
5842 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5843 ImplicitParamDecl::Other);
5844 Args.emplace_back(&ParamInOut);
5845 Args.emplace_back(&ParamIn);
5846 const auto &FnInfo =
5847 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5848 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5849 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5850 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5851 Name, &CGM.getModule());
5852 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5853 Fn->setDoesNotRecurse();
5854 CodeGenFunction CGF(CGM);
5855 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5856 llvm::Value *Size = nullptr;
5857 // If the size of the reduction item is non-constant, load it from global
5858 // threadprivate variable.
5859 if (RCG.getSizes(N).second) {
5860 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5861 CGF, CGM.getContext().getSizeType(),
5862 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5863 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5864 CGM.getContext().getSizeType(), Loc);
5865 }
5866 RCG.emitAggregateType(CGF, N, Size);
5867 // Remap lhs and rhs variables to the addresses of the function arguments.
5868 // %lhs = bitcast void* %arg0 to <type>*
5869 // %rhs = bitcast void* %arg1 to <type>*
5870 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5871 PrivateScope.addPrivate(
5872 LHSVD,
5873 // Pull out the pointer to the variable.
5874 CGF.EmitLoadOfPointer(
5875 CGF.Builder.CreateElementBitCast(
5876 CGF.GetAddrOfLocalVar(&ParamInOut),
5877 CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()),
5878 C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5879 PrivateScope.addPrivate(
5880 RHSVD,
5881 // Pull out the pointer to the variable.
5882 CGF.EmitLoadOfPointer(
5883 CGF.Builder.CreateElementBitCast(
5884 CGF.GetAddrOfLocalVar(&ParamIn),
5885 CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()),
5886 C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5887 PrivateScope.Privatize();
5888 // Emit the combiner body:
5889 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5890 // store <type> %2, <type>* %lhs
5891 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5892 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5893 cast<DeclRefExpr>(RHS));
5894 CGF.FinishFunction();
5895 return Fn;
5896}
5897
5898/// Emits reduction finalizer function:
5899/// \code
5900/// void @.red_fini(void* %arg) {
5901/// %0 = bitcast void* %arg to <type>*
5902/// <destroy>(<type>* %0)
5903/// ret void
5904/// }
5905/// \endcode
5906static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5907 SourceLocation Loc,
5908 ReductionCodeGen &RCG, unsigned N) {
5909 if (!RCG.needCleanups(N))
5910 return nullptr;
5911 ASTContext &C = CGM.getContext();
5912 FunctionArgList Args;
5913 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5914 ImplicitParamDecl::Other);
5915 Args.emplace_back(&Param);
5916 const auto &FnInfo =
5917 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5918 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5919 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
5920 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5921 Name, &CGM.getModule());
5922 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5923 Fn->setDoesNotRecurse();
5924 CodeGenFunction CGF(CGM);
5925 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5926 Address PrivateAddr = CGF.EmitLoadOfPointer(
5927 CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>());
5928 llvm::Value *Size = nullptr;
5929 // If the size of the reduction item is non-constant, load it from global
5930 // threadprivate variable.
5931 if (RCG.getSizes(N).second) {
5932 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5933 CGF, CGM.getContext().getSizeType(),
5934 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5935 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5936 CGM.getContext().getSizeType(), Loc);
5937 }
5938 RCG.emitAggregateType(CGF, N, Size);
5939 // Emit the finalizer body:
5940 // <destroy>(<type>* %0)
5941 RCG.emitCleanups(CGF, N, PrivateAddr);
5942 CGF.FinishFunction(Loc);
5943 return Fn;
5944}
5945
5946llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5947 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5948 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5949 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5950 return nullptr;
5951
5952 // Build typedef struct:
5953 // kmp_taskred_input {
5954 // void *reduce_shar; // shared reduction item
5955 // void *reduce_orig; // original reduction item used for initialization
5956 // size_t reduce_size; // size of data item
5957 // void *reduce_init; // data initialization routine
5958 // void *reduce_fini; // data finalization routine
5959 // void *reduce_comb; // data combiner routine
5960 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5961 // } kmp_taskred_input_t;
5962 ASTContext &C = CGM.getContext();
5963 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
5964 RD->startDefinition();
5965 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5966 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5967 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5968 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5969 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5970 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5971 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5972 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5973 RD->completeDefinition();
5974 QualType RDType = C.getRecordType(RD);
5975 unsigned Size = Data.ReductionVars.size();
5976 llvm::APInt ArraySize(/*numBits=*/64, Size);
5977 QualType ArrayRDType = C.getConstantArrayType(
5978 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5979 // kmp_task_red_input_t .rd_input.[Size];
5980 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5981 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5982 Data.ReductionCopies, Data.ReductionOps);
5983 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5984 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5985 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5986 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5987 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5988 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
5989 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5990 ".rd_input.gep.");
5991 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5992 // ElemLVal.reduce_shar = &Shareds[Cnt];
5993 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5994 RCG.emitSharedOrigLValue(CGF, Cnt);
5995 llvm::Value *CastedShared =
5996 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
5997 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5998 // ElemLVal.reduce_orig = &Origs[Cnt];
5999 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6000 llvm::Value *CastedOrig =
6001 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6002 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6003 RCG.emitAggregateType(CGF, Cnt);
6004 llvm::Value *SizeValInChars;
6005 llvm::Value *SizeVal;
6006 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6007 // We use delayed creation/initialization for VLAs and array sections. It is
6008 // required because runtime does not provide the way to pass the sizes of
6009 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6010 // threadprivate global variables are used to store these values and use
6011 // them in the functions.
6012 bool DelayedCreation = !!SizeVal;
6013 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6014 /*isSigned=*/false);
6015 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6016 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6017 // ElemLVal.reduce_init = init;
6018 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6019 llvm::Value *InitAddr =
6020 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6021 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6022 // ElemLVal.reduce_fini = fini;
6023 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6024 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6025 llvm::Value *FiniAddr = Fini
6026 ? CGF.EmitCastToVoidPtr(Fini)
6027 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6028 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6029 // ElemLVal.reduce_comb = comb;
6030 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6031 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6032 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6033 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6034 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6035 // ElemLVal.flags = 0;
6036 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6037 if (DelayedCreation) {
6038 CGF.EmitStoreOfScalar(
6039 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6040 FlagsLVal);
6041 } else
6042 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6043 FlagsLVal.getType());
6044 }
6045 if (Data.IsReductionWithTaskMod) {
6046 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6047 // is_ws, int num, void *data);
6048 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6049 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6050 CGM.IntTy, /*isSigned=*/true);
6051 llvm::Value *Args[] = {
6052 IdentTLoc, GTid,
6053 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6054 /*isSigned=*/true),
6055 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6056 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6057 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6058 return CGF.EmitRuntimeCall(
6059 OMPBuilder.getOrCreateRuntimeFunction(
6060 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6061 Args);
6062 }
6063 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6064 llvm::Value *Args[] = {
6065 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6066 /*isSigned=*/true),
6067 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6068 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6069 CGM.VoidPtrTy)};
6070 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6071 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6072 Args);
6073}
6074
6075void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6076 SourceLocation Loc,
6077 bool IsWorksharingReduction) {
6078 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6079 // is_ws, int num, void *data);
6080 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6081 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6082 CGM.IntTy, /*isSigned=*/true);
6083 llvm::Value *Args[] = {IdentTLoc, GTid,
6084 llvm::ConstantInt::get(CGM.IntTy,
6085 IsWorksharingReduction ? 1 : 0,
6086 /*isSigned=*/true)};
6087 (void)CGF.EmitRuntimeCall(
6088 OMPBuilder.getOrCreateRuntimeFunction(
6089 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6090 Args);
6091}
6092
6093void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6094 SourceLocation Loc,
6095 ReductionCodeGen &RCG,
6096 unsigned N) {
6097 auto Sizes = RCG.getSizes(N);
6098 // Emit threadprivate global variable if the type is non-constant
6099 // (Sizes.second = nullptr).
6100 if (Sizes.second) {
6101 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6102 /*isSigned=*/false);
6103 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6104 CGF, CGM.getContext().getSizeType(),
6105 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6106 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6107 }
6108}
6109
6110Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6111 SourceLocation Loc,
6112 llvm::Value *ReductionsPtr,
6113 LValue SharedLVal) {
6114 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6115 // *d);
6116 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6117 CGM.IntTy,
6118 /*isSigned=*/true),
6119 ReductionsPtr,
6120 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6121 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6122 return Address(
6123 CGF.EmitRuntimeCall(
6124 OMPBuilder.getOrCreateRuntimeFunction(
6125 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6126 Args),
6127 CGF.Int8Ty, SharedLVal.getAlignment());
6128}
6129
6130void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6131 const OMPTaskDataTy &Data) {
6132 if (!CGF.HaveInsertPoint())
6133 return;
6134
6135 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6136 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6137 OMPBuilder.createTaskwait(CGF.Builder);
6138 } else {
6139 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6140 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6141 auto &M = CGM.getModule();
6142 Address DependenciesArray = Address::invalid();
6143 llvm::Value *NumOfElements;
6144 std::tie(NumOfElements, DependenciesArray) =
6145 emitDependClause(CGF, Data.Dependences, Loc);
6146 llvm::Value *DepWaitTaskArgs[6];
6147 if (!Data.Dependences.empty()) {
6148 DepWaitTaskArgs[0] = UpLoc;
6149 DepWaitTaskArgs[1] = ThreadID;
6150 DepWaitTaskArgs[2] = NumOfElements;
6151 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6152 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6153 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6154
6155 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6156
6157 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6158 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6159 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6160 // is specified.
6161 CGF.EmitRuntimeCall(
6162 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6163 DepWaitTaskArgs);
6164
6165 } else {
6166
6167 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6168 // global_tid);
6169 llvm::Value *Args[] = {UpLoc, ThreadID};
6170 // Ignore return result until untied tasks are supported.
6171 CGF.EmitRuntimeCall(
6172 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6173 Args);
6174 }
6175 }
6176
6177 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6178 Region->emitUntiedSwitch(CGF);
6179}
6180
6181void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6182 OpenMPDirectiveKind InnerKind,
6183 const RegionCodeGenTy &CodeGen,
6184 bool HasCancel) {
6185 if (!CGF.HaveInsertPoint())
6186 return;
6187 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6188 InnerKind != OMPD_critical &&
6189 InnerKind != OMPD_master &&
6190 InnerKind != OMPD_masked);
6191 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6192}
6193
6194namespace {
6195enum RTCancelKind {
6196 CancelNoreq = 0,
6197 CancelParallel = 1,
6198 CancelLoop = 2,
6199 CancelSections = 3,
6200 CancelTaskgroup = 4
6201};
6202} // anonymous namespace
6203
6204static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6205 RTCancelKind CancelKind = CancelNoreq;
6206 if (CancelRegion == OMPD_parallel)
6207 CancelKind = CancelParallel;
6208 else if (CancelRegion == OMPD_for)
6209 CancelKind = CancelLoop;
6210 else if (CancelRegion == OMPD_sections)
6211 CancelKind = CancelSections;
6212 else {
6213 assert(CancelRegion == OMPD_taskgroup)(static_cast <bool> (CancelRegion == OMPD_taskgroup) ? void
(0) : __assert_fail ("CancelRegion == OMPD_taskgroup", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 6213, __extension__ __PRETTY_FUNCTION__))
;
6214 CancelKind = CancelTaskgroup;
6215 }
6216 return CancelKind;
6217}
6218
6219void CGOpenMPRuntime::emitCancellationPointCall(
6220 CodeGenFunction &CGF, SourceLocation Loc,
6221 OpenMPDirectiveKind CancelRegion) {
6222 if (!CGF.HaveInsertPoint())
6223 return;
6224 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6225 // global_tid, kmp_int32 cncl_kind);
6226 if (auto *OMPRegionInfo =
6227 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6228 // For 'cancellation point taskgroup', the task region info may not have a
6229 // cancel. This may instead happen in another adjacent task.
6230 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6231 llvm::Value *Args[] = {
6232 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6233 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6234 // Ignore return result until untied tasks are supported.
6235 llvm::Value *Result = CGF.EmitRuntimeCall(
6236 OMPBuilder.getOrCreateRuntimeFunction(
6237 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6238 Args);
6239 // if (__kmpc_cancellationpoint()) {
6240 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6241 // exit from construct;
6242 // }
6243 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6244 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6245 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6246 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6247 CGF.EmitBlock(ExitBB);
6248 if (CancelRegion == OMPD_parallel)
6249 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6250 // exit from construct;
6251 CodeGenFunction::JumpDest CancelDest =
6252 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6253 CGF.EmitBranchThroughCleanup(CancelDest);
6254 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6255 }
6256 }
6257}
6258
6259void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6260 const Expr *IfCond,
6261 OpenMPDirectiveKind CancelRegion) {
6262 if (!CGF.HaveInsertPoint())
6263 return;
6264 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6265 // kmp_int32 cncl_kind);
6266 auto &M = CGM.getModule();
6267 if (auto *OMPRegionInfo =
6268 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6269 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6270 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6271 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6272 llvm::Value *Args[] = {
6273 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6274 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6275 // Ignore return result until untied tasks are supported.
6276 llvm::Value *Result = CGF.EmitRuntimeCall(
6277 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6278 // if (__kmpc_cancel()) {
6279 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6280 // exit from construct;
6281 // }
6282 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6283 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6284 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6285 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6286 CGF.EmitBlock(ExitBB);
6287 if (CancelRegion == OMPD_parallel)
6288 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6289 // exit from construct;
6290 CodeGenFunction::JumpDest CancelDest =
6291 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6292 CGF.EmitBranchThroughCleanup(CancelDest);
6293 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6294 };
6295 if (IfCond) {
6296 emitIfClause(CGF, IfCond, ThenGen,
6297 [](CodeGenFunction &, PrePostActionTy &) {});
6298 } else {
6299 RegionCodeGenTy ThenRCG(ThenGen);
6300 ThenRCG(CGF);
6301 }
6302 }
6303}
6304
6305namespace {
6306/// Cleanup action for uses_allocators support.
6307class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6308 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6309
6310public:
6311 OMPUsesAllocatorsActionTy(
6312 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6313 : Allocators(Allocators) {}
6314 void Enter(CodeGenFunction &CGF) override {
6315 if (!CGF.HaveInsertPoint())
6316 return;
6317 for (const auto &AllocatorData : Allocators) {
6318 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6319 CGF, AllocatorData.first, AllocatorData.second);
6320 }
6321 }
6322 void Exit(CodeGenFunction &CGF) override {
6323 if (!CGF.HaveInsertPoint())
6324 return;
6325 for (const auto &AllocatorData : Allocators) {
6326 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6327 AllocatorData.first);
6328 }
6329 }
6330};
6331} // namespace
6332
6333void CGOpenMPRuntime::emitTargetOutlinedFunction(
6334 const OMPExecutableDirective &D, StringRef ParentName,
6335 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6336 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6337 assert(!ParentName.empty() && "Invalid target region parent name!")(static_cast <bool> (!ParentName.empty() && "Invalid target region parent name!"
) ? void (0) : __assert_fail ("!ParentName.empty() && \"Invalid target region parent name!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6337, __extension__
__PRETTY_FUNCTION__))
;
6338 HasEmittedTargetRegion = true;
6339 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6340 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6341 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6342 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6343 if (!D.AllocatorTraits)
6344 continue;
6345 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6346 }
6347 }
6348 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6349 CodeGen.setAction(UsesAllocatorAction);
6350 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6351 IsOffloadEntry, CodeGen);
6352}
6353
6354void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6355 const Expr *Allocator,
6356 const Expr *AllocatorTraits) {
6357 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6358 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6359 // Use default memspace handle.
6360 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6361 llvm::Value *NumTraits = llvm::ConstantInt::get(
6362 CGF.IntTy, cast<ConstantArrayType>(
6363 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6364 ->getSize()
6365 .getLimitedValue());
6366 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6367 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6368 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy, CGF.VoidPtrTy);
6369 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6370 AllocatorTraitsLVal.getBaseInfo(),
6371 AllocatorTraitsLVal.getTBAAInfo());
6372 llvm::Value *Traits =
6373 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6374
6375 llvm::Value *AllocatorVal =
6376 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6377 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6378 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6379 // Store to allocator.
6380 CGF.EmitVarDecl(*cast<VarDecl>(
6381 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6382 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6383 AllocatorVal =
6384 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6385 Allocator->getType(), Allocator->getExprLoc());
6386 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6387}
6388
6389void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6390 const Expr *Allocator) {
6391 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6392 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6393 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6394 llvm::Value *AllocatorVal =
6395 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6396 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6397 CGF.getContext().VoidPtrTy,
6398 Allocator->getExprLoc());
6399 (void)CGF.EmitRuntimeCall(
6400 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6401 OMPRTL___kmpc_destroy_allocator),
6402 {ThreadId, AllocatorVal});
6403}
6404
6405void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6406 const OMPExecutableDirective &D, StringRef ParentName,
6407 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6408 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6409 // Create a unique name for the entry function using the source location
6410 // information of the current target region. The name will be something like:
6411 //
6412 // __omp_offloading_DD_FFFF_PP_lBB
6413 //
6414 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6415 // mangled name of the function that encloses the target region and BB is the
6416 // line number of the target region.
6417
6418 const bool BuildOutlinedFn = CGM.getLangOpts().OpenMPIsDevice ||
6419 !CGM.getLangOpts().OpenMPOffloadMandatory;
6420 unsigned DeviceID;
6421 unsigned FileID;
6422 unsigned Line;
6423 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6424 Line);
6425 SmallString<64> EntryFnName;
6426 {
6427 llvm::raw_svector_ostream OS(EntryFnName);
6428 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6429 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6430 }
6431
6432 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6433
6434 CodeGenFunction CGF(CGM, true);
6435 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6436 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6437
6438 if (BuildOutlinedFn)
6439 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6440
6441 // If this target outline function is not an offload entry, we don't need to
6442 // register it.
6443 if (!IsOffloadEntry)
6444 return;
6445
6446 // The target region ID is used by the runtime library to identify the current
6447 // target region, so it only has to be unique and not necessarily point to
6448 // anything. It could be the pointer to the outlined function that implements
6449 // the target region, but we aren't using that so that the compiler doesn't
6450 // need to keep that, and could therefore inline the host function if proven
6451 // worthwhile during optimization. In the other hand, if emitting code for the
6452 // device, the ID has to be the function address so that it can retrieved from
6453 // the offloading entry and launched by the runtime library. We also mark the
6454 // outlined function to have external linkage in case we are emitting code for
6455 // the device, because these functions will be entry points to the device.
6456
6457 if (CGM.getLangOpts().OpenMPIsDevice) {
6458 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6459 OutlinedFn->setLinkage(llvm::GlobalValue::WeakODRLinkage);
6460 OutlinedFn->setDSOLocal(false);
6461 if (CGM.getTriple().isAMDGCN())
6462 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6463 } else {
6464 std::string Name = getName({EntryFnName, "region_id"});
6465 OutlinedFnID = new llvm::GlobalVariable(
6466 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6467 llvm::GlobalValue::WeakAnyLinkage,
6468 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6469 }
6470
6471 // If we do not allow host fallback we still need a named address to use.
6472 llvm::Constant *TargetRegionEntryAddr = OutlinedFn;
6473 if (!BuildOutlinedFn) {
6474 assert(!CGM.getModule().getGlobalVariable(EntryFnName, true) &&(static_cast <bool> (!CGM.getModule().getGlobalVariable
(EntryFnName, true) && "Named kernel already exists?"
) ? void (0) : __assert_fail ("!CGM.getModule().getGlobalVariable(EntryFnName, true) && \"Named kernel already exists?\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6475, __extension__
__PRETTY_FUNCTION__))
6475 "Named kernel already exists?")(static_cast <bool> (!CGM.getModule().getGlobalVariable
(EntryFnName, true) && "Named kernel already exists?"
) ? void (0) : __assert_fail ("!CGM.getModule().getGlobalVariable(EntryFnName, true) && \"Named kernel already exists?\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6475, __extension__
__PRETTY_FUNCTION__))
;
6476 TargetRegionEntryAddr = new llvm::GlobalVariable(
6477 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6478 llvm::GlobalValue::InternalLinkage,
6479 llvm::Constant::getNullValue(CGM.Int8Ty), EntryFnName);
6480 }
6481
6482 // Register the information for the entry associated with this target region.
6483 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6484 DeviceID, FileID, ParentName, Line, TargetRegionEntryAddr, OutlinedFnID,
6485 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6486
6487 // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6488 int32_t DefaultValTeams = -1;
6489 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6490 if (DefaultValTeams > 0 && OutlinedFn) {
6491 OutlinedFn->addFnAttr("omp_target_num_teams",
6492 std::to_string(DefaultValTeams));
6493 }
6494 int32_t DefaultValThreads = -1;
6495 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6496 if (DefaultValThreads > 0 && OutlinedFn) {
6497 OutlinedFn->addFnAttr("omp_target_thread_limit",
6498 std::to_string(DefaultValThreads));
6499 }
6500
6501 if (BuildOutlinedFn)
6502 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6503}
6504
6505/// Checks if the expression is constant or does not have non-trivial function
6506/// calls.
6507static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6508 // We can skip constant expressions.
6509 // We can skip expressions with trivial calls or simple expressions.
6510 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6511 !E->hasNonTrivialCall(Ctx)) &&
6512 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6513}
6514
6515const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6516 const Stmt *Body) {
6517 const Stmt *Child = Body->IgnoreContainers();
6518 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6519 Child = nullptr;
6520 for (const Stmt *S : C->body()) {
6521 if (const auto *E = dyn_cast<Expr>(S)) {
6522 if (isTrivial(Ctx, E))
6523 continue;
6524 }
6525 // Some of the statements can be ignored.
6526 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6527 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6528 continue;
6529 // Analyze declarations.
6530 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6531 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6532 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6533 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6534 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6535 isa<UsingDirectiveDecl>(D) ||
6536 isa<OMPDeclareReductionDecl>(D) ||
6537 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6538 return true;
6539 const auto *VD = dyn_cast<VarDecl>(D);
6540 if (!VD)
6541 return false;
6542 return VD->hasGlobalStorage() || !VD->isUsed();
6543 }))
6544 continue;
6545 }
6546 // Found multiple children - cannot get the one child only.
6547 if (Child)
6548 return nullptr;
6549 Child = S;
6550 }
6551 if (Child)
6552 Child = Child->IgnoreContainers();
6553 }
6554 return Child;
6555}
6556
6557const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6558 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6559 int32_t &DefaultVal) {
6560
6561 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6562 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6563, __extension__
__PRETTY_FUNCTION__))
6563 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6563, __extension__
__PRETTY_FUNCTION__))
;
6564 switch (DirectiveKind) {
6565 case OMPD_target: {
6566 const auto *CS = D.getInnermostCapturedStmt();
6567 const auto *Body =
6568 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6569 const Stmt *ChildStmt =
6570 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6571 if (const auto *NestedDir =
6572 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6573 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6574 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6575 const Expr *NumTeams =
6576 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6577 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6578 if (auto Constant =
6579 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6580 DefaultVal = Constant->getExtValue();
6581 return NumTeams;
6582 }
6583 DefaultVal = 0;
6584 return nullptr;
6585 }
6586 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6587 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6588 DefaultVal = 1;
6589 return nullptr;
6590 }
6591 DefaultVal = 1;
6592 return nullptr;
6593 }
6594 // A value of -1 is used to check if we need to emit no teams region
6595 DefaultVal = -1;
6596 return nullptr;
6597 }
6598 case OMPD_target_teams:
6599 case OMPD_target_teams_distribute:
6600 case OMPD_target_teams_distribute_simd:
6601 case OMPD_target_teams_distribute_parallel_for:
6602 case OMPD_target_teams_distribute_parallel_for_simd: {
6603 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6604 const Expr *NumTeams =
6605 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6606 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6607 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6608 DefaultVal = Constant->getExtValue();
6609 return NumTeams;
6610 }
6611 DefaultVal = 0;
6612 return nullptr;
6613 }
6614 case OMPD_target_parallel:
6615 case OMPD_target_parallel_for:
6616 case OMPD_target_parallel_for_simd:
6617 case OMPD_target_simd:
6618 DefaultVal = 1;
6619 return nullptr;
6620 case OMPD_parallel:
6621 case OMPD_for:
6622 case OMPD_parallel_for:
6623 case OMPD_parallel_master:
6624 case OMPD_parallel_sections:
6625 case OMPD_for_simd:
6626 case OMPD_parallel_for_simd:
6627 case OMPD_cancel:
6628 case OMPD_cancellation_point:
6629 case OMPD_ordered:
6630 case OMPD_threadprivate:
6631 case OMPD_allocate:
6632 case OMPD_task:
6633 case OMPD_simd:
6634 case OMPD_tile:
6635 case OMPD_unroll:
6636 case OMPD_sections:
6637 case OMPD_section:
6638 case OMPD_single:
6639 case OMPD_master:
6640 case OMPD_critical:
6641 case OMPD_taskyield:
6642 case OMPD_barrier:
6643 case OMPD_taskwait:
6644 case OMPD_taskgroup:
6645 case OMPD_atomic:
6646 case OMPD_flush:
6647 case OMPD_depobj:
6648 case OMPD_scan:
6649 case OMPD_teams:
6650 case OMPD_target_data:
6651 case OMPD_target_exit_data:
6652 case OMPD_target_enter_data:
6653 case OMPD_distribute:
6654 case OMPD_distribute_simd:
6655 case OMPD_distribute_parallel_for:
6656 case OMPD_distribute_parallel_for_simd:
6657 case OMPD_teams_distribute:
6658 case OMPD_teams_distribute_simd:
6659 case OMPD_teams_distribute_parallel_for:
6660 case OMPD_teams_distribute_parallel_for_simd:
6661 case OMPD_target_update:
6662 case OMPD_declare_simd:
6663 case OMPD_declare_variant:
6664 case OMPD_begin_declare_variant:
6665 case OMPD_end_declare_variant:
6666 case OMPD_declare_target:
6667 case OMPD_end_declare_target:
6668 case OMPD_declare_reduction:
6669 case OMPD_declare_mapper:
6670 case OMPD_taskloop:
6671 case OMPD_taskloop_simd:
6672 case OMPD_master_taskloop:
6673 case OMPD_master_taskloop_simd:
6674 case OMPD_parallel_master_taskloop:
6675 case OMPD_parallel_master_taskloop_simd:
6676 case OMPD_requires:
6677 case OMPD_metadirective:
6678 case OMPD_unknown:
6679 break;
6680 default:
6681 break;
6682 }
6683 llvm_unreachable("Unexpected directive kind.")::llvm::llvm_unreachable_internal("Unexpected directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6683)
;
6684}
6685
6686llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6687 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6688 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6690, __extension__
__PRETTY_FUNCTION__))
6689 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6690, __extension__
__PRETTY_FUNCTION__))
6690 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6690, __extension__
__PRETTY_FUNCTION__))
;
6691 CGBuilderTy &Bld = CGF.Builder;
6692 int32_t DefaultNT = -1;
6693 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6694 if (NumTeams != nullptr) {
6695 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6696
6697 switch (DirectiveKind) {
6698 case OMPD_target: {
6699 const auto *CS = D.getInnermostCapturedStmt();
6700 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6701 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6702 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6703 /*IgnoreResultAssign*/ true);
6704 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6705 /*isSigned=*/true);
6706 }
6707 case OMPD_target_teams:
6708 case OMPD_target_teams_distribute:
6709 case OMPD_target_teams_distribute_simd:
6710 case OMPD_target_teams_distribute_parallel_for:
6711 case OMPD_target_teams_distribute_parallel_for_simd: {
6712 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6713 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6714 /*IgnoreResultAssign*/ true);
6715 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6716 /*isSigned=*/true);
6717 }
6718 default:
6719 break;
6720 }
6721 }
6722
6723 return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
6724}
6725
6726static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6727 llvm::Value *DefaultThreadLimitVal) {
6728 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6729 CGF.getContext(), CS->getCapturedStmt());
6730 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6731 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6732 llvm::Value *NumThreads = nullptr;
6733 llvm::Value *CondVal = nullptr;
6734 // Handle if clause. If if clause present, the number of threads is
6735 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6736 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6737 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6738 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6739 const OMPIfClause *IfClause = nullptr;
6740 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6741 if (C->getNameModifier() == OMPD_unknown ||
6742 C->getNameModifier() == OMPD_parallel) {
6743 IfClause = C;
6744 break;
6745 }
6746 }
6747 if (IfClause) {
6748 const Expr *Cond = IfClause->getCondition();
6749 bool Result;
6750 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6751 if (!Result)
6752 return CGF.Builder.getInt32(1);
6753 } else {
6754 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6755 if (const auto *PreInit =
6756 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6757 for (const auto *I : PreInit->decls()) {
6758 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6759 CGF.EmitVarDecl(cast<VarDecl>(*I));
6760 } else {
6761 CodeGenFunction::AutoVarEmission Emission =
6762 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6763 CGF.EmitAutoVarCleanups(Emission);
6764 }
6765 }
6766 }
6767 CondVal = CGF.EvaluateExprAsBool(Cond);
6768 }
6769 }
6770 }
6771 // Check the value of num_threads clause iff if clause was not specified
6772 // or is not evaluated to false.
6773 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6774 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6775 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6776 const auto *NumThreadsClause =
6777 Dir->getSingleClause<OMPNumThreadsClause>();
6778 CodeGenFunction::LexicalScope Scope(
6779 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6780 if (const auto *PreInit =
6781 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6782 for (const auto *I : PreInit->decls()) {
6783 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6784 CGF.EmitVarDecl(cast<VarDecl>(*I));
6785 } else {
6786 CodeGenFunction::AutoVarEmission Emission =
6787 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6788 CGF.EmitAutoVarCleanups(Emission);
6789 }
6790 }
6791 }
6792 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6793 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6794 /*isSigned=*/false);
6795 if (DefaultThreadLimitVal)
6796 NumThreads = CGF.Builder.CreateSelect(
6797 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6798 DefaultThreadLimitVal, NumThreads);
6799 } else {
6800 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6801 : CGF.Builder.getInt32(0);
6802 }
6803 // Process condition of the if clause.
6804 if (CondVal) {
6805 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6806 CGF.Builder.getInt32(1));
6807 }
6808 return NumThreads;
6809 }
6810 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6811 return CGF.Builder.getInt32(1);
6812 return DefaultThreadLimitVal;
6813 }
6814 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6815 : CGF.Builder.getInt32(0);
6816}
6817
6818const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6819 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6820 int32_t &DefaultVal) {
6821 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6822 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6823, __extension__
__PRETTY_FUNCTION__))
6823 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6823, __extension__
__PRETTY_FUNCTION__))
;
6824
6825 switch (DirectiveKind) {
6826 case OMPD_target:
6827 // Teams have no clause thread_limit
6828 return nullptr;
6829 case OMPD_target_teams:
6830 case OMPD_target_teams_distribute:
6831 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6832 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6833 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6834 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6835 if (auto Constant =
6836 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6837 DefaultVal = Constant->getExtValue();
6838 return ThreadLimit;
6839 }
6840 return nullptr;
6841 case OMPD_target_parallel:
6842 case OMPD_target_parallel_for:
6843 case OMPD_target_parallel_for_simd:
6844 case OMPD_target_teams_distribute_parallel_for:
6845 case OMPD_target_teams_distribute_parallel_for_simd: {
6846 Expr *ThreadLimit = nullptr;
6847 Expr *NumThreads = nullptr;
6848 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6849 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6850 ThreadLimit = ThreadLimitClause->getThreadLimit();
6851 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6852 if (auto Constant =
6853 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6854 DefaultVal = Constant->getExtValue();
6855 }
6856 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6857 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6858 NumThreads = NumThreadsClause->getNumThreads();
6859 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6860 if (auto Constant =
6861 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6862 if (Constant->getExtValue() < DefaultVal) {
6863 DefaultVal = Constant->getExtValue();
6864 ThreadLimit = NumThreads;
6865 }
6866 }
6867 }
6868 }
6869 return ThreadLimit;
6870 }
6871 case OMPD_target_teams_distribute_simd:
6872 case OMPD_target_simd:
6873 DefaultVal = 1;
6874 return nullptr;
6875 case OMPD_parallel:
6876 case OMPD_for:
6877 case OMPD_parallel_for:
6878 case OMPD_parallel_master:
6879 case OMPD_parallel_sections:
6880 case OMPD_for_simd:
6881 case OMPD_parallel_for_simd:
6882 case OMPD_cancel:
6883 case OMPD_cancellation_point:
6884 case OMPD_ordered:
6885 case OMPD_threadprivate:
6886 case OMPD_allocate:
6887 case OMPD_task:
6888 case OMPD_simd:
6889 case OMPD_tile:
6890 case OMPD_unroll:
6891 case OMPD_sections:
6892 case OMPD_section:
6893 case OMPD_single:
6894 case OMPD_master:
6895 case OMPD_critical:
6896 case OMPD_taskyield:
6897 case OMPD_barrier:
6898 case OMPD_taskwait:
6899 case OMPD_taskgroup:
6900 case OMPD_atomic:
6901 case OMPD_flush:
6902 case OMPD_depobj:
6903 case OMPD_scan:
6904 case OMPD_teams:
6905 case OMPD_target_data:
6906 case OMPD_target_exit_data:
6907 case OMPD_target_enter_data:
6908 case OMPD_distribute:
6909 case OMPD_distribute_simd:
6910 case OMPD_distribute_parallel_for:
6911 case OMPD_distribute_parallel_for_simd:
6912 case OMPD_teams_distribute:
6913 case OMPD_teams_distribute_simd:
6914 case OMPD_teams_distribute_parallel_for:
6915 case OMPD_teams_distribute_parallel_for_simd:
6916 case OMPD_target_update:
6917 case OMPD_declare_simd:
6918 case OMPD_declare_variant:
6919 case OMPD_begin_declare_variant:
6920 case OMPD_end_declare_variant:
6921 case OMPD_declare_target:
6922 case OMPD_end_declare_target:
6923 case OMPD_declare_reduction:
6924 case OMPD_declare_mapper:
6925 case OMPD_taskloop:
6926 case OMPD_taskloop_simd:
6927 case OMPD_master_taskloop:
6928 case OMPD_master_taskloop_simd:
6929 case OMPD_parallel_master_taskloop:
6930 case OMPD_parallel_master_taskloop_simd:
6931 case OMPD_requires:
6932 case OMPD_unknown:
6933 break;
6934 default:
6935 break;
6936 }
6937 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6937)
;
6938}
6939
6940llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6941 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6942 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6944, __extension__
__PRETTY_FUNCTION__))
6943 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6944, __extension__
__PRETTY_FUNCTION__))
6944 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6944, __extension__
__PRETTY_FUNCTION__))
;
6945 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6946 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6947, __extension__
__PRETTY_FUNCTION__))
6947 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6947, __extension__
__PRETTY_FUNCTION__))
;
6948 CGBuilderTy &Bld = CGF.Builder;
6949 llvm::Value *ThreadLimitVal = nullptr;
6950 llvm::Value *NumThreadsVal = nullptr;
6951 switch (DirectiveKind) {
6952 case OMPD_target: {
6953 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6954 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6955 return NumThreads;
6956 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6957 CGF.getContext(), CS->getCapturedStmt());
6958 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6959 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6960 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6961 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6962 const auto *ThreadLimitClause =
6963 Dir->getSingleClause<OMPThreadLimitClause>();
6964 CodeGenFunction::LexicalScope Scope(
6965 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6966 if (const auto *PreInit =
6967 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6968 for (const auto *I : PreInit->decls()) {
6969 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6970 CGF.EmitVarDecl(cast<VarDecl>(*I));
6971 } else {
6972 CodeGenFunction::AutoVarEmission Emission =
6973 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6974 CGF.EmitAutoVarCleanups(Emission);
6975 }
6976 }
6977 }
6978 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6979 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6980 ThreadLimitVal =
6981 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6982 }
6983 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6984 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6985 CS = Dir->getInnermostCapturedStmt();
6986 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6987 CGF.getContext(), CS->getCapturedStmt());
6988 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6989 }
6990 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6991 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6992 CS = Dir->getInnermostCapturedStmt();
6993 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6994 return NumThreads;
6995 }
6996 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6997 return Bld.getInt32(1);
6998 }
6999 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7000 }
7001 case OMPD_target_teams: {
7002 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7003 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7004 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7005 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7006 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7007 ThreadLimitVal =
7008 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7009 }
7010 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7011 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7012 return NumThreads;
7013 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7014 CGF.getContext(), CS->getCapturedStmt());
7015 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7016 if (Dir->getDirectiveKind() == OMPD_distribute) {
7017 CS = Dir->getInnermostCapturedStmt();
7018 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7019 return NumThreads;
7020 }
7021 }
7022 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7023 }
7024 case OMPD_target_teams_distribute:
7025 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7026 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7027 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7028 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7029 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7030 ThreadLimitVal =
7031 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7032 }
7033 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7034 case OMPD_target_parallel:
7035 case OMPD_target_parallel_for:
7036 case OMPD_target_parallel_for_simd:
7037 case OMPD_target_teams_distribute_parallel_for:
7038 case OMPD_target_teams_distribute_parallel_for_simd: {
7039 llvm::Value *CondVal = nullptr;
7040 // Handle if clause. If if clause present, the number of threads is
7041 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7042 if (D.hasClausesOfKind<OMPIfClause>()) {
7043 const OMPIfClause *IfClause = nullptr;
7044 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7045 if (C->getNameModifier() == OMPD_unknown ||
7046 C->getNameModifier() == OMPD_parallel) {
7047 IfClause = C;
7048 break;
7049 }
7050 }
7051 if (IfClause) {
7052 const Expr *Cond = IfClause->getCondition();
7053 bool Result;
7054 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7055 if (!Result)
7056 return Bld.getInt32(1);
7057 } else {
7058 CodeGenFunction::RunCleanupsScope Scope(CGF);
7059 CondVal = CGF.EvaluateExprAsBool(Cond);
7060 }
7061 }
7062 }
7063 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7064 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7065 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7066 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7067 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7068 ThreadLimitVal =
7069 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7070 }
7071 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7072 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7073 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7074 llvm::Value *NumThreads = CGF.EmitScalarExpr(
7075 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7076 NumThreadsVal =
7077 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7078 ThreadLimitVal = ThreadLimitVal
7079 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7080 ThreadLimitVal),
7081 NumThreadsVal, ThreadLimitVal)
7082 : NumThreadsVal;
7083 }
7084 if (!ThreadLimitVal)
7085 ThreadLimitVal = Bld.getInt32(0);
7086 if (CondVal)
7087 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7088 return ThreadLimitVal;
7089 }
7090 case OMPD_target_teams_distribute_simd:
7091 case OMPD_target_simd:
7092 return Bld.getInt32(1);
7093 case OMPD_parallel:
7094 case OMPD_for:
7095 case OMPD_parallel_for:
7096 case OMPD_parallel_master:
7097 case OMPD_parallel_sections:
7098 case OMPD_for_simd:
7099 case OMPD_parallel_for_simd:
7100 case OMPD_cancel:
7101 case OMPD_cancellation_point:
7102 case OMPD_ordered:
7103 case OMPD_threadprivate:
7104 case OMPD_allocate:
7105 case OMPD_task:
7106 case OMPD_simd:
7107 case OMPD_tile:
7108 case OMPD_unroll:
7109 case OMPD_sections:
7110 case OMPD_section:
7111 case OMPD_single:
7112 case OMPD_master:
7113 case OMPD_critical:
7114 case OMPD_taskyield:
7115 case OMPD_barrier:
7116 case OMPD_taskwait:
7117 case OMPD_taskgroup:
7118 case OMPD_atomic:
7119 case OMPD_flush:
7120 case OMPD_depobj:
7121 case OMPD_scan:
7122 case OMPD_teams:
7123 case OMPD_target_data:
7124 case OMPD_target_exit_data:
7125 case OMPD_target_enter_data:
7126 case OMPD_distribute:
7127 case OMPD_distribute_simd:
7128 case OMPD_distribute_parallel_for:
7129 case OMPD_distribute_parallel_for_simd:
7130 case OMPD_teams_distribute:
7131 case OMPD_teams_distribute_simd:
7132 case OMPD_teams_distribute_parallel_for:
7133 case OMPD_teams_distribute_parallel_for_simd:
7134 case OMPD_target_update:
7135 case OMPD_declare_simd:
7136 case OMPD_declare_variant:
7137 case OMPD_begin_declare_variant:
7138 case OMPD_end_declare_variant:
7139 case OMPD_declare_target:
7140 case OMPD_end_declare_target:
7141 case OMPD_declare_reduction:
7142 case OMPD_declare_mapper:
7143 case OMPD_taskloop:
7144 case OMPD_taskloop_simd:
7145 case OMPD_master_taskloop:
7146 case OMPD_master_taskloop_simd:
7147 case OMPD_parallel_master_taskloop:
7148 case OMPD_parallel_master_taskloop_simd:
7149 case OMPD_requires:
7150 case OMPD_metadirective:
7151 case OMPD_unknown:
7152 break;
7153 default:
7154 break;
7155 }
7156 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7156)
;
7157}
7158
7159namespace {
7160LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
7161
7162// Utility to handle information from clauses associated with a given
7163// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7164// It provides a convenient interface to obtain the information and generate
7165// code for that information.
7166class MappableExprsHandler {
7167public:
7168 /// Values for bit flags used to specify the mapping type for
7169 /// offloading.
7170 enum OpenMPOffloadMappingFlags : uint64_t {
7171 /// No flags
7172 OMP_MAP_NONE = 0x0,
7173 /// Allocate memory on the device and move data from host to device.
7174 OMP_MAP_TO = 0x01,
7175 /// Allocate memory on the device and move data from device to host.
7176 OMP_MAP_FROM = 0x02,
7177 /// Always perform the requested mapping action on the element, even
7178 /// if it was already mapped before.
7179 OMP_MAP_ALWAYS = 0x04,
7180 /// Delete the element from the device environment, ignoring the
7181 /// current reference count associated with the element.
7182 OMP_MAP_DELETE = 0x08,
7183 /// The element being mapped is a pointer-pointee pair; both the
7184 /// pointer and the pointee should be mapped.
7185 OMP_MAP_PTR_AND_OBJ = 0x10,
7186 /// This flags signals that the base address of an entry should be
7187 /// passed to the target kernel as an argument.
7188 OMP_MAP_TARGET_PARAM = 0x20,
7189 /// Signal that the runtime library has to return the device pointer
7190 /// in the current position for the data being mapped. Used when we have the
7191 /// use_device_ptr or use_device_addr clause.
7192 OMP_MAP_RETURN_PARAM = 0x40,
7193 /// This flag signals that the reference being passed is a pointer to
7194 /// private data.
7195 OMP_MAP_PRIVATE = 0x80,
7196 /// Pass the element to the device by value.
7197 OMP_MAP_LITERAL = 0x100,
7198 /// Implicit map
7199 OMP_MAP_IMPLICIT = 0x200,
7200 /// Close is a hint to the runtime to allocate memory close to
7201 /// the target device.
7202 OMP_MAP_CLOSE = 0x400,
7203 /// 0x800 is reserved for compatibility with XLC.
7204 /// Produce a runtime error if the data is not already allocated.
7205 OMP_MAP_PRESENT = 0x1000,
7206 // Increment and decrement a separate reference counter so that the data
7207 // cannot be unmapped within the associated region. Thus, this flag is
7208 // intended to be used on 'target' and 'target data' directives because they
7209 // are inherently structured. It is not intended to be used on 'target
7210 // enter data' and 'target exit data' directives because they are inherently
7211 // dynamic.
7212 // This is an OpenMP extension for the sake of OpenACC support.
7213 OMP_MAP_OMPX_HOLD = 0x2000,
7214 /// Signal that the runtime library should use args as an array of
7215 /// descriptor_dim pointers and use args_size as dims. Used when we have
7216 /// non-contiguous list items in target update directive
7217 OMP_MAP_NON_CONTIG = 0x100000000000,
7218 /// The 16 MSBs of the flags indicate whether the entry is member of some
7219 /// struct/class.
7220 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7221 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_MAP_MEMBER_OF,
7222 };
7223
7224 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7225 static unsigned getFlagMemberOffset() {
7226 unsigned Offset = 0;
7227 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7228 Remain = Remain >> 1)
7229 Offset++;
7230 return Offset;
7231 }
7232
7233 /// Class that holds debugging information for a data mapping to be passed to
7234 /// the runtime library.
7235 class MappingExprInfo {
7236 /// The variable declaration used for the data mapping.
7237 const ValueDecl *MapDecl = nullptr;
7238 /// The original expression used in the map clause, or null if there is
7239 /// none.
7240 const Expr *MapExpr = nullptr;
7241
7242 public:
7243 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7244 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7245
7246 const ValueDecl *getMapDecl() const { return MapDecl; }
7247 const Expr *getMapExpr() const { return MapExpr; }
7248 };
7249
7250 /// Class that associates information with a base pointer to be passed to the
7251 /// runtime library.
7252 class BasePointerInfo {
7253 /// The base pointer.
7254 llvm::Value *Ptr = nullptr;
7255 /// The base declaration that refers to this device pointer, or null if
7256 /// there is none.
7257 const ValueDecl *DevPtrDecl = nullptr;
7258
7259 public:
7260 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7261 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7262 llvm::Value *operator*() const { return Ptr; }
7263 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7264 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7265 };
7266
7267 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7268 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7269 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7270 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7271 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7272 using MapDimArrayTy = SmallVector<uint64_t, 4>;
7273 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7274
7275 /// This structure contains combined information generated for mappable
7276 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7277 /// mappers, and non-contiguous information.
7278 struct MapCombinedInfoTy {
7279 struct StructNonContiguousInfo {
7280 bool IsNonContiguous = false;
7281 MapDimArrayTy Dims;
7282 MapNonContiguousArrayTy Offsets;
7283 MapNonContiguousArrayTy Counts;
7284 MapNonContiguousArrayTy Strides;
7285 };
7286 MapExprsArrayTy Exprs;
7287 MapBaseValuesArrayTy BasePointers;
7288 MapValuesArrayTy Pointers;
7289 MapValuesArrayTy Sizes;
7290 MapFlagsArrayTy Types;
7291 MapMappersArrayTy Mappers;
7292 StructNonContiguousInfo NonContigInfo;
7293
7294 /// Append arrays in \a CurInfo.
7295 void append(MapCombinedInfoTy &CurInfo) {
7296 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7297 BasePointers.append(CurInfo.BasePointers.begin(),
7298 CurInfo.BasePointers.end());
7299 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7300 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7301 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7302 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7303 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7304 CurInfo.NonContigInfo.Dims.end());
7305 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7306 CurInfo.NonContigInfo.Offsets.end());
7307 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7308 CurInfo.NonContigInfo.Counts.end());
7309 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7310 CurInfo.NonContigInfo.Strides.end());
7311 }
7312 };
7313
7314 /// Map between a struct and the its lowest & highest elements which have been
7315 /// mapped.
7316 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7317 /// HE(FieldIndex, Pointer)}
7318 struct StructRangeInfoTy {
7319 MapCombinedInfoTy PreliminaryMapData;
7320 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7321 0, Address::invalid()};
7322 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7323 0, Address::invalid()};
7324 Address Base = Address::invalid();
7325 Address LB = Address::invalid();
7326 bool IsArraySection = false;
7327 bool HasCompleteRecord = false;
7328 };
7329
7330private:
7331 /// Kind that defines how a device pointer has to be returned.
7332 struct MapInfo {
7333 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7334 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7335 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7336 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7337 bool ReturnDevicePointer = false;
7338 bool IsImplicit = false;
7339 const ValueDecl *Mapper = nullptr;
7340 const Expr *VarRef = nullptr;
7341 bool ForDeviceAddr = false;
7342
7343 MapInfo() = default;
7344 MapInfo(
7345 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7346 OpenMPMapClauseKind MapType,
7347 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7348 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7349 bool ReturnDevicePointer, bool IsImplicit,
7350 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7351 bool ForDeviceAddr = false)
7352 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7353 MotionModifiers(MotionModifiers),
7354 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7355 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7356 };
7357
7358 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7359 /// member and there is no map information about it, then emission of that
7360 /// entry is deferred until the whole struct has been processed.
7361 struct DeferredDevicePtrEntryTy {
7362 const Expr *IE = nullptr;
7363 const ValueDecl *VD = nullptr;
7364 bool ForDeviceAddr = false;
7365
7366 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7367 bool ForDeviceAddr)
7368 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7369 };
7370
7371 /// The target directive from where the mappable clauses were extracted. It
7372 /// is either a executable directive or a user-defined mapper directive.
7373 llvm::PointerUnion<const OMPExecutableDirective *,
7374 const OMPDeclareMapperDecl *>
7375 CurDir;
7376
7377 /// Function the directive is being generated for.
7378 CodeGenFunction &CGF;
7379
7380 /// Set of all first private variables in the current directive.
7381 /// bool data is set to true if the variable is implicitly marked as
7382 /// firstprivate, false otherwise.
7383 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7384
7385 /// Map between device pointer declarations and their expression components.
7386 /// The key value for declarations in 'this' is null.
7387 llvm::DenseMap<
7388 const ValueDecl *,
7389 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7390 DevPointersMap;
7391
7392 /// Map between device addr declarations and their expression components.
7393 /// The key value for declarations in 'this' is null.
7394 llvm::DenseMap<
7395 const ValueDecl *,
7396 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7397 HasDevAddrsMap;
7398
7399 /// Map between lambda declarations and their map type.
7400 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7401
7402 llvm::Value *getExprTypeSize(const Expr *E) const {
7403 QualType ExprTy = E->getType().getCanonicalType();
7404
7405 // Calculate the size for array shaping expression.
7406 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7407 llvm::Value *Size =
7408 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7409 for (const Expr *SE : OAE->getDimensions()) {
7410 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7411 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7412 CGF.getContext().getSizeType(),
7413 SE->getExprLoc());
7414 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7415 }
7416 return Size;
7417 }
7418
7419 // Reference types are ignored for mapping purposes.
7420 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7421 ExprTy = RefTy->getPointeeType().getCanonicalType();
7422
7423 // Given that an array section is considered a built-in type, we need to
7424 // do the calculation based on the length of the section instead of relying
7425 // on CGF.getTypeSize(E->getType()).
7426 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7427 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7428 OAE->getBase()->IgnoreParenImpCasts())
7429 .getCanonicalType();
7430
7431 // If there is no length associated with the expression and lower bound is
7432 // not specified too, that means we are using the whole length of the
7433 // base.
7434 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7435 !OAE->getLowerBound())
7436 return CGF.getTypeSize(BaseTy);
7437
7438 llvm::Value *ElemSize;
7439 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7440 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7441 } else {
7442 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7443 assert(ATy && "Expecting array type if not a pointer type.")(static_cast <bool> (ATy && "Expecting array type if not a pointer type."
) ? void (0) : __assert_fail ("ATy && \"Expecting array type if not a pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7443, __extension__
__PRETTY_FUNCTION__))
;
7444 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7445 }
7446
7447 // If we don't have a length at this point, that is because we have an
7448 // array section with a single element.
7449 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7450 return ElemSize;
7451
7452 if (const Expr *LenExpr = OAE->getLength()) {
7453 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7454 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7455 CGF.getContext().getSizeType(),
7456 LenExpr->getExprLoc());
7457 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7458 }
7459 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7460, __extension__
__PRETTY_FUNCTION__))
7460 OAE->getLowerBound() && "expected array_section[lb:].")(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7460, __extension__
__PRETTY_FUNCTION__))
;
7461 // Size = sizetype - lb * elemtype;
7462 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7463 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7464 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7465 CGF.getContext().getSizeType(),
7466 OAE->getLowerBound()->getExprLoc());
7467 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7468 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7469 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7470 LengthVal = CGF.Builder.CreateSelect(
7471 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7472 return LengthVal;
7473 }
7474 return CGF.getTypeSize(ExprTy);
7475 }
7476
7477 /// Return the corresponding bits for a given map clause modifier. Add
7478 /// a flag marking the map as a pointer if requested. Add a flag marking the
7479 /// map as the first one of a series of maps that relate to the same map
7480 /// expression.
7481 OpenMPOffloadMappingFlags getMapTypeBits(
7482 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7483 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7484 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7485 OpenMPOffloadMappingFlags Bits =
7486 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7487 switch (MapType) {
7488 case OMPC_MAP_alloc:
7489 case OMPC_MAP_release:
7490 // alloc and release is the default behavior in the runtime library, i.e.
7491 // if we don't pass any bits alloc/release that is what the runtime is
7492 // going to do. Therefore, we don't need to signal anything for these two
7493 // type modifiers.
7494 break;
7495 case OMPC_MAP_to:
7496 Bits |= OMP_MAP_TO;
7497 break;
7498 case OMPC_MAP_from:
7499 Bits |= OMP_MAP_FROM;
7500 break;
7501 case OMPC_MAP_tofrom:
7502 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7503 break;
7504 case OMPC_MAP_delete:
7505 Bits |= OMP_MAP_DELETE;
7506 break;
7507 case OMPC_MAP_unknown:
7508 llvm_unreachable("Unexpected map type!")::llvm::llvm_unreachable_internal("Unexpected map type!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 7508)
;
7509 }
7510 if (AddPtrFlag)
7511 Bits |= OMP_MAP_PTR_AND_OBJ;
7512 if (AddIsTargetParamFlag)
7513 Bits |= OMP_MAP_TARGET_PARAM;
7514 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7515 Bits |= OMP_MAP_ALWAYS;
7516 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7517 Bits |= OMP_MAP_CLOSE;
7518 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7519 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7520 Bits |= OMP_MAP_PRESENT;
7521 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7522 Bits |= OMP_MAP_OMPX_HOLD;
7523 if (IsNonContiguous)
7524 Bits |= OMP_MAP_NON_CONTIG;
7525 return Bits;
7526 }
7527
7528 /// Return true if the provided expression is a final array section. A
7529 /// final array section, is one whose length can't be proved to be one.
7530 bool isFinalArraySectionExpression(const Expr *E) const {
7531 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7532
7533 // It is not an array section and therefore not a unity-size one.
7534 if (!OASE)
7535 return false;
7536
7537 // An array section with no colon always refer to a single element.
7538 if (OASE->getColonLocFirst().isInvalid())
7539 return false;
7540
7541 const Expr *Length = OASE->getLength();
7542
7543 // If we don't have a length we have to check if the array has size 1
7544 // for this dimension. Also, we should always expect a length if the
7545 // base type is pointer.
7546 if (!Length) {
7547 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7548 OASE->getBase()->IgnoreParenImpCasts())
7549 .getCanonicalType();
7550 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7551 return ATy->getSize().getSExtValue() != 1;
7552 // If we don't have a constant dimension length, we have to consider
7553 // the current section as having any size, so it is not necessarily
7554 // unitary. If it happen to be unity size, that's user fault.
7555 return true;
7556 }
7557
7558 // Check if the length evaluates to 1.
7559 Expr::EvalResult Result;
7560 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7561 return true; // Can have more that size 1.
7562
7563 llvm::APSInt ConstLength = Result.Val.getInt();
7564 return ConstLength.getSExtValue() != 1;
7565 }
7566
7567 /// Generate the base pointers, section pointers, sizes, map type bits, and
7568 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7569 /// map type, map or motion modifiers, and expression components.
7570 /// \a IsFirstComponent should be set to true if the provided set of
7571 /// components is the first associated with a capture.
7572 void generateInfoForComponentList(
7573 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7574 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7575 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7576 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7577 bool IsFirstComponentList, bool IsImplicit,
7578 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7579 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7580 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7581 OverlappedElements = llvm::None) const {
7582 // The following summarizes what has to be generated for each map and the
7583 // types below. The generated information is expressed in this order:
7584 // base pointer, section pointer, size, flags
7585 // (to add to the ones that come from the map type and modifier).
7586 //
7587 // double d;
7588 // int i[100];
7589 // float *p;
7590 //
7591 // struct S1 {
7592 // int i;
7593 // float f[50];
7594 // }
7595 // struct S2 {
7596 // int i;
7597 // float f[50];
7598 // S1 s;
7599 // double *p;
7600 // struct S2 *ps;
7601 // int &ref;
7602 // }
7603 // S2 s;
7604 // S2 *ps;
7605 //
7606 // map(d)
7607 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7608 //
7609 // map(i)
7610 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7611 //
7612 // map(i[1:23])
7613 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7614 //
7615 // map(p)
7616 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7617 //
7618 // map(p[1:24])
7619 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7620 // in unified shared memory mode or for local pointers
7621 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7622 //
7623 // map(s)
7624 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7625 //
7626 // map(s.i)
7627 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7628 //
7629 // map(s.s.f)
7630 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7631 //
7632 // map(s.p)
7633 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7634 //
7635 // map(to: s.p[:22])
7636 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7637 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7638 // &(s.p), &(s.p[0]), 22*sizeof(double),
7639 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7640 // (*) alloc space for struct members, only this is a target parameter
7641 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7642 // optimizes this entry out, same in the examples below)
7643 // (***) map the pointee (map: to)
7644 //
7645 // map(to: s.ref)
7646 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7647 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7648 // (*) alloc space for struct members, only this is a target parameter
7649 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7650 // optimizes this entry out, same in the examples below)
7651 // (***) map the pointee (map: to)
7652 //
7653 // map(s.ps)
7654 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7655 //
7656 // map(from: s.ps->s.i)
7657 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7658 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7659 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7660 //
7661 // map(to: s.ps->ps)
7662 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7663 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7664 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7665 //
7666 // map(s.ps->ps->ps)
7667 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7668 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7669 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7670 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7671 //
7672 // map(to: s.ps->ps->s.f[:22])
7673 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7674 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7675 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7676 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7677 //
7678 // map(ps)
7679 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7680 //
7681 // map(ps->i)
7682 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7683 //
7684 // map(ps->s.f)
7685 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7686 //
7687 // map(from: ps->p)
7688 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7689 //
7690 // map(to: ps->p[:22])
7691 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7692 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7693 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7694 //
7695 // map(ps->ps)
7696 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7697 //
7698 // map(from: ps->ps->s.i)
7699 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7700 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7701 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7702 //
7703 // map(from: ps->ps->ps)
7704 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7705 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7706 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7707 //
7708 // map(ps->ps->ps->ps)
7709 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7710 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7711 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7712 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7713 //
7714 // map(to: ps->ps->ps->s.f[:22])
7715 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7716 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7717 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7718 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7719 //
7720 // map(to: s.f[:22]) map(from: s.p[:33])
7721 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7722 // sizeof(double*) (**), TARGET_PARAM
7723 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7724 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7725 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7726 // (*) allocate contiguous space needed to fit all mapped members even if
7727 // we allocate space for members not mapped (in this example,
7728 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7729 // them as well because they fall between &s.f[0] and &s.p)
7730 //
7731 // map(from: s.f[:22]) map(to: ps->p[:33])
7732 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7733 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7734 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7735 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7736 // (*) the struct this entry pertains to is the 2nd element in the list of
7737 // arguments, hence MEMBER_OF(2)
7738 //
7739 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7740 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7741 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7742 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7743 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7744 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7745 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7746 // (*) the struct this entry pertains to is the 4th element in the list
7747 // of arguments, hence MEMBER_OF(4)
7748
7749 // Track if the map information being generated is the first for a capture.
7750 bool IsCaptureFirstInfo = IsFirstComponentList;
7751 // When the variable is on a declare target link or in a to clause with
7752 // unified memory, a reference is needed to hold the host/device address
7753 // of the variable.
7754 bool RequiresReference = false;
7755
7756 // Scan the components from the base to the complete expression.
7757 auto CI = Components.rbegin();
7758 auto CE = Components.rend();
7759 auto I = CI;
7760
7761 // Track if the map information being generated is the first for a list of
7762 // components.
7763 bool IsExpressionFirstInfo = true;
7764 bool FirstPointerInComplexData = false;
7765 Address BP = Address::invalid();
7766 const Expr *AssocExpr = I->getAssociatedExpression();
7767 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7768 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7769 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
7770
7771 if (isa<MemberExpr>(AssocExpr)) {
7772 // The base is the 'this' pointer. The content of the pointer is going
7773 // to be the base of the field being mapped.
7774 BP = CGF.LoadCXXThisAddress();
7775 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7776 (OASE &&
7777 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7778 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7779 } else if (OAShE &&
7780 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7781 BP = Address(
7782 CGF.EmitScalarExpr(OAShE->getBase()),
7783 CGF.ConvertTypeForMem(OAShE->getBase()->getType()->getPointeeType()),
7784 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7785 } else {
7786 // The base is the reference to the variable.
7787 // BP = &Var.
7788 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7789 if (const auto *VD =
7790 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7791 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7792 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7793 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7794 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7795 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7796 RequiresReference = true;
7797 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7798 }
7799 }
7800 }
7801
7802 // If the variable is a pointer and is being dereferenced (i.e. is not
7803 // the last component), the base has to be the pointer itself, not its
7804 // reference. References are ignored for mapping purposes.
7805 QualType Ty =
7806 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7807 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7808 // No need to generate individual map information for the pointer, it
7809 // can be associated with the combined storage if shared memory mode is
7810 // active or the base declaration is not global variable.
7811 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7812 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7813 !VD || VD->hasLocalStorage())
7814 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7815 else
7816 FirstPointerInComplexData = true;
7817 ++I;
7818 }
7819 }
7820
7821 // Track whether a component of the list should be marked as MEMBER_OF some
7822 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7823 // in a component list should be marked as MEMBER_OF, all subsequent entries
7824 // do not belong to the base struct. E.g.
7825 // struct S2 s;
7826 // s.ps->ps->ps->f[:]
7827 // (1) (2) (3) (4)
7828 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7829 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7830 // is the pointee of ps(2) which is not member of struct s, so it should not
7831 // be marked as such (it is still PTR_AND_OBJ).
7832 // The variable is initialized to false so that PTR_AND_OBJ entries which
7833 // are not struct members are not considered (e.g. array of pointers to
7834 // data).
7835 bool ShouldBeMemberOf = false;
7836
7837 // Variable keeping track of whether or not we have encountered a component
7838 // in the component list which is a member expression. Useful when we have a
7839 // pointer or a final array section, in which case it is the previous
7840 // component in the list which tells us whether we have a member expression.
7841 // E.g. X.f[:]
7842 // While processing the final array section "[:]" it is "f" which tells us
7843 // whether we are dealing with a member of a declared struct.
7844 const MemberExpr *EncounteredME = nullptr;
7845
7846 // Track for the total number of dimension. Start from one for the dummy
7847 // dimension.
7848 uint64_t DimSize = 1;
7849
7850 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7851 bool IsPrevMemberReference = false;
7852
7853 for (; I != CE; ++I) {
7854 // If the current component is member of a struct (parent struct) mark it.
7855 if (!EncounteredME) {
7856 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7857 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7858 // as MEMBER_OF the parent struct.
7859 if (EncounteredME) {
7860 ShouldBeMemberOf = true;
7861 // Do not emit as complex pointer if this is actually not array-like
7862 // expression.
7863 if (FirstPointerInComplexData) {
7864 QualType Ty = std::prev(I)
7865 ->getAssociatedDeclaration()
7866 ->getType()
7867 .getNonReferenceType();
7868 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7869 FirstPointerInComplexData = false;
7870 }
7871 }
7872 }
7873
7874 auto Next = std::next(I);
7875
7876 // We need to generate the addresses and sizes if this is the last
7877 // component, if the component is a pointer or if it is an array section
7878 // whose length can't be proved to be one. If this is a pointer, it
7879 // becomes the base address for the following components.
7880
7881 // A final array section, is one whose length can't be proved to be one.
7882 // If the map item is non-contiguous then we don't treat any array section
7883 // as final array section.
7884 bool IsFinalArraySection =
7885 !IsNonContiguous &&
7886 isFinalArraySectionExpression(I->getAssociatedExpression());
7887
7888 // If we have a declaration for the mapping use that, otherwise use
7889 // the base declaration of the map clause.
7890 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7891 ? I->getAssociatedDeclaration()
7892 : BaseDecl;
7893 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7894 : MapExpr;
7895
7896 // Get information on whether the element is a pointer. Have to do a
7897 // special treatment for array sections given that they are built-in
7898 // types.
7899 const auto *OASE =
7900 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7901 const auto *OAShE =
7902 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7903 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
7904 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
7905 bool IsPointer =
7906 OAShE ||
7907 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7908 .getCanonicalType()
7909 ->isAnyPointerType()) ||
7910 I->getAssociatedExpression()->getType()->isAnyPointerType();
7911 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
7912 MapDecl &&
7913 MapDecl->getType()->isLValueReferenceType();
7914 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
7915
7916 if (OASE)
7917 ++DimSize;
7918
7919 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7920 IsFinalArraySection) {
7921 // If this is not the last component, we expect the pointer to be
7922 // associated with an array expression or member expression.
7923 assert((Next == CE ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7924 isa<MemberExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7925 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7926 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7927 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7928 isa<UnaryOperator>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7929 isa<BinaryOperator>(Next->getAssociatedExpression())) &&(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
7930 "Unexpected expression")(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7930, __extension__
__PRETTY_FUNCTION__))
;
7931
7932 Address LB = Address::invalid();
7933 Address LowestElem = Address::invalid();
7934 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7935 const MemberExpr *E) {
7936 const Expr *BaseExpr = E->getBase();
7937 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7938 // scalar.
7939 LValue BaseLV;
7940 if (E->isArrow()) {
7941 LValueBaseInfo BaseInfo;
7942 TBAAAccessInfo TBAAInfo;
7943 Address Addr =
7944 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
7945 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7946 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
7947 } else {
7948 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
7949 }
7950 return BaseLV;
7951 };
7952 if (OAShE) {
7953 LowestElem = LB =
7954 Address(CGF.EmitScalarExpr(OAShE->getBase()),
7955 CGF.ConvertTypeForMem(
7956 OAShE->getBase()->getType()->getPointeeType()),
7957 CGF.getContext().getTypeAlignInChars(
7958 OAShE->getBase()->getType()));
7959 } else if (IsMemberReference) {
7960 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
7961 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7962 LowestElem = CGF.EmitLValueForFieldInitialization(
7963 BaseLVal, cast<FieldDecl>(MapDecl))
7964 .getAddress(CGF);
7965 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
7966 .getAddress(CGF);
7967 } else {
7968 LowestElem = LB =
7969 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
7970 .getAddress(CGF);
7971 }
7972
7973 // If this component is a pointer inside the base struct then we don't
7974 // need to create any entry for it - it will be combined with the object
7975 // it is pointing to into a single PTR_AND_OBJ entry.
7976 bool IsMemberPointerOrAddr =
7977 EncounteredME &&
7978 (((IsPointer || ForDeviceAddr) &&
7979 I->getAssociatedExpression() == EncounteredME) ||
7980 (IsPrevMemberReference && !IsPointer) ||
7981 (IsMemberReference && Next != CE &&
7982 !Next->getAssociatedExpression()->getType()->isPointerType()));
7983 if (!OverlappedElements.empty() && Next == CE) {
7984 // Handle base element with the info for overlapped elements.
7985 assert(!PartialStruct.Base.isValid() && "The base element is set.")(static_cast <bool> (!PartialStruct.Base.isValid() &&
"The base element is set.") ? void (0) : __assert_fail ("!PartialStruct.Base.isValid() && \"The base element is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7985, __extension__
__PRETTY_FUNCTION__))
;
7986 assert(!IsPointer &&(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7987, __extension__
__PRETTY_FUNCTION__))
7987 "Unexpected base element with the pointer type.")(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7987, __extension__
__PRETTY_FUNCTION__))
;
7988 // Mark the whole struct as the struct that requires allocation on the
7989 // device.
7990 PartialStruct.LowestElem = {0, LowestElem};
7991 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7992 I->getAssociatedExpression()->getType());
7993 Address HB = CGF.Builder.CreateConstGEP(
7994 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7995 LowestElem, CGF.VoidPtrTy, CGF.Int8Ty),
7996 TypeSize.getQuantity() - 1);
7997 PartialStruct.HighestElem = {
7998 std::numeric_limits<decltype(
7999 PartialStruct.HighestElem.first)>::max(),
8000 HB};
8001 PartialStruct.Base = BP;
8002 PartialStruct.LB = LB;
8003 assert((static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8005, __extension__
__PRETTY_FUNCTION__))
8004 PartialStruct.PreliminaryMapData.BasePointers.empty() &&(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8005, __extension__
__PRETTY_FUNCTION__))
8005 "Overlapped elements must be used only once for the variable.")(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8005, __extension__
__PRETTY_FUNCTION__))
;
8006 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8007 // Emit data for non-overlapped data.
8008 OpenMPOffloadMappingFlags Flags =
8009 OMP_MAP_MEMBER_OF |
8010 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8011 /*AddPtrFlag=*/false,
8012 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8013 llvm::Value *Size = nullptr;
8014 // Do bitcopy of all non-overlapped structure elements.
8015 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8016 Component : OverlappedElements) {
8017 Address ComponentLB = Address::invalid();
8018 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8019 Component) {
8020 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8021 const auto *FD = dyn_cast<FieldDecl>(VD);
8022 if (FD && FD->getType()->isLValueReferenceType()) {
8023 const auto *ME =
8024 cast<MemberExpr>(MC.getAssociatedExpression());
8025 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8026 ComponentLB =
8027 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8028 .getAddress(CGF);
8029 } else {
8030 ComponentLB =
8031 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8032 .getAddress(CGF);
8033 }
8034 Size = CGF.Builder.CreatePtrDiff(
8035 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8036 CGF.EmitCastToVoidPtr(LB.getPointer()));
8037 break;
8038 }
8039 }
8040 assert(Size && "Failed to determine structure size")(static_cast <bool> (Size && "Failed to determine structure size"
) ? void (0) : __assert_fail ("Size && \"Failed to determine structure size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8040, __extension__
__PRETTY_FUNCTION__))
;
8041 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8042 CombinedInfo.BasePointers.push_back(BP.getPointer());
8043 CombinedInfo.Pointers.push_back(LB.getPointer());
8044 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8045 Size, CGF.Int64Ty, /*isSigned=*/true));
8046 CombinedInfo.Types.push_back(Flags);
8047 CombinedInfo.Mappers.push_back(nullptr);
8048 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8049 : 1);
8050 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8051 }
8052 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8053 CombinedInfo.BasePointers.push_back(BP.getPointer());
8054 CombinedInfo.Pointers.push_back(LB.getPointer());
8055 Size = CGF.Builder.CreatePtrDiff(
8056 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8057 CGF.EmitCastToVoidPtr(LB.getPointer()));
8058 CombinedInfo.Sizes.push_back(
8059 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8060 CombinedInfo.Types.push_back(Flags);
8061 CombinedInfo.Mappers.push_back(nullptr);
8062 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8063 : 1);
8064 break;
8065 }
8066 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8067 if (!IsMemberPointerOrAddr ||
8068 (Next == CE && MapType != OMPC_MAP_unknown)) {
8069 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8070 CombinedInfo.BasePointers.push_back(BP.getPointer());
8071 CombinedInfo.Pointers.push_back(LB.getPointer());
8072 CombinedInfo.Sizes.push_back(
8073 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8074 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8075 : 1);
8076
8077 // If Mapper is valid, the last component inherits the mapper.
8078 bool HasMapper = Mapper && Next == CE;
8079 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8080
8081 // We need to add a pointer flag for each map that comes from the
8082 // same expression except for the first one. We also need to signal
8083 // this map is the first one that relates with the current capture
8084 // (there is a set of entries for each capture).
8085 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8086 MapType, MapModifiers, MotionModifiers, IsImplicit,
8087 !IsExpressionFirstInfo || RequiresReference ||
8088 FirstPointerInComplexData || IsMemberReference,
8089 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8090
8091 if (!IsExpressionFirstInfo || IsMemberReference) {
8092 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8093 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8094 if (IsPointer || (IsMemberReference && Next != CE))
8095 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8096 OMP_MAP_DELETE | OMP_MAP_CLOSE);
8097
8098 if (ShouldBeMemberOf) {
8099 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8100 // should be later updated with the correct value of MEMBER_OF.
8101 Flags |= OMP_MAP_MEMBER_OF;
8102 // From now on, all subsequent PTR_AND_OBJ entries should not be
8103 // marked as MEMBER_OF.
8104 ShouldBeMemberOf = false;
8105 }
8106 }
8107
8108 CombinedInfo.Types.push_back(Flags);
8109 }
8110
8111 // If we have encountered a member expression so far, keep track of the
8112 // mapped member. If the parent is "*this", then the value declaration
8113 // is nullptr.
8114 if (EncounteredME) {
8115 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8116 unsigned FieldIndex = FD->getFieldIndex();
8117
8118 // Update info about the lowest and highest elements for this struct
8119 if (!PartialStruct.Base.isValid()) {
8120 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8121 if (IsFinalArraySection) {
8122 Address HB =
8123 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8124 .getAddress(CGF);
8125 PartialStruct.HighestElem = {FieldIndex, HB};
8126 } else {
8127 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8128 }
8129 PartialStruct.Base = BP;
8130 PartialStruct.LB = BP;
8131 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8132 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8133 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8134 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8135 }
8136 }
8137
8138 // Need to emit combined struct for array sections.
8139 if (IsFinalArraySection || IsNonContiguous)
8140 PartialStruct.IsArraySection = true;
8141
8142 // If we have a final array section, we are done with this expression.
8143 if (IsFinalArraySection)
8144 break;
8145
8146 // The pointer becomes the base for the next element.
8147 if (Next != CE)
8148 BP = IsMemberReference ? LowestElem : LB;
8149
8150 IsExpressionFirstInfo = false;
8151 IsCaptureFirstInfo = false;
8152 FirstPointerInComplexData = false;
8153 IsPrevMemberReference = IsMemberReference;
8154 } else if (FirstPointerInComplexData) {
8155 QualType Ty = Components.rbegin()
8156 ->getAssociatedDeclaration()
8157 ->getType()
8158 .getNonReferenceType();
8159 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8160 FirstPointerInComplexData = false;
8161 }
8162 }
8163 // If ran into the whole component - allocate the space for the whole
8164 // record.
8165 if (!EncounteredME)
8166 PartialStruct.HasCompleteRecord = true;
8167
8168 if (!IsNonContiguous)
8169 return;
8170
8171 const ASTContext &Context = CGF.getContext();
8172
8173 // For supporting stride in array section, we need to initialize the first
8174 // dimension size as 1, first offset as 0, and first count as 1
8175 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8176 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8177 MapValuesArrayTy CurStrides;
8178 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8179 uint64_t ElementTypeSize;
8180
8181 // Collect Size information for each dimension and get the element size as
8182 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8183 // should be [10, 10] and the first stride is 4 btyes.
8184 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8185 Components) {
8186 const Expr *AssocExpr = Component.getAssociatedExpression();
8187 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8188
8189 if (!OASE)
8190 continue;
8191
8192 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8193 auto *CAT = Context.getAsConstantArrayType(Ty);
8194 auto *VAT = Context.getAsVariableArrayType(Ty);
8195
8196 // We need all the dimension size except for the last dimension.
8197 assert((VAT || CAT || &Component == &*Components.begin()) &&(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8199, __extension__
__PRETTY_FUNCTION__))
8198 "Should be either ConstantArray or VariableArray if not the "(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8199, __extension__
__PRETTY_FUNCTION__))
8199 "first Component")(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8199, __extension__
__PRETTY_FUNCTION__))
;
8200
8201 // Get element size if CurStrides is empty.
8202 if (CurStrides.empty()) {
8203 const Type *ElementType = nullptr;
8204 if (CAT)
8205 ElementType = CAT->getElementType().getTypePtr();
8206 else if (VAT)
8207 ElementType = VAT->getElementType().getTypePtr();
8208 else
8209 assert(&Component == &*Components.begin() &&(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8211, __extension__
__PRETTY_FUNCTION__))
8210 "Only expect pointer (non CAT or VAT) when this is the "(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8211, __extension__
__PRETTY_FUNCTION__))
8211 "first Component")(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8211, __extension__
__PRETTY_FUNCTION__))
;
8212 // If ElementType is null, then it means the base is a pointer
8213 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8214 // for next iteration.
8215 if (ElementType) {
8216 // For the case that having pointer as base, we need to remove one
8217 // level of indirection.
8218 if (&Component != &*Components.begin())
8219 ElementType = ElementType->getPointeeOrArrayElementType();
8220 ElementTypeSize =
8221 Context.getTypeSizeInChars(ElementType).getQuantity();
8222 CurStrides.push_back(
8223 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8224 }
8225 }
8226 // Get dimension value except for the last dimension since we don't need
8227 // it.
8228 if (DimSizes.size() < Components.size() - 1) {
8229 if (CAT)
8230 DimSizes.push_back(llvm::ConstantInt::get(
8231 CGF.Int64Ty, CAT->getSize().getZExtValue()));
8232 else if (VAT)
8233 DimSizes.push_back(CGF.Builder.CreateIntCast(
8234 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8235 /*IsSigned=*/false));
8236 }
8237 }
8238
8239 // Skip the dummy dimension since we have already have its information.
8240 auto *DI = DimSizes.begin() + 1;
8241 // Product of dimension.
8242 llvm::Value *DimProd =
8243 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
8244
8245 // Collect info for non-contiguous. Notice that offset, count, and stride
8246 // are only meaningful for array-section, so we insert a null for anything
8247 // other than array-section.
8248 // Also, the size of offset, count, and stride are not the same as
8249 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8250 // count, and stride are the same as the number of non-contiguous
8251 // declaration in target update to/from clause.
8252 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8253 Components) {
8254 const Expr *AssocExpr = Component.getAssociatedExpression();
8255
8256 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8257 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8258 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8259 /*isSigned=*/false);
8260 CurOffsets.push_back(Offset);
8261 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8262 CurStrides.push_back(CurStrides.back());
8263 continue;
8264 }
8265
8266 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8267
8268 if (!OASE)
8269 continue;
8270
8271 // Offset
8272 const Expr *OffsetExpr = OASE->getLowerBound();
8273 llvm::Value *Offset = nullptr;
8274 if (!OffsetExpr) {
8275 // If offset is absent, then we just set it to zero.
8276 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8277 } else {
8278 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8279 CGF.Int64Ty,
8280 /*isSigned=*/false);
8281 }
8282 CurOffsets.push_back(Offset);
8283
8284 // Count
8285 const Expr *CountExpr = OASE->getLength();
8286 llvm::Value *Count = nullptr;
8287 if (!CountExpr) {
8288 // In Clang, once a high dimension is an array section, we construct all
8289 // the lower dimension as array section, however, for case like
8290 // arr[0:2][2], Clang construct the inner dimension as an array section
8291 // but it actually is not in an array section form according to spec.
8292 if (!OASE->getColonLocFirst().isValid() &&
8293 !OASE->getColonLocSecond().isValid()) {
8294 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8295 } else {
8296 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8297 // When the length is absent it defaults to ⌈(size −
8298 // lower-bound)/stride⌉, where size is the size of the array
8299 // dimension.
8300 const Expr *StrideExpr = OASE->getStride();
8301 llvm::Value *Stride =
8302 StrideExpr
8303 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8304 CGF.Int64Ty, /*isSigned=*/false)
8305 : nullptr;
8306 if (Stride)
8307 Count = CGF.Builder.CreateUDiv(
8308 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8309 else
8310 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8311 }
8312 } else {
8313 Count = CGF.EmitScalarExpr(CountExpr);
8314 }
8315 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8316 CurCounts.push_back(Count);
8317
8318 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8319 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8320 // Offset Count Stride
8321 // D0 0 1 4 (int) <- dummy dimension
8322 // D1 0 2 8 (2 * (1) * 4)
8323 // D2 1 2 20 (1 * (1 * 5) * 4)
8324 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8325 const Expr *StrideExpr = OASE->getStride();
8326 llvm::Value *Stride =
8327 StrideExpr
8328 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8329 CGF.Int64Ty, /*isSigned=*/false)
8330 : nullptr;
8331 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8332 if (Stride)
8333 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8334 else
8335 CurStrides.push_back(DimProd);
8336 if (DI != DimSizes.end())
8337 ++DI;
8338 }
8339
8340 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8341 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8342 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8343 }
8344
8345 /// Return the adjusted map modifiers if the declaration a capture refers to
8346 /// appears in a first-private clause. This is expected to be used only with
8347 /// directives that start with 'target'.
8348 MappableExprsHandler::OpenMPOffloadMappingFlags
8349 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8350 assert(Cap.capturesVariable() && "Expected capture by reference only!")(static_cast <bool> (Cap.capturesVariable() && "Expected capture by reference only!"
) ? void (0) : __assert_fail ("Cap.capturesVariable() && \"Expected capture by reference only!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8350, __extension__
__PRETTY_FUNCTION__))
;
8351
8352 // A first private variable captured by reference will use only the
8353 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8354 // declaration is known as first-private in this handler.
8355 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8356 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8357 return MappableExprsHandler::OMP_MAP_TO |
8358 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8359 return MappableExprsHandler::OMP_MAP_PRIVATE |
8360 MappableExprsHandler::OMP_MAP_TO;
8361 }
8362 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8363 if (I != LambdasMap.end())
8364 // for map(to: lambda): using user specified map type.
8365 return getMapTypeBits(
8366 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8367 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8368 /*AddPtrFlag=*/false,
8369 /*AddIsTargetParamFlag=*/false,
8370 /*isNonContiguous=*/false);
8371 return MappableExprsHandler::OMP_MAP_TO |
8372 MappableExprsHandler::OMP_MAP_FROM;
8373 }
8374
8375 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8376 // Rotate by getFlagMemberOffset() bits.
8377 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8378 << getFlagMemberOffset());
8379 }
8380
8381 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8382 OpenMPOffloadMappingFlags MemberOfFlag) {
8383 // If the entry is PTR_AND_OBJ but has not been marked with the special
8384 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8385 // marked as MEMBER_OF.
8386 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8387 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8388 return;
8389
8390 // Reset the placeholder value to prepare the flag for the assignment of the
8391 // proper MEMBER_OF value.
8392 Flags &= ~OMP_MAP_MEMBER_OF;
8393 Flags |= MemberOfFlag;
8394 }
8395
8396 void getPlainLayout(const CXXRecordDecl *RD,
8397 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8398 bool AsBase) const {
8399 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8400
8401 llvm::StructType *St =
8402 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8403
8404 unsigned NumElements = St->getNumElements();
8405 llvm::SmallVector<
8406 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8407 RecordLayout(NumElements);
8408
8409 // Fill bases.
8410 for (const auto &I : RD->bases()) {
8411 if (I.isVirtual())
8412 continue;
8413 const auto *Base = I.getType()->getAsCXXRecordDecl();
8414 // Ignore empty bases.
8415 if (Base->isEmpty() || CGF.getContext()
8416 .getASTRecordLayout(Base)
8417 .getNonVirtualSize()
8418 .isZero())
8419 continue;
8420
8421 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8422 RecordLayout[FieldIndex] = Base;
8423 }
8424 // Fill in virtual bases.
8425 for (const auto &I : RD->vbases()) {
8426 const auto *Base = I.getType()->getAsCXXRecordDecl();
8427 // Ignore empty bases.
8428 if (Base->isEmpty())
8429 continue;
8430 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8431 if (RecordLayout[FieldIndex])
8432 continue;
8433 RecordLayout[FieldIndex] = Base;
8434 }
8435 // Fill in all the fields.
8436 assert(!RD->isUnion() && "Unexpected union.")(static_cast <bool> (!RD->isUnion() && "Unexpected union."
) ? void (0) : __assert_fail ("!RD->isUnion() && \"Unexpected union.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8436, __extension__
__PRETTY_FUNCTION__))
;
8437 for (const auto *Field : RD->fields()) {
8438 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8439 // will fill in later.)
8440 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8441 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8442 RecordLayout[FieldIndex] = Field;
8443 }
8444 }
8445 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8446 &Data : RecordLayout) {
8447 if (Data.isNull())
8448 continue;
8449 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8450 getPlainLayout(Base, Layout, /*AsBase=*/true);
8451 else
8452 Layout.push_back(Data.get<const FieldDecl *>());
8453 }
8454 }
8455
8456 /// Generate all the base pointers, section pointers, sizes, map types, and
8457 /// mappers for the extracted mappable expressions (all included in \a
8458 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8459 /// pair of the relevant declaration and index where it occurs is appended to
8460 /// the device pointers info array.
8461 void generateAllInfoForClauses(
8462 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8463 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8464 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8465 // We have to process the component lists that relate with the same
8466 // declaration in a single chunk so that we can generate the map flags
8467 // correctly. Therefore, we organize all lists in a map.
8468 enum MapKind { Present, Allocs, Other, Total };
8469 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8470 SmallVector<SmallVector<MapInfo, 8>, 4>>
8471 Info;
8472
8473 // Helper function to fill the information map for the different supported
8474 // clauses.
8475 auto &&InfoGen =
8476 [&Info, &SkipVarSet](
8477 const ValueDecl *D, MapKind Kind,
8478 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8479 OpenMPMapClauseKind MapType,
8480 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8481 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8482 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8483 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8484 if (SkipVarSet.contains(D))
8485 return;
8486 auto It = Info.find(D);
8487 if (It == Info.end())
8488 It = Info
8489 .insert(std::make_pair(
8490 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8491 .first;
8492 It->second[Kind].emplace_back(
8493 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8494 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8495 };
8496
8497 for (const auto *Cl : Clauses) {
8498 const auto *C = dyn_cast<OMPMapClause>(Cl);
8499 if (!C)
8500 continue;
8501 MapKind Kind = Other;
8502 if (llvm::is_contained(C->getMapTypeModifiers(),
8503 OMPC_MAP_MODIFIER_present))
8504 Kind = Present;
8505 else if (C->getMapType() == OMPC_MAP_alloc)
8506 Kind = Allocs;
8507 const auto *EI = C->getVarRefs().begin();
8508 for (const auto L : C->component_lists()) {
8509 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8510 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8511 C->getMapTypeModifiers(), llvm::None,
8512 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8513 E);
8514 ++EI;
8515 }
8516 }
8517 for (const auto *Cl : Clauses) {
8518 const auto *C = dyn_cast<OMPToClause>(Cl);
8519 if (!C)
8520 continue;
8521 MapKind Kind = Other;
8522 if (llvm::is_contained(C->getMotionModifiers(),
8523 OMPC_MOTION_MODIFIER_present))
8524 Kind = Present;
8525 const auto *EI = C->getVarRefs().begin();
8526 for (const auto L : C->component_lists()) {
8527 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8528 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8529 C->isImplicit(), std::get<2>(L), *EI);
8530 ++EI;
8531 }
8532 }
8533 for (const auto *Cl : Clauses) {
8534 const auto *C = dyn_cast<OMPFromClause>(Cl);
8535 if (!C)
8536 continue;
8537 MapKind Kind = Other;
8538 if (llvm::is_contained(C->getMotionModifiers(),
8539 OMPC_MOTION_MODIFIER_present))
8540 Kind = Present;
8541 const auto *EI = C->getVarRefs().begin();
8542 for (const auto L : C->component_lists()) {
8543 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8544 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8545 C->isImplicit(), std::get<2>(L), *EI);
8546 ++EI;
8547 }
8548 }
8549
8550 // Look at the use_device_ptr and use_device_addr clauses information and
8551 // mark the existing map entries as such. If there is no map information for
8552 // an entry in the use_device_ptr and use_device_addr list, we create one
8553 // with map type 'alloc' and zero size section. It is the user fault if that
8554 // was not mapped before. If there is no map information and the pointer is
8555 // a struct member, then we defer the emission of that entry until the whole
8556 // struct has been processed.
8557 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8558 SmallVector<DeferredDevicePtrEntryTy, 4>>
8559 DeferredInfo;
8560 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8561
8562 auto &&UseDeviceDataCombinedInfoGen =
8563 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8564 CodeGenFunction &CGF) {
8565 UseDeviceDataCombinedInfo.Exprs.push_back(VD);
8566 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8567 UseDeviceDataCombinedInfo.Pointers.push_back(Ptr);
8568 UseDeviceDataCombinedInfo.Sizes.push_back(
8569 llvm::Constant::getNullValue(CGF.Int64Ty));
8570 UseDeviceDataCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8571 UseDeviceDataCombinedInfo.Mappers.push_back(nullptr);
8572 };
8573
8574 auto &&MapInfoGen =
8575 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8576 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8577 OMPClauseMappableExprCommon::MappableExprComponentListRef
8578 Components,
8579 bool IsImplicit, bool IsDevAddr) {
8580 // We didn't find any match in our map information - generate a zero
8581 // size array section - if the pointer is a struct member we defer
8582 // this action until the whole struct has been processed.
8583 if (isa<MemberExpr>(IE)) {
8584 // Insert the pointer into Info to be processed by
8585 // generateInfoForComponentList. Because it is a member pointer
8586 // without a pointee, no entry will be generated for it, therefore
8587 // we need to generate one after the whole struct has been
8588 // processed. Nonetheless, generateInfoForComponentList must be
8589 // called to take the pointer into account for the calculation of
8590 // the range of the partial struct.
8591 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8592 llvm::None, /*ReturnDevicePointer=*/false, IsImplicit,
8593 nullptr, nullptr, IsDevAddr);
8594 DeferredInfo[nullptr].emplace_back(IE, VD, IsDevAddr);
8595 } else {
8596 llvm::Value *Ptr;
8597 if (IsDevAddr) {
8598 if (IE->isGLValue())
8599 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8600 else
8601 Ptr = CGF.EmitScalarExpr(IE);
8602 } else {
8603 Ptr = CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8604 }
8605 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF);
8606 }
8607 };
8608
8609 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8610 const Expr *IE, bool IsDevAddr) -> bool {
8611 // We potentially have map information for this declaration already.
8612 // Look for the first set of components that refer to it. If found,
8613 // return true.
8614 // If the first component is a member expression, we have to look into
8615 // 'this', which maps to null in the map of map information. Otherwise
8616 // look directly for the information.
8617 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8618 if (It != Info.end()) {
8619 bool Found = false;
8620 for (auto &Data : It->second) {
8621 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8622 return MI.Components.back().getAssociatedDeclaration() == VD;
8623 });
8624 // If we found a map entry, signal that the pointer has to be
8625 // returned and move on to the next declaration. Exclude cases where
8626 // the base pointer is mapped as array subscript, array section or
8627 // array shaping. The base address is passed as a pointer to base in
8628 // this case and cannot be used as a base for use_device_ptr list
8629 // item.
8630 if (CI != Data.end()) {
8631 if (IsDevAddr) {
8632 CI->ReturnDevicePointer = true;
8633 Found = true;
8634 break;
8635 } else {
8636 auto PrevCI = std::next(CI->Components.rbegin());
8637 const auto *VarD = dyn_cast<VarDecl>(VD);
8638 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8639 isa<MemberExpr>(IE) ||
8640 !VD->getType().getNonReferenceType()->isPointerType() ||
8641 PrevCI == CI->Components.rend() ||
8642 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8643 VarD->hasLocalStorage()) {
8644 CI->ReturnDevicePointer = true;
8645 Found = true;
8646 break;
8647 }
8648 }
8649 }
8650 }
8651 return Found;
8652 }
8653 return false;
8654 };
8655
8656 // Look at the use_device_ptr clause information and mark the existing map
8657 // entries as such. If there is no map information for an entry in the
8658 // use_device_ptr list, we create one with map type 'alloc' and zero size
8659 // section. It is the user fault if that was not mapped before. If there is
8660 // no map information and the pointer is a struct member, then we defer the
8661 // emission of that entry until the whole struct has been processed.
8662 for (const auto *Cl : Clauses) {
8663 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8664 if (!C)
8665 continue;
8666 for (const auto L : C->component_lists()) {
8667 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8668 std::get<1>(L);
8669 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8670, __extension__
__PRETTY_FUNCTION__))
8670 "Not expecting empty list of components!")(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8670, __extension__
__PRETTY_FUNCTION__))
;
8671 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8672 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8673 const Expr *IE = Components.back().getAssociatedExpression();
8674 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8675 continue;
8676 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8677 /*IsDevAddr=*/false);
8678 }
8679 }
8680
8681 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8682 for (const auto *Cl : Clauses) {
8683 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8684 if (!C)
8685 continue;
8686 for (const auto L : C->component_lists()) {
8687 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8688 std::get<1>(L);
8689 assert(!std::get<1>(L).empty() &&(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8690, __extension__
__PRETTY_FUNCTION__))
8690 "Not expecting empty list of components!")(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8690, __extension__
__PRETTY_FUNCTION__))
;
8691 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8692 if (!Processed.insert(VD).second)
8693 continue;
8694 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8695 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8696 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8697 continue;
8698 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8699 /*IsDevAddr=*/true);
8700 }
8701 }
8702
8703 for (const auto &Data : Info) {
8704 StructRangeInfoTy PartialStruct;
8705 // Temporary generated information.
8706 MapCombinedInfoTy CurInfo;
8707 const Decl *D = Data.first;
8708 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8709 for (const auto &M : Data.second) {
8710 for (const MapInfo &L : M) {
8711 assert(!L.Components.empty() &&(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8712, __extension__
__PRETTY_FUNCTION__))
8712 "Not expecting declaration with no component lists.")(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8712, __extension__
__PRETTY_FUNCTION__))
;
8713
8714 // Remember the current base pointer index.
8715 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8716 CurInfo.NonContigInfo.IsNonContiguous =
8717 L.Components.back().isNonContiguous();
8718 generateInfoForComponentList(
8719 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8720 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8721 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8722
8723 // If this entry relates with a device pointer, set the relevant
8724 // declaration and add the 'return pointer' flag.
8725 if (L.ReturnDevicePointer) {
8726 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8727, __extension__
__PRETTY_FUNCTION__))
8727 "Unexpected number of mapped base pointers.")(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8727, __extension__
__PRETTY_FUNCTION__))
;
8728
8729 const ValueDecl *RelevantVD =
8730 L.Components.back().getAssociatedDeclaration();
8731 assert(RelevantVD &&(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8732, __extension__
__PRETTY_FUNCTION__))
8732 "No relevant declaration related with device pointer??")(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8732, __extension__
__PRETTY_FUNCTION__))
;
8733
8734 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8735 RelevantVD);
8736 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8737 }
8738 }
8739 }
8740
8741 // Append any pending zero-length pointers which are struct members and
8742 // used with use_device_ptr or use_device_addr.
8743 auto CI = DeferredInfo.find(Data.first);
8744 if (CI != DeferredInfo.end()) {
8745 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8746 llvm::Value *BasePtr;
8747 llvm::Value *Ptr;
8748 if (L.ForDeviceAddr) {
8749 if (L.IE->isGLValue())
8750 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8751 else
8752 Ptr = this->CGF.EmitScalarExpr(L.IE);
8753 BasePtr = Ptr;
8754 // Entry is RETURN_PARAM. Also, set the placeholder value
8755 // MEMBER_OF=FFFF so that the entry is later updated with the
8756 // correct value of MEMBER_OF.
8757 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8758 } else {
8759 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8760 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8761 L.IE->getExprLoc());
8762 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8763 // placeholder value MEMBER_OF=FFFF so that the entry is later
8764 // updated with the correct value of MEMBER_OF.
8765 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8766 OMP_MAP_MEMBER_OF);
8767 }
8768 CurInfo.Exprs.push_back(L.VD);
8769 CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8770 CurInfo.Pointers.push_back(Ptr);
8771 CurInfo.Sizes.push_back(
8772 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8773 CurInfo.Mappers.push_back(nullptr);
8774 }
8775 }
8776 // If there is an entry in PartialStruct it means we have a struct with
8777 // individual members mapped. Emit an extra combined entry.
8778 if (PartialStruct.Base.isValid()) {
8779 CurInfo.NonContigInfo.Dims.push_back(0);
8780 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8781 }
8782
8783 // We need to append the results of this capture to what we already
8784 // have.
8785 CombinedInfo.append(CurInfo);
8786 }
8787 // Append data for use_device_ptr clauses.
8788 CombinedInfo.append(UseDeviceDataCombinedInfo);
8789 }
8790
8791public:
8792 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8793 : CurDir(&Dir), CGF(CGF) {
8794 // Extract firstprivate clause information.
8795 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8796 for (const auto *D : C->varlists())
8797 FirstPrivateDecls.try_emplace(
8798 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8799 // Extract implicit firstprivates from uses_allocators clauses.
8800 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8801 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8802 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8803 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8804 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8805 /*Implicit=*/true);
8806 else if (const auto *VD = dyn_cast<VarDecl>(
8807 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8808 ->getDecl()))
8809 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8810 }
8811 }
8812 // Extract device pointer clause information.
8813 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8814 for (auto L : C->component_lists())
8815 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8816 // Extract device addr clause information.
8817 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8818 for (auto L : C->component_lists())
8819 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8820 // Extract map information.
8821 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8822 if (C->getMapType() != OMPC_MAP_to)
8823 continue;
8824 for (auto L : C->component_lists()) {
8825 const ValueDecl *VD = std::get<0>(L);
8826 const auto *RD = VD ? VD->getType()
8827 .getCanonicalType()
8828 .getNonReferenceType()
8829 ->getAsCXXRecordDecl()
8830 : nullptr;
8831 if (RD && RD->isLambda())
8832 LambdasMap.try_emplace(std::get<0>(L), C);
8833 }
8834 }
8835 }
8836
8837 /// Constructor for the declare mapper directive.
8838 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8839 : CurDir(&Dir), CGF(CGF) {}
8840
8841 /// Generate code for the combined entry if we have a partially mapped struct
8842 /// and take care of the mapping flags of the arguments corresponding to
8843 /// individual struct members.
8844 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8845 MapFlagsArrayTy &CurTypes,
8846 const StructRangeInfoTy &PartialStruct,
8847 const ValueDecl *VD = nullptr,
8848 bool NotTargetParams = true) const {
8849 if (CurTypes.size() == 1 &&
8850 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8851 !PartialStruct.IsArraySection)
8852 return;
8853 Address LBAddr = PartialStruct.LowestElem.second;
8854 Address HBAddr = PartialStruct.HighestElem.second;
8855 if (PartialStruct.HasCompleteRecord) {
8856 LBAddr = PartialStruct.LB;
8857 HBAddr = PartialStruct.LB;
8858 }
8859 CombinedInfo.Exprs.push_back(VD);
8860 // Base is the base of the struct
8861 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8862 // Pointer is the address of the lowest element
8863 llvm::Value *LB = LBAddr.getPointer();
8864 CombinedInfo.Pointers.push_back(LB);
8865 // There should not be a mapper for a combined entry.
8866 CombinedInfo.Mappers.push_back(nullptr);
8867 // Size is (addr of {highest+1} element) - (addr of lowest element)
8868 llvm::Value *HB = HBAddr.getPointer();
8869 llvm::Value *HAddr =
8870 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8871 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8872 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8873 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8874 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8875 /*isSigned=*/false);
8876 CombinedInfo.Sizes.push_back(Size);
8877 // Map type is always TARGET_PARAM, if generate info for captures.
8878 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8879 : OMP_MAP_TARGET_PARAM);
8880 // If any element has the present modifier, then make sure the runtime
8881 // doesn't attempt to allocate the struct.
8882 if (CurTypes.end() !=
8883 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8884 return Type & OMP_MAP_PRESENT;
8885 }))
8886 CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8887 // Remove TARGET_PARAM flag from the first element
8888 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8889 // If any element has the ompx_hold modifier, then make sure the runtime
8890 // uses the hold reference count for the struct as a whole so that it won't
8891 // be unmapped by an extra dynamic reference count decrement. Add it to all
8892 // elements as well so the runtime knows which reference count to check
8893 // when determining whether it's time for device-to-host transfers of
8894 // individual elements.
8895 if (CurTypes.end() !=
8896 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8897 return Type & OMP_MAP_OMPX_HOLD;
8898 })) {
8899 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
8900 for (auto &M : CurTypes)
8901 M |= OMP_MAP_OMPX_HOLD;
8902 }
8903
8904 // All other current entries will be MEMBER_OF the combined entry
8905 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8906 // 0xFFFF in the MEMBER_OF field).
8907 OpenMPOffloadMappingFlags MemberOfFlag =
8908 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
8909 for (auto &M : CurTypes)
8910 setCorrectMemberOfFlag(M, MemberOfFlag);
8911 }
8912
8913 /// Generate all the base pointers, section pointers, sizes, map types, and
8914 /// mappers for the extracted mappable expressions (all included in \a
8915 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8916 /// pair of the relevant declaration and index where it occurs is appended to
8917 /// the device pointers info array.
8918 void generateAllInfo(
8919 MapCombinedInfoTy &CombinedInfo,
8920 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8921 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8922 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8923, __extension__
__PRETTY_FUNCTION__))
8923 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8923, __extension__
__PRETTY_FUNCTION__))
;
8924 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8925 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
8926 }
8927
8928 /// Generate all the base pointers, section pointers, sizes, map types, and
8929 /// mappers for the extracted map clauses of user-defined mapper (all included
8930 /// in \a CombinedInfo).
8931 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
8932 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8933, __extension__
__PRETTY_FUNCTION__))
8933 "Expect a declare mapper directive")(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8933, __extension__
__PRETTY_FUNCTION__))
;
8934 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8935 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
8936 }
8937
8938 /// Emit capture info for lambdas for variables captured by reference.
8939 void generateInfoForLambdaCaptures(
8940 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8941 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8942 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8943 const auto *RD = VDType->getAsCXXRecordDecl();
8944 if (!RD || !RD->isLambda())
8945 return;
8946 Address VDAddr(Arg, CGF.ConvertTypeForMem(VDType),
8947 CGF.getContext().getDeclAlign(VD));
8948 LValue VDLVal = CGF.MakeAddrLValue(VDAddr, VDType);
8949 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8950 FieldDecl *ThisCapture = nullptr;
8951 RD->getCaptureFields(Captures, ThisCapture);
8952 if (ThisCapture) {
8953 LValue ThisLVal =
8954 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8955 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8956 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
8957 VDLVal.getPointer(CGF));
8958 CombinedInfo.Exprs.push_back(VD);
8959 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
8960 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
8961 CombinedInfo.Sizes.push_back(
8962 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8963 CGF.Int64Ty, /*isSigned=*/true));
8964 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8965 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8966 CombinedInfo.Mappers.push_back(nullptr);
8967 }
8968 for (const LambdaCapture &LC : RD->captures()) {
8969 if (!LC.capturesVariable())
8970 continue;
8971 const VarDecl *VD = cast<VarDecl>(LC.getCapturedVar());
8972 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8973 continue;
8974 auto It = Captures.find(VD);
8975 assert(It != Captures.end() && "Found lambda capture without field.")(static_cast <bool> (It != Captures.end() && "Found lambda capture without field."
) ? void (0) : __assert_fail ("It != Captures.end() && \"Found lambda capture without field.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8975, __extension__
__PRETTY_FUNCTION__))
;
8976 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8977 if (LC.getCaptureKind() == LCK_ByRef) {
8978 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8979 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8980 VDLVal.getPointer(CGF));
8981 CombinedInfo.Exprs.push_back(VD);
8982 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8983 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
8984 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8985 CGF.getTypeSize(
8986 VD->getType().getCanonicalType().getNonReferenceType()),
8987 CGF.Int64Ty, /*isSigned=*/true));
8988 } else {
8989 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8990 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
8991 VDLVal.getPointer(CGF));
8992 CombinedInfo.Exprs.push_back(VD);
8993 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
8994 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
8995 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8996 }
8997 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8998 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8999 CombinedInfo.Mappers.push_back(nullptr);
9000 }
9001 }
9002
9003 /// Set correct indices for lambdas captures.
9004 void adjustMemberOfForLambdaCaptures(
9005 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9006 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9007 MapFlagsArrayTy &Types) const {
9008 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9009 // Set correct member_of idx for all implicit lambda captures.
9010 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9011 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9012 continue;
9013 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9014 assert(BasePtr && "Unable to find base lambda address.")(static_cast <bool> (BasePtr && "Unable to find base lambda address."
) ? void (0) : __assert_fail ("BasePtr && \"Unable to find base lambda address.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9014, __extension__
__PRETTY_FUNCTION__))
;
9015 int TgtIdx = -1;
9016 for (unsigned J = I; J > 0; --J) {
9017 unsigned Idx = J - 1;
9018 if (Pointers[Idx] != BasePtr)
9019 continue;
9020 TgtIdx = Idx;
9021 break;
9022 }
9023 assert(TgtIdx != -1 && "Unable to find parent lambda.")(static_cast <bool> (TgtIdx != -1 && "Unable to find parent lambda."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Unable to find parent lambda.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9023, __extension__
__PRETTY_FUNCTION__))
;
9024 // All other current entries will be MEMBER_OF the combined entry
9025 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9026 // 0xFFFF in the MEMBER_OF field).
9027 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9028 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9029 }
9030 }
9031
9032 /// Generate the base pointers, section pointers, sizes, map types, and
9033 /// mappers associated to a given capture (all included in \a CombinedInfo).
9034 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9035 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9036 StructRangeInfoTy &PartialStruct) const {
9037 assert(!Cap->capturesVariableArrayType() &&(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9038, __extension__
__PRETTY_FUNCTION__))
1
'?' condition is true
9038 "Not expecting to generate map info for a variable array type!")(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9038, __extension__
__PRETTY_FUNCTION__))
;
9039
9040 // We need to know when we generating information for the first component
9041 const ValueDecl *VD = Cap->capturesThis()
2
'?' condition is false
9042 ? nullptr
9043 : Cap->getCapturedVar()->getCanonicalDecl();
9044
9045 // for map(to: lambda): skip here, processing it in
9046 // generateDefaultMapInfo
9047 if (LambdasMap.count(VD))
3
Assuming the condition is false
9048 return;
9049
9050 // If this declaration appears in a is_device_ptr clause we just have to
9051 // pass the pointer by value. If it is a reference to a declaration, we just
9052 // pass its value.
9053 if (VD && DevPointersMap.count(VD)) {
4
Assuming 'VD' is non-null
5
Assuming the condition is false
9054 CombinedInfo.Exprs.push_back(VD);
9055 CombinedInfo.BasePointers.emplace_back(Arg, VD);
9056 CombinedInfo.Pointers.push_back(Arg);
9057 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9058 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9059 /*isSigned=*/true));
9060 CombinedInfo.Types.push_back(
9061 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9062 OMP_MAP_TARGET_PARAM);
9063 CombinedInfo.Mappers.push_back(nullptr);
9064 return;
9065 }
9066 if (VD
5.1
'VD' is non-null
&& HasDevAddrsMap.count(VD)) {
6
Assuming the condition is true
7
Taking true branch
9067 auto I = HasDevAddrsMap.find(VD);
9068 CombinedInfo.Exprs.push_back(VD);
9069 Expr *E = nullptr;
8
'E' initialized to a null pointer value
9070 for (auto &MCL : I->second) {
9
Assuming '__begin3' is equal to '__end3'
9071 E = MCL.begin()->getAssociatedExpression();
9072 break;
9073 }
9074 llvm::Value *Ptr = nullptr;
9075 if (E->isGLValue())
10
Called C++ object pointer is null
9076 Ptr = CGF.EmitLValue(E).getPointer(CGF);
9077 else
9078 Ptr = CGF.EmitScalarExpr(E);
9079 CombinedInfo.BasePointers.emplace_back(Ptr, VD);
9080 CombinedInfo.Pointers.push_back(Ptr);
9081 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9082 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9083 /*isSigned=*/true));
9084 CombinedInfo.Types.push_back(
9085 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9086 OMP_MAP_TARGET_PARAM);
9087 CombinedInfo.Mappers.push_back(nullptr);
9088 return;
9089 }
9090
9091 using MapData =
9092 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9093 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9094 const ValueDecl *, const Expr *>;
9095 SmallVector<MapData, 4> DeclComponentLists;
9096 // For member fields list in is_device_ptr, store it in
9097 // DeclComponentLists for generating components info.
9098 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
9099 auto It = DevPointersMap.find(VD);
9100 if (It != DevPointersMap.end())
9101 for (const auto &MCL : It->second)
9102 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
9103 /*IsImpicit = */ true, nullptr,
9104 nullptr);
9105 auto I = HasDevAddrsMap.find(VD);
9106 if (I != HasDevAddrsMap.end())
9107 for (const auto &MCL : I->second)
9108 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
9109 /*IsImpicit = */ true, nullptr,
9110 nullptr);
9111 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9112, __extension__
__PRETTY_FUNCTION__))
9112 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9112, __extension__
__PRETTY_FUNCTION__))
;
9113 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9114 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9115 const auto *EI = C->getVarRefs().begin();
9116 for (const auto L : C->decl_component_lists(VD)) {
9117 const ValueDecl *VDecl, *Mapper;
9118 // The Expression is not correct if the mapping is implicit
9119 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9120 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9121 std::tie(VDecl, Components, Mapper) = L;
9122 assert(VDecl == VD && "We got information for the wrong declaration??")(static_cast <bool> (VDecl == VD && "We got information for the wrong declaration??"
) ? void (0) : __assert_fail ("VDecl == VD && \"We got information for the wrong declaration??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9122, __extension__
__PRETTY_FUNCTION__))
;
9123 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9124, __extension__
__PRETTY_FUNCTION__))
9124 "Not expecting declaration with no component lists.")(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9124, __extension__
__PRETTY_FUNCTION__))
;
9125 DeclComponentLists.emplace_back(Components, C->getMapType(),
9126 C->getMapTypeModifiers(),
9127 C->isImplicit(), Mapper, E);
9128 ++EI;
9129 }
9130 }
9131 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9132 const MapData &RHS) {
9133 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9134 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9135 bool HasPresent =
9136 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9137 bool HasAllocs = MapType == OMPC_MAP_alloc;
9138 MapModifiers = std::get<2>(RHS);
9139 MapType = std::get<1>(LHS);
9140 bool HasPresentR =
9141 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9142 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9143 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9144 });
9145
9146 // Find overlapping elements (including the offset from the base element).
9147 llvm::SmallDenseMap<
9148 const MapData *,
9149 llvm::SmallVector<
9150 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9151 4>
9152 OverlappedData;
9153 size_t Count = 0;
9154 for (const MapData &L : DeclComponentLists) {
9155 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9156 OpenMPMapClauseKind MapType;
9157 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9158 bool IsImplicit;
9159 const ValueDecl *Mapper;
9160 const Expr *VarRef;
9161 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9162 L;
9163 ++Count;
9164 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9165 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9166 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9167 VarRef) = L1;
9168 auto CI = Components.rbegin();
9169 auto CE = Components.rend();
9170 auto SI = Components1.rbegin();
9171 auto SE = Components1.rend();
9172 for (; CI != CE && SI != SE; ++CI, ++SI) {
9173 if (CI->getAssociatedExpression()->getStmtClass() !=
9174 SI->getAssociatedExpression()->getStmtClass())
9175 break;
9176 // Are we dealing with different variables/fields?
9177 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9178 break;
9179 }
9180 // Found overlapping if, at least for one component, reached the head
9181 // of the components list.
9182 if (CI == CE || SI == SE) {
9183 // Ignore it if it is the same component.
9184 if (CI == CE && SI == SE)
9185 continue;
9186 const auto It = (SI == SE) ? CI : SI;
9187 // If one component is a pointer and another one is a kind of
9188 // dereference of this pointer (array subscript, section, dereference,
9189 // etc.), it is not an overlapping.
9190 // Same, if one component is a base and another component is a
9191 // dereferenced pointer memberexpr with the same base.
9192 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9193 (std::prev(It)->getAssociatedDeclaration() &&
9194 std::prev(It)
9195 ->getAssociatedDeclaration()
9196 ->getType()
9197 ->isPointerType()) ||
9198 (It->getAssociatedDeclaration() &&
9199 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9200 std::next(It) != CE && std::next(It) != SE))
9201 continue;
9202 const MapData &BaseData = CI == CE ? L : L1;
9203 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9204 SI == SE ? Components : Components1;
9205 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9206 OverlappedElements.getSecond().push_back(SubData);
9207 }
9208 }
9209 }
9210 // Sort the overlapped elements for each item.
9211 llvm::SmallVector<const FieldDecl *, 4> Layout;
9212 if (!OverlappedData.empty()) {
9213 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9214 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9215 while (BaseType != OrigType) {
9216 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9217 OrigType = BaseType->getPointeeOrArrayElementType();
9218 }
9219
9220 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9221 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9222 else {
9223 const auto *RD = BaseType->getAsRecordDecl();
9224 Layout.append(RD->field_begin(), RD->field_end());
9225 }
9226 }
9227 for (auto &Pair : OverlappedData) {
9228 llvm::stable_sort(
9229 Pair.getSecond(),
9230 [&Layout](
9231 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9232 OMPClauseMappableExprCommon::MappableExprComponentListRef
9233 Second) {
9234 auto CI = First.rbegin();
9235 auto CE = First.rend();
9236 auto SI = Second.rbegin();
9237 auto SE = Second.rend();
9238 for (; CI != CE && SI != SE; ++CI, ++SI) {
9239 if (CI->getAssociatedExpression()->getStmtClass() !=
9240 SI->getAssociatedExpression()->getStmtClass())
9241 break;
9242 // Are we dealing with different variables/fields?
9243 if (CI->getAssociatedDeclaration() !=
9244 SI->getAssociatedDeclaration())
9245 break;
9246 }
9247
9248 // Lists contain the same elements.
9249 if (CI == CE && SI == SE)
9250 return false;
9251
9252 // List with less elements is less than list with more elements.
9253 if (CI == CE || SI == SE)
9254 return CI == CE;
9255
9256 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9257 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9258 if (FD1->getParent() == FD2->getParent())
9259 return FD1->getFieldIndex() < FD2->getFieldIndex();
9260 const auto *It =
9261 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9262 return FD == FD1 || FD == FD2;
9263 });
9264 return *It == FD1;
9265 });
9266 }
9267
9268 // Associated with a capture, because the mapping flags depend on it.
9269 // Go through all of the elements with the overlapped elements.
9270 bool IsFirstComponentList = true;
9271 for (const auto &Pair : OverlappedData) {
9272 const MapData &L = *Pair.getFirst();
9273 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9274 OpenMPMapClauseKind MapType;
9275 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9276 bool IsImplicit;
9277 const ValueDecl *Mapper;
9278 const Expr *VarRef;
9279 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9280 L;
9281 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9282 OverlappedComponents = Pair.getSecond();
9283 generateInfoForComponentList(
9284 MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9285 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9286 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9287 IsFirstComponentList = false;
9288 }
9289 // Go through other elements without overlapped elements.
9290 for (const MapData &L : DeclComponentLists) {
9291 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9292 OpenMPMapClauseKind MapType;
9293 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9294 bool IsImplicit;
9295 const ValueDecl *Mapper;
9296 const Expr *VarRef;
9297 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9298 L;
9299 auto It = OverlappedData.find(&L);
9300 if (It == OverlappedData.end())
9301 generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9302 Components, CombinedInfo, PartialStruct,
9303 IsFirstComponentList, IsImplicit, Mapper,
9304 /*ForDeviceAddr=*/false, VD, VarRef);
9305 IsFirstComponentList = false;
9306 }
9307 }
9308
9309 /// Generate the default map information for a given capture \a CI,
9310 /// record field declaration \a RI and captured value \a CV.
9311 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9312 const FieldDecl &RI, llvm::Value *CV,
9313 MapCombinedInfoTy &CombinedInfo) const {
9314 bool IsImplicit = true;
9315 // Do the default mapping.
9316 if (CI.capturesThis()) {
9317 CombinedInfo.Exprs.push_back(nullptr);
9318 CombinedInfo.BasePointers.push_back(CV);
9319 CombinedInfo.Pointers.push_back(CV);
9320 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9321 CombinedInfo.Sizes.push_back(
9322 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9323 CGF.Int64Ty, /*isSigned=*/true));
9324 // Default map type.
9325 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9326 } else if (CI.capturesVariableByCopy()) {
9327 const VarDecl *VD = CI.getCapturedVar();
9328 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9329 CombinedInfo.BasePointers.push_back(CV);
9330 CombinedInfo.Pointers.push_back(CV);
9331 if (!RI.getType()->isAnyPointerType()) {
9332 // We have to signal to the runtime captures passed by value that are
9333 // not pointers.
9334 CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9335 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9336 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9337 } else {
9338 // Pointers are implicitly mapped with a zero size and no flags
9339 // (other than first map that is added for all implicit maps).
9340 CombinedInfo.Types.push_back(OMP_MAP_NONE);
9341 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9342 }
9343 auto I = FirstPrivateDecls.find(VD);
9344 if (I != FirstPrivateDecls.end())
9345 IsImplicit = I->getSecond();
9346 } else {
9347 assert(CI.capturesVariable() && "Expected captured reference.")(static_cast <bool> (CI.capturesVariable() && "Expected captured reference."
) ? void (0) : __assert_fail ("CI.capturesVariable() && \"Expected captured reference.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9347, __extension__
__PRETTY_FUNCTION__))
;
9348 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9349 QualType ElementType = PtrTy->getPointeeType();
9350 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9351 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9352 // The default map type for a scalar/complex type is 'to' because by
9353 // default the value doesn't have to be retrieved. For an aggregate
9354 // type, the default is 'tofrom'.
9355 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9356 const VarDecl *VD = CI.getCapturedVar();
9357 auto I = FirstPrivateDecls.find(VD);
9358 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9359 CombinedInfo.BasePointers.push_back(CV);
9360 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9361 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9362 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9363 AlignmentSource::Decl));
9364 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9365 } else {
9366 CombinedInfo.Pointers.push_back(CV);
9367 }
9368 if (I != FirstPrivateDecls.end())
9369 IsImplicit = I->getSecond();
9370 }
9371 // Every default map produces a single argument which is a target parameter.
9372 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9373
9374 // Add flag stating this is an implicit map.
9375 if (IsImplicit)
9376 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9377
9378 // No user-defined mapper for default mapping.
9379 CombinedInfo.Mappers.push_back(nullptr);
9380 }
9381};
9382} // anonymous namespace
9383
9384static void emitNonContiguousDescriptor(
9385 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9386 CGOpenMPRuntime::TargetDataInfo &Info) {
9387 CodeGenModule &CGM = CGF.CGM;
9388 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9389 &NonContigInfo = CombinedInfo.NonContigInfo;
9390
9391 // Build an array of struct descriptor_dim and then assign it to
9392 // offload_args.
9393 //
9394 // struct descriptor_dim {
9395 // uint64_t offset;
9396 // uint64_t count;
9397 // uint64_t stride
9398 // };
9399 ASTContext &C = CGF.getContext();
9400 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9401 RecordDecl *RD;
9402 RD = C.buildImplicitRecord("descriptor_dim");
9403 RD->startDefinition();
9404 addFieldToRecordDecl(C, RD, Int64Ty);
9405 addFieldToRecordDecl(C, RD, Int64Ty);
9406 addFieldToRecordDecl(C, RD, Int64Ty);
9407 RD->completeDefinition();
9408 QualType DimTy = C.getRecordType(RD);
9409
9410 enum { OffsetFD = 0, CountFD, StrideFD };
9411 // We need two index variable here since the size of "Dims" is the same as the
9412 // size of Components, however, the size of offset, count, and stride is equal
9413 // to the size of base declaration that is non-contiguous.
9414 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9415 // Skip emitting ir if dimension size is 1 since it cannot be
9416 // non-contiguous.
9417 if (NonContigInfo.Dims[I] == 1)
9418 continue;
9419 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9420 QualType ArrayTy =
9421 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9422 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9423 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9424 unsigned RevIdx = EE - II - 1;
9425 LValue DimsLVal = CGF.MakeAddrLValue(
9426 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9427 // Offset
9428 LValue OffsetLVal = CGF.EmitLValueForField(
9429 DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9430 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9431 // Count
9432 LValue CountLVal = CGF.EmitLValueForField(
9433 DimsLVal, *std::next(RD->field_begin(), CountFD));
9434 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9435 // Stride
9436 LValue StrideLVal = CGF.EmitLValueForField(
9437 DimsLVal, *std::next(RD->field_begin(), StrideFD));
9438 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9439 }
9440 // args[I] = &dims
9441 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9442 DimsAddr, CGM.Int8PtrTy, CGM.Int8Ty);
9443 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9444 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9445 Info.PointersArray, 0, I);
9446 Address PAddr(P, CGM.VoidPtrTy, CGF.getPointerAlign());
9447 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9448 ++L;
9449 }
9450}
9451
9452// Try to extract the base declaration from a `this->x` expression if possible.
9453static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9454 if (!E)
9455 return nullptr;
9456
9457 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9458 if (const MemberExpr *ME =
9459 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9460 return ME->getMemberDecl();
9461 return nullptr;
9462}
9463
9464/// Emit a string constant containing the names of the values mapped to the
9465/// offloading runtime library.
9466llvm::Constant *
9467emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9468 MappableExprsHandler::MappingExprInfo &MapExprs) {
9469
9470 uint32_t SrcLocStrSize;
9471 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9472 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9473
9474 SourceLocation Loc;
9475 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9476 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9477 Loc = VD->getLocation();
9478 else
9479 Loc = MapExprs.getMapExpr()->getExprLoc();
9480 } else {
9481 Loc = MapExprs.getMapDecl()->getLocation();
9482 }
9483
9484 std::string ExprName;
9485 if (MapExprs.getMapExpr()) {
9486 PrintingPolicy P(CGF.getContext().getLangOpts());
9487 llvm::raw_string_ostream OS(ExprName);
9488 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9489 OS.flush();
9490 } else {
9491 ExprName = MapExprs.getMapDecl()->getNameAsString();
9492 }
9493
9494 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9495 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9496 PLoc.getLine(), PLoc.getColumn(),
9497 SrcLocStrSize);
9498}
9499
9500/// Emit the arrays used to pass the captures and map information to the
9501/// offloading runtime library. If there is no map or capture information,
9502/// return nullptr by reference.
9503static void emitOffloadingArrays(
9504 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9505 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9506 bool IsNonContiguous = false) {
9507 CodeGenModule &CGM = CGF.CGM;
9508 ASTContext &Ctx = CGF.getContext();
9509
9510 // Reset the array information.
9511 Info.clearArrayInfo();
9512 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9513
9514 if (Info.NumberOfPtrs) {
9515 // Detect if we have any capture size requiring runtime evaluation of the
9516 // size so that a constant array could be eventually used.
9517
9518 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9519 QualType PointerArrayType = Ctx.getConstantArrayType(
9520 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9521 /*IndexTypeQuals=*/0);
9522
9523 Info.BasePointersArray =
9524 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9525 Info.PointersArray =
9526 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9527 Address MappersArray =
9528 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9529 Info.MappersArray = MappersArray.getPointer();
9530
9531 // If we don't have any VLA types or other types that require runtime
9532 // evaluation, we can use a constant array for the map sizes, otherwise we
9533 // need to fill up the arrays as we do for the pointers.
9534 QualType Int64Ty =
9535 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9536 SmallVector<llvm::Constant *> ConstSizes(
9537 CombinedInfo.Sizes.size(), llvm::ConstantInt::get(CGF.Int64Ty, 0));
9538 llvm::SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
9539 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9540 if (auto *CI = dyn_cast<llvm::Constant>(CombinedInfo.Sizes[I])) {
9541 if (!isa<llvm::ConstantExpr>(CI) && !isa<llvm::GlobalValue>(CI)) {
9542 if (IsNonContiguous && (CombinedInfo.Types[I] &
9543 MappableExprsHandler::OMP_MAP_NON_CONTIG))
9544 ConstSizes[I] = llvm::ConstantInt::get(
9545 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
9546 else
9547 ConstSizes[I] = CI;
9548 continue;
9549 }
9550 }
9551 RuntimeSizes.set(I);
9552 }
9553
9554 if (RuntimeSizes.all()) {
9555 QualType SizeArrayType = Ctx.getConstantArrayType(
9556 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9557 /*IndexTypeQuals=*/0);
9558 Info.SizesArray =
9559 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9560 } else {
9561 auto *SizesArrayInit = llvm::ConstantArray::get(
9562 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9563 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9564 auto *SizesArrayGbl = new llvm::GlobalVariable(
9565 CGM.getModule(), SizesArrayInit->getType(), /*isConstant=*/true,
9566 llvm::GlobalValue::PrivateLinkage, SizesArrayInit, Name);
9567 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9568 if (RuntimeSizes.any()) {
9569 QualType SizeArrayType = Ctx.getConstantArrayType(
9570 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9571 /*IndexTypeQuals=*/0);
9572 Address Buffer = CGF.CreateMemTemp(SizeArrayType, ".offload_sizes");
9573 llvm::Value *GblConstPtr =
9574 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9575 SizesArrayGbl, CGM.Int64Ty->getPointerTo());
9576 CGF.Builder.CreateMemCpy(
9577 Buffer,
9578 Address(GblConstPtr, CGM.Int64Ty,
9579 CGM.getNaturalTypeAlignment(Ctx.getIntTypeForBitwidth(
9580 /*DestWidth=*/64, /*Signed=*/false))),
9581 CGF.getTypeSize(SizeArrayType));
9582 Info.SizesArray = Buffer.getPointer();
9583 } else {
9584 Info.SizesArray = SizesArrayGbl;
9585 }
9586 }
9587
9588 // The map types are always constant so we don't need to generate code to
9589 // fill arrays. Instead, we create an array constant.
9590 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9591 llvm::copy(CombinedInfo.Types, Mapping.begin());
9592 std::string MaptypesName =
9593 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9594 auto *MapTypesArrayGbl =
9595 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9596 Info.MapTypesArray = MapTypesArrayGbl;
9597
9598 // The information types are only built if there is debug information
9599 // requested.
9600 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9601 Info.MapNamesArray = llvm::Constant::getNullValue(
9602 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9603 } else {
9604 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9605 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9606 };
9607 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9608 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9609 std::string MapnamesName =
9610 CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9611 auto *MapNamesArrayGbl =
9612 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9613 Info.MapNamesArray = MapNamesArrayGbl;
9614 }
9615
9616 // If there's a present map type modifier, it must not be applied to the end
9617 // of a region, so generate a separate map type array in that case.
9618 if (Info.separateBeginEndCalls()) {
9619 bool EndMapTypesDiffer = false;
9620 for (uint64_t &Type : Mapping) {
9621 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9622 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9623 EndMapTypesDiffer = true;
9624 }
9625 }
9626 if (EndMapTypesDiffer) {
9627 MapTypesArrayGbl =
9628 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9629 Info.MapTypesArrayEnd = MapTypesArrayGbl;
9630 }
9631 }
9632
9633 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9634 llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9635 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9636 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9637 Info.BasePointersArray, 0, I);
9638 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9639 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9640 Address BPAddr(BP, BPVal->getType(),
9641 Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9642 CGF.Builder.CreateStore(BPVal, BPAddr);
9643
9644 if (Info.requiresDevicePointerInfo())
9645 if (const ValueDecl *DevVD =
9646 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9647 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9648
9649 llvm::Value *PVal = CombinedInfo.Pointers[I];
9650 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9651 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9652 Info.PointersArray, 0, I);
9653 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9654 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9655 Address PAddr(P, PVal->getType(), Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9656 CGF.Builder.CreateStore(PVal, PAddr);
9657
9658 if (RuntimeSizes.test(I)) {
9659 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9660 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9661 Info.SizesArray,
9662 /*Idx0=*/0,
9663 /*Idx1=*/I);
9664 Address SAddr(S, CGM.Int64Ty, Ctx.getTypeAlignInChars(Int64Ty));
9665 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9666 CGM.Int64Ty,
9667 /*isSigned=*/true),
9668 SAddr);
9669 }
9670
9671 // Fill up the mapper array.
9672 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9673 if (CombinedInfo.Mappers[I]) {
9674 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9675 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9676 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9677 Info.HasMapper = true;
9678 }
9679 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9680 CGF.Builder.CreateStore(MFunc, MAddr);
9681 }
9682 }
9683
9684 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9685 Info.NumberOfPtrs == 0)
9686 return;
9687
9688 emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9689}
9690
9691namespace {
9692/// Additional arguments for emitOffloadingArraysArgument function.
9693struct ArgumentsOptions {
9694 bool ForEndCall = false;
9695 ArgumentsOptions() = default;
9696 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9697};
9698} // namespace
9699
9700/// Emit the arguments to be passed to the runtime library based on the
9701/// arrays of base pointers, pointers, sizes, map types, and mappers. If
9702/// ForEndCall, emit map types to be passed for the end of the region instead of
9703/// the beginning.
9704static void emitOffloadingArraysArgument(
9705 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9706 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9707 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9708 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9709 const ArgumentsOptions &Options = ArgumentsOptions()) {
9710 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&(static_cast <bool> ((!Options.ForEndCall || Info.separateBeginEndCalls
()) && "expected region end call to runtime only when end call is separate"
) ? void (0) : __assert_fail ("(!Options.ForEndCall || Info.separateBeginEndCalls()) && \"expected region end call to runtime only when end call is separate\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9711, __extension__
__PRETTY_FUNCTION__))
9711 "expected region end call to runtime only when end call is separate")(static_cast <bool> ((!Options.ForEndCall || Info.separateBeginEndCalls
()) && "expected region end call to runtime only when end call is separate"
) ? void (0) : __assert_fail ("(!Options.ForEndCall || Info.separateBeginEndCalls()) && \"expected region end call to runtime only when end call is separate\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9711, __extension__
__PRETTY_FUNCTION__))
;
9712 CodeGenModule &CGM = CGF.CGM;
9713 if (Info.NumberOfPtrs) {
9714 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9715 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9716 Info.BasePointersArray,
9717 /*Idx0=*/0, /*Idx1=*/0);
9718 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9719 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9720 Info.PointersArray,
9721 /*Idx0=*/0,
9722 /*Idx1=*/0);
9723 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9724 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9725 /*Idx0=*/0, /*Idx1=*/0);
9726 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9727 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9728 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9729 : Info.MapTypesArray,
9730 /*Idx0=*/0,
9731 /*Idx1=*/0);
9732
9733 // Only emit the mapper information arrays if debug information is
9734 // requested.
9735 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9736 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9737 else
9738 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9739 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9740 Info.MapNamesArray,
9741 /*Idx0=*/0,
9742 /*Idx1=*/0);
9743 // If there is no user-defined mapper, set the mapper array to nullptr to
9744 // avoid an unnecessary data privatization
9745 if (!Info.HasMapper)
9746 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9747 else
9748 MappersArrayArg =
9749 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9750 } else {
9751 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9752 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9753 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9754 MapTypesArrayArg =
9755 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9756 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9757 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9758 }
9759}
9760
9761/// Check for inner distribute directive.
9762static const OMPExecutableDirective *
9763getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9764 const auto *CS = D.getInnermostCapturedStmt();
9765 const auto *Body =
9766 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9767 const Stmt *ChildStmt =
9768 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9769
9770 if (const auto *NestedDir =
9771 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9772 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9773 switch (D.getDirectiveKind()) {
9774 case OMPD_target:
9775 if (isOpenMPDistributeDirective(DKind))
9776 return NestedDir;
9777 if (DKind == OMPD_teams) {
9778 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9779 /*IgnoreCaptured=*/true);
9780 if (!Body)
9781 return nullptr;
9782 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9783 if (const auto *NND =
9784 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9785 DKind = NND->getDirectiveKind();
9786 if (isOpenMPDistributeDirective(DKind))
9787 return NND;
9788 }
9789 }
9790 return nullptr;
9791 case OMPD_target_teams:
9792 if (isOpenMPDistributeDirective(DKind))
9793 return NestedDir;
9794 return nullptr;
9795 case OMPD_target_parallel:
9796 case OMPD_target_simd:
9797 case OMPD_target_parallel_for:
9798 case OMPD_target_parallel_for_simd:
9799 return nullptr;
9800 case OMPD_target_teams_distribute:
9801 case OMPD_target_teams_distribute_simd:
9802 case OMPD_target_teams_distribute_parallel_for:
9803 case OMPD_target_teams_distribute_parallel_for_simd:
9804 case OMPD_parallel:
9805 case OMPD_for:
9806 case OMPD_parallel_for:
9807 case OMPD_parallel_master:
9808 case OMPD_parallel_sections:
9809 case OMPD_for_simd:
9810 case OMPD_parallel_for_simd:
9811 case OMPD_cancel:
9812 case OMPD_cancellation_point:
9813 case OMPD_ordered:
9814 case OMPD_threadprivate:
9815 case OMPD_allocate:
9816 case OMPD_task:
9817 case OMPD_simd:
9818 case OMPD_tile:
9819 case OMPD_unroll:
9820 case OMPD_sections:
9821 case OMPD_section:
9822 case OMPD_single:
9823 case OMPD_master:
9824 case OMPD_critical:
9825 case OMPD_taskyield:
9826 case OMPD_barrier:
9827 case OMPD_taskwait:
9828 case OMPD_taskgroup:
9829 case OMPD_atomic:
9830 case OMPD_flush:
9831 case OMPD_depobj:
9832 case OMPD_scan:
9833 case OMPD_teams:
9834 case OMPD_target_data:
9835 case OMPD_target_exit_data:
9836 case OMPD_target_enter_data:
9837 case OMPD_distribute:
9838 case OMPD_distribute_simd:
9839 case OMPD_distribute_parallel_for:
9840 case OMPD_distribute_parallel_for_simd:
9841 case OMPD_teams_distribute:
9842 case OMPD_teams_distribute_simd:
9843 case OMPD_teams_distribute_parallel_for:
9844 case OMPD_teams_distribute_parallel_for_simd:
9845 case OMPD_target_update:
9846 case OMPD_declare_simd:
9847 case OMPD_declare_variant:
9848 case OMPD_begin_declare_variant:
9849 case OMPD_end_declare_variant:
9850 case OMPD_declare_target:
9851 case OMPD_end_declare_target:
9852 case OMPD_declare_reduction:
9853 case OMPD_declare_mapper:
9854 case OMPD_taskloop:
9855 case OMPD_taskloop_simd:
9856 case OMPD_master_taskloop:
9857 case OMPD_master_taskloop_simd:
9858 case OMPD_parallel_master_taskloop:
9859 case OMPD_parallel_master_taskloop_simd:
9860 case OMPD_requires:
9861 case OMPD_metadirective:
9862 case OMPD_unknown:
9863 default:
9864 llvm_unreachable("Unexpected directive.")::llvm::llvm_unreachable_internal("Unexpected directive.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 9864)
;
9865 }
9866 }
9867
9868 return nullptr;
9869}
9870
9871/// Emit the user-defined mapper function. The code generation follows the
9872/// pattern in the example below.
9873/// \code
9874/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9875/// void *base, void *begin,
9876/// int64_t size, int64_t type,
9877/// void *name = nullptr) {
9878/// // Allocate space for an array section first or add a base/begin for
9879/// // pointer dereference.
9880/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9881/// !maptype.IsDelete)
9882/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9883/// size*sizeof(Ty), clearToFromMember(type));
9884/// // Map members.
9885/// for (unsigned i = 0; i < size; i++) {
9886/// // For each component specified by this mapper:
9887/// for (auto c : begin[i]->all_components) {
9888/// if (c.hasMapper())
9889/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9890/// c.arg_type, c.arg_name);
9891/// else
9892/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9893/// c.arg_begin, c.arg_size, c.arg_type,
9894/// c.arg_name);
9895/// }
9896/// }
9897/// // Delete the array section.
9898/// if (size > 1 && maptype.IsDelete)
9899/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9900/// size*sizeof(Ty), clearToFromMember(type));
9901/// }
9902/// \endcode
9903void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9904 CodeGenFunction *CGF) {
9905 if (UDMMap.count(D) > 0)
9906 return;
9907 ASTContext &C = CGM.getContext();
9908 QualType Ty = D->getType();
9909 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9910 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9911 auto *MapperVarDecl =
9912 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9913 SourceLocation Loc = D->getLocation();
9914 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9915 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(Ty);
9916
9917 // Prepare mapper function arguments and attributes.
9918 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9919 C.VoidPtrTy, ImplicitParamDecl::Other);
9920 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9921 ImplicitParamDecl::Other);
9922 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9923 C.VoidPtrTy, ImplicitParamDecl::Other);
9924 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9925 ImplicitParamDecl::Other);
9926 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9927 ImplicitParamDecl::Other);
9928 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9929 ImplicitParamDecl::Other);
9930 FunctionArgList Args;
9931 Args.push_back(&HandleArg);
9932 Args.push_back(&BaseArg);
9933 Args.push_back(&BeginArg);
9934 Args.push_back(&SizeArg);
9935 Args.push_back(&TypeArg);
9936 Args.push_back(&NameArg);
9937 const CGFunctionInfo &FnInfo =
9938 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9939 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9940 SmallString<64> TyStr;
9941 llvm::raw_svector_ostream Out(TyStr);
9942 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9943 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9944 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9945 Name, &CGM.getModule());
9946 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9947 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9948 // Start the mapper function code generation.
9949 CodeGenFunction MapperCGF(CGM);
9950 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9951 // Compute the starting and end addresses of array elements.
9952 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9953 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9954 C.getPointerType(Int64Ty), Loc);
9955 // Prepare common arguments for array initiation and deletion.
9956 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9957 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9958 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9959 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9960 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9961 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9962 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9963 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9964 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9965 // Convert the size in bytes into the number of array elements.
9966 Size = MapperCGF.Builder.CreateExactUDiv(
9967 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
9968 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9969 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
9970 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(ElemTy, PtrBegin, Size);
9971 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9972 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9973 C.getPointerType(Int64Ty), Loc);
9974 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9975 MapperCGF.GetAddrOfLocalVar(&NameArg),
9976 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9977
9978 // Emit array initiation if this is an array section and \p MapType indicates
9979 // that memory allocation is required.
9980 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
9981 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
9982 MapName, ElementSize, HeadBB, /*IsInit=*/true);
9983
9984 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9985
9986 // Emit the loop header block.
9987 MapperCGF.EmitBlock(HeadBB);
9988 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
9989 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
9990 // Evaluate whether the initial condition is satisfied.
9991 llvm::Value *IsEmpty =
9992 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
9993 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9994 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9995
9996 // Emit the loop body block.
9997 MapperCGF.EmitBlock(BodyBB);
9998 llvm::BasicBlock *LastBB = BodyBB;
9999 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10000 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10001 PtrPHI->addIncoming(PtrBegin, EntryBB);
10002 Address PtrCurrent(PtrPHI, ElemTy,
10003 MapperCGF.GetAddrOfLocalVar(&BeginArg)
10004 .getAlignment()
10005 .alignmentOfArrayElement(ElementSize));
10006 // Privatize the declared variable of mapper to be the current array element.
10007 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10008 Scope.addPrivate(MapperVarDecl, PtrCurrent);
10009 (void)Scope.Privatize();
10010
10011 // Get map clause information. Fill up the arrays with all mapped variables.
10012 MappableExprsHandler::MapCombinedInfoTy Info;
10013 MappableExprsHandler MEHandler(*D, MapperCGF);
10014 MEHandler.generateAllInfoForMapper(Info);
10015
10016 // Call the runtime API __tgt_mapper_num_components to get the number of
10017 // pre-existing components.
10018 llvm::Value *OffloadingArgs[] = {Handle};
10019 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10020 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10021 OMPRTL___tgt_mapper_num_components),
10022 OffloadingArgs);
10023 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10024 PreviousSize,
10025 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10026
10027 // Fill up the runtime mapper handle for all components.
10028 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10029 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10030 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10031 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10032 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10033 llvm::Value *CurSizeArg = Info.Sizes[I];
10034 llvm::Value *CurNameArg =
10035 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10036 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10037 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10038
10039 // Extract the MEMBER_OF field from the map type.
10040 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10041 llvm::Value *MemberMapType =
10042 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10043
10044 // Combine the map type inherited from user-defined mapper with that
10045 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10046 // bits of the \a MapType, which is the input argument of the mapper
10047 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10048 // bits of MemberMapType.
10049 // [OpenMP 5.0], 1.2.6. map-type decay.
10050 // | alloc | to | from | tofrom | release | delete
10051 // ----------------------------------------------------------
10052 // alloc | alloc | alloc | alloc | alloc | release | delete
10053 // to | alloc | to | alloc | to | release | delete
10054 // from | alloc | alloc | from | from | release | delete
10055 // tofrom | alloc | to | from | tofrom | release | delete
10056 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10057 MapType,
10058 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10059 MappableExprsHandler::OMP_MAP_FROM));
10060 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10061 llvm::BasicBlock *AllocElseBB =
10062 MapperCGF.createBasicBlock("omp.type.alloc.else");
10063 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10064 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10065 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10066 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10067 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10068 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10069 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10070 MapperCGF.EmitBlock(AllocBB);
10071 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10072 MemberMapType,
10073 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10074 MappableExprsHandler::OMP_MAP_FROM)));
10075 MapperCGF.Builder.CreateBr(EndBB);
10076 MapperCGF.EmitBlock(AllocElseBB);
10077 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10078 LeftToFrom,
10079 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10080 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10081 // In case of to, clear OMP_MAP_FROM.
10082 MapperCGF.EmitBlock(ToBB);
10083 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10084 MemberMapType,
10085 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10086 MapperCGF.Builder.CreateBr(EndBB);
10087 MapperCGF.EmitBlock(ToElseBB);
10088 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10089 LeftToFrom,
10090 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10091 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10092 // In case of from, clear OMP_MAP_TO.
10093 MapperCGF.EmitBlock(FromBB);
10094 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10095 MemberMapType,
10096 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10097 // In case of tofrom, do nothing.
10098 MapperCGF.EmitBlock(EndBB);
10099 LastBB = EndBB;
10100 llvm::PHINode *CurMapType =
10101 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10102 CurMapType->addIncoming(AllocMapType, AllocBB);
10103 CurMapType->addIncoming(ToMapType, ToBB);
10104 CurMapType->addIncoming(FromMapType, FromBB);
10105 CurMapType->addIncoming(MemberMapType, ToElseBB);
10106
10107 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
10108 CurSizeArg, CurMapType, CurNameArg};
10109 if (Info.Mappers[I]) {
10110 // Call the corresponding mapper function.
10111 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10112 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10113 assert(MapperFunc && "Expect a valid mapper function is available.")(static_cast <bool> (MapperFunc && "Expect a valid mapper function is available."
) ? void (0) : __assert_fail ("MapperFunc && \"Expect a valid mapper function is available.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10113, __extension__
__PRETTY_FUNCTION__))
;
10114 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10115 } else {
10116 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10117 // data structure.
10118 MapperCGF.EmitRuntimeCall(
10119 OMPBuilder.getOrCreateRuntimeFunction(
10120 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10121 OffloadingArgs);
10122 }
10123 }
10124
10125 // Update the pointer to point to the next element that needs to be mapped,
10126 // and check whether we have mapped all elements.
10127 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10128 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10129 PtrPHI->addIncoming(PtrNext, LastBB);
10130 llvm::Value *IsDone =
10131 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10132 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10133 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10134
10135 MapperCGF.EmitBlock(ExitBB);
10136 // Emit array deletion if this is an array section and \p MapType indicates
10137 // that deletion is required.
10138 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10139 MapName, ElementSize, DoneBB, /*IsInit=*/false);
10140
10141 // Emit the function exit block.
10142 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10143 MapperCGF.FinishFunction();
10144 UDMMap.try_emplace(D, Fn);
10145 if (CGF) {
10146 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10147 Decls.second.push_back(D);
10148 }
10149}
10150
10151/// Emit the array initialization or deletion portion for user-defined mapper
10152/// code generation. First, it evaluates whether an array section is mapped and
10153/// whether the \a MapType instructs to delete this section. If \a IsInit is
10154/// true, and \a MapType indicates to not delete this array, array
10155/// initialization code is generated. If \a IsInit is false, and \a MapType
10156/// indicates to not this array, array deletion code is generated.
10157void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10158 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10159 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10160 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10161 bool IsInit) {
10162 StringRef Prefix = IsInit ? ".init" : ".del";
10163
10164 // Evaluate if this is an array section.
10165 llvm::BasicBlock *BodyBB =
10166 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10167 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10168 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10169 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10170 MapType,
10171 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10172 llvm::Value *DeleteCond;
10173 llvm::Value *Cond;
10174 if (IsInit) {
10175 // base != begin?
10176 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10177 // IsPtrAndObj?
10178 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10179 MapType,
10180 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10181 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10182 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10183 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10184 DeleteCond = MapperCGF.Builder.CreateIsNull(
10185 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10186 } else {
10187 Cond = IsArray;
10188 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10189 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10190 }
10191 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10192 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10193
10194 MapperCGF.EmitBlock(BodyBB);
10195 // Get the array size by multiplying element size and element number (i.e., \p
10196 // Size).
10197 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10198 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10199 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10200 // memory allocation/deletion purpose only.
10201 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10202 MapType,
10203 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10204 MappableExprsHandler::OMP_MAP_FROM)));
10205 MapTypeArg = MapperCGF.Builder.CreateOr(
10206 MapTypeArg,
10207 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10208
10209 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10210 // data structure.
10211 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
10212 ArraySize, MapTypeArg, MapName};
10213 MapperCGF.EmitRuntimeCall(
10214 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10215 OMPRTL___tgt_push_mapper_component),
10216 OffloadingArgs);
10217}
10218
10219llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10220 const OMPDeclareMapperDecl *D) {
10221 auto I = UDMMap.find(D);
10222 if (I != UDMMap.end())
10223 return I->second;
10224 emitUserDefinedMapper(D);
10225 return UDMMap.lookup(D);
10226}
10227
10228llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
10229 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10230 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10231 const OMPLoopDirective &D)>
10232 SizeEmitter) {
10233 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10234 const OMPExecutableDirective *TD = &D;
10235 // Get nested teams distribute kind directive, if any.
10236 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10237 TD = getNestedDistributeDirective(CGM.getContext(), D);
10238 if (!TD)
10239 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10240
10241 const auto *LD = cast<OMPLoopDirective>(TD);
10242 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
10243 return NumIterations;
10244 return llvm::ConstantInt::get(CGF.Int64Ty, 0);
10245}
10246
10247void CGOpenMPRuntime::emitTargetCall(
10248 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10249 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10250 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10251 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10252 const OMPLoopDirective &D)>
10253 SizeEmitter) {
10254 if (!CGF.HaveInsertPoint())
10255 return;
10256
10257 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsDevice &&
10258 CGM.getLangOpts().OpenMPOffloadMandatory;
10259
10260 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!")(static_cast <bool> ((OffloadingMandatory || OutlinedFn
) && "Invalid outlined function!") ? void (0) : __assert_fail
("(OffloadingMandatory || OutlinedFn) && \"Invalid outlined function!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10260, __extension__
__PRETTY_FUNCTION__))
;
10261
10262 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10263 D.hasClausesOfKind<OMPNowaitClause>() ||
10264 D.hasClausesOfKind<OMPInReductionClause>();
10265 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10266 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10267 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10268 PrePostActionTy &) {
10269 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10270 };
10271 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10272
10273 CodeGenFunction::OMPTargetDataInfo InputInfo;
10274 llvm::Value *MapTypesArray = nullptr;
10275 llvm::Value *MapNamesArray = nullptr;
10276 // Generate code for the host fallback function.
10277 auto &&FallbackGen = [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask,
10278 &CS, OffloadingMandatory](CodeGenFunction &CGF) {
10279 if (OffloadingMandatory) {
10280 CGF.Builder.CreateUnreachable();
10281 } else {
10282 if (RequiresOuterTask) {
10283 CapturedVars.clear();
10284 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10285 }
10286 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10287 }
10288 };
10289 // Fill up the pointer arrays and transfer execution to the device.
10290 auto &&ThenGen = [this, Device, OutlinedFnID, &D, &InputInfo, &MapTypesArray,
10291 &MapNamesArray, SizeEmitter,
10292 FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10293 if (Device.getInt() == OMPC_DEVICE_ancestor) {
10294 // Reverse offloading is not supported, so just execute on the host.
10295 FallbackGen(CGF);
10296 return;
10297 }
10298
10299 // On top of the arrays that were filled up, the target offloading call
10300 // takes as arguments the device id as well as the host pointer. The host
10301 // pointer is used by the runtime library to identify the current target
10302 // region, so it only has to be unique and not necessarily point to
10303 // anything. It could be the pointer to the outlined function that
10304 // implements the target region, but we aren't using that so that the
10305 // compiler doesn't need to keep that, and could therefore inline the host
10306 // function if proven worthwhile during optimization.
10307
10308 // From this point on, we need to have an ID of the target region defined.
10309 assert(OutlinedFnID && "Invalid outlined function ID!")(static_cast <bool> (OutlinedFnID && "Invalid outlined function ID!"
) ? void (0) : __assert_fail ("OutlinedFnID && \"Invalid outlined function ID!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10309, __extension__
__PRETTY_FUNCTION__))
;
10310 (void)OutlinedFnID;
10311
10312 // Emit device ID if any.
10313 llvm::Value *DeviceID;
10314 if (Device.getPointer()) {
10315 assert((Device.getInt() == OMPC_DEVICE_unknown ||(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10317, __extension__
__PRETTY_FUNCTION__))
10316 Device.getInt() == OMPC_DEVICE_device_num) &&(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10317, __extension__
__PRETTY_FUNCTION__))
10317 "Expected device_num modifier.")(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10317, __extension__
__PRETTY_FUNCTION__))
;
10318 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10319 DeviceID =
10320 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10321 } else {
10322 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10323 }
10324
10325 // Emit the number of elements in the offloading arrays.
10326 llvm::Value *PointerNum =
10327 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10328
10329 // Return value of the runtime offloading call.
10330 llvm::Value *Return;
10331
10332 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10333 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10334
10335 // Source location for the ident struct
10336 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10337
10338 // Get tripcount for the target loop-based directive.
10339 llvm::Value *NumIterations =
10340 emitTargetNumIterationsCall(CGF, D, SizeEmitter);
10341
10342 // Arguments for the target kernel.
10343 SmallVector<llvm::Value *> KernelArgs{
10344 CGF.Builder.getInt32(/* Version */ 1),
10345 PointerNum,
10346 InputInfo.BasePointersArray.getPointer(),
10347 InputInfo.PointersArray.getPointer(),
10348 InputInfo.SizesArray.getPointer(),
10349 MapTypesArray,
10350 MapNamesArray,
10351 InputInfo.MappersArray.getPointer(),
10352 NumIterations};
10353
10354 // Arguments passed to the 'nowait' variant.
10355 SmallVector<llvm::Value *> NoWaitKernelArgs{
10356 CGF.Builder.getInt32(0),
10357 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10358 CGF.Builder.getInt32(0),
10359 llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
10360 };
10361
10362 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
10363
10364 // The target region is an outlined function launched by the runtime
10365 // via calls to __tgt_target_kernel().
10366 //
10367 // Note that on the host and CPU targets, the runtime implementation of
10368 // these calls simply call the outlined function without forking threads.
10369 // The outlined functions themselves have runtime calls to
10370 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10371 // the compiler in emitTeamsCall() and emitParallelCall().
10372 //
10373 // In contrast, on the NVPTX target, the implementation of
10374 // __tgt_target_teams() launches a GPU kernel with the requested number
10375 // of teams and threads so no additional calls to the runtime are required.
10376 // Check the error code and execute the host version if required.
10377 CGF.Builder.restoreIP(
10378 HasNoWait ? OMPBuilder.emitTargetKernel(
10379 CGF.Builder, Return, RTLoc, DeviceID, NumTeams,
10380 NumThreads, OutlinedFnID, KernelArgs, NoWaitKernelArgs)
10381 : OMPBuilder.emitTargetKernel(CGF.Builder, Return, RTLoc,
10382 DeviceID, NumTeams, NumThreads,
10383 OutlinedFnID, KernelArgs));
10384
10385 llvm::BasicBlock *OffloadFailedBlock =
10386 CGF.createBasicBlock("omp_offload.failed");
10387 llvm::BasicBlock *OffloadContBlock =
10388 CGF.createBasicBlock("omp_offload.cont");
10389 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10390 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10391
10392 CGF.EmitBlock(OffloadFailedBlock);
10393 FallbackGen(CGF);
10394
10395 CGF.EmitBranch(OffloadContBlock);
10396
10397 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10398 };
10399
10400 // Notify that the host version must be executed.
10401 auto &&ElseGen = [FallbackGen](CodeGenFunction &CGF, PrePostActionTy &) {
10402 FallbackGen(CGF);
10403 };
10404
10405 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10406 &MapNamesArray, &CapturedVars, RequiresOuterTask,
10407 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10408 // Fill up the arrays with all the captured variables.
10409 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10410
10411 // Get mappable expression information.
10412 MappableExprsHandler MEHandler(D, CGF);
10413 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10414 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10415
10416 auto RI = CS.getCapturedRecordDecl()->field_begin();
10417 auto *CV = CapturedVars.begin();
10418 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10419 CE = CS.capture_end();
10420 CI != CE; ++CI, ++RI, ++CV) {
10421 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10422 MappableExprsHandler::StructRangeInfoTy PartialStruct;
10423
10424 // VLA sizes are passed to the outlined region by copy and do not have map
10425 // information associated.
10426 if (CI->capturesVariableArrayType()) {
10427 CurInfo.Exprs.push_back(nullptr);
10428 CurInfo.BasePointers.push_back(*CV);
10429 CurInfo.Pointers.push_back(*CV);
10430 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10431 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10432 // Copy to the device as an argument. No need to retrieve it.
10433 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10434 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10435 MappableExprsHandler::OMP_MAP_IMPLICIT);
10436 CurInfo.Mappers.push_back(nullptr);
10437 } else {
10438 // If we have any information in the map clause, we use it, otherwise we
10439 // just do a default mapping.
10440 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10441 if (!CI->capturesThis())
10442 MappedVarSet.insert(CI->getCapturedVar());
10443 else
10444 MappedVarSet.insert(nullptr);
10445 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10446 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10447 // Generate correct mapping for variables captured by reference in
10448 // lambdas.
10449 if (CI->capturesVariable())
10450 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10451 CurInfo, LambdaPointers);
10452 }
10453 // We expect to have at least an element of information for this capture.
10454 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10455, __extension__
__PRETTY_FUNCTION__))
10455 "Non-existing map pointer for capture!")(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10455, __extension__
__PRETTY_FUNCTION__))
;
10456 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10460, __extension__
__PRETTY_FUNCTION__))
10457 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10460, __extension__
__PRETTY_FUNCTION__))
10458 CurInfo.BasePointers.size() == CurInfo.Types.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10460, __extension__
__PRETTY_FUNCTION__))
10459 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10460, __extension__
__PRETTY_FUNCTION__))
10460 "Inconsistent map information sizes!")(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10460, __extension__
__PRETTY_FUNCTION__))
;
10461
10462 // If there is an entry in PartialStruct it means we have a struct with
10463 // individual members mapped. Emit an extra combined entry.
10464 if (PartialStruct.Base.isValid()) {
10465 CombinedInfo.append(PartialStruct.PreliminaryMapData);
10466 MEHandler.emitCombinedEntry(
10467 CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10468 !PartialStruct.PreliminaryMapData.BasePointers.empty());
10469 }
10470
10471 // We need to append the results of this capture to what we already have.
10472 CombinedInfo.append(CurInfo);
10473 }
10474 // Adjust MEMBER_OF flags for the lambdas captures.
10475 MEHandler.adjustMemberOfForLambdaCaptures(
10476 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10477 CombinedInfo.Types);
10478 // Map any list items in a map clause that were not captures because they
10479 // weren't referenced within the construct.
10480 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10481
10482 TargetDataInfo Info;
10483 // Fill up the arrays and create the arguments.
10484 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10485 emitOffloadingArraysArgument(
10486 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10487 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10488 {/*ForEndCall=*/false});
10489
10490 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10491 InputInfo.BasePointersArray =
10492 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10493 InputInfo.PointersArray =
10494 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10495 InputInfo.SizesArray =
10496 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10497 InputInfo.MappersArray =
10498 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10499 MapTypesArray = Info.MapTypesArray;
10500 MapNamesArray = Info.MapNamesArray;
10501 if (RequiresOuterTask)
10502 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10503 else
10504 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10505 };
10506
10507 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10508 CodeGenFunction &CGF, PrePostActionTy &) {
10509 if (RequiresOuterTask) {
10510 CodeGenFunction::OMPTargetDataInfo InputInfo;
10511 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10512 } else {
10513 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10514 }
10515 };
10516
10517 // If we have a target function ID it means that we need to support
10518 // offloading, otherwise, just execute on the host. We need to execute on host
10519 // regardless of the conditional in the if clause if, e.g., the user do not
10520 // specify target triples.
10521 if (OutlinedFnID) {
10522 if (IfCond) {
10523 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10524 } else {
10525 RegionCodeGenTy ThenRCG(TargetThenGen);
10526 ThenRCG(CGF);
10527 }
10528 } else {
10529 RegionCodeGenTy ElseRCG(TargetElseGen);
10530 ElseRCG(CGF);
10531 }
10532}
10533
10534void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10535 StringRef ParentName) {
10536 if (!S)
10537 return;
10538
10539 // Codegen OMP target directives that offload compute to the device.
10540 bool RequiresDeviceCodegen =
10541 isa<OMPExecutableDirective>(S) &&
10542 isOpenMPTargetExecutionDirective(
10543 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10544
10545 if (RequiresDeviceCodegen) {
10546 const auto &E = *cast<OMPExecutableDirective>(S);
10547 unsigned DeviceID;
10548 unsigned FileID;
10549 unsigned Line;
10550 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10551 FileID, Line);
10552
10553 // Is this a target region that should not be emitted as an entry point? If
10554 // so just signal we are done with this target region.
10555 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10556 ParentName, Line))
10557 return;
10558
10559 switch (E.getDirectiveKind()) {
10560 case OMPD_target:
10561 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10562 cast<OMPTargetDirective>(E));
10563 break;
10564 case OMPD_target_parallel:
10565 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10566 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10567 break;
10568 case OMPD_target_teams:
10569 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10570 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10571 break;
10572 case OMPD_target_teams_distribute:
10573 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10574 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10575 break;
10576 case OMPD_target_teams_distribute_simd:
10577 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10578 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10579 break;
10580 case OMPD_target_parallel_for:
10581 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10582 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10583 break;
10584 case OMPD_target_parallel_for_simd:
10585 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10586 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10587 break;
10588 case OMPD_target_simd:
10589 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10590 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10591 break;
10592 case OMPD_target_teams_distribute_parallel_for:
10593 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10594 CGM, ParentName,
10595 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10596 break;
10597 case OMPD_target_teams_distribute_parallel_for_simd:
10598 CodeGenFunction::
10599 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10600 CGM, ParentName,
10601 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10602 break;
10603 case OMPD_parallel:
10604 case OMPD_for:
10605 case OMPD_parallel_for:
10606 case OMPD_parallel_master:
10607 case OMPD_parallel_sections:
10608 case OMPD_for_simd:
10609 case OMPD_parallel_for_simd:
10610 case OMPD_cancel:
10611 case OMPD_cancellation_point:
10612 case OMPD_ordered:
10613 case OMPD_threadprivate:
10614 case OMPD_allocate:
10615 case OMPD_task:
10616 case OMPD_simd:
10617 case OMPD_tile:
10618 case OMPD_unroll:
10619 case OMPD_sections:
10620 case OMPD_section:
10621 case OMPD_single:
10622 case OMPD_master:
10623 case OMPD_critical:
10624 case OMPD_taskyield:
10625 case OMPD_barrier:
10626 case OMPD_taskwait:
10627 case OMPD_taskgroup:
10628 case OMPD_atomic:
10629 case OMPD_flush:
10630 case OMPD_depobj:
10631 case OMPD_scan:
10632 case OMPD_teams:
10633 case OMPD_target_data:
10634 case OMPD_target_exit_data:
10635 case OMPD_target_enter_data:
10636 case OMPD_distribute:
10637 case OMPD_distribute_simd:
10638 case OMPD_distribute_parallel_for:
10639 case OMPD_distribute_parallel_for_simd:
10640 case OMPD_teams_distribute:
10641 case OMPD_teams_distribute_simd:
10642 case OMPD_teams_distribute_parallel_for:
10643 case OMPD_teams_distribute_parallel_for_simd:
10644 case OMPD_target_update:
10645 case OMPD_declare_simd:
10646 case OMPD_declare_variant:
10647 case OMPD_begin_declare_variant:
10648 case OMPD_end_declare_variant:
10649 case OMPD_declare_target:
10650 case OMPD_end_declare_target:
10651 case OMPD_declare_reduction:
10652 case OMPD_declare_mapper:
10653 case OMPD_taskloop:
10654 case OMPD_taskloop_simd:
10655 case OMPD_master_taskloop:
10656 case OMPD_master_taskloop_simd:
10657 case OMPD_parallel_master_taskloop:
10658 case OMPD_parallel_master_taskloop_simd:
10659 case OMPD_requires:
10660 case OMPD_metadirective:
10661 case OMPD_unknown:
10662 default:
10663 llvm_unreachable("Unknown target directive for OpenMP device codegen.")::llvm::llvm_unreachable_internal("Unknown target directive for OpenMP device codegen."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10663)
;
10664 }
10665 return;
10666 }
10667
10668 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10669 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10670 return;
10671
10672 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10673 return;
10674 }
10675
10676 // If this is a lambda function, look into its body.
10677 if (const auto *L = dyn_cast<LambdaExpr>(S))
10678 S = L->getBody();
10679
10680 // Keep looking for target regions recursively.
10681 for (const Stmt *II : S->children())
10682 scanForTargetRegionsFunctions(II, ParentName);
10683}
10684
10685static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10686 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10687 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10688 if (!DevTy)
10689 return false;
10690 // Do not emit device_type(nohost) functions for the host.
10691 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10692 return true;
10693 // Do not emit device_type(host) functions for the device.
10694 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10695 return true;
10696 return false;
10697}
10698
10699bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10700 // If emitting code for the host, we do not process FD here. Instead we do
10701 // the normal code generation.
10702 if (!CGM.getLangOpts().OpenMPIsDevice) {
10703 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10704 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10705 CGM.getLangOpts().OpenMPIsDevice))
10706 return true;
10707 return false;
10708 }
10709
10710 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10711 // Try to detect target regions in the function.
10712 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10713 StringRef Name = CGM.getMangledName(GD);
10714 scanForTargetRegionsFunctions(FD->getBody(), Name);
10715 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10716 CGM.getLangOpts().OpenMPIsDevice))
10717 return true;
10718 }
10719
10720 // Do not to emit function if it is not marked as declare target.
10721 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10722 AlreadyEmittedTargetDecls.count(VD) == 0;
10723}
10724
10725bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10726 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10727 CGM.getLangOpts().OpenMPIsDevice))
10728 return true;
10729
10730 if (!CGM.getLangOpts().OpenMPIsDevice)
10731 return false;
10732
10733 // Check if there are Ctors/Dtors in this declaration and look for target
10734 // regions in it. We use the complete variant to produce the kernel name
10735 // mangling.
10736 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10737 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10738 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10739 StringRef ParentName =
10740 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10741 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10742 }
10743 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10744 StringRef ParentName =
10745 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10746 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10747 }
10748 }
10749
10750 // Do not to emit variable if it is not marked as declare target.
10751 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10752 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10753 cast<VarDecl>(GD.getDecl()));
10754 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10755 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10756 HasRequiresUnifiedSharedMemory)) {
10757 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10758 return true;
10759 }
10760 return false;
10761}
10762
10763void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10764 llvm::Constant *Addr) {
10765 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10766 !CGM.getLangOpts().OpenMPIsDevice)
10767 return;
10768
10769 // If we have host/nohost variables, they do not need to be registered.
10770 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10771 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10772 if (DevTy && *DevTy != OMPDeclareTargetDeclAttr::DT_Any)
10773 return;
10774
10775 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10776 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10777 if (!Res) {
10778 if (CGM.getLangOpts().OpenMPIsDevice) {
10779 // Register non-target variables being emitted in device code (debug info
10780 // may cause this).
10781 StringRef VarName = CGM.getMangledName(VD);
10782 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10783 }
10784 return;
10785 }
10786 // Register declare target variables.
10787 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10788 StringRef VarName;
10789 CharUnits VarSize;
10790 llvm::GlobalValue::LinkageTypes Linkage;
10791
10792 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10793 !HasRequiresUnifiedSharedMemory) {
10794 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10795 VarName = CGM.getMangledName(VD);
10796 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10797 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10798 assert(!VarSize.isZero() && "Expected non-zero size of the variable")(static_cast <bool> (!VarSize.isZero() && "Expected non-zero size of the variable"
) ? void (0) : __assert_fail ("!VarSize.isZero() && \"Expected non-zero size of the variable\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10798, __extension__
__PRETTY_FUNCTION__))
;
10799 } else {
10800 VarSize = CharUnits::Zero();
10801 }
10802 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10803 // Temp solution to prevent optimizations of the internal variables.
10804 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10805 // Do not create a "ref-variable" if the original is not also available
10806 // on the host.
10807 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10808 return;
10809 std::string RefName = getName({VarName, "ref"});
10810 if (!CGM.GetGlobalValue(RefName)) {
10811 llvm::Constant *AddrRef =
10812 getOrCreateInternalVariable(Addr->getType(), RefName);
10813 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10814 GVAddrRef->setConstant(/*Val=*/true);
10815 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10816 GVAddrRef->setInitializer(Addr);
10817 CGM.addCompilerUsedGlobal(GVAddrRef);
10818 }
10819 }
10820 } else {
10821 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10824, __extension__
__PRETTY_FUNCTION__))
10822 (*Res == OMPDeclareTargetDeclAttr::MT_To &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10824, __extension__
__PRETTY_FUNCTION__))
10823 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10824, __extension__
__PRETTY_FUNCTION__))
10824 "Declare target attribute must link or to with unified memory.")(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10824, __extension__
__PRETTY_FUNCTION__))
;
10825 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10826 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10827 else
10828 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10829
10830 if (CGM.getLangOpts().OpenMPIsDevice) {
10831 VarName = Addr->getName();
10832 Addr = nullptr;
10833 } else {
10834 VarName = getAddrOfDeclareTargetVar(VD).getName();
10835 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10836 }
10837 VarSize = CGM.getPointerSize();
10838 Linkage = llvm::GlobalValue::WeakAnyLinkage;
10839 }
10840
10841 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10842 VarName, Addr, VarSize, Flags, Linkage);
10843}
10844
10845bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10846 if (isa<FunctionDecl>(GD.getDecl()) ||
10847 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10848 return emitTargetFunctions(GD);
10849
10850 return emitTargetGlobalVariable(GD);
10851}
10852
10853void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10854 for (const VarDecl *VD : DeferredGlobalVariables) {
10855 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10856 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10857 if (!Res)
10858 continue;
10859 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10860 !HasRequiresUnifiedSharedMemory) {
10861 CGM.EmitGlobal(VD);
10862 } else {
10863 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10866, __extension__
__PRETTY_FUNCTION__))
10864 (*Res == OMPDeclareTargetDeclAttr::MT_To &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10866, __extension__
__PRETTY_FUNCTION__))
10865 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10866, __extension__
__PRETTY_FUNCTION__))
10866 "Expected link clause or to clause with unified memory.")(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10866, __extension__
__PRETTY_FUNCTION__))
;
10867 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10868 }
10869 }
10870}
10871
10872void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10873 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10874 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10875, __extension__
__PRETTY_FUNCTION__))
10875 " Expected target-based directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10875, __extension__
__PRETTY_FUNCTION__))
;
10876}
10877
10878void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10879 for (const OMPClause *Clause : D->clauselists()) {
10880 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10881 HasRequiresUnifiedSharedMemory = true;
10882 } else if (const auto *AC =
10883 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10884 switch (AC->getAtomicDefaultMemOrderKind()) {
10885 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10886 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10887 break;
10888 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10889 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10890 break;
10891 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10892 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10893 break;
10894 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10895 break;
10896 }
10897 }
10898 }
10899}
10900
10901llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10902 return RequiresAtomicOrdering;
10903}
10904
10905bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10906 LangAS &AS) {
10907 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10908 return false;
10909 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10910 switch(A->getAllocatorType()) {
10911 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10912 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10913 // Not supported, fallback to the default mem space.
10914 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10915 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10916 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10917 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10918 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10919 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10920 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10921 AS = LangAS::Default;
10922 return true;
10923 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10924 llvm_unreachable("Expected predefined allocator for the variables with the "::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10925
)
10925 "static storage.")::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10925
)
;
10926 }
10927 return false;
10928}
10929
10930bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10931 return HasRequiresUnifiedSharedMemory;
10932}
10933
10934CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10935 CodeGenModule &CGM)
10936 : CGM(CGM) {
10937 if (CGM.getLangOpts().OpenMPIsDevice) {
10938 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10939 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10940 }
10941}
10942
10943CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10944 if (CGM.getLangOpts().OpenMPIsDevice)
10945 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10946}
10947
10948bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10949 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
10950 return true;
10951
10952 const auto *D = cast<FunctionDecl>(GD.getDecl());
10953 // Do not to emit function if it is marked as declare target as it was already
10954 // emitted.
10955 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10956 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10957 if (auto *F = dyn_cast_or_null<llvm::Function>(
10958 CGM.GetGlobalValue(CGM.getMangledName(GD))))
10959 return !F->isDeclaration();
10960 return false;
10961 }
10962 return true;
10963 }
10964
10965 return !AlreadyEmittedTargetDecls.insert(D).second;
10966}
10967
10968llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10969 // If we don't have entries or if we are emitting code for the device, we
10970 // don't need to do anything.
10971 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
10972 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
10973 (OffloadEntriesInfoManager.empty() &&
10974 !HasEmittedDeclareTargetRegion &&
10975 !HasEmittedTargetRegion))
10976 return nullptr;
10977
10978 // Create and register the function that handles the requires directives.
10979 ASTContext &C = CGM.getContext();
10980
10981 llvm::Function *RequiresRegFn;
10982 {
10983 CodeGenFunction CGF(CGM);
10984 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
10985 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
10986 std::string ReqName = getName({"omp_offloading", "requires_reg"});
10987 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
10988 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
10989 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
10990 // TODO: check for other requires clauses.
10991 // The requires directive takes effect only when a target region is
10992 // present in the compilation unit. Otherwise it is ignored and not
10993 // passed to the runtime. This avoids the runtime from throwing an error
10994 // for mismatching requires clauses across compilation units that don't
10995 // contain at least 1 target region.
10996 assert((HasEmittedTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10999, __extension__
__PRETTY_FUNCTION__))
10997 HasEmittedDeclareTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10999, __extension__
__PRETTY_FUNCTION__))
10998 !OffloadEntriesInfoManager.empty()) &&(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10999, __extension__
__PRETTY_FUNCTION__))
10999 "Target or declare target region expected.")(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10999, __extension__
__PRETTY_FUNCTION__))
;
11000 if (HasRequiresUnifiedSharedMemory)
11001 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11002 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11003 CGM.getModule(), OMPRTL___tgt_register_requires),
11004 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11005 CGF.FinishFunction();
11006 }
11007 return RequiresRegFn;
11008}
11009
11010void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11011 const OMPExecutableDirective &D,
11012 SourceLocation Loc,
11013 llvm::Function *OutlinedFn,
11014 ArrayRef<llvm::Value *> CapturedVars) {
11015 if (!CGF.HaveInsertPoint())
11016 return;
11017
11018 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11019 CodeGenFunction::RunCleanupsScope Scope(CGF);
11020
11021 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11022 llvm::Value *Args[] = {
11023 RTLoc,
11024 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11025 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11026 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11027 RealArgs.append(std::begin(Args), std::end(Args));
11028 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11029
11030 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11031 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11032 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11033}
11034
11035void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11036 const Expr *NumTeams,
11037 const Expr *ThreadLimit,
11038 SourceLocation Loc) {
11039 if (!CGF.HaveInsertPoint())
11040 return;
11041
11042 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11043
11044 llvm::Value *NumTeamsVal =
11045 NumTeams
11046 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11047 CGF.CGM.Int32Ty, /* isSigned = */ true)
11048 : CGF.Builder.getInt32(0);
11049
11050 llvm::Value *ThreadLimitVal =
11051 ThreadLimit
11052 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11053 CGF.CGM.Int32Ty, /* isSigned = */ true)
11054 : CGF.Builder.getInt32(0);
11055
11056 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11057 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11058 ThreadLimitVal};
11059 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11060 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11061 PushNumTeamsArgs);
11062}
11063
11064void CGOpenMPRuntime::emitTargetDataCalls(
11065 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11066 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11067 if (!CGF.HaveInsertPoint())
11068 return;
11069
11070 // Action used to replace the default codegen action and turn privatization
11071 // off.
11072 PrePostActionTy NoPrivAction;
11073
11074 // Generate the code for the opening of the data environment. Capture all the
11075 // arguments of the runtime call by reference because they are used in the
11076 // closing of the region.
11077 auto &&BeginThenGen = [this, &D, Device, &Info,
11078 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11079 // Fill up the arrays with all the mapped variables.
11080 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11081
11082 // Get map clause information.
11083 MappableExprsHandler MEHandler(D, CGF);
11084 MEHandler.generateAllInfo(CombinedInfo);
11085
11086 // Fill up the arrays and create the arguments.
11087 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11088 /*IsNonContiguous=*/true);
11089
11090 llvm::Value *BasePointersArrayArg = nullptr;
11091 llvm::Value *PointersArrayArg = nullptr;
11092 llvm::Value *SizesArrayArg = nullptr;
11093 llvm::Value *MapTypesArrayArg = nullptr;
11094 llvm::Value *MapNamesArrayArg = nullptr;
11095 llvm::Value *MappersArrayArg = nullptr;
11096 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11097 SizesArrayArg, MapTypesArrayArg,
11098 MapNamesArrayArg, MappersArrayArg, Info);
11099
11100 // Emit device ID if any.
11101 llvm::Value *DeviceID = nullptr;
11102 if (Device) {
11103 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11104 CGF.Int64Ty, /*isSigned=*/true);
11105 } else {
11106 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11107 }
11108
11109 // Emit the number of elements in the offloading arrays.
11110 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11111 //
11112 // Source location for the ident struct
11113 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11114
11115 llvm::Value *OffloadingArgs[] = {RTLoc,
11116 DeviceID,
11117 PointerNum,
11118 BasePointersArrayArg,
11119 PointersArrayArg,
11120 SizesArrayArg,
11121 MapTypesArrayArg,
11122 MapNamesArrayArg,
11123 MappersArrayArg};
11124 CGF.EmitRuntimeCall(
11125 OMPBuilder.getOrCreateRuntimeFunction(
11126 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11127 OffloadingArgs);
11128
11129 // If device pointer privatization is required, emit the body of the region
11130 // here. It will have to be duplicated: with and without privatization.
11131 if (!Info.CaptureDeviceAddrMap.empty())
11132 CodeGen(CGF);
11133 };
11134
11135 // Generate code for the closing of the data region.
11136 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11137 PrePostActionTy &) {
11138 assert(Info.isValid() && "Invalid data environment closing arguments.")(static_cast <bool> (Info.isValid() && "Invalid data environment closing arguments."
) ? void (0) : __assert_fail ("Info.isValid() && \"Invalid data environment closing arguments.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11138, __extension__
__PRETTY_FUNCTION__))
;
11139
11140 llvm::Value *BasePointersArrayArg = nullptr;
11141 llvm::Value *PointersArrayArg = nullptr;
11142 llvm::Value *SizesArrayArg = nullptr;
11143 llvm::Value *MapTypesArrayArg = nullptr;
11144 llvm::Value *MapNamesArrayArg = nullptr;
11145 llvm::Value *MappersArrayArg = nullptr;
11146 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11147 SizesArrayArg, MapTypesArrayArg,
11148 MapNamesArrayArg, MappersArrayArg, Info,
11149 {/*ForEndCall=*/true});
11150
11151 // Emit device ID if any.
11152 llvm::Value *DeviceID = nullptr;
11153 if (Device) {
11154 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11155 CGF.Int64Ty, /*isSigned=*/true);
11156 } else {
11157 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11158 }
11159
11160 // Emit the number of elements in the offloading arrays.
11161 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11162
11163 // Source location for the ident struct
11164 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11165
11166 llvm::Value *OffloadingArgs[] = {RTLoc,
11167 DeviceID,
11168 PointerNum,
11169 BasePointersArrayArg,
11170 PointersArrayArg,
11171 SizesArrayArg,
11172 MapTypesArrayArg,
11173 MapNamesArrayArg,
11174 MappersArrayArg};
11175 CGF.EmitRuntimeCall(
11176 OMPBuilder.getOrCreateRuntimeFunction(
11177 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11178 OffloadingArgs);
11179 };
11180
11181 // If we need device pointer privatization, we need to emit the body of the
11182 // region with no privatization in the 'else' branch of the conditional.
11183 // Otherwise, we don't have to do anything.
11184 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11185 PrePostActionTy &) {
11186 if (!Info.CaptureDeviceAddrMap.empty()) {
11187 CodeGen.setAction(NoPrivAction);
11188 CodeGen(CGF);
11189 }
11190 };
11191
11192 // We don't have to do anything to close the region if the if clause evaluates
11193 // to false.
11194 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11195
11196 if (IfCond) {
11197 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11198 } else {
11199 RegionCodeGenTy RCG(BeginThenGen);
11200 RCG(CGF);
11201 }
11202
11203 // If we don't require privatization of device pointers, we emit the body in
11204 // between the runtime calls. This avoids duplicating the body code.
11205 if (Info.CaptureDeviceAddrMap.empty()) {
11206 CodeGen.setAction(NoPrivAction);
11207 CodeGen(CGF);
11208 }
11209
11210 if (IfCond) {
11211 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11212 } else {
11213 RegionCodeGenTy RCG(EndThenGen);
11214 RCG(CGF);
11215 }
11216}
11217
11218void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11219 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11220 const Expr *Device) {
11221 if (!CGF.HaveInsertPoint())
11222 return;
11223
11224 assert((isa<OMPTargetEnterDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11227, __extension__
__PRETTY_FUNCTION__))
11225 isa<OMPTargetExitDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11227, __extension__
__PRETTY_FUNCTION__))
11226 isa<OMPTargetUpdateDirective>(D)) &&(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11227, __extension__
__PRETTY_FUNCTION__))
11227 "Expecting either target enter, exit data, or update directives.")(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11227, __extension__
__PRETTY_FUNCTION__))
;
11228
11229 CodeGenFunction::OMPTargetDataInfo InputInfo;
11230 llvm::Value *MapTypesArray = nullptr;
11231 llvm::Value *MapNamesArray = nullptr;
11232 // Generate the code for the opening of the data environment.
11233 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11234 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11235 // Emit device ID if any.
11236 llvm::Value *DeviceID = nullptr;
11237 if (Device) {
11238 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11239 CGF.Int64Ty, /*isSigned=*/true);
11240 } else {
11241 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11242 }
11243
11244 // Emit the number of elements in the offloading arrays.
11245 llvm::Constant *PointerNum =
11246 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11247
11248 // Source location for the ident struct
11249 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11250
11251 llvm::Value *OffloadingArgs[] = {RTLoc,
11252 DeviceID,
11253 PointerNum,
11254 InputInfo.BasePointersArray.getPointer(),
11255 InputInfo.PointersArray.getPointer(),
11256 InputInfo.SizesArray.getPointer(),
11257 MapTypesArray,
11258 MapNamesArray,
11259 InputInfo.MappersArray.getPointer()};
11260
11261 // Select the right runtime function call for each standalone
11262 // directive.
11263 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11264 RuntimeFunction RTLFn;
11265 switch (D.getDirectiveKind()) {
11266 case OMPD_target_enter_data:
11267 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11268 : OMPRTL___tgt_target_data_begin_mapper;
11269 break;
11270 case OMPD_target_exit_data:
11271 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11272 : OMPRTL___tgt_target_data_end_mapper;
11273 break;
11274 case OMPD_target_update:
11275 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11276 : OMPRTL___tgt_target_data_update_mapper;
11277 break;
11278 case OMPD_parallel:
11279 case OMPD_for:
11280 case OMPD_parallel_for:
11281 case OMPD_parallel_master:
11282 case OMPD_parallel_sections:
11283 case OMPD_for_simd:
11284 case OMPD_parallel_for_simd:
11285 case OMPD_cancel:
11286 case OMPD_cancellation_point:
11287 case OMPD_ordered:
11288 case OMPD_threadprivate:
11289 case OMPD_allocate:
11290 case OMPD_task:
11291 case OMPD_simd:
11292 case OMPD_tile:
11293 case OMPD_unroll:
11294 case OMPD_sections:
11295 case OMPD_section:
11296 case OMPD_single:
11297 case OMPD_master:
11298 case OMPD_critical:
11299 case OMPD_taskyield:
11300 case OMPD_barrier:
11301 case OMPD_taskwait:
11302 case OMPD_taskgroup:
11303 case OMPD_atomic:
11304 case OMPD_flush:
11305 case OMPD_depobj:
11306 case OMPD_scan:
11307 case OMPD_teams:
11308 case OMPD_target_data:
11309 case OMPD_distribute:
11310 case OMPD_distribute_simd:
11311 case OMPD_distribute_parallel_for:
11312 case OMPD_distribute_parallel_for_simd:
11313 case OMPD_teams_distribute:
11314 case OMPD_teams_distribute_simd:
11315 case OMPD_teams_distribute_parallel_for:
11316 case OMPD_teams_distribute_parallel_for_simd:
11317 case OMPD_declare_simd:
11318 case OMPD_declare_variant:
11319 case OMPD_begin_declare_variant:
11320 case OMPD_end_declare_variant:
11321 case OMPD_declare_target:
11322 case OMPD_end_declare_target:
11323 case OMPD_declare_reduction:
11324 case OMPD_declare_mapper:
11325 case OMPD_taskloop:
11326 case OMPD_taskloop_simd:
11327 case OMPD_master_taskloop:
11328 case OMPD_master_taskloop_simd:
11329 case OMPD_parallel_master_taskloop:
11330 case OMPD_parallel_master_taskloop_simd:
11331 case OMPD_target:
11332 case OMPD_target_simd:
11333 case OMPD_target_teams_distribute:
11334 case OMPD_target_teams_distribute_simd:
11335 case OMPD_target_teams_distribute_parallel_for:
11336 case OMPD_target_teams_distribute_parallel_for_simd:
11337 case OMPD_target_teams:
11338 case OMPD_target_parallel:
11339 case OMPD_target_parallel_for:
11340 case OMPD_target_parallel_for_simd:
11341 case OMPD_requires:
11342 case OMPD_metadirective:
11343 case OMPD_unknown:
11344 default:
11345 llvm_unreachable("Unexpected standalone target data directive.")::llvm::llvm_unreachable_internal("Unexpected standalone target data directive."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11345)
;
11346 break;
11347 }
11348 CGF.EmitRuntimeCall(
11349 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11350 OffloadingArgs);
11351 };
11352
11353 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11354 &MapNamesArray](CodeGenFunction &CGF,
11355 PrePostActionTy &) {
11356 // Fill up the arrays with all the mapped variables.
11357 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11358
11359 // Get map clause information.
11360 MappableExprsHandler MEHandler(D, CGF);
11361 MEHandler.generateAllInfo(CombinedInfo);
11362
11363 TargetDataInfo Info;
11364 // Fill up the arrays and create the arguments.
11365 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11366 /*IsNonContiguous=*/true);
11367 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11368 D.hasClausesOfKind<OMPNowaitClause>();
11369 emitOffloadingArraysArgument(
11370 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11371 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11372 {/*ForEndCall=*/false});
11373 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11374 InputInfo.BasePointersArray =
11375 Address(Info.BasePointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11376 InputInfo.PointersArray =
11377 Address(Info.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11378 InputInfo.SizesArray =
11379 Address(Info.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
11380 InputInfo.MappersArray =
11381 Address(Info.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
11382 MapTypesArray = Info.MapTypesArray;
11383 MapNamesArray = Info.MapNamesArray;
11384 if (RequiresOuterTask)
11385 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11386 else
11387 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11388 };
11389
11390 if (IfCond) {
11391 emitIfClause(CGF, IfCond, TargetThenGen,
11392 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11393 } else {
11394 RegionCodeGenTy ThenRCG(TargetThenGen);
11395 ThenRCG(CGF);
11396 }
11397}
11398
11399namespace {
11400 /// Kind of parameter in a function with 'declare simd' directive.
11401enum ParamKindTy {
11402 Linear,
11403 LinearRef,
11404 LinearUVal,
11405 LinearVal,
11406 Uniform,
11407 Vector,
11408};
11409/// Attribute set of the parameter.
11410struct ParamAttrTy {
11411 ParamKindTy Kind = Vector;
11412 llvm::APSInt StrideOrArg;
11413 llvm::APSInt Alignment;
11414 bool HasVarStride = false;
11415};
11416} // namespace
11417
11418static unsigned evaluateCDTSize(const FunctionDecl *FD,
11419 ArrayRef<ParamAttrTy> ParamAttrs) {
11420 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11421 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11422 // of that clause. The VLEN value must be power of 2.
11423 // In other case the notion of the function`s "characteristic data type" (CDT)
11424 // is used to compute the vector length.
11425 // CDT is defined in the following order:
11426 // a) For non-void function, the CDT is the return type.
11427 // b) If the function has any non-uniform, non-linear parameters, then the
11428 // CDT is the type of the first such parameter.
11429 // c) If the CDT determined by a) or b) above is struct, union, or class
11430 // type which is pass-by-value (except for the type that maps to the
11431 // built-in complex data type), the characteristic data type is int.
11432 // d) If none of the above three cases is applicable, the CDT is int.
11433 // The VLEN is then determined based on the CDT and the size of vector
11434 // register of that ISA for which current vector version is generated. The
11435 // VLEN is computed using the formula below:
11436 // VLEN = sizeof(vector_register) / sizeof(CDT),
11437 // where vector register size specified in section 3.2.1 Registers and the
11438 // Stack Frame of original AMD64 ABI document.
11439 QualType RetType = FD->getReturnType();
11440 if (RetType.isNull())
11441 return 0;
11442 ASTContext &C = FD->getASTContext();
11443 QualType CDT;
11444 if (!RetType.isNull() && !RetType->isVoidType()) {
11445 CDT = RetType;
11446 } else {
11447 unsigned Offset = 0;
11448 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11449 if (ParamAttrs[Offset].Kind == Vector)
11450 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11451 ++Offset;
11452 }
11453 if (CDT.isNull()) {
11454 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11455 if (ParamAttrs[I + Offset].Kind == Vector) {
11456 CDT = FD->getParamDecl(I)->getType();
11457 break;
11458 }
11459 }
11460 }
11461 }
11462 if (CDT.isNull())
11463 CDT = C.IntTy;
11464 CDT = CDT->getCanonicalTypeUnqualified();
11465 if (CDT->isRecordType() || CDT->isUnionType())
11466 CDT = C.IntTy;
11467 return C.getTypeSize(CDT);
11468}
11469
11470/// Mangle the parameter part of the vector function name according to
11471/// their OpenMP classification. The mangling function is defined in
11472/// section 4.5 of the AAVFABI(2021Q1).
11473static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11474 SmallString<256> Buffer;
11475 llvm::raw_svector_ostream Out(Buffer);
11476 for (const auto &ParamAttr : ParamAttrs) {
11477 switch (ParamAttr.Kind) {
11478 case Linear:
11479 Out << 'l';
11480 break;
11481 case LinearRef:
11482 Out << 'R';
11483 break;
11484 case LinearUVal:
11485 Out << 'U';
11486 break;
11487 case LinearVal:
11488 Out << 'L';
11489 break;
11490 case Uniform:
11491 Out << 'u';
11492 break;
11493 case Vector:
11494 Out << 'v';
11495 break;
11496 }
11497 if (ParamAttr.HasVarStride)
11498 Out << "s" << ParamAttr.StrideOrArg;
11499 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
11500 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
11501 // Don't print the step value if it is not present or if it is
11502 // equal to 1.
11503 if (ParamAttr.StrideOrArg < 0)
11504 Out << 'n' << -ParamAttr.StrideOrArg;
11505 else if (ParamAttr.StrideOrArg != 1)
11506 Out << ParamAttr.StrideOrArg;
11507 }
11508
11509 if (!!ParamAttr.Alignment)
11510 Out << 'a' << ParamAttr.Alignment;
11511 }
11512
11513 return std::string(Out.str());
11514}
11515
11516static void
11517emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11518 const llvm::APSInt &VLENVal,
11519 ArrayRef<ParamAttrTy> ParamAttrs,
11520 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11521 struct ISADataTy {
11522 char ISA;
11523 unsigned VecRegSize;
11524 };
11525 ISADataTy ISAData[] = {
11526 {
11527 'b', 128
11528 }, // SSE
11529 {
11530 'c', 256
11531 }, // AVX
11532 {
11533 'd', 256
11534 }, // AVX2
11535 {
11536 'e', 512
11537 }, // AVX512
11538 };
11539 llvm::SmallVector<char, 2> Masked;
11540 switch (State) {
11541 case OMPDeclareSimdDeclAttr::BS_Undefined:
11542 Masked.push_back('N');
11543 Masked.push_back('M');
11544 break;
11545 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11546 Masked.push_back('N');
11547 break;
11548 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11549 Masked.push_back('M');
11550 break;
11551 }
11552 for (char Mask : Masked) {
11553 for (const ISADataTy &Data : ISAData) {
11554 SmallString<256> Buffer;
11555 llvm::raw_svector_ostream Out(Buffer);
11556 Out << "_ZGV" << Data.ISA << Mask;
11557 if (!VLENVal) {
11558 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11559 assert(NumElts && "Non-zero simdlen/cdtsize expected")(static_cast <bool> (NumElts && "Non-zero simdlen/cdtsize expected"
) ? void (0) : __assert_fail ("NumElts && \"Non-zero simdlen/cdtsize expected\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11559, __extension__
__PRETTY_FUNCTION__))
;
11560 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11561 } else {
11562 Out << VLENVal;
11563 }
11564 Out << mangleVectorParameters(ParamAttrs);
11565 Out << '_' << Fn->getName();
11566 Fn->addFnAttr(Out.str());
11567 }
11568 }
11569}
11570
11571// This are the Functions that are needed to mangle the name of the
11572// vector functions generated by the compiler, according to the rules
11573// defined in the "Vector Function ABI specifications for AArch64",
11574// available at
11575// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11576
11577/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11578static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11579 QT = QT.getCanonicalType();
11580
11581 if (QT->isVoidType())
11582 return false;
11583
11584 if (Kind == ParamKindTy::Uniform)
11585 return false;
11586
11587 if (Kind == ParamKindTy::LinearUVal || ParamKindTy::LinearRef)
11588 return false;
11589
11590 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
11591 !QT->isReferenceType())
11592 return false;
11593
11594 return true;
11595}
11596
11597/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11598static bool getAArch64PBV(QualType QT, ASTContext &C) {
11599 QT = QT.getCanonicalType();
11600 unsigned Size = C.getTypeSize(QT);
11601
11602 // Only scalars and complex within 16 bytes wide set PVB to true.
11603 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11604 return false;
11605
11606 if (QT->isFloatingType())
11607 return true;
11608
11609 if (QT->isIntegerType())
11610 return true;
11611
11612 if (QT->isPointerType())
11613 return true;
11614
11615 // TODO: Add support for complex types (section 3.1.2, item 2).
11616
11617 return false;
11618}
11619
11620/// Computes the lane size (LS) of a return type or of an input parameter,
11621/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11622/// TODO: Add support for references, section 3.2.1, item 1.
11623static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11624 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11625 QualType PTy = QT.getCanonicalType()->getPointeeType();
11626 if (getAArch64PBV(PTy, C))
11627 return C.getTypeSize(PTy);
11628 }
11629 if (getAArch64PBV(QT, C))
11630 return C.getTypeSize(QT);
11631
11632 return C.getTypeSize(C.getUIntPtrType());
11633}
11634
11635// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11636// signature of the scalar function, as defined in 3.2.2 of the
11637// AAVFABI.
11638static std::tuple<unsigned, unsigned, bool>
11639getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11640 QualType RetType = FD->getReturnType().getCanonicalType();
11641
11642 ASTContext &C = FD->getASTContext();
11643
11644 bool OutputBecomesInput = false;
11645
11646 llvm::SmallVector<unsigned, 8> Sizes;
11647 if (!RetType->isVoidType()) {
11648 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11649 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11650 OutputBecomesInput = true;
11651 }
11652 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11653 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11654 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11655 }
11656
11657 assert(!Sizes.empty() && "Unable to determine NDS and WDS.")(static_cast <bool> (!Sizes.empty() && "Unable to determine NDS and WDS."
) ? void (0) : __assert_fail ("!Sizes.empty() && \"Unable to determine NDS and WDS.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11657, __extension__
__PRETTY_FUNCTION__))
;
11658 // The LS of a function parameter / return value can only be a power
11659 // of 2, starting from 8 bits, up to 128.
11660 assert(llvm::all_of(Sizes,(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11665, __extension__
__PRETTY_FUNCTION__))
11661 [](unsigned Size) {(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11665, __extension__
__PRETTY_FUNCTION__))
11662 return Size == 8 || Size == 16 || Size == 32 ||(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11665, __extension__
__PRETTY_FUNCTION__))
11663 Size == 64 || Size == 128;(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11665, __extension__
__PRETTY_FUNCTION__))
11664 }) &&(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11665, __extension__
__PRETTY_FUNCTION__))
11665 "Invalid size")(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11665, __extension__
__PRETTY_FUNCTION__))
;
11666
11667 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11668 *std::max_element(std::begin(Sizes), std::end(Sizes)),
11669 OutputBecomesInput);
11670}
11671
11672// Function used to add the attribute. The parameter `VLEN` is
11673// templated to allow the use of "x" when targeting scalable functions
11674// for SVE.
11675template <typename T>
11676static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11677 char ISA, StringRef ParSeq,
11678 StringRef MangledName, bool OutputBecomesInput,
11679 llvm::Function *Fn) {
11680 SmallString<256> Buffer;
11681 llvm::raw_svector_ostream Out(Buffer);
11682 Out << Prefix << ISA << LMask << VLEN;
11683 if (OutputBecomesInput)
11684 Out << "v";
11685 Out << ParSeq << "_" << MangledName;
11686 Fn->addFnAttr(Out.str());
11687}
11688
11689// Helper function to generate the Advanced SIMD names depending on
11690// the value of the NDS when simdlen is not present.
11691static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11692 StringRef Prefix, char ISA,
11693 StringRef ParSeq, StringRef MangledName,
11694 bool OutputBecomesInput,
11695 llvm::Function *Fn) {
11696 switch (NDS) {
11697 case 8:
11698 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11699 OutputBecomesInput, Fn);
11700 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11701 OutputBecomesInput, Fn);
11702 break;
11703 case 16:
11704 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11705 OutputBecomesInput, Fn);
11706 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11707 OutputBecomesInput, Fn);
11708 break;
11709 case 32:
11710 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11711 OutputBecomesInput, Fn);
11712 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11713 OutputBecomesInput, Fn);
11714 break;
11715 case 64:
11716 case 128:
11717 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11718 OutputBecomesInput, Fn);
11719 break;
11720 default:
11721 llvm_unreachable("Scalar type is too wide.")::llvm::llvm_unreachable_internal("Scalar type is too wide.",
"clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11721)
;
11722 }
11723}
11724
11725/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11726static void emitAArch64DeclareSimdFunction(
11727 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11728 ArrayRef<ParamAttrTy> ParamAttrs,
11729 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11730 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11731
11732 // Get basic data for building the vector signature.
11733 const auto Data = getNDSWDS(FD, ParamAttrs);
11734 const unsigned NDS = std::get<0>(Data);
11735 const unsigned WDS = std::get<1>(Data);
11736 const bool OutputBecomesInput = std::get<2>(Data);
11737
11738 // Check the values provided via `simdlen` by the user.
11739 // 1. A `simdlen(1)` doesn't produce vector signatures,
11740 if (UserVLEN == 1) {
11741 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11742 DiagnosticsEngine::Warning,
11743 "The clause simdlen(1) has no effect when targeting aarch64.");
11744 CGM.getDiags().Report(SLoc, DiagID);
11745 return;
11746 }
11747
11748 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11749 // Advanced SIMD output.
11750 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11751 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11752 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11753 "power of 2 when targeting Advanced SIMD.");
11754 CGM.getDiags().Report(SLoc, DiagID);
11755 return;
11756 }
11757
11758 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11759 // limits.
11760 if (ISA == 's' && UserVLEN != 0) {
11761 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11762 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11763 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11764 "lanes in the architectural constraints "
11765 "for SVE (min is 128-bit, max is "
11766 "2048-bit, by steps of 128-bit)");
11767 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11768 return;
11769 }
11770 }
11771
11772 // Sort out parameter sequence.
11773 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11774 StringRef Prefix = "_ZGV";
11775 // Generate simdlen from user input (if any).
11776 if (UserVLEN) {
11777 if (ISA == 's') {
11778 // SVE generates only a masked function.
11779 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11780 OutputBecomesInput, Fn);
11781 } else {
11782 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11782, __extension__
__PRETTY_FUNCTION__))
;
11783 // Advanced SIMD generates one or two functions, depending on
11784 // the `[not]inbranch` clause.
11785 switch (State) {
11786 case OMPDeclareSimdDeclAttr::BS_Undefined:
11787 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11788 OutputBecomesInput, Fn);
11789 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11790 OutputBecomesInput, Fn);
11791 break;
11792 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11793 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11794 OutputBecomesInput, Fn);
11795 break;
11796 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11797 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11798 OutputBecomesInput, Fn);
11799 break;
11800 }
11801 }
11802 } else {
11803 // If no user simdlen is provided, follow the AAVFABI rules for
11804 // generating the vector length.
11805 if (ISA == 's') {
11806 // SVE, section 3.4.1, item 1.
11807 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11808 OutputBecomesInput, Fn);
11809 } else {
11810 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11810, __extension__
__PRETTY_FUNCTION__))
;
11811 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11812 // two vector names depending on the use of the clause
11813 // `[not]inbranch`.
11814 switch (State) {
11815 case OMPDeclareSimdDeclAttr::BS_Undefined:
11816 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11817 OutputBecomesInput, Fn);
11818 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11819 OutputBecomesInput, Fn);
11820 break;
11821 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11822 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11823 OutputBecomesInput, Fn);
11824 break;
11825 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11826 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11827 OutputBecomesInput, Fn);
11828 break;
11829 }
11830 }
11831 }
11832}
11833
11834void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11835 llvm::Function *Fn) {
11836 ASTContext &C = CGM.getContext();
11837 FD = FD->getMostRecentDecl();
11838 while (FD) {
11839 // Map params to their positions in function decl.
11840 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11841 if (isa<CXXMethodDecl>(FD))
11842 ParamPositions.try_emplace(FD, 0);
11843 unsigned ParamPos = ParamPositions.size();
11844 for (const ParmVarDecl *P : FD->parameters()) {
11845 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11846 ++ParamPos;
11847 }
11848 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11849 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11850 // Mark uniform parameters.
11851 for (const Expr *E : Attr->uniforms()) {
11852 E = E->IgnoreParenImpCasts();
11853 unsigned Pos;
11854 if (isa<CXXThisExpr>(E)) {
11855 Pos = ParamPositions[FD];
11856 } else {
11857 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11858 ->getCanonicalDecl();
11859 auto It = ParamPositions.find(PVD);
11860 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11860, __extension__
__PRETTY_FUNCTION__))
;
11861 Pos = It->second;
11862 }
11863 ParamAttrs[Pos].Kind = Uniform;
11864 }
11865 // Get alignment info.
11866 auto *NI = Attr->alignments_begin();
11867 for (const Expr *E : Attr->aligneds()) {
11868 E = E->IgnoreParenImpCasts();
11869 unsigned Pos;
11870 QualType ParmTy;
11871 if (isa<CXXThisExpr>(E)) {
11872 Pos = ParamPositions[FD];
11873 ParmTy = E->getType();
11874 } else {
11875 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11876 ->getCanonicalDecl();
11877 auto It = ParamPositions.find(PVD);
11878 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11878, __extension__
__PRETTY_FUNCTION__))
;
11879 Pos = It->second;
11880 ParmTy = PVD->getType();
11881 }
11882 ParamAttrs[Pos].Alignment =
11883 (*NI)
11884 ? (*NI)->EvaluateKnownConstInt(C)
11885 : llvm::APSInt::getUnsigned(
11886 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11887 .getQuantity());
11888 ++NI;
11889 }
11890 // Mark linear parameters.
11891 auto *SI = Attr->steps_begin();
11892 auto *MI = Attr->modifiers_begin();
11893 for (const Expr *E : Attr->linears()) {
11894 E = E->IgnoreParenImpCasts();
11895 unsigned Pos;
11896 bool IsReferenceType = false;
11897 // Rescaling factor needed to compute the linear parameter
11898 // value in the mangled name.
11899 unsigned PtrRescalingFactor = 1;
11900 if (isa<CXXThisExpr>(E)) {
11901 Pos = ParamPositions[FD];
11902 auto *P = cast<PointerType>(E->getType());
11903 PtrRescalingFactor = CGM.getContext()
11904 .getTypeSizeInChars(P->getPointeeType())
11905 .getQuantity();
11906 } else {
11907 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11908 ->getCanonicalDecl();
11909 auto It = ParamPositions.find(PVD);
11910 assert(It != ParamPositions.end() && "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11910, __extension__
__PRETTY_FUNCTION__))
;
11911 Pos = It->second;
11912 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11913 PtrRescalingFactor = CGM.getContext()
11914 .getTypeSizeInChars(P->getPointeeType())
11915 .getQuantity();
11916 else if (PVD->getType()->isReferenceType()) {
11917 IsReferenceType = true;
11918 PtrRescalingFactor =
11919 CGM.getContext()
11920 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11921 .getQuantity();
11922 }
11923 }
11924 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11925 if (*MI == OMPC_LINEAR_ref)
11926 ParamAttr.Kind = LinearRef;
11927 else if (*MI == OMPC_LINEAR_uval)
11928 ParamAttr.Kind = LinearUVal;
11929 else if (IsReferenceType)
11930 ParamAttr.Kind = LinearVal;
11931 else
11932 ParamAttr.Kind = Linear;
11933 // Assuming a stride of 1, for `linear` without modifiers.
11934 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11935 if (*SI) {
11936 Expr::EvalResult Result;
11937 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11938 if (const auto *DRE =
11939 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11940 if (const auto *StridePVD =
11941 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11942 ParamAttr.HasVarStride = true;
11943 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11944 assert(It != ParamPositions.end() &&(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11945, __extension__
__PRETTY_FUNCTION__))
11945 "Function parameter not found")(static_cast <bool> (It != ParamPositions.end() &&
"Function parameter not found") ? void (0) : __assert_fail (
"It != ParamPositions.end() && \"Function parameter not found\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11945, __extension__
__PRETTY_FUNCTION__))
;
11946 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11947 }
11948 }
11949 } else {
11950 ParamAttr.StrideOrArg = Result.Val.getInt();
11951 }
11952 }
11953 // If we are using a linear clause on a pointer, we need to
11954 // rescale the value of linear_step with the byte size of the
11955 // pointee type.
11956 if (!ParamAttr.HasVarStride &&
11957 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11958 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11959 ++SI;
11960 ++MI;
11961 }
11962 llvm::APSInt VLENVal;
11963 SourceLocation ExprLoc;
11964 const Expr *VLENExpr = Attr->getSimdlen();
11965 if (VLENExpr) {
11966 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11967 ExprLoc = VLENExpr->getExprLoc();
11968 }
11969 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11970 if (CGM.getTriple().isX86()) {
11971 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11972 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11973 unsigned VLEN = VLENVal.getExtValue();
11974 StringRef MangledName = Fn->getName();
11975 if (CGM.getTarget().hasFeature("sve"))
11976 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11977 MangledName, 's', 128, Fn, ExprLoc);
11978 if (CGM.getTarget().hasFeature("neon"))
11979 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11980 MangledName, 'n', 128, Fn, ExprLoc);
11981 }
11982 }
11983 FD = FD->getPreviousDecl();
11984 }
11985}
11986
11987namespace {
11988/// Cleanup action for doacross support.
11989class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11990public:
11991 static const int DoacrossFinArgs = 2;
11992
11993private:
11994 llvm::FunctionCallee RTLFn;
11995 llvm::Value *Args[DoacrossFinArgs];
11996
11997public:
11998 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11999 ArrayRef<llvm::Value *> CallArgs)
12000 : RTLFn(RTLFn) {
12001 assert(CallArgs.size() == DoacrossFinArgs)(static_cast <bool> (CallArgs.size() == DoacrossFinArgs
) ? void (0) : __assert_fail ("CallArgs.size() == DoacrossFinArgs"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12001, __extension__
__PRETTY_FUNCTION__))
;
12002 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12003 }
12004 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12005 if (!CGF.HaveInsertPoint())
12006 return;
12007 CGF.EmitRuntimeCall(RTLFn, Args);
12008 }
12009};
12010} // namespace
12011
12012void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12013 const OMPLoopDirective &D,
12014 ArrayRef<Expr *> NumIterations) {
12015 if (!CGF.HaveInsertPoint())
12016 return;
12017
12018 ASTContext &C = CGM.getContext();
12019 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12020 RecordDecl *RD;
12021 if (KmpDimTy.isNull()) {
12022 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12023 // kmp_int64 lo; // lower
12024 // kmp_int64 up; // upper
12025 // kmp_int64 st; // stride
12026 // };
12027 RD = C.buildImplicitRecord("kmp_dim");
12028 RD->startDefinition();
12029 addFieldToRecordDecl(C, RD, Int64Ty);
12030 addFieldToRecordDecl(C, RD, Int64Ty);
12031 addFieldToRecordDecl(C, RD, Int64Ty);
12032 RD->completeDefinition();
12033 KmpDimTy = C.getRecordType(RD);
12034 } else {
12035 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12036 }
12037 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12038 QualType ArrayTy =
12039 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12040
12041 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12042 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12043 enum { LowerFD = 0, UpperFD, StrideFD };
12044 // Fill dims with data.
12045 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12046 LValue DimsLVal = CGF.MakeAddrLValue(
12047 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12048 // dims.upper = num_iterations;
12049 LValue UpperLVal = CGF.EmitLValueForField(
12050 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12051 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12052 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12053 Int64Ty, NumIterations[I]->getExprLoc());
12054 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12055 // dims.stride = 1;
12056 LValue StrideLVal = CGF.EmitLValueForField(
12057 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12058 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12059 StrideLVal);
12060 }
12061
12062 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12063 // kmp_int32 num_dims, struct kmp_dim * dims);
12064 llvm::Value *Args[] = {
12065 emitUpdateLocation(CGF, D.getBeginLoc()),
12066 getThreadID(CGF, D.getBeginLoc()),
12067 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12068 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12069 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12070 CGM.VoidPtrTy)};
12071
12072 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12073 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12074 CGF.EmitRuntimeCall(RTLFn, Args);
12075 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12076 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12077 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12078 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12079 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12080 llvm::makeArrayRef(FiniArgs));
12081}
12082
12083void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12084 const OMPDependClause *C) {
12085 QualType Int64Ty =
12086 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12087 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12088 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12089 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12090 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12091 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12092 const Expr *CounterVal = C->getLoopData(I);
12093 assert(CounterVal)(static_cast <bool> (CounterVal) ? void (0) : __assert_fail
("CounterVal", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12093
, __extension__ __PRETTY_FUNCTION__))
;
12094 llvm::Value *CntVal = CGF.EmitScalarConversion(
12095 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12096 CounterVal->getExprLoc());
12097 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12098 /*Volatile=*/false, Int64Ty);
12099 }
12100 llvm::Value *Args[] = {
12101 emitUpdateLocation(CGF, C->getBeginLoc()),
12102 getThreadID(CGF, C->getBeginLoc()),
12103 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12104 llvm::FunctionCallee RTLFn;
12105 if (C->getDependencyKind() == OMPC_DEPEND_source) {
12106 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12107 OMPRTL___kmpc_doacross_post);
12108 } else {
12109 assert(C->getDependencyKind() == OMPC_DEPEND_sink)(static_cast <bool> (C->getDependencyKind() == OMPC_DEPEND_sink
) ? void (0) : __assert_fail ("C->getDependencyKind() == OMPC_DEPEND_sink"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12109, __extension__
__PRETTY_FUNCTION__))
;
12110 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12111 OMPRTL___kmpc_doacross_wait);
12112 }
12113 CGF.EmitRuntimeCall(RTLFn, Args);
12114}
12115
12116void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12117 llvm::FunctionCallee Callee,
12118 ArrayRef<llvm::Value *> Args) const {
12119 assert(Loc.isValid() && "Outlined function call location must be valid.")(static_cast <bool> (Loc.isValid() && "Outlined function call location must be valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Outlined function call location must be valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12119, __extension__
__PRETTY_FUNCTION__))
;
12120 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12121
12122 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12123 if (Fn->doesNotThrow()) {
12124 CGF.EmitNounwindRuntimeCall(Fn, Args);
12125 return;
12126 }
12127 }
12128 CGF.EmitRuntimeCall(Callee, Args);
12129}
12130
12131void CGOpenMPRuntime::emitOutlinedFunctionCall(
12132 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12133 ArrayRef<llvm::Value *> Args) const {
12134 emitCall(CGF, Loc, OutlinedFn, Args);
12135}
12136
12137void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12138 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12139 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12140 HasEmittedDeclareTargetRegion = true;
12141}
12142
12143Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12144 const VarDecl *NativeParam,
12145 const VarDecl *TargetParam) const {
12146 return CGF.GetAddrOfLocalVar(NativeParam);
12147}
12148
12149/// Return allocator value from expression, or return a null allocator (default
12150/// when no allocator specified).
12151static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12152 const Expr *Allocator) {
12153 llvm::Value *AllocVal;
12154 if (Allocator) {
12155 AllocVal = CGF.EmitScalarExpr(Allocator);
12156 // According to the standard, the original allocator type is a enum
12157 // (integer). Convert to pointer type, if required.
12158 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12159 CGF.getContext().VoidPtrTy,
12160 Allocator->getExprLoc());
12161 } else {
12162 // If no allocator specified, it defaults to the null allocator.
12163 AllocVal = llvm::Constant::getNullValue(
12164 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12165 }
12166 return AllocVal;
12167}
12168
12169/// Return the alignment from an allocate directive if present.
12170static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
12171 llvm::Optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
12172
12173 if (!AllocateAlignment)
12174 return nullptr;
12175
12176 return llvm::ConstantInt::get(CGM.SizeTy, AllocateAlignment->getQuantity());
12177}
12178
12179Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12180 const VarDecl *VD) {
12181 if (!VD)
12182 return Address::invalid();
12183 Address UntiedAddr = Address::invalid();
12184 Address UntiedRealAddr = Address::invalid();
12185 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12186 if (It != FunctionToUntiedTaskStackMap.end()) {
12187 const UntiedLocalVarsAddressesMap &UntiedData =
12188 UntiedLocalVarsStack[It->second];
12189 auto I = UntiedData.find(VD);
12190 if (I != UntiedData.end()) {
12191 UntiedAddr = I->second.first;
12192 UntiedRealAddr = I->second.second;
12193 }
12194 }
12195 const VarDecl *CVD = VD->getCanonicalDecl();
12196 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12197 // Use the default allocation.
12198 if (!isAllocatableDecl(VD))
12199 return UntiedAddr;
12200 llvm::Value *Size;
12201 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12202 if (CVD->getType()->isVariablyModifiedType()) {
12203 Size = CGF.getTypeSize(CVD->getType());
12204 // Align the size: ((size + align - 1) / align) * align
12205 Size = CGF.Builder.CreateNUWAdd(
12206 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12207 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12208 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12209 } else {
12210 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12211 Size = CGM.getSize(Sz.alignTo(Align));
12212 }
12213 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12214 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12215 const Expr *Allocator = AA->getAllocator();
12216 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12217 llvm::Value *Alignment = getAlignmentValue(CGM, CVD);
12218 SmallVector<llvm::Value *, 4> Args;
12219 Args.push_back(ThreadID);
12220 if (Alignment)
12221 Args.push_back(Alignment);
12222 Args.push_back(Size);
12223 Args.push_back(AllocVal);
12224 llvm::omp::RuntimeFunction FnID =
12225 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12226 llvm::Value *Addr = CGF.EmitRuntimeCall(
12227 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12228 getName({CVD->getName(), ".void.addr"}));
12229 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12230 CGM.getModule(), OMPRTL___kmpc_free);
12231 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12232 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12233 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12234 if (UntiedAddr.isValid())
12235 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12236
12237 // Cleanup action for allocate support.
12238 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12239 llvm::FunctionCallee RTLFn;
12240 SourceLocation::UIntTy LocEncoding;
12241 Address Addr;
12242 const Expr *AllocExpr;
12243
12244 public:
12245 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12246 SourceLocation::UIntTy LocEncoding, Address Addr,
12247 const Expr *AllocExpr)
12248 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12249 AllocExpr(AllocExpr) {}
12250 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12251 if (!CGF.HaveInsertPoint())
12252 return;
12253 llvm::Value *Args[3];
12254 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12255 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12256 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12257 Addr.getPointer(), CGF.VoidPtrTy);
12258 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12259 Args[2] = AllocVal;
12260 CGF.EmitRuntimeCall(RTLFn, Args);
12261 }
12262 };
12263 Address VDAddr =
12264 UntiedRealAddr.isValid()
12265 ? UntiedRealAddr
12266 : Address(Addr, CGF.ConvertTypeForMem(CVD->getType()), Align);
12267 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12268 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12269 VDAddr, Allocator);
12270 if (UntiedRealAddr.isValid())
12271 if (auto *Region =
12272 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12273 Region->emitUntiedSwitch(CGF);
12274 return VDAddr;
12275 }
12276 return UntiedAddr;
12277}
12278
12279bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12280 const VarDecl *VD) const {
12281 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12282 if (It == FunctionToUntiedTaskStackMap.end())
12283 return false;
12284 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12285}
12286
12287CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12288 CodeGenModule &CGM, const OMPLoopDirective &S)
12289 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12290 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12290, __extension__
__PRETTY_FUNCTION__))
;
12291 if (!NeedToPush)
12292 return;
12293 NontemporalDeclsSet &DS =
12294 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12295 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12296 for (const Stmt *Ref : C->private_refs()) {
12297 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12298 const ValueDecl *VD;
12299 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12300 VD = DRE->getDecl();
12301 } else {
12302 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12303 assert((ME->isImplicitCXXThis() ||(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12305, __extension__
__PRETTY_FUNCTION__))
12304 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12305, __extension__
__PRETTY_FUNCTION__))
12305 "Expected member of current class.")(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12305, __extension__
__PRETTY_FUNCTION__))
;
12306 VD = ME->getMemberDecl();
12307 }
12308 DS.insert(VD);
12309 }
12310 }
12311}
12312
12313CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12314 if (!NeedToPush)
12315 return;
12316 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12317}
12318
12319CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12320 CodeGenFunction &CGF,
12321 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12322 std::pair<Address, Address>> &LocalVars)
12323 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12324 if (!NeedToPush)
12325 return;
12326 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12327 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12328 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12329}
12330
12331CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12332 if (!NeedToPush)
12333 return;
12334 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12335}
12336
12337bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12338 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12338, __extension__
__PRETTY_FUNCTION__))
;
12339
12340 return llvm::any_of(
12341 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12342 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12343}
12344
12345void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12346 const OMPExecutableDirective &S,
12347 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12348 const {
12349 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12350 // Vars in target/task regions must be excluded completely.
12351 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12352 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12353 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12354 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12355 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12356 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12357 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12358 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12359 }
12360 }
12361 // Exclude vars in private clauses.
12362 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12363 for (const Expr *Ref : C->varlists()) {
12364 if (!Ref->getType()->isScalarType())
12365 continue;
12366 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12367 if (!DRE)
12368 continue;
12369 NeedToCheckForLPCs.insert(DRE->getDecl());
12370 }
12371 }
12372 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12373 for (const Expr *Ref : C->varlists()) {
12374 if (!Ref->getType()->isScalarType())
12375 continue;
12376 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12377 if (!DRE)
12378 continue;
12379 NeedToCheckForLPCs.insert(DRE->getDecl());
12380 }
12381 }
12382 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12383 for (const Expr *Ref : C->varlists()) {
12384 if (!Ref->getType()->isScalarType())
12385 continue;
12386 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12387 if (!DRE)
12388 continue;
12389 NeedToCheckForLPCs.insert(DRE->getDecl());
12390 }
12391 }
12392 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12393 for (const Expr *Ref : C->varlists()) {
12394 if (!Ref->getType()->isScalarType())
12395 continue;
12396 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12397 if (!DRE)
12398 continue;
12399 NeedToCheckForLPCs.insert(DRE->getDecl());
12400 }
12401 }
12402 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12403 for (const Expr *Ref : C->varlists()) {
12404 if (!Ref->getType()->isScalarType())
12405 continue;
12406 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12407 if (!DRE)
12408 continue;
12409 NeedToCheckForLPCs.insert(DRE->getDecl());
12410 }
12411 }
12412 for (const Decl *VD : NeedToCheckForLPCs) {
12413 for (const LastprivateConditionalData &Data :
12414 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12415 if (Data.DeclToUniqueName.count(VD) > 0) {
12416 if (!Data.Disabled)
12417 NeedToAddForLPCsAsDisabled.insert(VD);
12418 break;
12419 }
12420 }
12421 }
12422}
12423
12424CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12425 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12426 : CGM(CGF.CGM),
12427 Action((CGM.getLangOpts().OpenMP >= 50 &&
12428 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12429 [](const OMPLastprivateClause *C) {
12430 return C->getKind() ==
12431 OMPC_LASTPRIVATE_conditional;
12432 }))
12433 ? ActionToDo::PushAsLastprivateConditional
12434 : ActionToDo::DoNotPush) {
12435 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12435, __extension__
__PRETTY_FUNCTION__))
;
12436 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12437 return;
12438 assert(Action == ActionToDo::PushAsLastprivateConditional &&(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12439, __extension__
__PRETTY_FUNCTION__))
12439 "Expected a push action.")(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12439, __extension__
__PRETTY_FUNCTION__))
;
12440 LastprivateConditionalData &Data =
12441 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12442 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12443 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12444 continue;
12445
12446 for (const Expr *Ref : C->varlists()) {
12447 Data.DeclToUniqueName.insert(std::make_pair(
12448 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12449 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12450 }
12451 }
12452 Data.IVLVal = IVLVal;
12453 Data.Fn = CGF.CurFn;
12454}
12455
12456CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12457 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12458 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12459 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12459, __extension__
__PRETTY_FUNCTION__))
;
12460 if (CGM.getLangOpts().OpenMP < 50)
12461 return;
12462 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12463 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12464 if (!NeedToAddForLPCsAsDisabled.empty()) {
12465 Action = ActionToDo::DisableLastprivateConditional;
12466 LastprivateConditionalData &Data =
12467 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12468 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12469 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12470 Data.Fn = CGF.CurFn;
12471 Data.Disabled = true;
12472 }
12473}
12474
12475CGOpenMPRuntime::LastprivateConditionalRAII
12476CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12477 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12478 return LastprivateConditionalRAII(CGF, S);
12479}
12480
12481CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12482 if (CGM.getLangOpts().OpenMP < 50)
12483 return;
12484 if (Action == ActionToDo::DisableLastprivateConditional) {
12485 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12486, __extension__
__PRETTY_FUNCTION__))
12486 "Expected list of disabled private vars.")(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12486, __extension__
__PRETTY_FUNCTION__))
;
12487 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12488 }
12489 if (Action == ActionToDo::PushAsLastprivateConditional) {
12490 assert((static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12492, __extension__
__PRETTY_FUNCTION__))
12491 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12492, __extension__
__PRETTY_FUNCTION__))
12492 "Expected list of lastprivate conditional vars.")(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12492, __extension__
__PRETTY_FUNCTION__))
;
12493 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12494 }
12495}
12496
12497Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12498 const VarDecl *VD) {
12499 ASTContext &C = CGM.getContext();
12500 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12501 if (I == LastprivateConditionalToTypes.end())
12502 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12503 QualType NewType;
12504 const FieldDecl *VDField;
12505 const FieldDecl *FiredField;
12506 LValue BaseLVal;
12507 auto VI = I->getSecond().find(VD);
12508 if (VI == I->getSecond().end()) {
12509 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12510 RD->startDefinition();
12511 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12512 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12513 RD->completeDefinition();
12514 NewType = C.getRecordType(RD);
12515 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12516 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12517 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12518 } else {
12519 NewType = std::get<0>(VI->getSecond());
12520 VDField = std::get<1>(VI->getSecond());
12521 FiredField = std::get<2>(VI->getSecond());
12522 BaseLVal = std::get<3>(VI->getSecond());
12523 }
12524 LValue FiredLVal =
12525 CGF.EmitLValueForField(BaseLVal, FiredField);
12526 CGF.EmitStoreOfScalar(
12527 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12528 FiredLVal);
12529 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12530}
12531
12532namespace {
12533/// Checks if the lastprivate conditional variable is referenced in LHS.
12534class LastprivateConditionalRefChecker final
12535 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12536 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12537 const Expr *FoundE = nullptr;
12538 const Decl *FoundD = nullptr;
12539 StringRef UniqueDeclName;
12540 LValue IVLVal;
12541 llvm::Function *FoundFn = nullptr;
12542 SourceLocation Loc;
12543
12544public:
12545 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12546 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12547 llvm::reverse(LPM)) {
12548 auto It = D.DeclToUniqueName.find(E->getDecl());
12549 if (It == D.DeclToUniqueName.end())
12550 continue;
12551 if (D.Disabled)
12552 return false;
12553 FoundE = E;
12554 FoundD = E->getDecl()->getCanonicalDecl();
12555 UniqueDeclName = It->second;
12556 IVLVal = D.IVLVal;
12557 FoundFn = D.Fn;
12558 break;
12559 }
12560 return FoundE == E;
12561 }
12562 bool VisitMemberExpr(const MemberExpr *E) {
12563 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12564 return false;
12565 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12566 llvm::reverse(LPM)) {
12567 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12568 if (It == D.DeclToUniqueName.end())
12569 continue;
12570 if (D.Disabled)
12571 return false;
12572 FoundE = E;
12573 FoundD = E->getMemberDecl()->getCanonicalDecl();
12574 UniqueDeclName = It->second;
12575 IVLVal = D.IVLVal;
12576 FoundFn = D.Fn;
12577 break;
12578 }
12579 return FoundE == E;
12580 }
12581 bool VisitStmt(const Stmt *S) {
12582 for (const Stmt *Child : S->children()) {
12583 if (!Child)
12584 continue;
12585 if (const auto *E = dyn_cast<Expr>(Child))
12586 if (!E->isGLValue())
12587 continue;
12588 if (Visit(Child))
12589 return true;
12590 }
12591 return false;
12592 }
12593 explicit LastprivateConditionalRefChecker(
12594 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12595 : LPM(LPM) {}
12596 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12597 getFoundData() const {
12598 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12599 }
12600};
12601} // namespace
12602
12603void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12604 LValue IVLVal,
12605 StringRef UniqueDeclName,
12606 LValue LVal,
12607 SourceLocation Loc) {
12608 // Last updated loop counter for the lastprivate conditional var.
12609 // int<xx> last_iv = 0;
12610 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12611 llvm::Constant *LastIV =
12612 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12613 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12614 IVLVal.getAlignment().getAsAlign());
12615 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12616
12617 // Last value of the lastprivate conditional.
12618 // decltype(priv_a) last_a;
12619 llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12620 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12621 Last->setAlignment(LVal.getAlignment().getAsAlign());
12622 LValue LastLVal = CGF.MakeAddrLValue(
12623 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12624
12625 // Global loop counter. Required to handle inner parallel-for regions.
12626 // iv
12627 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12628
12629 // #pragma omp critical(a)
12630 // if (last_iv <= iv) {
12631 // last_iv = iv;
12632 // last_a = priv_a;
12633 // }
12634 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12635 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12636 Action.Enter(CGF);
12637 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12638 // (last_iv <= iv) ? Check if the variable is updated and store new
12639 // value in global var.
12640 llvm::Value *CmpRes;
12641 if (IVLVal.getType()->isSignedIntegerType()) {
12642 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12643 } else {
12644 assert(IVLVal.getType()->isUnsignedIntegerType() &&(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12645, __extension__
__PRETTY_FUNCTION__))
12645 "Loop iteration variable must be integer.")(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12645, __extension__
__PRETTY_FUNCTION__))
;
12646 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12647 }
12648 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12649 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12650 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12651 // {
12652 CGF.EmitBlock(ThenBB);
12653
12654 // last_iv = iv;
12655 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12656
12657 // last_a = priv_a;
12658 switch (CGF.getEvaluationKind(LVal.getType())) {
12659 case TEK_Scalar: {
12660 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12661 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12662 break;
12663 }
12664 case TEK_Complex: {
12665 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12666 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12667 break;
12668 }
12669 case TEK_Aggregate:
12670 llvm_unreachable(::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12671)
12671 "Aggregates are not supported in lastprivate conditional.")::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12671)
;
12672 }
12673 // }
12674 CGF.EmitBranch(ExitBB);
12675 // There is no need to emit line number for unconditional branch.
12676 (void)ApplyDebugLocation::CreateEmpty(CGF);
12677 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12678 };
12679
12680 if (CGM.getLangOpts().OpenMPSimd) {
12681 // Do not emit as a critical region as no parallel region could be emitted.
12682 RegionCodeGenTy ThenRCG(CodeGen);
12683 ThenRCG(CGF);
12684 } else {
12685 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12686 }
12687}
12688
12689void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12690 const Expr *LHS) {
12691 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12692 return;
12693 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12694 if (!Checker.Visit(LHS))
12695 return;
12696 const Expr *FoundE;
12697 const Decl *FoundD;
12698 StringRef UniqueDeclName;
12699 LValue IVLVal;
12700 llvm::Function *FoundFn;
12701 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12702 Checker.getFoundData();
12703 if (FoundFn != CGF.CurFn) {
12704 // Special codegen for inner parallel regions.
12705 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12706 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12707 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12708, __extension__
__PRETTY_FUNCTION__))
12708 "Lastprivate conditional is not found in outer region.")(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12708, __extension__
__PRETTY_FUNCTION__))
;
12709 QualType StructTy = std::get<0>(It->getSecond());
12710 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12711 LValue PrivLVal = CGF.EmitLValue(FoundE);
12712 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12713 PrivLVal.getAddress(CGF),
12714 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)),
12715 CGF.ConvertTypeForMem(StructTy));
12716 LValue BaseLVal =
12717 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12718 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12719 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12720 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12721 FiredLVal, llvm::AtomicOrdering::Unordered,
12722 /*IsVolatile=*/true, /*isInit=*/false);
12723 return;
12724 }
12725
12726 // Private address of the lastprivate conditional in the current context.
12727 // priv_a
12728 LValue LVal = CGF.EmitLValue(FoundE);
12729 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12730 FoundE->getExprLoc());
12731}
12732
12733void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12734 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12735 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12736 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12737 return;
12738 auto Range = llvm::reverse(LastprivateConditionalStack);
12739 auto It = llvm::find_if(
12740 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12741 if (It == Range.end() || It->Fn != CGF.CurFn)
12742 return;
12743 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12744 assert(LPCI != LastprivateConditionalToTypes.end() &&(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12745, __extension__
__PRETTY_FUNCTION__))
12745 "Lastprivates must be registered already.")(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12745, __extension__
__PRETTY_FUNCTION__))
;
12746 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12747 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12748 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12749 for (const auto &Pair : It->DeclToUniqueName) {
12750 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12751 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12752 continue;
12753 auto I = LPCI->getSecond().find(Pair.first);
12754 assert(I != LPCI->getSecond().end() &&(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12755, __extension__
__PRETTY_FUNCTION__))
12755 "Lastprivate must be rehistered already.")(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12755, __extension__
__PRETTY_FUNCTION__))
;
12756 // bool Cmp = priv_a.Fired != 0;
12757 LValue BaseLVal = std::get<3>(I->getSecond());
12758 LValue FiredLVal =
12759 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12760 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12761 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12762 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12763 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12764 // if (Cmp) {
12765 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12766 CGF.EmitBlock(ThenBB);
12767 Address Addr = CGF.GetAddrOfLocalVar(VD);
12768 LValue LVal;
12769 if (VD->getType()->isReferenceType())
12770 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12771 AlignmentSource::Decl);
12772 else
12773 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12774 AlignmentSource::Decl);
12775 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12776 D.getBeginLoc());
12777 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12778 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12779 // }
12780 }
12781}
12782
12783void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12784 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12785 SourceLocation Loc) {
12786 if (CGF.getLangOpts().OpenMP < 50)
12787 return;
12788 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12789 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12790, __extension__
__PRETTY_FUNCTION__))
12790 "Unknown lastprivate conditional variable.")(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12790, __extension__
__PRETTY_FUNCTION__))
;
12791 StringRef UniqueName = It->second;
12792 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12793 // The variable was not updated in the region - exit.
12794 if (!GV)
12795 return;
12796 LValue LPLVal = CGF.MakeAddrLValue(
12797 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12798 PrivLVal.getType().getNonReferenceType());
12799 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12800 CGF.EmitStoreOfScalar(Res, PrivLVal);
12801}
12802
12803llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12804 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12805 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12806 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12806)
;
12807}
12808
12809llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12810 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12811 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12812 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12812)
;
12813}
12814
12815llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12816 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12817 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12818 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12819 bool Tied, unsigned &NumberOfParts) {
12820 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12820)
;
12821}
12822
12823void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12824 SourceLocation Loc,
12825 llvm::Function *OutlinedFn,
12826 ArrayRef<llvm::Value *> CapturedVars,
12827 const Expr *IfCond,
12828 llvm::Value *NumThreads) {
12829 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12829)
;
12830}
12831
12832void CGOpenMPSIMDRuntime::emitCriticalRegion(
12833 CodeGenFunction &CGF, StringRef CriticalName,
12834 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12835 const Expr *Hint) {
12836 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12836)
;
12837}
12838
12839void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12840 const RegionCodeGenTy &MasterOpGen,
12841 SourceLocation Loc) {
12842 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12842)
;
12843}
12844
12845void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12846 const RegionCodeGenTy &MasterOpGen,
12847 SourceLocation Loc,
12848 const Expr *Filter) {
12849 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12849)
;
12850}
12851
12852void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12853 SourceLocation Loc) {
12854 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12854)
;
12855}
12856
12857void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12858 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12859 SourceLocation Loc) {
12860 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12860)
;
12861}
12862
12863void CGOpenMPSIMDRuntime::emitSingleRegion(
12864 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12865 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12866 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12867 ArrayRef<const Expr *> AssignmentOps) {
12868 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12868)
;
12869}
12870
12871void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12872 const RegionCodeGenTy &OrderedOpGen,
12873 SourceLocation Loc,
12874 bool IsThreads) {
12875 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12875)
;
12876}
12877
12878void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12879 SourceLocation Loc,
12880 OpenMPDirectiveKind Kind,
12881 bool EmitChecks,
12882 bool ForceSimpleCall) {
12883 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12883)
;
12884}
12885
12886void CGOpenMPSIMDRuntime::emitForDispatchInit(
12887 CodeGenFunction &CGF, SourceLocation Loc,
12888 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12889 bool Ordered, const DispatchRTInput &DispatchValues) {
12890 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12890)
;
12891}
12892
12893void CGOpenMPSIMDRuntime::emitForStaticInit(
12894 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12895 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12896 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12896)
;
12897}
12898
12899void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12900 CodeGenFunction &CGF, SourceLocation Loc,
12901 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12902 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12902)
;
12903}
12904
12905void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12906 SourceLocation Loc,
12907 unsigned IVSize,
12908 bool IVSigned) {
12909 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12909)
;
12910}
12911
12912void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12913 SourceLocation Loc,
12914 OpenMPDirectiveKind DKind) {
12915 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12915)
;
12916}
12917
12918llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12919 SourceLocation Loc,
12920 unsigned IVSize, bool IVSigned,
12921 Address IL, Address LB,
12922 Address UB, Address ST) {
12923 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12923)
;
12924}
12925
12926void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12927 llvm::Value *NumThreads,
12928 SourceLocation Loc) {
12929 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12929)
;
12930}
12931
12932void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12933 ProcBindKind ProcBind,
12934 SourceLocation Loc) {
12935 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12935)
;
12936}
12937
12938Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12939 const VarDecl *VD,
12940 Address VDAddr,
12941 SourceLocation Loc) {
12942 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12942)
;
12943}
12944
12945llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12946 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12947 CodeGenFunction *CGF) {
12948 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12948)
;
12949}
12950
12951Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12952 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12953 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12953)
;
12954}
12955
12956void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12957 ArrayRef<const Expr *> Vars,
12958 SourceLocation Loc,
12959 llvm::AtomicOrdering AO) {
12960 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12960)
;
12961}
12962
12963void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12964 const OMPExecutableDirective &D,
12965 llvm::Function *TaskFunction,
12966 QualType SharedsTy, Address Shareds,
12967 const Expr *IfCond,
12968 const OMPTaskDataTy &Data) {
12969 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12969)
;
12970}
12971
12972void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12973 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12974 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12975 const Expr *IfCond, const OMPTaskDataTy &Data) {
12976 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12976)
;
12977}
12978
12979void CGOpenMPSIMDRuntime::emitReduction(
12980 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12981 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12982 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12983 assert(Options.SimpleReduction && "Only simple reduction is expected.")(static_cast <bool> (Options.SimpleReduction &&
"Only simple reduction is expected.") ? void (0) : __assert_fail
("Options.SimpleReduction && \"Only simple reduction is expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12983, __extension__
__PRETTY_FUNCTION__))
;
12984 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12985 ReductionOps, Options);
12986}
12987
12988llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12989 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12990 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12991 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12991)
;
12992}
12993
12994void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12995 SourceLocation Loc,
12996 bool IsWorksharingReduction) {
12997 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12997)
;
12998}
12999
13000void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13001 SourceLocation Loc,
13002 ReductionCodeGen &RCG,
13003 unsigned N) {
13004 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13004)
;
13005}
13006
13007Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13008 SourceLocation Loc,
13009 llvm::Value *ReductionsPtr,
13010 LValue SharedLVal) {
13011 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13011)
;
13012}
13013
13014void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13015 SourceLocation Loc,
13016 const OMPTaskDataTy &Data) {
13017 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13017)
;
13018}
13019
13020void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13021 CodeGenFunction &CGF, SourceLocation Loc,
13022 OpenMPDirectiveKind CancelRegion) {
13023 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13023)
;
13024}
13025
13026void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13027 SourceLocation Loc, const Expr *IfCond,
13028 OpenMPDirectiveKind CancelRegion) {
13029 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13029)
;
13030}
13031
13032void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13033 const OMPExecutableDirective &D, StringRef ParentName,
13034 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13035 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13036 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13036)
;
13037}
13038
13039void CGOpenMPSIMDRuntime::emitTargetCall(
13040 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13041 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13042 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13043 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13044 const OMPLoopDirective &D)>
13045 SizeEmitter) {
13046 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13046)
;
13047}
13048
13049bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13050 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13050)
;
13051}
13052
13053bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13054 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13054)
;
13055}
13056
13057bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13058 return false;
13059}
13060
13061void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13062 const OMPExecutableDirective &D,
13063 SourceLocation Loc,
13064 llvm::Function *OutlinedFn,
13065 ArrayRef<llvm::Value *> CapturedVars) {
13066 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13066)
;
13067}
13068
13069void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13070 const Expr *NumTeams,
13071 const Expr *ThreadLimit,
13072 SourceLocation Loc) {
13073 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13073)
;
13074}
13075
13076void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13077 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13078 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13079 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13079)
;
13080}
13081
13082void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13083 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13084 const Expr *Device) {
13085 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13085)
;
13086}
13087
13088void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13089 const OMPLoopDirective &D,
13090 ArrayRef<Expr *> NumIterations) {
13091 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13091)
;
13092}
13093
13094void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13095 const OMPDependClause *C) {
13096 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13096)
;
13097}
13098
13099const VarDecl *
13100CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13101 const VarDecl *NativeParam) const {
13102 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13102)
;
13103}
13104
13105Address
13106CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13107 const VarDecl *NativeParam,
13108 const VarDecl *TargetParam) const {
13109 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13109)
;
13110}