Bug Summary

File:clang/lib/CodeGen/CGOpenMPRuntime.cpp
Warning:line 8332, column 9
2nd function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CGOpenMPRuntime.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/include -I tools/clang/include -I include -I /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-19-134126-35450-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/lib/CodeGen/CGOpenMPRuntime.cpp

/build/llvm-toolchain-snapshot-14~++20220119111520+da61cb019eb2/clang/lib/CodeGen/CGOpenMPRuntime.cpp

1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/Value.h"
38#include "llvm/Support/AtomicOrdering.h"
39#include "llvm/Support/Format.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <numeric>
43
44using namespace clang;
45using namespace CodeGen;
46using namespace llvm::omp;
47
48namespace {
49/// Base class for handling code generation inside OpenMP regions.
50class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51public:
52 /// Kinds of OpenMP regions used in codegen.
53 enum CGOpenMPRegionKind {
54 /// Region with outlined function for standalone 'parallel'
55 /// directive.
56 ParallelOutlinedRegion,
57 /// Region with outlined function for standalone 'task' directive.
58 TaskOutlinedRegion,
59 /// Region for constructs that do not require function outlining,
60 /// like 'for', 'sections', 'atomic' etc. directives.
61 InlinedRegion,
62 /// Region with outlined function for standalone 'target' directive.
63 TargetRegion,
64 };
65
66 CGOpenMPRegionInfo(const CapturedStmt &CS,
67 const CGOpenMPRegionKind RegionKind,
68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69 bool HasCancel)
70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72
73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77 Kind(Kind), HasCancel(HasCancel) {}
78
79 /// Get a variable or parameter for storing global thread id
80 /// inside OpenMP construct.
81 virtual const VarDecl *getThreadIDVariable() const = 0;
82
83 /// Emit the captured statement body.
84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85
86 /// Get an LValue for the current ThreadID variable.
87 /// \return LValue for thread id variable. This LValue always has type int32*.
88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89
90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91
92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93
94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95
96 bool hasCancel() const { return HasCancel; }
97
98 static bool classof(const CGCapturedStmtInfo *Info) {
99 return Info->getKind() == CR_OpenMP;
100 }
101
102 ~CGOpenMPRegionInfo() override = default;
103
104protected:
105 CGOpenMPRegionKind RegionKind;
106 RegionCodeGenTy CodeGen;
107 OpenMPDirectiveKind Kind;
108 bool HasCancel;
109};
110
111/// API for captured statement code generation in OpenMP constructs.
112class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113public:
114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115 const RegionCodeGenTy &CodeGen,
116 OpenMPDirectiveKind Kind, bool HasCancel,
117 StringRef HelperName)
118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119 HasCancel),
120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 121, __extension__
__PRETTY_FUNCTION__))
;
122 }
123
124 /// Get a variable or parameter for storing global thread id
125 /// inside OpenMP construct.
126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127
128 /// Get the name of the capture helper.
129 StringRef getHelperName() const override { return HelperName; }
130
131 static bool classof(const CGCapturedStmtInfo *Info) {
132 return CGOpenMPRegionInfo::classof(Info) &&
133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134 ParallelOutlinedRegion;
135 }
136
137private:
138 /// A variable or parameter storing global thread id for OpenMP
139 /// constructs.
140 const VarDecl *ThreadIDVar;
141 StringRef HelperName;
142};
143
144/// API for captured statement code generation in OpenMP constructs.
145class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146public:
147 class UntiedTaskActionTy final : public PrePostActionTy {
148 bool Untied;
149 const VarDecl *PartIDVar;
150 const RegionCodeGenTy UntiedCodeGen;
151 llvm::SwitchInst *UntiedSwitch = nullptr;
152
153 public:
154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155 const RegionCodeGenTy &UntiedCodeGen)
156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157 void Enter(CodeGenFunction &CGF) override {
158 if (Untied) {
159 // Emit task switching point.
160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161 CGF.GetAddrOfLocalVar(PartIDVar),
162 PartIDVar->getType()->castAs<PointerType>());
163 llvm::Value *Res =
164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167 CGF.EmitBlock(DoneBB);
168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171 CGF.Builder.GetInsertBlock());
172 emitUntiedSwitch(CGF);
173 }
174 }
175 void emitUntiedSwitch(CodeGenFunction &CGF) const {
176 if (Untied) {
177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178 CGF.GetAddrOfLocalVar(PartIDVar),
179 PartIDVar->getType()->castAs<PointerType>());
180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181 PartIdLVal);
182 UntiedCodeGen(CGF);
183 CodeGenFunction::JumpDest CurPoint =
184 CGF.getJumpDestInCurrentScope(".untied.next.");
185 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188 CGF.Builder.GetInsertBlock());
189 CGF.EmitBranchThroughCleanup(CurPoint);
190 CGF.EmitBlock(CurPoint.getBlock());
191 }
192 }
193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194 };
195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196 const VarDecl *ThreadIDVar,
197 const RegionCodeGenTy &CodeGen,
198 OpenMPDirectiveKind Kind, bool HasCancel,
199 const UntiedTaskActionTy &Action)
200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201 ThreadIDVar(ThreadIDVar), Action(Action) {
202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 202, __extension__
__PRETTY_FUNCTION__))
;
203 }
204
205 /// Get a variable or parameter for storing global thread id
206 /// inside OpenMP construct.
207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208
209 /// Get an LValue for the current ThreadID variable.
210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211
212 /// Get the name of the capture helper.
213 StringRef getHelperName() const override { return ".omp_outlined."; }
214
215 void emitUntiedSwitch(CodeGenFunction &CGF) override {
216 Action.emitUntiedSwitch(CGF);
217 }
218
219 static bool classof(const CGCapturedStmtInfo *Info) {
220 return CGOpenMPRegionInfo::classof(Info) &&
221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222 TaskOutlinedRegion;
223 }
224
225private:
226 /// A variable or parameter storing global thread id for OpenMP
227 /// constructs.
228 const VarDecl *ThreadIDVar;
229 /// Action for emitting code for untied tasks.
230 const UntiedTaskActionTy &Action;
231};
232
233/// API for inlined captured statement code generation in OpenMP
234/// constructs.
235class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236public:
237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238 const RegionCodeGenTy &CodeGen,
239 OpenMPDirectiveKind Kind, bool HasCancel)
240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241 OldCSI(OldCSI),
242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243
244 // Retrieve the value of the context parameter.
245 llvm::Value *getContextValue() const override {
246 if (OuterRegionInfo)
247 return OuterRegionInfo->getContextValue();
248 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 248)
;
249 }
250
251 void setContextValue(llvm::Value *V) override {
252 if (OuterRegionInfo) {
253 OuterRegionInfo->setContextValue(V);
254 return;
255 }
256 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 256)
;
257 }
258
259 /// Lookup the captured field decl for a variable.
260 const FieldDecl *lookup(const VarDecl *VD) const override {
261 if (OuterRegionInfo)
262 return OuterRegionInfo->lookup(VD);
263 // If there is no outer outlined region,no need to lookup in a list of
264 // captured variables, we can use the original one.
265 return nullptr;
266 }
267
268 FieldDecl *getThisFieldDecl() const override {
269 if (OuterRegionInfo)
270 return OuterRegionInfo->getThisFieldDecl();
271 return nullptr;
272 }
273
274 /// Get a variable or parameter for storing global thread id
275 /// inside OpenMP construct.
276 const VarDecl *getThreadIDVariable() const override {
277 if (OuterRegionInfo)
278 return OuterRegionInfo->getThreadIDVariable();
279 return nullptr;
280 }
281
282 /// Get an LValue for the current ThreadID variable.
283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284 if (OuterRegionInfo)
285 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286 llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 286)
;
287 }
288
289 /// Get the name of the capture helper.
290 StringRef getHelperName() const override {
291 if (auto *OuterRegionInfo = getOldCSI())
292 return OuterRegionInfo->getHelperName();
293 llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 293)
;
294 }
295
296 void emitUntiedSwitch(CodeGenFunction &CGF) override {
297 if (OuterRegionInfo)
298 OuterRegionInfo->emitUntiedSwitch(CGF);
299 }
300
301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302
303 static bool classof(const CGCapturedStmtInfo *Info) {
304 return CGOpenMPRegionInfo::classof(Info) &&
305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306 }
307
308 ~CGOpenMPInlinedRegionInfo() override = default;
309
310private:
311 /// CodeGen info about outer OpenMP region.
312 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313 CGOpenMPRegionInfo *OuterRegionInfo;
314};
315
316/// API for captured statement code generation in OpenMP target
317/// constructs. For this captures, implicit parameters are used instead of the
318/// captured fields. The name of the target region has to be unique in a given
319/// application so it is provided by the client, because only the client has
320/// the information to generate that.
321class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322public:
323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324 const RegionCodeGenTy &CodeGen, StringRef HelperName)
325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326 /*HasCancel=*/false),
327 HelperName(HelperName) {}
328
329 /// This is unused for target regions because each starts executing
330 /// with a single thread.
331 const VarDecl *getThreadIDVariable() const override { return nullptr; }
332
333 /// Get the name of the capture helper.
334 StringRef getHelperName() const override { return HelperName; }
335
336 static bool classof(const CGCapturedStmtInfo *Info) {
337 return CGOpenMPRegionInfo::classof(Info) &&
338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339 }
340
341private:
342 StringRef HelperName;
343};
344
345static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346 llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 346)
;
347}
348/// API for generation of expressions captured in a innermost OpenMP
349/// region.
350class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351public:
352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354 OMPD_unknown,
355 /*HasCancel=*/false),
356 PrivScope(CGF) {
357 // Make sure the globals captured in the provided statement are local by
358 // using the privatization logic. We assume the same variable is not
359 // captured more than once.
360 for (const auto &C : CS.captures()) {
361 if (!C.capturesVariable() && !C.capturesVariableByCopy())
362 continue;
363
364 const VarDecl *VD = C.getCapturedVar();
365 if (VD->isLocalVarDeclOrParm())
366 continue;
367
368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369 /*RefersToEnclosingVariableOrCapture=*/false,
370 VD->getType().getNonReferenceType(), VK_LValue,
371 C.getLocation());
372 PrivScope.addPrivate(
373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374 }
375 (void)PrivScope.Privatize();
376 }
377
378 /// Lookup the captured field decl for a variable.
379 const FieldDecl *lookup(const VarDecl *VD) const override {
380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381 return FD;
382 return nullptr;
383 }
384
385 /// Emit the captured statement body.
386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387 llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 387)
;
388 }
389
390 /// Get a variable or parameter for storing global thread id
391 /// inside OpenMP construct.
392 const VarDecl *getThreadIDVariable() const override {
393 llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 393)
;
394 }
395
396 /// Get the name of the capture helper.
397 StringRef getHelperName() const override {
398 llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 398)
;
399 }
400
401 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402
403private:
404 /// Private scope to capture global variables.
405 CodeGenFunction::OMPPrivateScope PrivScope;
406};
407
408/// RAII for emitting code of OpenMP constructs.
409class InlinedOpenMPRegionRAII {
410 CodeGenFunction &CGF;
411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412 FieldDecl *LambdaThisCaptureField = nullptr;
413 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414 bool NoInheritance = false;
415
416public:
417 /// Constructs region for combined constructs.
418 /// \param CodeGen Code generation sequence for combined directives. Includes
419 /// a list of functions used for code generation of implicitly inlined
420 /// regions.
421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422 OpenMPDirectiveKind Kind, bool HasCancel,
423 bool NoInheritance = true)
424 : CGF(CGF), NoInheritance(NoInheritance) {
425 // Start emission for the construct.
426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428 if (NoInheritance) {
429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431 CGF.LambdaThisCaptureField = nullptr;
432 BlockInfo = CGF.BlockInfo;
433 CGF.BlockInfo = nullptr;
434 }
435 }
436
437 ~InlinedOpenMPRegionRAII() {
438 // Restore original CapturedStmtInfo only if we're done with code emission.
439 auto *OldCSI =
440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441 delete CGF.CapturedStmtInfo;
442 CGF.CapturedStmtInfo = OldCSI;
443 if (NoInheritance) {
444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446 CGF.BlockInfo = BlockInfo;
447 }
448 }
449};
450
451/// Values for bit flags used in the ident_t to describe the fields.
452/// All enumeric elements are named and described in accordance with the code
453/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454enum OpenMPLocationFlags : unsigned {
455 /// Use trampoline for internal microtask.
456 OMP_IDENT_IMD = 0x01,
457 /// Use c-style ident structure.
458 OMP_IDENT_KMPC = 0x02,
459 /// Atomic reduction option for kmpc_reduce.
460 OMP_ATOMIC_REDUCE = 0x10,
461 /// Explicit 'barrier' directive.
462 OMP_IDENT_BARRIER_EXPL = 0x20,
463 /// Implicit barrier in code.
464 OMP_IDENT_BARRIER_IMPL = 0x40,
465 /// Implicit barrier in 'for' directive.
466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467 /// Implicit barrier in 'sections' directive.
468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469 /// Implicit barrier in 'single' directive.
470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471 /// Call of __kmp_for_static_init for static loop.
472 OMP_IDENT_WORK_LOOP = 0x200,
473 /// Call of __kmp_for_static_init for sections.
474 OMP_IDENT_WORK_SECTIONS = 0x400,
475 /// Call of __kmp_for_static_init for distribute.
476 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE
478};
479
480namespace {
481LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
482/// Values for bit flags for marking which requires clauses have been used.
483enum OpenMPOffloadingRequiresDirFlags : int64_t {
484 /// flag undefined.
485 OMP_REQ_UNDEFINED = 0x000,
486 /// no requires clause present.
487 OMP_REQ_NONE = 0x001,
488 /// reverse_offload clause.
489 OMP_REQ_REVERSE_OFFLOAD = 0x002,
490 /// unified_address clause.
491 OMP_REQ_UNIFIED_ADDRESS = 0x004,
492 /// unified_shared_memory clause.
493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
494 /// dynamic_allocators clause.
495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS
497};
498
499enum OpenMPOffloadingReservedDeviceIDs {
500 /// Device ID if the device was not defined, runtime should get it
501 /// from environment variables in the spec.
502 OMP_DEVICEID_UNDEF = -1,
503};
504} // anonymous namespace
505
506/// Describes ident structure that describes a source location.
507/// All descriptions are taken from
508/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509/// Original structure:
510/// typedef struct ident {
511/// kmp_int32 reserved_1; /**< might be used in Fortran;
512/// see above */
513/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
514/// KMP_IDENT_KMPC identifies this union
515/// member */
516/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
517/// see above */
518///#if USE_ITT_BUILD
519/// /* but currently used for storing
520/// region-specific ITT */
521/// /* contextual information. */
522///#endif /* USE_ITT_BUILD */
523/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
524/// C++ */
525/// char const *psource; /**< String describing the source location.
526/// The string is composed of semi-colon separated
527// fields which describe the source file,
528/// the function and a pair of line numbers that
529/// delimit the construct.
530/// */
531/// } ident_t;
532enum IdentFieldIndex {
533 /// might be used in Fortran
534 IdentField_Reserved_1,
535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536 IdentField_Flags,
537 /// Not really used in Fortran any more
538 IdentField_Reserved_2,
539 /// Source[4] in Fortran, do not use for C++
540 IdentField_Reserved_3,
541 /// String describing the source location. The string is composed of
542 /// semi-colon separated fields which describe the source file, the function
543 /// and a pair of line numbers that delimit the construct.
544 IdentField_PSource
545};
546
547/// Schedule types for 'omp for' loops (these enumerators are taken from
548/// the enum sched_type in kmp.h).
549enum OpenMPSchedType {
550 /// Lower bound for default (unordered) versions.
551 OMP_sch_lower = 32,
552 OMP_sch_static_chunked = 33,
553 OMP_sch_static = 34,
554 OMP_sch_dynamic_chunked = 35,
555 OMP_sch_guided_chunked = 36,
556 OMP_sch_runtime = 37,
557 OMP_sch_auto = 38,
558 /// static with chunk adjustment (e.g., simd)
559 OMP_sch_static_balanced_chunked = 45,
560 /// Lower bound for 'ordered' versions.
561 OMP_ord_lower = 64,
562 OMP_ord_static_chunked = 65,
563 OMP_ord_static = 66,
564 OMP_ord_dynamic_chunked = 67,
565 OMP_ord_guided_chunked = 68,
566 OMP_ord_runtime = 69,
567 OMP_ord_auto = 70,
568 OMP_sch_default = OMP_sch_static,
569 /// dist_schedule types
570 OMP_dist_sch_static_chunked = 91,
571 OMP_dist_sch_static = 92,
572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573 /// Set if the monotonic schedule modifier was present.
574 OMP_sch_modifier_monotonic = (1 << 29),
575 /// Set if the nonmonotonic schedule modifier was present.
576 OMP_sch_modifier_nonmonotonic = (1 << 30),
577};
578
579/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580/// region.
581class CleanupTy final : public EHScopeStack::Cleanup {
582 PrePostActionTy *Action;
583
584public:
585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587 if (!CGF.HaveInsertPoint())
588 return;
589 Action->Exit(CGF);
590 }
591};
592
593} // anonymous namespace
594
595void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596 CodeGenFunction::RunCleanupsScope Scope(CGF);
597 if (PrePostAction) {
598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599 Callback(CodeGen, CGF, *PrePostAction);
600 } else {
601 PrePostActionTy Action;
602 Callback(CodeGen, CGF, Action);
603 }
604}
605
606/// Check if the combiner is a call to UDR combiner and if it is so return the
607/// UDR decl used for reduction.
608static const OMPDeclareReductionDecl *
609getReductionInit(const Expr *ReductionOp) {
610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612 if (const auto *DRE =
613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615 return DRD;
616 return nullptr;
617}
618
619static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620 const OMPDeclareReductionDecl *DRD,
621 const Expr *InitOp,
622 Address Private, Address Original,
623 QualType Ty) {
624 if (DRD->getInitializer()) {
625 std::pair<llvm::Function *, llvm::Function *> Reduction =
626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627 const auto *CE = cast<CallExpr>(InitOp);
628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631 const auto *LHSDRE =
632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633 const auto *RHSDRE =
634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637 [=]() { return Private; });
638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639 [=]() { return Original; });
640 (void)PrivateScope.Privatize();
641 RValue Func = RValue::get(Reduction.second);
642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643 CGF.EmitIgnoredExpr(InitOp);
644 } else {
645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647 auto *GV = new llvm::GlobalVariable(
648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649 llvm::GlobalValue::PrivateLinkage, Init, Name);
650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651 RValue InitRVal;
652 switch (CGF.getEvaluationKind(Ty)) {
653 case TEK_Scalar:
654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655 break;
656 case TEK_Complex:
657 InitRVal =
658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659 break;
660 case TEK_Aggregate: {
661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664 /*IsInitializer=*/false);
665 return;
666 }
667 }
668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671 /*IsInitializer=*/false);
672 }
673}
674
675/// Emit initialization of arrays of complex types.
676/// \param DestAddr Address of the array.
677/// \param Type Type of array.
678/// \param Init Initial expression of array.
679/// \param SrcAddr Address of the original array.
680static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681 QualType Type, bool EmitDeclareReductionInit,
682 const Expr *Init,
683 const OMPDeclareReductionDecl *DRD,
684 Address SrcAddr = Address::invalid()) {
685 // Perform element-by-element initialization.
686 QualType ElementTy;
687
688 // Drill down to the base element type on both arrays.
689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691 if (DRD)
692 SrcAddr =
693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694
695 llvm::Value *SrcBegin = nullptr;
696 if (DRD)
697 SrcBegin = SrcAddr.getPointer();
698 llvm::Value *DestBegin = DestAddr.getPointer();
699 // Cast from pointer to array type to pointer to single element.
700 llvm::Value *DestEnd =
701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702 // The basic structure here is a while-do loop.
703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705 llvm::Value *IsEmpty =
706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708
709 // Enter the loop body, making that address the current address.
710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711 CGF.EmitBlock(BodyBB);
712
713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714
715 llvm::PHINode *SrcElementPHI = nullptr;
716 Address SrcElementCurrent = Address::invalid();
717 if (DRD) {
718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719 "omp.arraycpy.srcElementPast");
720 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721 SrcElementCurrent =
722 Address(SrcElementPHI,
723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724 }
725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727 DestElementPHI->addIncoming(DestBegin, EntryBB);
728 Address DestElementCurrent =
729 Address(DestElementPHI,
730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731
732 // Emit copy.
733 {
734 CodeGenFunction::RunCleanupsScope InitScope(CGF);
735 if (EmitDeclareReductionInit) {
736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737 SrcElementCurrent, ElementTy);
738 } else
739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740 /*IsInitializer=*/false);
741 }
742
743 if (DRD) {
744 // Shift the address forward by one element.
745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747 "omp.arraycpy.dest.element");
748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749 }
750
751 // Shift the address forward by one element.
752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754 "omp.arraycpy.dest.element");
755 // Check whether we've reached the end.
756 llvm::Value *Done =
757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760
761 // Done.
762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763}
764
765LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766 return CGF.EmitOMPSharedLValue(E);
767}
768
769LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770 const Expr *E) {
771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773 return LValue();
774}
775
776void ReductionCodeGen::emitAggregateInitialization(
777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778 const OMPDeclareReductionDecl *DRD) {
779 // Emit VarDecl with copy init for arrays.
780 // Get the address of the original variable captured in current
781 // captured region.
782 const auto *PrivateVD =
783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784 bool EmitDeclareReductionInit =
785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787 EmitDeclareReductionInit,
788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789 : PrivateVD->getInit(),
790 DRD, SharedAddr);
791}
792
793ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794 ArrayRef<const Expr *> Origs,
795 ArrayRef<const Expr *> Privates,
796 ArrayRef<const Expr *> ReductionOps) {
797 ClausesData.reserve(Shareds.size());
798 SharedAddresses.reserve(Shareds.size());
799 Sizes.reserve(Shareds.size());
800 BaseDecls.reserve(Shareds.size());
801 const auto *IOrig = Origs.begin();
802 const auto *IPriv = Privates.begin();
803 const auto *IRed = ReductionOps.begin();
804 for (const Expr *Ref : Shareds) {
805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806 std::advance(IOrig, 1);
807 std::advance(IPriv, 1);
808 std::advance(IRed, 1);
809 }
810}
811
812void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 814, __extension__
__PRETTY_FUNCTION__))
814 "Number of generated lvalues must be exactly N.")(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 814, __extension__
__PRETTY_FUNCTION__))
;
815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817 SharedAddresses.emplace_back(First, Second);
818 if (ClausesData[N].Shared == ClausesData[N].Ref) {
819 OrigAddresses.emplace_back(First, Second);
820 } else {
821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823 OrigAddresses.emplace_back(First, Second);
824 }
825}
826
827void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828 const auto *PrivateVD =
829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830 QualType PrivateType = PrivateVD->getType();
831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832 if (!PrivateType->isVariablyModifiedType()) {
833 Sizes.emplace_back(
834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835 nullptr);
836 return;
837 }
838 llvm::Value *Size;
839 llvm::Value *SizeInChars;
840 auto *ElemType =
841 cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
842 ->getElementType();
843 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
844 if (AsArraySection) {
845 Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
846 OrigAddresses[N].first.getPointer(CGF));
847 Size = CGF.Builder.CreateNUWAdd(
848 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
849 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
850 } else {
851 SizeInChars =
852 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
853 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
854 }
855 Sizes.emplace_back(SizeInChars, Size);
856 CodeGenFunction::OpaqueValueMapping OpaqueMap(
857 CGF,
858 cast<OpaqueValueExpr>(
859 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
860 RValue::get(Size));
861 CGF.EmitVariablyModifiedType(PrivateType);
862}
863
864void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
865 llvm::Value *Size) {
866 const auto *PrivateVD =
867 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
868 QualType PrivateType = PrivateVD->getType();
869 if (!PrivateType->isVariablyModifiedType()) {
870 assert(!Size && !Sizes[N].second &&(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 872, __extension__
__PRETTY_FUNCTION__))
871 "Size should be nullptr for non-variably modified reduction "(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 872, __extension__
__PRETTY_FUNCTION__))
872 "items.")(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 872, __extension__
__PRETTY_FUNCTION__))
;
873 return;
874 }
875 CodeGenFunction::OpaqueValueMapping OpaqueMap(
876 CGF,
877 cast<OpaqueValueExpr>(
878 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
879 RValue::get(Size));
880 CGF.EmitVariablyModifiedType(PrivateType);
881}
882
883void ReductionCodeGen::emitInitialization(
884 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
885 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
886 assert(SharedAddresses.size() > N && "No variable was generated")(static_cast <bool> (SharedAddresses.size() > N &&
"No variable was generated") ? void (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 886, __extension__
__PRETTY_FUNCTION__))
;
887 const auto *PrivateVD =
888 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
889 const OMPDeclareReductionDecl *DRD =
890 getReductionInit(ClausesData[N].ReductionOp);
891 QualType PrivateType = PrivateVD->getType();
892 PrivateAddr = CGF.Builder.CreateElementBitCast(
893 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
894 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
895 if (DRD && DRD->getInitializer())
896 (void)DefaultInit(CGF);
897 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
898 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
899 (void)DefaultInit(CGF);
900 QualType SharedType = SharedAddresses[N].first.getType();
901 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
902 PrivateAddr, SharedAddr, SharedType);
903 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
904 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
905 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
906 PrivateVD->getType().getQualifiers(),
907 /*IsInitializer=*/false);
908 }
909}
910
911bool ReductionCodeGen::needCleanups(unsigned N) {
912 const auto *PrivateVD =
913 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
914 QualType PrivateType = PrivateVD->getType();
915 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
916 return DTorKind != QualType::DK_none;
917}
918
919void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
920 Address PrivateAddr) {
921 const auto *PrivateVD =
922 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
923 QualType PrivateType = PrivateVD->getType();
924 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
925 if (needCleanups(N)) {
926 PrivateAddr = CGF.Builder.CreateElementBitCast(
927 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
928 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
929 }
930}
931
932static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
933 LValue BaseLV) {
934 BaseTy = BaseTy.getNonReferenceType();
935 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
936 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
937 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
938 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
939 } else {
940 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
941 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
942 }
943 BaseTy = BaseTy->getPointeeType();
944 }
945 return CGF.MakeAddrLValue(
946 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
947 CGF.ConvertTypeForMem(ElTy)),
948 BaseLV.getType(), BaseLV.getBaseInfo(),
949 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
950}
951
952static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
953 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
954 llvm::Value *Addr) {
955 Address Tmp = Address::invalid();
956 Address TopTmp = Address::invalid();
957 Address MostTopTmp = Address::invalid();
958 BaseTy = BaseTy.getNonReferenceType();
959 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
960 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
961 Tmp = CGF.CreateMemTemp(BaseTy);
962 if (TopTmp.isValid())
963 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
964 else
965 MostTopTmp = Tmp;
966 TopTmp = Tmp;
967 BaseTy = BaseTy->getPointeeType();
968 }
969 llvm::Type *Ty = BaseLVType;
970 if (Tmp.isValid())
971 Ty = Tmp.getElementType();
972 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
973 if (Tmp.isValid()) {
974 CGF.Builder.CreateStore(Addr, Tmp);
975 return MostTopTmp;
976 }
977 return Address(Addr, BaseLVAlignment);
978}
979
980static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
981 const VarDecl *OrigVD = nullptr;
982 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
983 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
984 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
985 Base = TempOASE->getBase()->IgnoreParenImpCasts();
986 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
987 Base = TempASE->getBase()->IgnoreParenImpCasts();
988 DE = cast<DeclRefExpr>(Base);
989 OrigVD = cast<VarDecl>(DE->getDecl());
990 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
991 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
992 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
993 Base = TempASE->getBase()->IgnoreParenImpCasts();
994 DE = cast<DeclRefExpr>(Base);
995 OrigVD = cast<VarDecl>(DE->getDecl());
996 }
997 return OrigVD;
998}
999
1000Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1001 Address PrivateAddr) {
1002 const DeclRefExpr *DE;
1003 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1004 BaseDecls.emplace_back(OrigVD);
1005 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1006 LValue BaseLValue =
1007 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1008 OriginalBaseLValue);
1009 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1010 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1011 BaseLValue.getPointer(CGF), SharedAddr.getPointer());
1012 llvm::Value *PrivatePointer =
1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014 PrivateAddr.getPointer(), SharedAddr.getType());
1015 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017 return castToBase(CGF, OrigVD->getType(),
1018 SharedAddresses[N].first.getType(),
1019 OriginalBaseLValue.getAddress(CGF).getType(),
1020 OriginalBaseLValue.getAlignment(), Ptr);
1021 }
1022 BaseDecls.emplace_back(
1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024 return PrivateAddr;
1025}
1026
1027bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028 const OMPDeclareReductionDecl *DRD =
1029 getReductionInit(ClausesData[N].ReductionOp);
1030 return DRD && DRD->getInitializer();
1031}
1032
1033LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034 return CGF.EmitLoadOfPointerLValue(
1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036 getThreadIDVariable()->getType()->castAs<PointerType>());
1037}
1038
1039void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040 if (!CGF.HaveInsertPoint())
1041 return;
1042 // 1.2.2 OpenMP Language Terminology
1043 // Structured block - An executable statement with a single entry at the
1044 // top and a single exit at the bottom.
1045 // The point of exit cannot be a branch out of the structured block.
1046 // longjmp() and throw() must not violate the entry/exit criteria.
1047 CGF.EHStack.pushTerminate();
1048 if (S)
1049 CGF.incrementProfileCounter(S);
1050 CodeGen(CGF);
1051 CGF.EHStack.popTerminate();
1052}
1053
1054LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055 CodeGenFunction &CGF) {
1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057 getThreadIDVariable()->getType(),
1058 AlignmentSource::Decl);
1059}
1060
1061static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062 QualType FieldTy) {
1063 auto *Field = FieldDecl::Create(
1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067 Field->setAccess(AS_public);
1068 DC->addDecl(Field);
1069 return Field;
1070}
1071
1072CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073 StringRef Separator)
1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077
1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079 OMPBuilder.initialize();
1080 loadOffloadInfoMetadata();
1081}
1082
1083void CGOpenMPRuntime::clear() {
1084 InternalVars.clear();
1085 // Clean non-target variable declarations possibly used only in debug info.
1086 for (const auto &Data : EmittedNonTargetVariables) {
1087 if (!Data.getValue().pointsToAliveValue())
1088 continue;
1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090 if (!GV)
1091 continue;
1092 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093 continue;
1094 GV->eraseFromParent();
1095 }
1096}
1097
1098std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099 SmallString<128> Buffer;
1100 llvm::raw_svector_ostream OS(Buffer);
1101 StringRef Sep = FirstSeparator;
1102 for (StringRef Part : Parts) {
1103 OS << Sep << Part;
1104 Sep = Separator;
1105 }
1106 return std::string(OS.str());
1107}
1108
1109static llvm::Function *
1110emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111 const Expr *CombinerInitializer, const VarDecl *In,
1112 const VarDecl *Out, bool IsCombiner) {
1113 // void .omp_combiner.(Ty *in, Ty *out);
1114 ASTContext &C = CGM.getContext();
1115 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116 FunctionArgList Args;
1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121 Args.push_back(&OmpOutParm);
1122 Args.push_back(&OmpInParm);
1123 const CGFunctionInfo &FnInfo =
1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126 std::string Name = CGM.getOpenMPRuntime().getName(
1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129 Name, &CGM.getModule());
1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131 if (CGM.getLangOpts().Optimize) {
1132 Fn->removeFnAttr(llvm::Attribute::NoInline);
1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135 }
1136 CodeGenFunction CGF(CGM);
1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140 Out->getLocation());
1141 CodeGenFunction::OMPPrivateScope Scope(CGF);
1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145 .getAddress(CGF);
1146 });
1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150 .getAddress(CGF);
1151 });
1152 (void)Scope.Privatize();
1153 if (!IsCombiner && Out->hasInit() &&
1154 !CGF.isTrivialInitializer(Out->getInit())) {
1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156 Out->getType().getQualifiers(),
1157 /*IsInitializer=*/true);
1158 }
1159 if (CombinerInitializer)
1160 CGF.EmitIgnoredExpr(CombinerInitializer);
1161 Scope.ForceCleanup();
1162 CGF.FinishFunction();
1163 return Fn;
1164}
1165
1166void CGOpenMPRuntime::emitUserDefinedReduction(
1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168 if (UDRMap.count(D) > 0)
1169 return;
1170 llvm::Function *Combiner = emitCombinerOrInitializer(
1171 CGM, D->getType(), D->getCombiner(),
1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174 /*IsCombiner=*/true);
1175 llvm::Function *Initializer = nullptr;
1176 if (const Expr *Init = D->getInitializer()) {
1177 Initializer = emitCombinerOrInitializer(
1178 CGM, D->getType(),
1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180 : nullptr,
1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183 /*IsCombiner=*/false);
1184 }
1185 UDRMap.try_emplace(D, Combiner, Initializer);
1186 if (CGF) {
1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188 Decls.second.push_back(D);
1189 }
1190}
1191
1192std::pair<llvm::Function *, llvm::Function *>
1193CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194 auto I = UDRMap.find(D);
1195 if (I != UDRMap.end())
1196 return I->second;
1197 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198 return UDRMap.lookup(D);
1199}
1200
1201namespace {
1202// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203// Builder if one is present.
1204struct PushAndPopStackRAII {
1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206 bool HasCancel, llvm::omp::Directive Kind)
1207 : OMPBuilder(OMPBuilder) {
1208 if (!OMPBuilder)
1209 return;
1210
1211 // The following callback is the crucial part of clangs cleanup process.
1212 //
1213 // NOTE:
1214 // Once the OpenMPIRBuilder is used to create parallel regions (and
1215 // similar), the cancellation destination (Dest below) is determined via
1216 // IP. That means if we have variables to finalize we split the block at IP,
1217 // use the new block (=BB) as destination to build a JumpDest (via
1218 // getJumpDestInCurrentScope(BB)) which then is fed to
1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220 // to push & pop an FinalizationInfo object.
1221 // The FiniCB will still be needed but at the point where the
1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224 assert(IP.getBlock()->end() == IP.getPoint() &&(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1225, __extension__
__PRETTY_FUNCTION__))
1225 "Clang CG should cause non-terminated block!")(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1225, __extension__
__PRETTY_FUNCTION__))
;
1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227 CGF.Builder.restoreIP(IP);
1228 CodeGenFunction::JumpDest Dest =
1229 CGF.getOMPCancelDestination(OMPD_parallel);
1230 CGF.EmitBranchThroughCleanup(Dest);
1231 };
1232
1233 // TODO: Remove this once we emit parallel regions through the
1234 // OpenMPIRBuilder as it can do this setup internally.
1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236 OMPBuilder->pushFinalizationCB(std::move(FI));
1237 }
1238 ~PushAndPopStackRAII() {
1239 if (OMPBuilder)
1240 OMPBuilder->popFinalizationCB();
1241 }
1242 llvm::OpenMPIRBuilder *OMPBuilder;
1243};
1244} // namespace
1245
1246static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250 assert(ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1251, __extension__
__PRETTY_FUNCTION__))
1251 "thread id variable must be of type kmp_int32 *")(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1251, __extension__
__PRETTY_FUNCTION__))
;
1252 CodeGenFunction CGF(CGM, true);
1253 bool HasCancel = false;
1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255 HasCancel = OPD->hasCancel();
1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257 HasCancel = OPD->hasCancel();
1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259 HasCancel = OPSD->hasCancel();
1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261 HasCancel = OPFD->hasCancel();
1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263 HasCancel = OPFD->hasCancel();
1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265 HasCancel = OPFD->hasCancel();
1266 else if (const auto *OPFD =
1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268 HasCancel = OPFD->hasCancel();
1269 else if (const auto *OPFD =
1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271 HasCancel = OPFD->hasCancel();
1272
1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274 // parallel region to make cancellation barriers work properly.
1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278 HasCancel, OutlinedHelperName);
1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281}
1282
1283llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287 return emitParallelOrTeamsOutlinedFunction(
1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289}
1290
1291llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295 return emitParallelOrTeamsOutlinedFunction(
1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297}
1298
1299llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303 bool Tied, unsigned &NumberOfParts) {
1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305 PrePostActionTy &) {
1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308 llvm::Value *TaskArgs[] = {
1309 UpLoc, ThreadID,
1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311 TaskTVar->getType()->castAs<PointerType>())
1312 .getPointer(CGF)};
1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314 CGM.getModule(), OMPRTL___kmpc_omp_task),
1315 TaskArgs);
1316 };
1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318 UntiedCodeGen);
1319 CodeGen.setAction(Action);
1320 assert(!ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1321, __extension__
__PRETTY_FUNCTION__))
1321 "thread id variable must be of type kmp_int32 for tasks")(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1321, __extension__
__PRETTY_FUNCTION__))
;
1322 const OpenMPDirectiveKind Region =
1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324 : OMPD_task;
1325 const CapturedStmt *CS = D.getCapturedStmt(Region);
1326 bool HasCancel = false;
1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328 HasCancel = TD->hasCancel();
1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330 HasCancel = TD->hasCancel();
1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332 HasCancel = TD->hasCancel();
1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334 HasCancel = TD->hasCancel();
1335
1336 CodeGenFunction CGF(CGM, true);
1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338 InnermostKind, HasCancel, Action);
1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341 if (!Tied)
1342 NumberOfParts = Action.getNumberOfParts();
1343 return Res;
1344}
1345
1346static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347 const RecordDecl *RD, const CGRecordLayout &RL,
1348 ArrayRef<llvm::Constant *> Data) {
1349 llvm::StructType *StructTy = RL.getLLVMType();
1350 unsigned PrevIdx = 0;
1351 ConstantInitBuilder CIBuilder(CGM);
1352 auto DI = Data.begin();
1353 for (const FieldDecl *FD : RD->fields()) {
1354 unsigned Idx = RL.getLLVMFieldNo(FD);
1355 // Fill the alignment.
1356 for (unsigned I = PrevIdx; I < Idx; ++I)
1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358 PrevIdx = Idx + 1;
1359 Fields.add(*DI);
1360 ++DI;
1361 }
1362}
1363
1364template <class... As>
1365static llvm::GlobalVariable *
1366createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368 As &&... Args) {
1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371 ConstantInitBuilder CIBuilder(CGM);
1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373 buildStructValue(Fields, CGM, RD, RL, Data);
1374 return Fields.finishAndCreateGlobal(
1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376 std::forward<As>(Args)...);
1377}
1378
1379template <typename T>
1380static void
1381createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382 ArrayRef<llvm::Constant *> Data,
1383 T &Parent) {
1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387 buildStructValue(Fields, CGM, RD, RL, Data);
1388 Fields.finishAndAddTo(Parent);
1389}
1390
1391void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392 bool AtCurrentPoint) {
1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")(static_cast <bool> (!Elem.second.ServiceInsertPt &&
"Insert point is set already.") ? void (0) : __assert_fail (
"!Elem.second.ServiceInsertPt && \"Insert point is set already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1394, __extension__
__PRETTY_FUNCTION__))
;
1395
1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397 if (AtCurrentPoint) {
1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400 } else {
1401 Elem.second.ServiceInsertPt =
1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404 }
1405}
1406
1407void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409 if (Elem.second.ServiceInsertPt) {
1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411 Elem.second.ServiceInsertPt = nullptr;
1412 Ptr->eraseFromParent();
1413 }
1414}
1415
1416static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417 SourceLocation Loc,
1418 SmallString<128> &Buffer) {
1419 llvm::raw_svector_ostream OS(Buffer);
1420 // Build debug location
1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422 OS << ";" << PLoc.getFilename() << ";";
1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424 OS << FD->getQualifiedNameAsString();
1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426 return OS.str();
1427}
1428
1429llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430 SourceLocation Loc,
1431 unsigned Flags) {
1432 uint32_t SrcLocStrSize;
1433 llvm::Constant *SrcLocStr;
1434 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1435 Loc.isInvalid()) {
1436 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1437 } else {
1438 std::string FunctionName;
1439 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1440 FunctionName = FD->getQualifiedNameAsString();
1441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1442 const char *FileName = PLoc.getFilename();
1443 unsigned Line = PLoc.getLine();
1444 unsigned Column = PLoc.getColumn();
1445 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1446 Column, SrcLocStrSize);
1447 }
1448 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1449 return OMPBuilder.getOrCreateIdent(
1450 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1451}
1452
1453llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1454 SourceLocation Loc) {
1455 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1455, __extension__
__PRETTY_FUNCTION__))
;
1456 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1457 // the clang invariants used below might be broken.
1458 if (CGM.getLangOpts().OpenMPIRBuilder) {
1459 SmallString<128> Buffer;
1460 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1461 uint32_t SrcLocStrSize;
1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1464 return OMPBuilder.getOrCreateThreadID(
1465 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1466 }
1467
1468 llvm::Value *ThreadID = nullptr;
1469 // Check whether we've already cached a load of the thread id in this
1470 // function.
1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472 if (I != OpenMPLocThreadIDMap.end()) {
1473 ThreadID = I->second.ThreadID;
1474 if (ThreadID != nullptr)
1475 return ThreadID;
1476 }
1477 // If exceptions are enabled, do not use parameter to avoid possible crash.
1478 if (auto *OMPRegionInfo =
1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480 if (OMPRegionInfo->getThreadIDVariable()) {
1481 // Check if this an outlined function with thread id passed as argument.
1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485 !CGF.getLangOpts().CXXExceptions ||
1486 CGF.Builder.GetInsertBlock() == TopBlock ||
1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489 TopBlock ||
1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491 CGF.Builder.GetInsertBlock()) {
1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493 // If value loaded in entry block, cache it and use it everywhere in
1494 // function.
1495 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497 Elem.second.ThreadID = ThreadID;
1498 }
1499 return ThreadID;
1500 }
1501 }
1502 }
1503
1504 // This is not an outlined function region - need to call __kmpc_int32
1505 // kmpc_global_thread_num(ident_t *loc).
1506 // Generate thread id value and cache this value for use across the
1507 // function.
1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509 if (!Elem.second.ServiceInsertPt)
1510 setLocThreadIdInsertPt(CGF);
1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513 llvm::CallInst *Call = CGF.Builder.CreateCall(
1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515 OMPRTL___kmpc_global_thread_num),
1516 emitUpdateLocation(CGF, Loc));
1517 Call->setCallingConv(CGF.getRuntimeCC());
1518 Elem.second.ThreadID = Call;
1519 return Call;
1520}
1521
1522void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1523, __extension__
__PRETTY_FUNCTION__))
;
1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525 clearLocThreadIdInsertPt(CGF);
1526 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527 }
1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530 UDRMap.erase(D);
1531 FunctionUDRMap.erase(CGF.CurFn);
1532 }
1533 auto I = FunctionUDMMap.find(CGF.CurFn);
1534 if (I != FunctionUDMMap.end()) {
1535 for(const auto *D : I->second)
1536 UDMMap.erase(D);
1537 FunctionUDMMap.erase(I);
1538 }
1539 LastprivateConditionalToTypes.erase(CGF.CurFn);
1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541}
1542
1543llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544 return OMPBuilder.IdentPtr;
1545}
1546
1547llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548 if (!Kmpc_MicroTy) {
1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553 }
1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555}
1556
1557llvm::FunctionCallee
1558CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1559 bool IsGPUDistribute) {
1560 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1561, __extension__
__PRETTY_FUNCTION__))
1561 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1561, __extension__
__PRETTY_FUNCTION__))
;
1562 StringRef Name;
1563 if (IsGPUDistribute)
1564 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1565 : "__kmpc_distribute_static_init_4u")
1566 : (IVSigned ? "__kmpc_distribute_static_init_8"
1567 : "__kmpc_distribute_static_init_8u");
1568 else
1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1570 : "__kmpc_for_static_init_4u")
1571 : (IVSigned ? "__kmpc_for_static_init_8"
1572 : "__kmpc_for_static_init_8u");
1573
1574 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576 llvm::Type *TypeParams[] = {
1577 getIdentTyPointerTy(), // loc
1578 CGM.Int32Ty, // tid
1579 CGM.Int32Ty, // schedtype
1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581 PtrTy, // p_lower
1582 PtrTy, // p_upper
1583 PtrTy, // p_stride
1584 ITy, // incr
1585 ITy // chunk
1586 };
1587 auto *FnTy =
1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589 return CGM.CreateRuntimeFunction(FnTy, Name);
1590}
1591
1592llvm::FunctionCallee
1593CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1594 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1595, __extension__
__PRETTY_FUNCTION__))
1595 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1595, __extension__
__PRETTY_FUNCTION__))
;
1596 StringRef Name =
1597 IVSize == 32
1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602 CGM.Int32Ty, // tid
1603 CGM.Int32Ty, // schedtype
1604 ITy, // lower
1605 ITy, // upper
1606 ITy, // stride
1607 ITy // chunk
1608 };
1609 auto *FnTy =
1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611 return CGM.CreateRuntimeFunction(FnTy, Name);
1612}
1613
1614llvm::FunctionCallee
1615CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1616 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1617, __extension__
__PRETTY_FUNCTION__))
1617 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1617, __extension__
__PRETTY_FUNCTION__))
;
1618 StringRef Name =
1619 IVSize == 32
1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622 llvm::Type *TypeParams[] = {
1623 getIdentTyPointerTy(), // loc
1624 CGM.Int32Ty, // tid
1625 };
1626 auto *FnTy =
1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628 return CGM.CreateRuntimeFunction(FnTy, Name);
1629}
1630
1631llvm::FunctionCallee
1632CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1633 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1634, __extension__
__PRETTY_FUNCTION__))
1634 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1634, __extension__
__PRETTY_FUNCTION__))
;
1635 StringRef Name =
1636 IVSize == 32
1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1641 llvm::Type *TypeParams[] = {
1642 getIdentTyPointerTy(), // loc
1643 CGM.Int32Ty, // tid
1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645 PtrTy, // p_lower
1646 PtrTy, // p_upper
1647 PtrTy // p_stride
1648 };
1649 auto *FnTy =
1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651 return CGM.CreateRuntimeFunction(FnTy, Name);
1652}
1653
1654/// Obtain information that uniquely identifies a target entry. This
1655/// consists of the file and device IDs as well as line number associated with
1656/// the relevant entry source location.
1657static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1658 unsigned &DeviceID, unsigned &FileID,
1659 unsigned &LineNum) {
1660 SourceManager &SM = C.getSourceManager();
1661
1662 // The loc should be always valid and have a file ID (the user cannot use
1663 // #pragma directives in macros)
1664
1665 assert(Loc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (Loc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1665, __extension__
__PRETTY_FUNCTION__))
;
1666
1667 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1668 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1668, __extension__
__PRETTY_FUNCTION__))
;
1669
1670 llvm::sys::fs::UniqueID ID;
1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1672 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1673 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1673, __extension__
__PRETTY_FUNCTION__))
;
1674 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1675 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1676 << PLoc.getFilename() << EC.message();
1677 }
1678
1679 DeviceID = ID.getDevice();
1680 FileID = ID.getFile();
1681 LineNum = PLoc.getLine();
1682}
1683
1684Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1685 if (CGM.getLangOpts().OpenMPSimd)
1686 return Address::invalid();
1687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1689 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1690 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1691 HasRequiresUnifiedSharedMemory))) {
1692 SmallString<64> PtrName;
1693 {
1694 llvm::raw_svector_ostream OS(PtrName);
1695 OS << CGM.getMangledName(GlobalDecl(VD));
1696 if (!VD->isExternallyVisible()) {
1697 unsigned DeviceID, FileID, Line;
1698 getTargetEntryUniqueInfo(CGM.getContext(),
1699 VD->getCanonicalDecl()->getBeginLoc(),
1700 DeviceID, FileID, Line);
1701 OS << llvm::format("_%x", FileID);
1702 }
1703 OS << "_decl_tgt_ref_ptr";
1704 }
1705 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1706 if (!Ptr) {
1707 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1708 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1709 PtrName);
1710
1711 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1712 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1713
1714 if (!CGM.getLangOpts().OpenMPIsDevice)
1715 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1716 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1717 }
1718 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1719 }
1720 return Address::invalid();
1721}
1722
1723llvm::Constant *
1724CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1725 assert(!CGM.getLangOpts().OpenMPUseTLS ||(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1726, __extension__
__PRETTY_FUNCTION__))
1726 !CGM.getContext().getTargetInfo().isTLSSupported())(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1726, __extension__
__PRETTY_FUNCTION__))
;
1727 // Lookup the entry, lazily creating it if necessary.
1728 std::string Suffix = getName({"cache", ""});
1729 return getOrCreateInternalVariable(
1730 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1731}
1732
1733Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734 const VarDecl *VD,
1735 Address VDAddr,
1736 SourceLocation Loc) {
1737 if (CGM.getLangOpts().OpenMPUseTLS &&
1738 CGM.getContext().getTargetInfo().isTLSSupported())
1739 return VDAddr;
1740
1741 llvm::Type *VarTy = VDAddr.getElementType();
1742 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744 CGM.Int8PtrTy),
1745 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746 getOrCreateThreadPrivateCache(VD)};
1747 return Address(CGF.EmitRuntimeCall(
1748 OMPBuilder.getOrCreateRuntimeFunction(
1749 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1750 Args),
1751 VDAddr.getAlignment());
1752}
1753
1754void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758 // library.
1759 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1760 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1761 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1762 OMPLoc);
1763 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1764 // to register constructor/destructor for variable.
1765 llvm::Value *Args[] = {
1766 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1767 Ctor, CopyCtor, Dtor};
1768 CGF.EmitRuntimeCall(
1769 OMPBuilder.getOrCreateRuntimeFunction(
1770 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1771 Args);
1772}
1773
1774llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1775 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1776 bool PerformInit, CodeGenFunction *CGF) {
1777 if (CGM.getLangOpts().OpenMPUseTLS &&
1778 CGM.getContext().getTargetInfo().isTLSSupported())
1779 return nullptr;
1780
1781 VD = VD->getDefinition(CGM.getContext());
1782 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1783 QualType ASTTy = VD->getType();
1784
1785 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1786 const Expr *Init = VD->getAnyInitializer();
1787 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1788 // Generate function that re-emits the declaration's initializer into the
1789 // threadprivate copy of the variable VD
1790 CodeGenFunction CtorCGF(CGM);
1791 FunctionArgList Args;
1792 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1793 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1794 ImplicitParamDecl::Other);
1795 Args.push_back(&Dst);
1796
1797 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1798 CGM.getContext().VoidPtrTy, Args);
1799 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1800 std::string Name = getName({"__kmpc_global_ctor_", ""});
1801 llvm::Function *Fn =
1802 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1803 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1804 Args, Loc, Loc);
1805 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807 CGM.getContext().VoidPtrTy, Dst.getLocation());
1808 Address Arg = Address(ArgVal, VDAddr.getAlignment());
1809 Arg = CtorCGF.Builder.CreateElementBitCast(
1810 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1811 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1812 /*IsInitializer=*/true);
1813 ArgVal = CtorCGF.EmitLoadOfScalar(
1814 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1815 CGM.getContext().VoidPtrTy, Dst.getLocation());
1816 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1817 CtorCGF.FinishFunction();
1818 Ctor = Fn;
1819 }
1820 if (VD->getType().isDestructedType() != QualType::DK_none) {
1821 // Generate function that emits destructor call for the threadprivate copy
1822 // of the variable VD
1823 CodeGenFunction DtorCGF(CGM);
1824 FunctionArgList Args;
1825 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1826 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1827 ImplicitParamDecl::Other);
1828 Args.push_back(&Dst);
1829
1830 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1831 CGM.getContext().VoidTy, Args);
1832 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1833 std::string Name = getName({"__kmpc_global_dtor_", ""});
1834 llvm::Function *Fn =
1835 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1836 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1837 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1838 Loc, Loc);
1839 // Create a scope with an artificial location for the body of this function.
1840 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1841 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1842 DtorCGF.GetAddrOfLocalVar(&Dst),
1843 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1844 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1845 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1846 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1847 DtorCGF.FinishFunction();
1848 Dtor = Fn;
1849 }
1850 // Do not emit init function if it is not required.
1851 if (!Ctor && !Dtor)
1852 return nullptr;
1853
1854 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1855 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1856 /*isVarArg=*/false)
1857 ->getPointerTo();
1858 // Copying constructor for the threadprivate variable.
1859 // Must be NULL - reserved by runtime, but currently it requires that this
1860 // parameter is always NULL. Otherwise it fires assertion.
1861 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1862 if (Ctor == nullptr) {
1863 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1864 /*isVarArg=*/false)
1865 ->getPointerTo();
1866 Ctor = llvm::Constant::getNullValue(CtorTy);
1867 }
1868 if (Dtor == nullptr) {
1869 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1870 /*isVarArg=*/false)
1871 ->getPointerTo();
1872 Dtor = llvm::Constant::getNullValue(DtorTy);
1873 }
1874 if (!CGF) {
1875 auto *InitFunctionTy =
1876 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1877 std::string Name = getName({"__omp_threadprivate_init_", ""});
1878 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1879 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1880 CodeGenFunction InitCGF(CGM);
1881 FunctionArgList ArgList;
1882 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1883 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1884 Loc, Loc);
1885 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886 InitCGF.FinishFunction();
1887 return InitFunction;
1888 }
1889 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1890 }
1891 return nullptr;
1892}
1893
1894bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1895 llvm::GlobalVariable *Addr,
1896 bool PerformInit) {
1897 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1898 !CGM.getLangOpts().OpenMPIsDevice)
1899 return false;
1900 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1901 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1902 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1903 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1904 HasRequiresUnifiedSharedMemory))
1905 return CGM.getLangOpts().OpenMPIsDevice;
1906 VD = VD->getDefinition(CGM.getContext());
1907 assert(VD && "Unknown VarDecl")(static_cast <bool> (VD && "Unknown VarDecl") ?
void (0) : __assert_fail ("VD && \"Unknown VarDecl\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1907, __extension__
__PRETTY_FUNCTION__))
;
1908
1909 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1910 return CGM.getLangOpts().OpenMPIsDevice;
1911
1912 QualType ASTTy = VD->getType();
1913 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1914
1915 // Produce the unique prefix to identify the new target regions. We use
1916 // the source location of the variable declaration which we know to not
1917 // conflict with any target region.
1918 unsigned DeviceID;
1919 unsigned FileID;
1920 unsigned Line;
1921 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1922 SmallString<128> Buffer, Out;
1923 {
1924 llvm::raw_svector_ostream OS(Buffer);
1925 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1926 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1927 }
1928
1929 const Expr *Init = VD->getAnyInitializer();
1930 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1931 llvm::Constant *Ctor;
1932 llvm::Constant *ID;
1933 if (CGM.getLangOpts().OpenMPIsDevice) {
1934 // Generate function that re-emits the declaration's initializer into
1935 // the threadprivate copy of the variable VD
1936 CodeGenFunction CtorCGF(CGM);
1937
1938 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1939 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1940 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1941 FTy, Twine(Buffer, "_ctor"), FI, Loc);
1942 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1943 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1944 FunctionArgList(), Loc, Loc);
1945 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1946 CtorCGF.EmitAnyExprToMem(Init,
1947 Address(Addr, CGM.getContext().getDeclAlign(VD)),
1948 Init->getType().getQualifiers(),
1949 /*IsInitializer=*/true);
1950 CtorCGF.FinishFunction();
1951 Ctor = Fn;
1952 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1953 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1954 } else {
1955 Ctor = new llvm::GlobalVariable(
1956 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1957 llvm::GlobalValue::PrivateLinkage,
1958 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1959 ID = Ctor;
1960 }
1961
1962 // Register the information for the entry associated with the constructor.
1963 Out.clear();
1964 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1965 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1966 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1967 }
1968 if (VD->getType().isDestructedType() != QualType::DK_none) {
1969 llvm::Constant *Dtor;
1970 llvm::Constant *ID;
1971 if (CGM.getLangOpts().OpenMPIsDevice) {
1972 // Generate function that emits destructor call for the threadprivate
1973 // copy of the variable VD
1974 CodeGenFunction DtorCGF(CGM);
1975
1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979 FTy, Twine(Buffer, "_dtor"), FI, Loc);
1980 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1981 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982 FunctionArgList(), Loc, Loc);
1983 // Create a scope with an artificial location for the body of this
1984 // function.
1985 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1986 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989 DtorCGF.FinishFunction();
1990 Dtor = Fn;
1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993 } else {
1994 Dtor = new llvm::GlobalVariable(
1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996 llvm::GlobalValue::PrivateLinkage,
1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998 ID = Dtor;
1999 }
2000 // Register the information for the entry associated with the destructor.
2001 Out.clear();
2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005 }
2006 return CGM.getLangOpts().OpenMPIsDevice;
2007}
2008
2009Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010 QualType VarType,
2011 StringRef Name) {
2012 std::string Suffix = getName({"artificial", ""});
2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014 llvm::GlobalVariable *GAddr =
2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017 CGM.getTarget().isTLSSupported()) {
2018 GAddr->setThreadLocal(/*Val=*/true);
2019 return Address(GAddr, GAddr->getValueType(),
2020 CGM.getContext().getTypeAlignInChars(VarType));
2021 }
2022 std::string CacheSuffix = getName({"cache", ""});
2023 llvm::Value *Args[] = {
2024 emitUpdateLocation(CGF, SourceLocation()),
2025 getThreadID(CGF, SourceLocation()),
2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028 /*isSigned=*/false),
2029 getOrCreateInternalVariable(
2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031 return Address(
2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033 CGF.EmitRuntimeCall(
2034 OMPBuilder.getOrCreateRuntimeFunction(
2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036 Args),
2037 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038 CGM.getContext().getTypeAlignInChars(VarType));
2039}
2040
2041void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042 const RegionCodeGenTy &ThenGen,
2043 const RegionCodeGenTy &ElseGen) {
2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045
2046 // If the condition constant folds and can be elided, try to avoid emitting
2047 // the condition and the dead arm of the if/else.
2048 bool CondConstant;
2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050 if (CondConstant)
2051 ThenGen(CGF);
2052 else
2053 ElseGen(CGF);
2054 return;
2055 }
2056
2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2058 // emit the conditional branch.
2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063
2064 // Emit the 'then' code.
2065 CGF.EmitBlock(ThenBlock);
2066 ThenGen(CGF);
2067 CGF.EmitBranch(ContBlock);
2068 // Emit the 'else' code if present.
2069 // There is no need to emit line number for unconditional branch.
2070 (void)ApplyDebugLocation::CreateEmpty(CGF);
2071 CGF.EmitBlock(ElseBlock);
2072 ElseGen(CGF);
2073 // There is no need to emit line number for unconditional branch.
2074 (void)ApplyDebugLocation::CreateEmpty(CGF);
2075 CGF.EmitBranch(ContBlock);
2076 // Emit the continuation block for code after the if.
2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078}
2079
2080void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081 llvm::Function *OutlinedFn,
2082 ArrayRef<llvm::Value *> CapturedVars,
2083 const Expr *IfCond,
2084 llvm::Value *NumThreads) {
2085 if (!CGF.HaveInsertPoint())
2086 return;
2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088 auto &M = CGM.getModule();
2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090 this](CodeGenFunction &CGF, PrePostActionTy &) {
2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093 llvm::Value *Args[] = {
2094 RTLoc,
2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098 RealArgs.append(std::begin(Args), std::end(Args));
2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100
2101 llvm::FunctionCallee RTLFn =
2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104 };
2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106 this](CodeGenFunction &CGF, PrePostActionTy &) {
2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109 // Build calls:
2110 // __kmpc_serialized_parallel(&Loc, GTid);
2111 llvm::Value *Args[] = {RTLoc, ThreadID};
2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113 M, OMPRTL___kmpc_serialized_parallel),
2114 Args);
2115
2116 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118 Address ZeroAddrBound =
2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120 /*Name=*/".bound.zero.addr");
2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123 // ThreadId for serialized parallels is 0.
2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127
2128 // Ensure we do not inline the function. This is trivially true for the ones
2129 // passed to __kmpc_fork_call but the ones called in serialized regions
2130 // could be inlined. This is not a perfect but it is closer to the invariant
2131 // we want, namely, every data environment starts with a new function.
2132 // TODO: We should pass the if condition to the runtime function and do the
2133 // handling there. Much cleaner code.
2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137
2138 // __kmpc_end_serialized_parallel(&Loc, GTid);
2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141 M, OMPRTL___kmpc_end_serialized_parallel),
2142 EndArgs);
2143 };
2144 if (IfCond) {
2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146 } else {
2147 RegionCodeGenTy ThenRCG(ThenGen);
2148 ThenRCG(CGF);
2149 }
2150}
2151
2152// If we're inside an (outlined) parallel region, use the region info's
2153// thread-ID variable (it is passed in a first argument of the outlined function
2154// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155// regular serial code region, get thread ID by calling kmp_int32
2156// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157// return the address of that temp.
2158Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159 SourceLocation Loc) {
2160 if (auto *OMPRegionInfo =
2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162 if (OMPRegionInfo->getThreadIDVariable())
2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164
2165 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166 QualType Int32Ty =
2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169 CGF.EmitStoreOfScalar(ThreadID,
2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171
2172 return ThreadIDTemp;
2173}
2174
2175llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177 SmallString<256> Buffer;
2178 llvm::raw_svector_ostream Out(Buffer);
2179 Out << Name;
2180 StringRef RuntimeName = Out.str();
2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182 if (Elem.second) {
2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2184, __extension__
__PRETTY_FUNCTION__))
2184 "OMP internal variable has different type than requested")(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2184, __extension__
__PRETTY_FUNCTION__))
;
2185 return &*Elem.second;
2186 }
2187
2188 return Elem.second = new llvm::GlobalVariable(
2189 CGM.getModule(), Ty, /*IsConstant*/ false,
2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191 Elem.first(), /*InsertBefore=*/nullptr,
2192 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193}
2194
2195llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197 std::string Name = getName({Prefix, "var"});
2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199}
2200
2201namespace {
2202/// Common pre(post)-action for different OpenMP constructs.
2203class CommonActionTy final : public PrePostActionTy {
2204 llvm::FunctionCallee EnterCallee;
2205 ArrayRef<llvm::Value *> EnterArgs;
2206 llvm::FunctionCallee ExitCallee;
2207 ArrayRef<llvm::Value *> ExitArgs;
2208 bool Conditional;
2209 llvm::BasicBlock *ContBlock = nullptr;
2210
2211public:
2212 CommonActionTy(llvm::FunctionCallee EnterCallee,
2213 ArrayRef<llvm::Value *> EnterArgs,
2214 llvm::FunctionCallee ExitCallee,
2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217 ExitArgs(ExitArgs), Conditional(Conditional) {}
2218 void Enter(CodeGenFunction &CGF) override {
2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220 if (Conditional) {
2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223 ContBlock = CGF.createBasicBlock("omp_if.end");
2224 // Generate the branch (If-stmt)
2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226 CGF.EmitBlock(ThenBlock);
2227 }
2228 }
2229 void Done(CodeGenFunction &CGF) {
2230 // Emit the rest of blocks/branches
2231 CGF.EmitBranch(ContBlock);
2232 CGF.EmitBlock(ContBlock, true);
2233 }
2234 void Exit(CodeGenFunction &CGF) override {
2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236 }
2237};
2238} // anonymous namespace
2239
2240void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241 StringRef CriticalName,
2242 const RegionCodeGenTy &CriticalOpGen,
2243 SourceLocation Loc, const Expr *Hint) {
2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245 // CriticalOpGen();
2246 // __kmpc_end_critical(ident_t *, gtid, Lock);
2247 // Prepare arguments and build a call to __kmpc_critical
2248 if (!CGF.HaveInsertPoint())
2249 return;
2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251 getCriticalRegionLock(CriticalName)};
2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253 std::end(Args));
2254 if (Hint) {
2255 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257 }
2258 CommonActionTy Action(
2259 OMPBuilder.getOrCreateRuntimeFunction(
2260 CGM.getModule(),
2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262 EnterArgs,
2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264 OMPRTL___kmpc_end_critical),
2265 Args);
2266 CriticalOpGen.setAction(Action);
2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268}
2269
2270void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271 const RegionCodeGenTy &MasterOpGen,
2272 SourceLocation Loc) {
2273 if (!CGF.HaveInsertPoint())
2274 return;
2275 // if(__kmpc_master(ident_t *, gtid)) {
2276 // MasterOpGen();
2277 // __kmpc_end_master(ident_t *, gtid);
2278 // }
2279 // Prepare arguments and build a call to __kmpc_master
2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282 CGM.getModule(), OMPRTL___kmpc_master),
2283 Args,
2284 OMPBuilder.getOrCreateRuntimeFunction(
2285 CGM.getModule(), OMPRTL___kmpc_end_master),
2286 Args,
2287 /*Conditional=*/true);
2288 MasterOpGen.setAction(Action);
2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290 Action.Done(CGF);
2291}
2292
2293void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294 const RegionCodeGenTy &MaskedOpGen,
2295 SourceLocation Loc, const Expr *Filter) {
2296 if (!CGF.HaveInsertPoint())
2297 return;
2298 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299 // MaskedOpGen();
2300 // __kmpc_end_masked(iden_t *, gtid);
2301 // }
2302 // Prepare arguments and build a call to __kmpc_masked
2303 llvm::Value *FilterVal = Filter
2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307 FilterVal};
2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309 getThreadID(CGF, Loc)};
2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311 CGM.getModule(), OMPRTL___kmpc_masked),
2312 Args,
2313 OMPBuilder.getOrCreateRuntimeFunction(
2314 CGM.getModule(), OMPRTL___kmpc_end_masked),
2315 ArgsEnd,
2316 /*Conditional=*/true);
2317 MaskedOpGen.setAction(Action);
2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319 Action.Done(CGF);
2320}
2321
2322void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323 SourceLocation Loc) {
2324 if (!CGF.HaveInsertPoint())
2325 return;
2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327 OMPBuilder.createTaskyield(CGF.Builder);
2328 } else {
2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330 llvm::Value *Args[] = {
2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335 Args);
2336 }
2337
2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339 Region->emitUntiedSwitch(CGF);
2340}
2341
2342void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343 const RegionCodeGenTy &TaskgroupOpGen,
2344 SourceLocation Loc) {
2345 if (!CGF.HaveInsertPoint())
2346 return;
2347 // __kmpc_taskgroup(ident_t *, gtid);
2348 // TaskgroupOpGen();
2349 // __kmpc_end_taskgroup(ident_t *, gtid);
2350 // Prepare arguments and build a call to __kmpc_taskgroup
2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354 Args,
2355 OMPBuilder.getOrCreateRuntimeFunction(
2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357 Args);
2358 TaskgroupOpGen.setAction(Action);
2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360}
2361
2362/// Given an array of pointers to variables, project the address of a
2363/// given variable.
2364static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365 unsigned Index, const VarDecl *Var) {
2366 // Pull out the pointer to the variable.
2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369
2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371 Addr = CGF.Builder.CreateElementBitCast(
2372 Addr, CGF.ConvertTypeForMem(Var->getType()));
2373 return Addr;
2374}
2375
2376static llvm::Value *emitCopyprivateCopyFunction(
2377 CodeGenModule &CGM, llvm::Type *ArgsType,
2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380 SourceLocation Loc) {
2381 ASTContext &C = CGM.getContext();
2382 // void copy_func(void *LHSArg, void *RHSArg);
2383 FunctionArgList Args;
2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385 ImplicitParamDecl::Other);
2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387 ImplicitParamDecl::Other);
2388 Args.push_back(&LHSArg);
2389 Args.push_back(&RHSArg);
2390 const auto &CGFI =
2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392 std::string Name =
2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395 llvm::GlobalValue::InternalLinkage, Name,
2396 &CGM.getModule());
2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398 Fn->setDoesNotRecurse();
2399 CodeGenFunction CGF(CGM);
2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401 // Dest = (void*[n])(LHSArg);
2402 // Src = (void*[n])(RHSArg);
2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405 ArgsType), CGF.getPointerAlign());
2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408 ArgsType), CGF.getPointerAlign());
2409 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411 // ...
2412 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414 const auto *DestVar =
2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417
2418 const auto *SrcVar =
2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421
2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423 QualType Type = VD->getType();
2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425 }
2426 CGF.FinishFunction();
2427 return Fn;
2428}
2429
2430void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431 const RegionCodeGenTy &SingleOpGen,
2432 SourceLocation Loc,
2433 ArrayRef<const Expr *> CopyprivateVars,
2434 ArrayRef<const Expr *> SrcExprs,
2435 ArrayRef<const Expr *> DstExprs,
2436 ArrayRef<const Expr *> AssignmentOps) {
2437 if (!CGF.HaveInsertPoint())
2438 return;
2439 assert(CopyprivateVars.size() == SrcExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
2440 CopyprivateVars.size() == DstExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
2441 CopyprivateVars.size() == AssignmentOps.size())(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
;
2442 ASTContext &C = CGM.getContext();
2443 // int32 did_it = 0;
2444 // if(__kmpc_single(ident_t *, gtid)) {
2445 // SingleOpGen();
2446 // __kmpc_end_single(ident_t *, gtid);
2447 // did_it = 1;
2448 // }
2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450 // <copy_func>, did_it);
2451
2452 Address DidIt = Address::invalid();
2453 if (!CopyprivateVars.empty()) {
2454 // int32 did_it = 0;
2455 QualType KmpInt32Ty =
2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459 }
2460 // Prepare arguments and build a call to __kmpc_single
2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2463 CGM.getModule(), OMPRTL___kmpc_single),
2464 Args,
2465 OMPBuilder.getOrCreateRuntimeFunction(
2466 CGM.getModule(), OMPRTL___kmpc_end_single),
2467 Args,
2468 /*Conditional=*/true);
2469 SingleOpGen.setAction(Action);
2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471 if (DidIt.isValid()) {
2472 // did_it = 1;
2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474 }
2475 Action.Done(CGF);
2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477 // <copy_func>, did_it);
2478 if (DidIt.isValid()) {
2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480 QualType CopyprivateArrayTy = C.getConstantArrayType(
2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482 /*IndexTypeQuals=*/0);
2483 // Create a list of all private variables for copyprivate.
2484 Address CopyprivateList =
2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488 CGF.Builder.CreateStore(
2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491 CGF.VoidPtrTy),
2492 Elem);
2493 }
2494 // Build function that copies private values from single region to all other
2495 // threads in the corresponding parallel region.
2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500 Address CL =
2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502 CGF.VoidPtrTy);
2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504 llvm::Value *Args[] = {
2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506 getThreadID(CGF, Loc), // i32 <gtid>
2507 BufSize, // size_t <buf_size>
2508 CL.getPointer(), // void *<copyprivate list>
2509 CpyFn, // void (*) (void *, void *) <copy_func>
2510 DidItVal // i32 did_it
2511 };
2512 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2513 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514 Args);
2515 }
2516}
2517
2518void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519 const RegionCodeGenTy &OrderedOpGen,
2520 SourceLocation Loc, bool IsThreads) {
2521 if (!CGF.HaveInsertPoint())
2522 return;
2523 // __kmpc_ordered(ident_t *, gtid);
2524 // OrderedOpGen();
2525 // __kmpc_end_ordered(ident_t *, gtid);
2526 // Prepare arguments and build a call to __kmpc_ordered
2527 if (IsThreads) {
2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2530 CGM.getModule(), OMPRTL___kmpc_ordered),
2531 Args,
2532 OMPBuilder.getOrCreateRuntimeFunction(
2533 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534 Args);
2535 OrderedOpGen.setAction(Action);
2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537 return;
2538 }
2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540}
2541
2542unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543 unsigned Flags;
2544 if (Kind == OMPD_for)
2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546 else if (Kind == OMPD_sections)
2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548 else if (Kind == OMPD_single)
2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550 else if (Kind == OMPD_barrier)
2551 Flags = OMP_IDENT_BARRIER_EXPL;
2552 else
2553 Flags = OMP_IDENT_BARRIER_IMPL;
2554 return Flags;
2555}
2556
2557void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558 CodeGenFunction &CGF, const OMPLoopDirective &S,
2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560 // Check if the loop directive is actually a doacross loop directive. In this
2561 // case choose static, 1 schedule.
2562 if (llvm::any_of(
2563 S.getClausesOfKind<OMPOrderedClause>(),
2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565 ScheduleKind = OMPC_SCHEDULE_static;
2566 // Chunk size is 1 in this case.
2567 llvm::APInt ChunkSize(32, 1);
2568 ChunkExpr = IntegerLiteral::Create(
2569 CGF.getContext(), ChunkSize,
2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571 SourceLocation());
2572 }
2573}
2574
2575void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576 OpenMPDirectiveKind Kind, bool EmitChecks,
2577 bool ForceSimpleCall) {
2578 // Check if we should use the OMPBuilder
2579 auto *OMPRegionInfo =
2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2582 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2583 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2584 return;
2585 }
2586
2587 if (!CGF.HaveInsertPoint())
2588 return;
2589 // Build call __kmpc_cancel_barrier(loc, thread_id);
2590 // Build call __kmpc_barrier(loc, thread_id);
2591 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2592 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2593 // thread_id);
2594 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2595 getThreadID(CGF, Loc)};
2596 if (OMPRegionInfo) {
2597 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2598 llvm::Value *Result = CGF.EmitRuntimeCall(
2599 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2600 OMPRTL___kmpc_cancel_barrier),
2601 Args);
2602 if (EmitChecks) {
2603 // if (__kmpc_cancel_barrier()) {
2604 // exit from construct;
2605 // }
2606 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2607 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2608 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2609 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2610 CGF.EmitBlock(ExitBB);
2611 // exit from construct;
2612 CodeGenFunction::JumpDest CancelDestination =
2613 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2614 CGF.EmitBranchThroughCleanup(CancelDestination);
2615 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2616 }
2617 return;
2618 }
2619 }
2620 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2621 CGM.getModule(), OMPRTL___kmpc_barrier),
2622 Args);
2623}
2624
2625/// Map the OpenMP loop schedule to the runtime enumeration.
2626static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2627 bool Chunked, bool Ordered) {
2628 switch (ScheduleKind) {
2629 case OMPC_SCHEDULE_static:
2630 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2631 : (Ordered ? OMP_ord_static : OMP_sch_static);
2632 case OMPC_SCHEDULE_dynamic:
2633 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2634 case OMPC_SCHEDULE_guided:
2635 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2636 case OMPC_SCHEDULE_runtime:
2637 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2638 case OMPC_SCHEDULE_auto:
2639 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2640 case OMPC_SCHEDULE_unknown:
2641 assert(!Chunked && "chunk was specified but schedule kind not known")(static_cast <bool> (!Chunked && "chunk was specified but schedule kind not known"
) ? void (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2641, __extension__
__PRETTY_FUNCTION__))
;
2642 return Ordered ? OMP_ord_static : OMP_sch_static;
2643 }
2644 llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2644)
;
2645}
2646
2647/// Map the OpenMP distribute schedule to the runtime enumeration.
2648static OpenMPSchedType
2649getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2650 // only static is allowed for dist_schedule
2651 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2652}
2653
2654bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2655 bool Chunked) const {
2656 OpenMPSchedType Schedule =
2657 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2658 return Schedule == OMP_sch_static;
2659}
2660
2661bool CGOpenMPRuntime::isStaticNonchunked(
2662 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2663 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2664 return Schedule == OMP_dist_sch_static;
2665}
2666
2667bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2668 bool Chunked) const {
2669 OpenMPSchedType Schedule =
2670 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2671 return Schedule == OMP_sch_static_chunked;
2672}
2673
2674bool CGOpenMPRuntime::isStaticChunked(
2675 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2676 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2677 return Schedule == OMP_dist_sch_static_chunked;
2678}
2679
2680bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2681 OpenMPSchedType Schedule =
2682 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2683 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")(static_cast <bool> (Schedule != OMP_sch_static_chunked
&& "cannot be chunked here") ? void (0) : __assert_fail
("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2683, __extension__
__PRETTY_FUNCTION__))
;
2684 return Schedule != OMP_sch_static;
2685}
2686
2687static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2688 OpenMPScheduleClauseModifier M1,
2689 OpenMPScheduleClauseModifier M2) {
2690 int Modifier = 0;
2691 switch (M1) {
2692 case OMPC_SCHEDULE_MODIFIER_monotonic:
2693 Modifier = OMP_sch_modifier_monotonic;
2694 break;
2695 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2696 Modifier = OMP_sch_modifier_nonmonotonic;
2697 break;
2698 case OMPC_SCHEDULE_MODIFIER_simd:
2699 if (Schedule == OMP_sch_static_chunked)
2700 Schedule = OMP_sch_static_balanced_chunked;
2701 break;
2702 case OMPC_SCHEDULE_MODIFIER_last:
2703 case OMPC_SCHEDULE_MODIFIER_unknown:
2704 break;
2705 }
2706 switch (M2) {
2707 case OMPC_SCHEDULE_MODIFIER_monotonic:
2708 Modifier = OMP_sch_modifier_monotonic;
2709 break;
2710 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2711 Modifier = OMP_sch_modifier_nonmonotonic;
2712 break;
2713 case OMPC_SCHEDULE_MODIFIER_simd:
2714 if (Schedule == OMP_sch_static_chunked)
2715 Schedule = OMP_sch_static_balanced_chunked;
2716 break;
2717 case OMPC_SCHEDULE_MODIFIER_last:
2718 case OMPC_SCHEDULE_MODIFIER_unknown:
2719 break;
2720 }
2721 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2722 // If the static schedule kind is specified or if the ordered clause is
2723 // specified, and if the nonmonotonic modifier is not specified, the effect is
2724 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2725 // modifier is specified, the effect is as if the nonmonotonic modifier is
2726 // specified.
2727 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2728 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2729 Schedule == OMP_sch_static_balanced_chunked ||
2730 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2731 Schedule == OMP_dist_sch_static_chunked ||
2732 Schedule == OMP_dist_sch_static))
2733 Modifier = OMP_sch_modifier_nonmonotonic;
2734 }
2735 return Schedule | Modifier;
2736}
2737
2738void CGOpenMPRuntime::emitForDispatchInit(
2739 CodeGenFunction &CGF, SourceLocation Loc,
2740 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2741 bool Ordered, const DispatchRTInput &DispatchValues) {
2742 if (!CGF.HaveInsertPoint())
2743 return;
2744 OpenMPSchedType Schedule = getRuntimeSchedule(
2745 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2746 assert(Ordered ||(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
2747 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
2748 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
2749 Schedule != OMP_sch_static_balanced_chunked))(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
;
2750 // Call __kmpc_dispatch_init(
2751 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2752 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2753 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2754
2755 // If the Chunk was not specified in the clause - use default value 1.
2756 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2757 : CGF.Builder.getIntN(IVSize, 1);
2758 llvm::Value *Args[] = {
2759 emitUpdateLocation(CGF, Loc),
2760 getThreadID(CGF, Loc),
2761 CGF.Builder.getInt32(addMonoNonMonoModifier(
2762 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2763 DispatchValues.LB, // Lower
2764 DispatchValues.UB, // Upper
2765 CGF.Builder.getIntN(IVSize, 1), // Stride
2766 Chunk // Chunk
2767 };
2768 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2769}
2770
2771static void emitForStaticInitCall(
2772 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2773 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2774 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2775 const CGOpenMPRuntime::StaticRTInput &Values) {
2776 if (!CGF.HaveInsertPoint())
2777 return;
2778
2779 assert(!Values.Ordered)(static_cast <bool> (!Values.Ordered) ? void (0) : __assert_fail
("!Values.Ordered", "clang/lib/CodeGen/CGOpenMPRuntime.cpp",
2779, __extension__ __PRETTY_FUNCTION__))
;
2780 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2781 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2782 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2783 Schedule == OMP_dist_sch_static ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2784 Schedule == OMP_dist_sch_static_chunked)(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
;
2785
2786 // Call __kmpc_for_static_init(
2787 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2788 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2789 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2790 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2791 llvm::Value *Chunk = Values.Chunk;
2792 if (Chunk == nullptr) {
2793 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2795, __extension__
__PRETTY_FUNCTION__))
2794 Schedule == OMP_dist_sch_static) &&(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2795, __extension__
__PRETTY_FUNCTION__))
2795 "expected static non-chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2795, __extension__
__PRETTY_FUNCTION__))
;
2796 // If the Chunk was not specified in the clause - use default value 1.
2797 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2798 } else {
2799 assert((Schedule == OMP_sch_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2800 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2801 Schedule == OMP_ord_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2802 Schedule == OMP_dist_sch_static_chunked) &&(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2803 "expected static chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
;
2804 }
2805 llvm::Value *Args[] = {
2806 UpdateLocation,
2807 ThreadId,
2808 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2809 M2)), // Schedule type
2810 Values.IL.getPointer(), // &isLastIter
2811 Values.LB.getPointer(), // &LB
2812 Values.UB.getPointer(), // &UB
2813 Values.ST.getPointer(), // &Stride
2814 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2815 Chunk // Chunk
2816 };
2817 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2818}
2819
2820void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2821 SourceLocation Loc,
2822 OpenMPDirectiveKind DKind,
2823 const OpenMPScheduleTy &ScheduleKind,
2824 const StaticRTInput &Values) {
2825 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2826 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2827 assert(isOpenMPWorksharingDirective(DKind) &&(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2828, __extension__
__PRETTY_FUNCTION__))
2828 "Expected loop-based or sections-based directive.")(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2828, __extension__
__PRETTY_FUNCTION__))
;
2829 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2830 isOpenMPLoopDirective(DKind)
2831 ? OMP_IDENT_WORK_LOOP
2832 : OMP_IDENT_WORK_SECTIONS);
2833 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2834 llvm::FunctionCallee StaticInitFunction =
2835 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2836 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2837 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2838 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2839}
2840
2841void CGOpenMPRuntime::emitDistributeStaticInit(
2842 CodeGenFunction &CGF, SourceLocation Loc,
2843 OpenMPDistScheduleClauseKind SchedKind,
2844 const CGOpenMPRuntime::StaticRTInput &Values) {
2845 OpenMPSchedType ScheduleNum =
2846 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2847 llvm::Value *UpdatedLocation =
2848 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2849 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2850 llvm::FunctionCallee StaticInitFunction;
2851 bool isGPUDistribute =
2852 CGM.getLangOpts().OpenMPIsDevice &&
2853 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2854 StaticInitFunction = createForStaticInitFunction(
2855 Values.IVSize, Values.IVSigned, isGPUDistribute);
2856
2857 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2858 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2859 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2860}
2861
2862void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2863 SourceLocation Loc,
2864 OpenMPDirectiveKind DKind) {
2865 if (!CGF.HaveInsertPoint())
2866 return;
2867 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2868 llvm::Value *Args[] = {
2869 emitUpdateLocation(CGF, Loc,
2870 isOpenMPDistributeDirective(DKind)
2871 ? OMP_IDENT_WORK_DISTRIBUTE
2872 : isOpenMPLoopDirective(DKind)
2873 ? OMP_IDENT_WORK_LOOP
2874 : OMP_IDENT_WORK_SECTIONS),
2875 getThreadID(CGF, Loc)};
2876 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2877 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2878 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2879 CGF.EmitRuntimeCall(
2880 OMPBuilder.getOrCreateRuntimeFunction(
2881 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2882 Args);
2883 else
2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2886 Args);
2887}
2888
2889void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2890 SourceLocation Loc,
2891 unsigned IVSize,
2892 bool IVSigned) {
2893 if (!CGF.HaveInsertPoint())
2894 return;
2895 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2896 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2897 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2898}
2899
2900llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2901 SourceLocation Loc, unsigned IVSize,
2902 bool IVSigned, Address IL,
2903 Address LB, Address UB,
2904 Address ST) {
2905 // Call __kmpc_dispatch_next(
2906 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2907 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2908 // kmp_int[32|64] *p_stride);
2909 llvm::Value *Args[] = {
2910 emitUpdateLocation(CGF, Loc),
2911 getThreadID(CGF, Loc),
2912 IL.getPointer(), // &isLastIter
2913 LB.getPointer(), // &Lower
2914 UB.getPointer(), // &Upper
2915 ST.getPointer() // &Stride
2916 };
2917 llvm::Value *Call =
2918 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2919 return CGF.EmitScalarConversion(
2920 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2921 CGF.getContext().BoolTy, Loc);
2922}
2923
2924void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2925 llvm::Value *NumThreads,
2926 SourceLocation Loc) {
2927 if (!CGF.HaveInsertPoint())
2928 return;
2929 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2930 llvm::Value *Args[] = {
2931 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2932 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2933 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2934 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2935 Args);
2936}
2937
2938void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2939 ProcBindKind ProcBind,
2940 SourceLocation Loc) {
2941 if (!CGF.HaveInsertPoint())
2942 return;
2943 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")(static_cast <bool> (ProcBind != OMP_PROC_BIND_unknown &&
"Unsupported proc_bind value.") ? void (0) : __assert_fail (
"ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2943, __extension__
__PRETTY_FUNCTION__))
;
2944 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2945 llvm::Value *Args[] = {
2946 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2947 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2948 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2949 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2950 Args);
2951}
2952
2953void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2954 SourceLocation Loc, llvm::AtomicOrdering AO) {
2955 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2956 OMPBuilder.createFlush(CGF.Builder);
2957 } else {
2958 if (!CGF.HaveInsertPoint())
2959 return;
2960 // Build call void __kmpc_flush(ident_t *loc)
2961 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2962 CGM.getModule(), OMPRTL___kmpc_flush),
2963 emitUpdateLocation(CGF, Loc));
2964 }
2965}
2966
2967namespace {
2968/// Indexes of fields for type kmp_task_t.
2969enum KmpTaskTFields {
2970 /// List of shared variables.
2971 KmpTaskTShareds,
2972 /// Task routine.
2973 KmpTaskTRoutine,
2974 /// Partition id for the untied tasks.
2975 KmpTaskTPartId,
2976 /// Function with call of destructors for private variables.
2977 Data1,
2978 /// Task priority.
2979 Data2,
2980 /// (Taskloops only) Lower bound.
2981 KmpTaskTLowerBound,
2982 /// (Taskloops only) Upper bound.
2983 KmpTaskTUpperBound,
2984 /// (Taskloops only) Stride.
2985 KmpTaskTStride,
2986 /// (Taskloops only) Is last iteration flag.
2987 KmpTaskTLastIter,
2988 /// (Taskloops only) Reduction data.
2989 KmpTaskTReductions,
2990};
2991} // anonymous namespace
2992
2993bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2994 return OffloadEntriesTargetRegion.empty() &&
2995 OffloadEntriesDeviceGlobalVar.empty();
2996}
2997
2998/// Initialize target region entry.
2999void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3001 StringRef ParentName, unsigned LineNum,
3002 unsigned Order) {
3003 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3005, __extension__
__PRETTY_FUNCTION__))
3004 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3005, __extension__
__PRETTY_FUNCTION__))
3005 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3005, __extension__
__PRETTY_FUNCTION__))
;
3006 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3007 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3008 OMPTargetRegionEntryTargetRegion);
3009 ++OffloadingEntriesNum;
3010}
3011
3012void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3013 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3014 StringRef ParentName, unsigned LineNum,
3015 llvm::Constant *Addr, llvm::Constant *ID,
3016 OMPTargetRegionEntryKind Flags) {
3017 // If we are emitting code for a target, the entry is already initialized,
3018 // only has to be registered.
3019 if (CGM.getLangOpts().OpenMPIsDevice) {
3020 // This could happen if the device compilation is invoked standalone.
3021 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3022 return;
3023 auto &Entry =
3024 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3025 Entry.setAddress(Addr);
3026 Entry.setID(ID);
3027 Entry.setFlags(Flags);
3028 } else {
3029 if (Flags ==
3030 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3031 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3032 /*IgnoreAddressId*/ true))
3033 return;
3034 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3035, __extension__
__PRETTY_FUNCTION__))
3035 "Target region entry already registered!")(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3035, __extension__
__PRETTY_FUNCTION__))
;
3036 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3037 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3038 ++OffloadingEntriesNum;
3039 }
3040}
3041
3042bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3043 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3044 bool IgnoreAddressId) const {
3045 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3046 if (PerDevice == OffloadEntriesTargetRegion.end())
3047 return false;
3048 auto PerFile = PerDevice->second.find(FileID);
3049 if (PerFile == PerDevice->second.end())
3050 return false;
3051 auto PerParentName = PerFile->second.find(ParentName);
3052 if (PerParentName == PerFile->second.end())
3053 return false;
3054 auto PerLine = PerParentName->second.find(LineNum);
3055 if (PerLine == PerParentName->second.end())
3056 return false;
3057 // Fail if this entry is already registered.
3058 if (!IgnoreAddressId &&
3059 (PerLine->second.getAddress() || PerLine->second.getID()))
3060 return false;
3061 return true;
3062}
3063
3064void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3065 const OffloadTargetRegionEntryInfoActTy &Action) {
3066 // Scan all target region entries and perform the provided action.
3067 for (const auto &D : OffloadEntriesTargetRegion)
3068 for (const auto &F : D.second)
3069 for (const auto &P : F.second)
3070 for (const auto &L : P.second)
3071 Action(D.first, F.first, P.first(), L.first, L.second);
3072}
3073
3074void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3075 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3076 OMPTargetGlobalVarEntryKind Flags,
3077 unsigned Order) {
3078 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3080, __extension__
__PRETTY_FUNCTION__))
3079 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3080, __extension__
__PRETTY_FUNCTION__))
3080 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3080, __extension__
__PRETTY_FUNCTION__))
;
3081 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3082 ++OffloadingEntriesNum;
3083}
3084
3085void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3086 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3087 CharUnits VarSize,
3088 OMPTargetGlobalVarEntryKind Flags,
3089 llvm::GlobalValue::LinkageTypes Linkage) {
3090 if (CGM.getLangOpts().OpenMPIsDevice) {
3091 // This could happen if the device compilation is invoked standalone.
3092 if (!hasDeviceGlobalVarEntryInfo(VarName))
3093 return;
3094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3096 if (Entry.getVarSize().isZero()) {
3097 Entry.setVarSize(VarSize);
3098 Entry.setLinkage(Linkage);
3099 }
3100 return;
3101 }
3102 Entry.setVarSize(VarSize);
3103 Entry.setLinkage(Linkage);
3104 Entry.setAddress(Addr);
3105 } else {
3106 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3107 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3108 assert(Entry.isValid() && Entry.getFlags() == Flags &&(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3109, __extension__
__PRETTY_FUNCTION__))
3109 "Entry not initialized!")(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3109, __extension__
__PRETTY_FUNCTION__))
;
3110 if (Entry.getVarSize().isZero()) {
3111 Entry.setVarSize(VarSize);
3112 Entry.setLinkage(Linkage);
3113 }
3114 return;
3115 }
3116 OffloadEntriesDeviceGlobalVar.try_emplace(
3117 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3118 ++OffloadingEntriesNum;
3119 }
3120}
3121
3122void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3123 actOnDeviceGlobalVarEntriesInfo(
3124 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3125 // Scan all target region entries and perform the provided action.
3126 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3127 Action(E.getKey(), E.getValue());
3128}
3129
3130void CGOpenMPRuntime::createOffloadEntry(
3131 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3132 llvm::GlobalValue::LinkageTypes Linkage) {
3133 StringRef Name = Addr->getName();
3134 llvm::Module &M = CGM.getModule();
3135 llvm::LLVMContext &C = M.getContext();
3136
3137 // Create constant string with the name.
3138 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3139
3140 std::string StringName = getName({"omp_offloading", "entry_name"});
3141 auto *Str = new llvm::GlobalVariable(
3142 M, StrPtrInit->getType(), /*isConstant=*/true,
3143 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3144 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3145
3146 llvm::Constant *Data[] = {
3147 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3148 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3149 llvm::ConstantInt::get(CGM.SizeTy, Size),
3150 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3151 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3152 std::string EntryName = getName({"omp_offloading", "entry", ""});
3153 llvm::GlobalVariable *Entry = createGlobalStruct(
3154 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3155 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3156
3157 // The entry has to be created in the section the linker expects it to be.
3158 Entry->setSection("omp_offloading_entries");
3159}
3160
3161void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3162 // Emit the offloading entries and metadata so that the device codegen side
3163 // can easily figure out what to emit. The produced metadata looks like
3164 // this:
3165 //
3166 // !omp_offload.info = !{!1, ...}
3167 //
3168 // Right now we only generate metadata for function that contain target
3169 // regions.
3170
3171 // If we are in simd mode or there are no entries, we don't need to do
3172 // anything.
3173 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3174 return;
3175
3176 llvm::Module &M = CGM.getModule();
3177 llvm::LLVMContext &C = M.getContext();
3178 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3179 SourceLocation, StringRef>,
3180 16>
3181 OrderedEntries(OffloadEntriesInfoManager.size());
3182 llvm::SmallVector<StringRef, 16> ParentFunctions(
3183 OffloadEntriesInfoManager.size());
3184
3185 // Auxiliary methods to create metadata values and strings.
3186 auto &&GetMDInt = [this](unsigned V) {
3187 return llvm::ConstantAsMetadata::get(
3188 llvm::ConstantInt::get(CGM.Int32Ty, V));
3189 };
3190
3191 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3192
3193 // Create the offloading info metadata node.
3194 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3195
3196 // Create function that emits metadata for each target region entry;
3197 auto &&TargetRegionMetadataEmitter =
3198 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3199 &GetMDString](
3200 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3201 unsigned Line,
3202 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3203 // Generate metadata for target regions. Each entry of this metadata
3204 // contains:
3205 // - Entry 0 -> Kind of this type of metadata (0).
3206 // - Entry 1 -> Device ID of the file where the entry was identified.
3207 // - Entry 2 -> File ID of the file where the entry was identified.
3208 // - Entry 3 -> Mangled name of the function where the entry was
3209 // identified.
3210 // - Entry 4 -> Line in the file where the entry was identified.
3211 // - Entry 5 -> Order the entry was created.
3212 // The first element of the metadata node is the kind.
3213 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3214 GetMDInt(FileID), GetMDString(ParentName),
3215 GetMDInt(Line), GetMDInt(E.getOrder())};
3216
3217 SourceLocation Loc;
3218 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3219 E = CGM.getContext().getSourceManager().fileinfo_end();
3220 I != E; ++I) {
3221 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3222 I->getFirst()->getUniqueID().getFile() == FileID) {
3223 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3224 I->getFirst(), Line, 1);
3225 break;
3226 }
3227 }
3228 // Save this entry in the right position of the ordered entries array.
3229 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3230 ParentFunctions[E.getOrder()] = ParentName;
3231
3232 // Add metadata to the named metadata node.
3233 MD->addOperand(llvm::MDNode::get(C, Ops));
3234 };
3235
3236 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3237 TargetRegionMetadataEmitter);
3238
3239 // Create function that emits metadata for each device global variable entry;
3240 auto &&DeviceGlobalVarMetadataEmitter =
3241 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3242 MD](StringRef MangledName,
3243 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3244 &E) {
3245 // Generate metadata for global variables. Each entry of this metadata
3246 // contains:
3247 // - Entry 0 -> Kind of this type of metadata (1).
3248 // - Entry 1 -> Mangled name of the variable.
3249 // - Entry 2 -> Declare target kind.
3250 // - Entry 3 -> Order the entry was created.
3251 // The first element of the metadata node is the kind.
3252 llvm::Metadata *Ops[] = {
3253 GetMDInt(E.getKind()), GetMDString(MangledName),
3254 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3255
3256 // Save this entry in the right position of the ordered entries array.
3257 OrderedEntries[E.getOrder()] =
3258 std::make_tuple(&E, SourceLocation(), MangledName);
3259
3260 // Add metadata to the named metadata node.
3261 MD->addOperand(llvm::MDNode::get(C, Ops));
3262 };
3263
3264 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3265 DeviceGlobalVarMetadataEmitter);
3266
3267 for (const auto &E : OrderedEntries) {
3268 assert(std::get<0>(E) && "All ordered entries must exist!")(static_cast <bool> (std::get<0>(E) && "All ordered entries must exist!"
) ? void (0) : __assert_fail ("std::get<0>(E) && \"All ordered entries must exist!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3268, __extension__
__PRETTY_FUNCTION__))
;
3269 if (const auto *CE =
3270 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3271 std::get<0>(E))) {
3272 if (!CE->getID() || !CE->getAddress()) {
3273 // Do not blame the entry if the parent funtion is not emitted.
3274 StringRef FnName = ParentFunctions[CE->getOrder()];
3275 if (!CGM.GetGlobalValue(FnName))
3276 continue;
3277 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3278 DiagnosticsEngine::Error,
3279 "Offloading entry for target region in %0 is incorrect: either the "
3280 "address or the ID is invalid.");
3281 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3282 continue;
3283 }
3284 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3285 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3286 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3287 OffloadEntryInfoDeviceGlobalVar>(
3288 std::get<0>(E))) {
3289 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3290 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3291 CE->getFlags());
3292 switch (Flags) {
3293 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3294 if (CGM.getLangOpts().OpenMPIsDevice &&
3295 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3296 continue;
3297 if (!CE->getAddress()) {
3298 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299 DiagnosticsEngine::Error, "Offloading entry for declare target "
3300 "variable %0 is incorrect: the "
3301 "address is invalid.");
3302 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3303 continue;
3304 }
3305 // The vaiable has no definition - no need to add the entry.
3306 if (CE->getVarSize().isZero())
3307 continue;
3308 break;
3309 }
3310 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3311 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3313, __extension__
__PRETTY_FUNCTION__))
3312 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3313, __extension__
__PRETTY_FUNCTION__))
3313 "Declaret target link address is set.")(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3313, __extension__
__PRETTY_FUNCTION__))
;
3314 if (CGM.getLangOpts().OpenMPIsDevice)
3315 continue;
3316 if (!CE->getAddress()) {
3317 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3318 DiagnosticsEngine::Error,
3319 "Offloading entry for declare target variable is incorrect: the "
3320 "address is invalid.");
3321 CGM.getDiags().Report(DiagID);
3322 continue;
3323 }
3324 break;
3325 }
3326 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3327 CE->getVarSize().getQuantity(), Flags,
3328 CE->getLinkage());
3329 } else {
3330 llvm_unreachable("Unsupported entry kind.")::llvm::llvm_unreachable_internal("Unsupported entry kind.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3330)
;
3331 }
3332 }
3333}
3334
3335/// Loads all the offload entries information from the host IR
3336/// metadata.
3337void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3338 // If we are in target mode, load the metadata from the host IR. This code has
3339 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3340
3341 if (!CGM.getLangOpts().OpenMPIsDevice)
3342 return;
3343
3344 if (CGM.getLangOpts().OMPHostIRFile.empty())
3345 return;
3346
3347 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3348 if (auto EC = Buf.getError()) {
3349 CGM.getDiags().Report(diag::err_cannot_open_file)
3350 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351 return;
3352 }
3353
3354 llvm::LLVMContext C;
3355 auto ME = expectedToErrorOrAndEmitErrors(
3356 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3357
3358 if (auto EC = ME.getError()) {
3359 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3360 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3361 CGM.getDiags().Report(DiagID)
3362 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3363 return;
3364 }
3365
3366 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3367 if (!MD)
3368 return;
3369
3370 for (llvm::MDNode *MN : MD->operands()) {
3371 auto &&GetMDInt = [MN](unsigned Idx) {
3372 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3373 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3374 };
3375
3376 auto &&GetMDString = [MN](unsigned Idx) {
3377 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3378 return V->getString();
3379 };
3380
3381 switch (GetMDInt(0)) {
3382 default:
3383 llvm_unreachable("Unexpected metadata!")::llvm::llvm_unreachable_internal("Unexpected metadata!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3383)
;
3384 break;
3385 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3386 OffloadingEntryInfoTargetRegion:
3387 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3388 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3389 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3390 /*Order=*/GetMDInt(5));
3391 break;
3392 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3393 OffloadingEntryInfoDeviceGlobalVar:
3394 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3395 /*MangledName=*/GetMDString(1),
3396 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3397 /*Flags=*/GetMDInt(2)),
3398 /*Order=*/GetMDInt(3));
3399 break;
3400 }
3401 }
3402}
3403
3404void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3405 if (!KmpRoutineEntryPtrTy) {
3406 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3407 ASTContext &C = CGM.getContext();
3408 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3409 FunctionProtoType::ExtProtoInfo EPI;
3410 KmpRoutineEntryPtrQTy = C.getPointerType(
3411 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3412 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3413 }
3414}
3415
3416QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3417 // Make sure the type of the entry is already created. This is the type we
3418 // have to create:
3419 // struct __tgt_offload_entry{
3420 // void *addr; // Pointer to the offload entry info.
3421 // // (function or global)
3422 // char *name; // Name of the function or global.
3423 // size_t size; // Size of the entry info (0 if it a function).
3424 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3425 // int32_t reserved; // Reserved, to use by the runtime library.
3426 // };
3427 if (TgtOffloadEntryQTy.isNull()) {
3428 ASTContext &C = CGM.getContext();
3429 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3430 RD->startDefinition();
3431 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3432 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3433 addFieldToRecordDecl(C, RD, C.getSizeType());
3434 addFieldToRecordDecl(
3435 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3436 addFieldToRecordDecl(
3437 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3438 RD->completeDefinition();
3439 RD->addAttr(PackedAttr::CreateImplicit(C));
3440 TgtOffloadEntryQTy = C.getRecordType(RD);
3441 }
3442 return TgtOffloadEntryQTy;
3443}
3444
3445namespace {
3446struct PrivateHelpersTy {
3447 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3448 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3449 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3450 PrivateElemInit(PrivateElemInit) {}
3451 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3452 const Expr *OriginalRef = nullptr;
3453 const VarDecl *Original = nullptr;
3454 const VarDecl *PrivateCopy = nullptr;
3455 const VarDecl *PrivateElemInit = nullptr;
3456 bool isLocalPrivate() const {
3457 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3458 }
3459};
3460typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3461} // anonymous namespace
3462
3463static bool isAllocatableDecl(const VarDecl *VD) {
3464 const VarDecl *CVD = VD->getCanonicalDecl();
3465 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3466 return false;
3467 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3468 // Use the default allocation.
3469 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3470 !AA->getAllocator());
3471}
3472
3473static RecordDecl *
3474createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3475 if (!Privates.empty()) {
3476 ASTContext &C = CGM.getContext();
3477 // Build struct .kmp_privates_t. {
3478 // /* private vars */
3479 // };
3480 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3481 RD->startDefinition();
3482 for (const auto &Pair : Privates) {
3483 const VarDecl *VD = Pair.second.Original;
3484 QualType Type = VD->getType().getNonReferenceType();
3485 // If the private variable is a local variable with lvalue ref type,
3486 // allocate the pointer instead of the pointee type.
3487 if (Pair.second.isLocalPrivate()) {
3488 if (VD->getType()->isLValueReferenceType())
3489 Type = C.getPointerType(Type);
3490 if (isAllocatableDecl(VD))
3491 Type = C.getPointerType(Type);
3492 }
3493 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3494 if (VD->hasAttrs()) {
3495 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3496 E(VD->getAttrs().end());
3497 I != E; ++I)
3498 FD->addAttr(*I);
3499 }
3500 }
3501 RD->completeDefinition();
3502 return RD;
3503 }
3504 return nullptr;
3505}
3506
3507static RecordDecl *
3508createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3509 QualType KmpInt32Ty,
3510 QualType KmpRoutineEntryPointerQTy) {
3511 ASTContext &C = CGM.getContext();
3512 // Build struct kmp_task_t {
3513 // void * shareds;
3514 // kmp_routine_entry_t routine;
3515 // kmp_int32 part_id;
3516 // kmp_cmplrdata_t data1;
3517 // kmp_cmplrdata_t data2;
3518 // For taskloops additional fields:
3519 // kmp_uint64 lb;
3520 // kmp_uint64 ub;
3521 // kmp_int64 st;
3522 // kmp_int32 liter;
3523 // void * reductions;
3524 // };
3525 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3526 UD->startDefinition();
3527 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3528 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3529 UD->completeDefinition();
3530 QualType KmpCmplrdataTy = C.getRecordType(UD);
3531 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3532 RD->startDefinition();
3533 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3534 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3535 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3536 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3537 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3538 if (isOpenMPTaskLoopDirective(Kind)) {
3539 QualType KmpUInt64Ty =
3540 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3541 QualType KmpInt64Ty =
3542 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3543 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3544 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3545 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3546 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3547 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3548 }
3549 RD->completeDefinition();
3550 return RD;
3551}
3552
3553static RecordDecl *
3554createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3555 ArrayRef<PrivateDataTy> Privates) {
3556 ASTContext &C = CGM.getContext();
3557 // Build struct kmp_task_t_with_privates {
3558 // kmp_task_t task_data;
3559 // .kmp_privates_t. privates;
3560 // };
3561 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3562 RD->startDefinition();
3563 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3564 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3565 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3566 RD->completeDefinition();
3567 return RD;
3568}
3569
3570/// Emit a proxy function which accepts kmp_task_t as the second
3571/// argument.
3572/// \code
3573/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3574/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3575/// For taskloops:
3576/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3577/// tt->reductions, tt->shareds);
3578/// return 0;
3579/// }
3580/// \endcode
3581static llvm::Function *
3582emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3583 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3584 QualType KmpTaskTWithPrivatesPtrQTy,
3585 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3586 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3587 llvm::Value *TaskPrivatesMap) {
3588 ASTContext &C = CGM.getContext();
3589 FunctionArgList Args;
3590 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3591 ImplicitParamDecl::Other);
3592 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3593 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3594 ImplicitParamDecl::Other);
3595 Args.push_back(&GtidArg);
3596 Args.push_back(&TaskTypeArg);
3597 const auto &TaskEntryFnInfo =
3598 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3599 llvm::FunctionType *TaskEntryTy =
3600 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3601 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3602 auto *TaskEntry = llvm::Function::Create(
3603 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3604 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3605 TaskEntry->setDoesNotRecurse();
3606 CodeGenFunction CGF(CGM);
3607 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3608 Loc, Loc);
3609
3610 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3611 // tt,
3612 // For taskloops:
3613 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3614 // tt->task_data.shareds);
3615 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3616 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3617 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3618 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3619 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3620 const auto *KmpTaskTWithPrivatesQTyRD =
3621 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3622 LValue Base =
3623 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3624 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3625 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3626 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3627 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3628
3629 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3630 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3631 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3632 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3633 CGF.ConvertTypeForMem(SharedsPtrTy));
3634
3635 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3636 llvm::Value *PrivatesParam;
3637 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3638 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3639 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3640 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3641 } else {
3642 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3643 }
3644
3645 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3646 TaskPrivatesMap,
3647 CGF.Builder
3648 .CreatePointerBitCastOrAddrSpaceCast(
3649 TDBase.getAddress(CGF), CGF.VoidPtrTy)
3650 .getPointer()};
3651 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3652 std::end(CommonArgs));
3653 if (isOpenMPTaskLoopDirective(Kind)) {
3654 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3655 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3656 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3657 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3658 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3659 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3660 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3661 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3662 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3663 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3664 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3665 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3666 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3667 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3668 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3669 CallArgs.push_back(LBParam);
3670 CallArgs.push_back(UBParam);
3671 CallArgs.push_back(StParam);
3672 CallArgs.push_back(LIParam);
3673 CallArgs.push_back(RParam);
3674 }
3675 CallArgs.push_back(SharedsParam);
3676
3677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3678 CallArgs);
3679 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3680 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3681 CGF.FinishFunction();
3682 return TaskEntry;
3683}
3684
3685static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3686 SourceLocation Loc,
3687 QualType KmpInt32Ty,
3688 QualType KmpTaskTWithPrivatesPtrQTy,
3689 QualType KmpTaskTWithPrivatesQTy) {
3690 ASTContext &C = CGM.getContext();
3691 FunctionArgList Args;
3692 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3693 ImplicitParamDecl::Other);
3694 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3695 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3696 ImplicitParamDecl::Other);
3697 Args.push_back(&GtidArg);
3698 Args.push_back(&TaskTypeArg);
3699 const auto &DestructorFnInfo =
3700 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3701 llvm::FunctionType *DestructorFnTy =
3702 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3703 std::string Name =
3704 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3705 auto *DestructorFn =
3706 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3707 Name, &CGM.getModule());
3708 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3709 DestructorFnInfo);
3710 DestructorFn->setDoesNotRecurse();
3711 CodeGenFunction CGF(CGM);
3712 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3713 Args, Loc, Loc);
3714
3715 LValue Base = CGF.EmitLoadOfPointerLValue(
3716 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3717 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3718 const auto *KmpTaskTWithPrivatesQTyRD =
3719 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3720 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3721 Base = CGF.EmitLValueForField(Base, *FI);
3722 for (const auto *Field :
3723 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3724 if (QualType::DestructionKind DtorKind =
3725 Field->getType().isDestructedType()) {
3726 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3727 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3728 }
3729 }
3730 CGF.FinishFunction();
3731 return DestructorFn;
3732}
3733
3734/// Emit a privates mapping function for correct handling of private and
3735/// firstprivate variables.
3736/// \code
3737/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3738/// **noalias priv1,..., <tyn> **noalias privn) {
3739/// *priv1 = &.privates.priv1;
3740/// ...;
3741/// *privn = &.privates.privn;
3742/// }
3743/// \endcode
3744static llvm::Value *
3745emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3746 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3747 ArrayRef<PrivateDataTy> Privates) {
3748 ASTContext &C = CGM.getContext();
3749 FunctionArgList Args;
3750 ImplicitParamDecl TaskPrivatesArg(
3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3753 ImplicitParamDecl::Other);
3754 Args.push_back(&TaskPrivatesArg);
3755 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3756 unsigned Counter = 1;
3757 for (const Expr *E : Data.PrivateVars) {
3758 Args.push_back(ImplicitParamDecl::Create(
3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760 C.getPointerType(C.getPointerType(E->getType()))
3761 .withConst()
3762 .withRestrict(),
3763 ImplicitParamDecl::Other));
3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765 PrivateVarsPos[VD] = Counter;
3766 ++Counter;
3767 }
3768 for (const Expr *E : Data.FirstprivateVars) {
3769 Args.push_back(ImplicitParamDecl::Create(
3770 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3771 C.getPointerType(C.getPointerType(E->getType()))
3772 .withConst()
3773 .withRestrict(),
3774 ImplicitParamDecl::Other));
3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776 PrivateVarsPos[VD] = Counter;
3777 ++Counter;
3778 }
3779 for (const Expr *E : Data.LastprivateVars) {
3780 Args.push_back(ImplicitParamDecl::Create(
3781 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3782 C.getPointerType(C.getPointerType(E->getType()))
3783 .withConst()
3784 .withRestrict(),
3785 ImplicitParamDecl::Other));
3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787 PrivateVarsPos[VD] = Counter;
3788 ++Counter;
3789 }
3790 for (const VarDecl *VD : Data.PrivateLocals) {
3791 QualType Ty = VD->getType().getNonReferenceType();
3792 if (VD->getType()->isLValueReferenceType())
3793 Ty = C.getPointerType(Ty);
3794 if (isAllocatableDecl(VD))
3795 Ty = C.getPointerType(Ty);
3796 Args.push_back(ImplicitParamDecl::Create(
3797 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3798 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3799 ImplicitParamDecl::Other));
3800 PrivateVarsPos[VD] = Counter;
3801 ++Counter;
3802 }
3803 const auto &TaskPrivatesMapFnInfo =
3804 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3805 llvm::FunctionType *TaskPrivatesMapTy =
3806 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3807 std::string Name =
3808 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3809 auto *TaskPrivatesMap = llvm::Function::Create(
3810 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3811 &CGM.getModule());
3812 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3813 TaskPrivatesMapFnInfo);
3814 if (CGM.getLangOpts().Optimize) {
3815 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3816 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3817 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3818 }
3819 CodeGenFunction CGF(CGM);
3820 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3821 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3822
3823 // *privi = &.privates.privi;
3824 LValue Base = CGF.EmitLoadOfPointerLValue(
3825 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3826 TaskPrivatesArg.getType()->castAs<PointerType>());
3827 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3828 Counter = 0;
3829 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3830 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3831 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3832 LValue RefLVal =
3833 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3834 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3835 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3836 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3837 ++Counter;
3838 }
3839 CGF.FinishFunction();
3840 return TaskPrivatesMap;
3841}
3842
3843/// Emit initialization for private variables in task-based directives.
3844static void emitPrivatesInit(CodeGenFunction &CGF,
3845 const OMPExecutableDirective &D,
3846 Address KmpTaskSharedsPtr, LValue TDBase,
3847 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3848 QualType SharedsTy, QualType SharedsPtrTy,
3849 const OMPTaskDataTy &Data,
3850 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3851 ASTContext &C = CGF.getContext();
3852 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3853 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3854 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3855 ? OMPD_taskloop
3856 : OMPD_task;
3857 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3858 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3859 LValue SrcBase;
3860 bool IsTargetTask =
3861 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3862 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3863 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3864 // PointersArray, SizesArray, and MappersArray. The original variables for
3865 // these arrays are not captured and we get their addresses explicitly.
3866 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3867 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3868 SrcBase = CGF.MakeAddrLValue(
3869 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3870 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3871 SharedsTy);
3872 }
3873 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3874 for (const PrivateDataTy &Pair : Privates) {
3875 // Do not initialize private locals.
3876 if (Pair.second.isLocalPrivate()) {
3877 ++FI;
3878 continue;
3879 }
3880 const VarDecl *VD = Pair.second.PrivateCopy;
3881 const Expr *Init = VD->getAnyInitializer();
3882 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3883 !CGF.isTrivialInitializer(Init)))) {
3884 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3885 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3886 const VarDecl *OriginalVD = Pair.second.Original;
3887 // Check if the variable is the target-based BasePointersArray,
3888 // PointersArray, SizesArray, or MappersArray.
3889 LValue SharedRefLValue;
3890 QualType Type = PrivateLValue.getType();
3891 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3892 if (IsTargetTask && !SharedField) {
3893 assert(isa<ImplicitParamDecl>(OriginalVD) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3894 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3895 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3896 ->getNumParams() == 0 &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3897 isa<TranslationUnitDecl>((static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3898 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3899 ->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3900 "Expected artificial target data variable.")(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
;
3901 SharedRefLValue =
3902 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3903 } else if (ForDup) {
3904 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3905 SharedRefLValue = CGF.MakeAddrLValue(
3906 Address(SharedRefLValue.getPointer(CGF),
3907 C.getDeclAlign(OriginalVD)),
3908 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3909 SharedRefLValue.getTBAAInfo());
3910 } else if (CGF.LambdaCaptureFields.count(
3911 Pair.second.Original->getCanonicalDecl()) > 0 ||
3912 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3913 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3914 } else {
3915 // Processing for implicitly captured variables.
3916 InlinedOpenMPRegionRAII Region(
3917 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3918 /*HasCancel=*/false, /*NoInheritance=*/true);
3919 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3920 }
3921 if (Type->isArrayType()) {
3922 // Initialize firstprivate array.
3923 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3924 // Perform simple memcpy.
3925 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3926 } else {
3927 // Initialize firstprivate array using element-by-element
3928 // initialization.
3929 CGF.EmitOMPAggregateAssign(
3930 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3931 Type,
3932 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3933 Address SrcElement) {
3934 // Clean up any temporaries needed by the initialization.
3935 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3936 InitScope.addPrivate(
3937 Elem, [SrcElement]() -> Address { return SrcElement; });
3938 (void)InitScope.Privatize();
3939 // Emit initialization for single element.
3940 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3941 CGF, &CapturesInfo);
3942 CGF.EmitAnyExprToMem(Init, DestElement,
3943 Init->getType().getQualifiers(),
3944 /*IsInitializer=*/false);
3945 });
3946 }
3947 } else {
3948 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3949 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3950 return SharedRefLValue.getAddress(CGF);
3951 });
3952 (void)InitScope.Privatize();
3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3954 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3955 /*capturedByInit=*/false);
3956 }
3957 } else {
3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3959 }
3960 }
3961 ++FI;
3962 }
3963}
3964
3965/// Check if duplication function is required for taskloops.
3966static bool checkInitIsRequired(CodeGenFunction &CGF,
3967 ArrayRef<PrivateDataTy> Privates) {
3968 bool InitRequired = false;
3969 for (const PrivateDataTy &Pair : Privates) {
3970 if (Pair.second.isLocalPrivate())
3971 continue;
3972 const VarDecl *VD = Pair.second.PrivateCopy;
3973 const Expr *Init = VD->getAnyInitializer();
3974 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3975 !CGF.isTrivialInitializer(Init));
3976 if (InitRequired)
3977 break;
3978 }
3979 return InitRequired;
3980}
3981
3982
3983/// Emit task_dup function (for initialization of
3984/// private/firstprivate/lastprivate vars and last_iter flag)
3985/// \code
3986/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3987/// lastpriv) {
3988/// // setup lastprivate flag
3989/// task_dst->last = lastpriv;
3990/// // could be constructor calls here...
3991/// }
3992/// \endcode
3993static llvm::Value *
3994emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3995 const OMPExecutableDirective &D,
3996 QualType KmpTaskTWithPrivatesPtrQTy,
3997 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3998 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3999 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4000 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4001 ASTContext &C = CGM.getContext();
4002 FunctionArgList Args;
4003 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4004 KmpTaskTWithPrivatesPtrQTy,
4005 ImplicitParamDecl::Other);
4006 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4007 KmpTaskTWithPrivatesPtrQTy,
4008 ImplicitParamDecl::Other);
4009 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4010 ImplicitParamDecl::Other);
4011 Args.push_back(&DstArg);
4012 Args.push_back(&SrcArg);
4013 Args.push_back(&LastprivArg);
4014 const auto &TaskDupFnInfo =
4015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4016 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4017 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4018 auto *TaskDup = llvm::Function::Create(
4019 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4020 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4021 TaskDup->setDoesNotRecurse();
4022 CodeGenFunction CGF(CGM);
4023 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4024 Loc);
4025
4026 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027 CGF.GetAddrOfLocalVar(&DstArg),
4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029 // task_dst->liter = lastpriv;
4030 if (WithLastIter) {
4031 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4032 LValue Base = CGF.EmitLValueForField(
4033 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4034 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4035 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4036 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4037 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4038 }
4039
4040 // Emit initial values for private copies (if any).
4041 assert(!Privates.empty())(static_cast <bool> (!Privates.empty()) ? void (0) : __assert_fail
("!Privates.empty()", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 4041, __extension__ __PRETTY_FUNCTION__))
;
4042 Address KmpTaskSharedsPtr = Address::invalid();
4043 if (!Data.FirstprivateVars.empty()) {
4044 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4045 CGF.GetAddrOfLocalVar(&SrcArg),
4046 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4047 LValue Base = CGF.EmitLValueForField(
4048 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4049 KmpTaskSharedsPtr = Address(
4050 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4051 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4052 KmpTaskTShareds)),
4053 Loc),
4054 CGM.getNaturalTypeAlignment(SharedsTy));
4055 }
4056 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4057 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4058 CGF.FinishFunction();
4059 return TaskDup;
4060}
4061
4062/// Checks if destructor function is required to be generated.
4063/// \return true if cleanups are required, false otherwise.
4064static bool
4065checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4066 ArrayRef<PrivateDataTy> Privates) {
4067 for (const PrivateDataTy &P : Privates) {
4068 if (P.second.isLocalPrivate())
4069 continue;
4070 QualType Ty = P.second.Original->getType().getNonReferenceType();
4071 if (Ty.isDestructedType())
4072 return true;
4073 }
4074 return false;
4075}
4076
4077namespace {
4078/// Loop generator for OpenMP iterator expression.
4079class OMPIteratorGeneratorScope final
4080 : public CodeGenFunction::OMPPrivateScope {
4081 CodeGenFunction &CGF;
4082 const OMPIteratorExpr *E = nullptr;
4083 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4084 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4085 OMPIteratorGeneratorScope() = delete;
4086 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4087
4088public:
4089 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4090 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4091 if (!E)
4092 return;
4093 SmallVector<llvm::Value *, 4> Uppers;
4094 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4095 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4096 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4097 addPrivate(VD, [&CGF, VD]() {
4098 return CGF.CreateMemTemp(VD->getType(), VD->getName());
4099 });
4100 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4101 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4102 return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4103 "counter.addr");
4104 });
4105 }
4106 Privatize();
4107
4108 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4109 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4110 LValue CLVal =
4111 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4112 HelperData.CounterVD->getType());
4113 // Counter = 0;
4114 CGF.EmitStoreOfScalar(
4115 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4116 CLVal);
4117 CodeGenFunction::JumpDest &ContDest =
4118 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4119 CodeGenFunction::JumpDest &ExitDest =
4120 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4121 // N = <number-of_iterations>;
4122 llvm::Value *N = Uppers[I];
4123 // cont:
4124 // if (Counter < N) goto body; else goto exit;
4125 CGF.EmitBlock(ContDest.getBlock());
4126 auto *CVal =
4127 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4128 llvm::Value *Cmp =
4129 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4130 ? CGF.Builder.CreateICmpSLT(CVal, N)
4131 : CGF.Builder.CreateICmpULT(CVal, N);
4132 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4133 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4134 // body:
4135 CGF.EmitBlock(BodyBB);
4136 // Iteri = Begini + Counter * Stepi;
4137 CGF.EmitIgnoredExpr(HelperData.Update);
4138 }
4139 }
4140 ~OMPIteratorGeneratorScope() {
4141 if (!E)
4142 return;
4143 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4144 // Counter = Counter + 1;
4145 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4146 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4147 // goto cont;
4148 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4149 // exit:
4150 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4151 }
4152 }
4153};
4154} // namespace
4155
4156static std::pair<llvm::Value *, llvm::Value *>
4157getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4158 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4159 llvm::Value *Addr;
4160 if (OASE) {
4161 const Expr *Base = OASE->getBase();
4162 Addr = CGF.EmitScalarExpr(Base);
4163 } else {
4164 Addr = CGF.EmitLValue(E).getPointer(CGF);
4165 }
4166 llvm::Value *SizeVal;
4167 QualType Ty = E->getType();
4168 if (OASE) {
4169 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4170 for (const Expr *SE : OASE->getDimensions()) {
4171 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4172 Sz = CGF.EmitScalarConversion(
4173 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4174 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4175 }
4176 } else if (const auto *ASE =
4177 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4178 LValue UpAddrLVal =
4179 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4180 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4181 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4182 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4183 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4184 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4185 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4186 } else {
4187 SizeVal = CGF.getTypeSize(Ty);
4188 }
4189 return std::make_pair(Addr, SizeVal);
4190}
4191
4192/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4193static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4194 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4195 if (KmpTaskAffinityInfoTy.isNull()) {
4196 RecordDecl *KmpAffinityInfoRD =
4197 C.buildImplicitRecord("kmp_task_affinity_info_t");
4198 KmpAffinityInfoRD->startDefinition();
4199 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4200 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4201 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4202 KmpAffinityInfoRD->completeDefinition();
4203 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4204 }
4205}
4206
4207CGOpenMPRuntime::TaskResultTy
4208CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4209 const OMPExecutableDirective &D,
4210 llvm::Function *TaskFunction, QualType SharedsTy,
4211 Address Shareds, const OMPTaskDataTy &Data) {
4212 ASTContext &C = CGM.getContext();
4213 llvm::SmallVector<PrivateDataTy, 4> Privates;
4214 // Aggregate privates and sort them by the alignment.
4215 const auto *I = Data.PrivateCopies.begin();
4216 for (const Expr *E : Data.PrivateVars) {
4217 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4218 Privates.emplace_back(
4219 C.getDeclAlign(VD),
4220 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4221 /*PrivateElemInit=*/nullptr));
4222 ++I;
4223 }
4224 I = Data.FirstprivateCopies.begin();
4225 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4226 for (const Expr *E : Data.FirstprivateVars) {
4227 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4228 Privates.emplace_back(
4229 C.getDeclAlign(VD),
4230 PrivateHelpersTy(
4231 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4232 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4233 ++I;
4234 ++IElemInitRef;
4235 }
4236 I = Data.LastprivateCopies.begin();
4237 for (const Expr *E : Data.LastprivateVars) {
4238 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4239 Privates.emplace_back(
4240 C.getDeclAlign(VD),
4241 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4242 /*PrivateElemInit=*/nullptr));
4243 ++I;
4244 }
4245 for (const VarDecl *VD : Data.PrivateLocals) {
4246 if (isAllocatableDecl(VD))
4247 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4248 else
4249 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4250 }
4251 llvm::stable_sort(Privates,
4252 [](const PrivateDataTy &L, const PrivateDataTy &R) {
4253 return L.first > R.first;
4254 });
4255 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4256 // Build type kmp_routine_entry_t (if not built yet).
4257 emitKmpRoutineEntryT(KmpInt32Ty);
4258 // Build type kmp_task_t (if not built yet).
4259 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4260 if (SavedKmpTaskloopTQTy.isNull()) {
4261 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4262 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4263 }
4264 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4265 } else {
4266 assert((D.getDirectiveKind() == OMPD_task ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4267 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4268 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4269 "Expected taskloop, task or target directive")(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
;
4270 if (SavedKmpTaskTQTy.isNull()) {
4271 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4272 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4273 }
4274 KmpTaskTQTy = SavedKmpTaskTQTy;
4275 }
4276 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4277 // Build particular struct kmp_task_t for the given task.
4278 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4279 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4280 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4281 QualType KmpTaskTWithPrivatesPtrQTy =
4282 C.getPointerType(KmpTaskTWithPrivatesQTy);
4283 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4284 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4285 KmpTaskTWithPrivatesTy->getPointerTo();
4286 llvm::Value *KmpTaskTWithPrivatesTySize =
4287 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4288 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4289
4290 // Emit initial values for private copies (if any).
4291 llvm::Value *TaskPrivatesMap = nullptr;
4292 llvm::Type *TaskPrivatesMapTy =
4293 std::next(TaskFunction->arg_begin(), 3)->getType();
4294 if (!Privates.empty()) {
4295 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4296 TaskPrivatesMap =
4297 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4298 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4299 TaskPrivatesMap, TaskPrivatesMapTy);
4300 } else {
4301 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4302 cast<llvm::PointerType>(TaskPrivatesMapTy));
4303 }
4304 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4305 // kmp_task_t *tt);
4306 llvm::Function *TaskEntry = emitProxyTaskFunction(
4307 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4308 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4309 TaskPrivatesMap);
4310
4311 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4312 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4313 // kmp_routine_entry_t *task_entry);
4314 // Task flags. Format is taken from
4315 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4316 // description of kmp_tasking_flags struct.
4317 enum {
4318 TiedFlag = 0x1,
4319 FinalFlag = 0x2,
4320 DestructorsFlag = 0x8,
4321 PriorityFlag = 0x20,
4322 DetachableFlag = 0x40,
4323 };
4324 unsigned Flags = Data.Tied ? TiedFlag : 0;
4325 bool NeedsCleanup = false;
4326 if (!Privates.empty()) {
4327 NeedsCleanup =
4328 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4329 if (NeedsCleanup)
4330 Flags = Flags | DestructorsFlag;
4331 }
4332 if (Data.Priority.getInt())
4333 Flags = Flags | PriorityFlag;
4334 if (D.hasClausesOfKind<OMPDetachClause>())
4335 Flags = Flags | DetachableFlag;
4336 llvm::Value *TaskFlags =
4337 Data.Final.getPointer()
4338 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4339 CGF.Builder.getInt32(FinalFlag),
4340 CGF.Builder.getInt32(/*C=*/0))
4341 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4342 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4343 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4344 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4345 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4346 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4347 TaskEntry, KmpRoutineEntryPtrTy)};
4348 llvm::Value *NewTask;
4349 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4350 // Check if we have any device clause associated with the directive.
4351 const Expr *Device = nullptr;
4352 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4353 Device = C->getDevice();
4354 // Emit device ID if any otherwise use default value.
4355 llvm::Value *DeviceID;
4356 if (Device)
4357 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4358 CGF.Int64Ty, /*isSigned=*/true);
4359 else
4360 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4361 AllocArgs.push_back(DeviceID);
4362 NewTask = CGF.EmitRuntimeCall(
4363 OMPBuilder.getOrCreateRuntimeFunction(
4364 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4365 AllocArgs);
4366 } else {
4367 NewTask =
4368 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4369 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4370 AllocArgs);
4371 }
4372 // Emit detach clause initialization.
4373 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4374 // task_descriptor);
4375 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4376 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4377 LValue EvtLVal = CGF.EmitLValue(Evt);
4378
4379 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4380 // int gtid, kmp_task_t *task);
4381 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4382 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4383 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4384 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4385 OMPBuilder.getOrCreateRuntimeFunction(
4386 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4387 {Loc, Tid, NewTask});
4388 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4389 Evt->getExprLoc());
4390 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4391 }
4392 // Process affinity clauses.
4393 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4394 // Process list of affinity data.
4395 ASTContext &C = CGM.getContext();
4396 Address AffinitiesArray = Address::invalid();
4397 // Calculate number of elements to form the array of affinity data.
4398 llvm::Value *NumOfElements = nullptr;
4399 unsigned NumAffinities = 0;
4400 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4401 if (const Expr *Modifier = C->getModifier()) {
4402 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4403 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4404 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4405 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4406 NumOfElements =
4407 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4408 }
4409 } else {
4410 NumAffinities += C->varlist_size();
4411 }
4412 }
4413 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4414 // Fields ids in kmp_task_affinity_info record.
4415 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4416
4417 QualType KmpTaskAffinityInfoArrayTy;
4418 if (NumOfElements) {
4419 NumOfElements = CGF.Builder.CreateNUWAdd(
4420 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4421 auto *OVE = new (C) OpaqueValueExpr(
4422 Loc,
4423 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4424 VK_PRValue);
4425 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4426 RValue::get(NumOfElements));
4427 KmpTaskAffinityInfoArrayTy =
4428 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4429 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4430 // Properly emit variable-sized array.
4431 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4432 ImplicitParamDecl::Other);
4433 CGF.EmitVarDecl(*PD);
4434 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4435 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4436 /*isSigned=*/false);
4437 } else {
4438 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4439 KmpTaskAffinityInfoTy,
4440 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4441 ArrayType::Normal, /*IndexTypeQuals=*/0);
4442 AffinitiesArray =
4443 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4444 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4445 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4446 /*isSigned=*/false);
4447 }
4448
4449 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4450 // Fill array by elements without iterators.
4451 unsigned Pos = 0;
4452 bool HasIterator = false;
4453 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4454 if (C->getModifier()) {
4455 HasIterator = true;
4456 continue;
4457 }
4458 for (const Expr *E : C->varlists()) {
4459 llvm::Value *Addr;
4460 llvm::Value *Size;
4461 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4462 LValue Base =
4463 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4464 KmpTaskAffinityInfoTy);
4465 // affs[i].base_addr = &<Affinities[i].second>;
4466 LValue BaseAddrLVal = CGF.EmitLValueForField(
4467 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4468 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4469 BaseAddrLVal);
4470 // affs[i].len = sizeof(<Affinities[i].second>);
4471 LValue LenLVal = CGF.EmitLValueForField(
4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4473 CGF.EmitStoreOfScalar(Size, LenLVal);
4474 ++Pos;
4475 }
4476 }
4477 LValue PosLVal;
4478 if (HasIterator) {
4479 PosLVal = CGF.MakeAddrLValue(
4480 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4481 C.getSizeType());
4482 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4483 }
4484 // Process elements with iterators.
4485 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4486 const Expr *Modifier = C->getModifier();
4487 if (!Modifier)
4488 continue;
4489 OMPIteratorGeneratorScope IteratorScope(
4490 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4491 for (const Expr *E : C->varlists()) {
4492 llvm::Value *Addr;
4493 llvm::Value *Size;
4494 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4495 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4496 LValue Base = CGF.MakeAddrLValue(
4497 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4498 // affs[i].base_addr = &<Affinities[i].second>;
4499 LValue BaseAddrLVal = CGF.EmitLValueForField(
4500 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4501 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4502 BaseAddrLVal);
4503 // affs[i].len = sizeof(<Affinities[i].second>);
4504 LValue LenLVal = CGF.EmitLValueForField(
4505 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4506 CGF.EmitStoreOfScalar(Size, LenLVal);
4507 Idx = CGF.Builder.CreateNUWAdd(
4508 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4509 CGF.EmitStoreOfScalar(Idx, PosLVal);
4510 }
4511 }
4512 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4513 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4514 // naffins, kmp_task_affinity_info_t *affin_list);
4515 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4516 llvm::Value *GTid = getThreadID(CGF, Loc);
4517 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4518 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4519 // FIXME: Emit the function and ignore its result for now unless the
4520 // runtime function is properly implemented.
4521 (void)CGF.EmitRuntimeCall(
4522 OMPBuilder.getOrCreateRuntimeFunction(
4523 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4524 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4525 }
4526 llvm::Value *NewTaskNewTaskTTy =
4527 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4528 NewTask, KmpTaskTWithPrivatesPtrTy);
4529 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4530 KmpTaskTWithPrivatesQTy);
4531 LValue TDBase =
4532 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4533 // Fill the data in the resulting kmp_task_t record.
4534 // Copy shareds if there are any.
4535 Address KmpTaskSharedsPtr = Address::invalid();
4536 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4537 KmpTaskSharedsPtr =
4538 Address(CGF.EmitLoadOfScalar(
4539 CGF.EmitLValueForField(
4540 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4541 KmpTaskTShareds)),
4542 Loc),
4543 CGM.getNaturalTypeAlignment(SharedsTy));
4544 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4545 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4546 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4547 }
4548 // Emit initial values for private copies (if any).
4549 TaskResultTy Result;
4550 if (!Privates.empty()) {
4551 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4552 SharedsTy, SharedsPtrTy, Data, Privates,
4553 /*ForDup=*/false);
4554 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4555 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4556 Result.TaskDupFn = emitTaskDupFunction(
4557 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4558 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4559 /*WithLastIter=*/!Data.LastprivateVars.empty());
4560 }
4561 }
4562 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4563 enum { Priority = 0, Destructors = 1 };
4564 // Provide pointer to function with destructors for privates.
4565 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4566 const RecordDecl *KmpCmplrdataUD =
4567 (*FI)->getType()->getAsUnionType()->getDecl();
4568 if (NeedsCleanup) {
4569 llvm::Value *DestructorFn = emitDestructorsFunction(
4570 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4571 KmpTaskTWithPrivatesQTy);
4572 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4573 LValue DestructorsLV = CGF.EmitLValueForField(
4574 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4575 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4576 DestructorFn, KmpRoutineEntryPtrTy),
4577 DestructorsLV);
4578 }
4579 // Set priority.
4580 if (Data.Priority.getInt()) {
4581 LValue Data2LV = CGF.EmitLValueForField(
4582 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4583 LValue PriorityLV = CGF.EmitLValueForField(
4584 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4585 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4586 }
4587 Result.NewTask = NewTask;
4588 Result.TaskEntry = TaskEntry;
4589 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4590 Result.TDBase = TDBase;
4591 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4592 return Result;
4593}
4594
4595namespace {
4596/// Dependence kind for RTL.
4597enum RTLDependenceKindTy {
4598 DepIn = 0x01,
4599 DepInOut = 0x3,
4600 DepMutexInOutSet = 0x4
4601};
4602/// Fields ids in kmp_depend_info record.
4603enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4604} // namespace
4605
4606/// Translates internal dependency kind into the runtime kind.
4607static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4608 RTLDependenceKindTy DepKind;
4609 switch (K) {
4610 case OMPC_DEPEND_in:
4611 DepKind = DepIn;
4612 break;
4613 // Out and InOut dependencies must use the same code.
4614 case OMPC_DEPEND_out:
4615 case OMPC_DEPEND_inout:
4616 DepKind = DepInOut;
4617 break;
4618 case OMPC_DEPEND_mutexinoutset:
4619 DepKind = DepMutexInOutSet;
4620 break;
4621 case OMPC_DEPEND_source:
4622 case OMPC_DEPEND_sink:
4623 case OMPC_DEPEND_depobj:
4624 case OMPC_DEPEND_unknown:
4625 llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4625)
;
4626 }
4627 return DepKind;
4628}
4629
4630/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4631static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4632 QualType &FlagsTy) {
4633 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4634 if (KmpDependInfoTy.isNull()) {
4635 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4636 KmpDependInfoRD->startDefinition();
4637 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4638 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4639 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4640 KmpDependInfoRD->completeDefinition();
4641 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4642 }
4643}
4644
4645std::pair<llvm::Value *, LValue>
4646CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4647 SourceLocation Loc) {
4648 ASTContext &C = CGM.getContext();
4649 QualType FlagsTy;
4650 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4651 RecordDecl *KmpDependInfoRD =
4652 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4653 LValue Base = CGF.EmitLoadOfPointerLValue(
4654 DepobjLVal.getAddress(CGF),
4655 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4656 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4657 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4658 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4659 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4660 Base.getTBAAInfo());
4661 Address DepObjAddr = CGF.Builder.CreateGEP(
4662 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4663 LValue NumDepsBase = CGF.MakeAddrLValue(
4664 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4665 // NumDeps = deps[i].base_addr;
4666 LValue BaseAddrLVal = CGF.EmitLValueForField(
4667 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4668 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4669 return std::make_pair(NumDeps, Base);
4670}
4671
4672static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4673 llvm::PointerUnion<unsigned *, LValue *> Pos,
4674 const OMPTaskDataTy::DependData &Data,
4675 Address DependenciesArray) {
4676 CodeGenModule &CGM = CGF.CGM;
4677 ASTContext &C = CGM.getContext();
4678 QualType FlagsTy;
4679 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4680 RecordDecl *KmpDependInfoRD =
4681 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4682 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4683
4684 OMPIteratorGeneratorScope IteratorScope(
4685 CGF, cast_or_null<OMPIteratorExpr>(
4686 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4687 : nullptr));
4688 for (const Expr *E : Data.DepExprs) {
4689 llvm::Value *Addr;
4690 llvm::Value *Size;
4691 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4692 LValue Base;
4693 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4694 Base = CGF.MakeAddrLValue(
4695 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4696 } else {
4697 LValue &PosLVal = *Pos.get<LValue *>();
4698 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4699 Base = CGF.MakeAddrLValue(
4700 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4701 }
4702 // deps[i].base_addr = &<Dependencies[i].second>;
4703 LValue BaseAddrLVal = CGF.EmitLValueForField(
4704 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4705 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4706 BaseAddrLVal);
4707 // deps[i].len = sizeof(<Dependencies[i].second>);
4708 LValue LenLVal = CGF.EmitLValueForField(
4709 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4710 CGF.EmitStoreOfScalar(Size, LenLVal);
4711 // deps[i].flags = <Dependencies[i].first>;
4712 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4713 LValue FlagsLVal = CGF.EmitLValueForField(
4714 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4715 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4716 FlagsLVal);
4717 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4718 ++(*P);
4719 } else {
4720 LValue &PosLVal = *Pos.get<LValue *>();
4721 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4722 Idx = CGF.Builder.CreateNUWAdd(Idx,
4723 llvm::ConstantInt::get(Idx->getType(), 1));
4724 CGF.EmitStoreOfScalar(Idx, PosLVal);
4725 }
4726 }
4727}
4728
4729static SmallVector<llvm::Value *, 4>
4730emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4731 const OMPTaskDataTy::DependData &Data) {
4732 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4733, __extension__
__PRETTY_FUNCTION__))
4733 "Expected depobj dependecy kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4733, __extension__
__PRETTY_FUNCTION__))
;
4734 SmallVector<llvm::Value *, 4> Sizes;
4735 SmallVector<LValue, 4> SizeLVals;
4736 ASTContext &C = CGF.getContext();
4737 QualType FlagsTy;
4738 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4739 RecordDecl *KmpDependInfoRD =
4740 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4741 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4742 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4743 {
4744 OMPIteratorGeneratorScope IteratorScope(
4745 CGF, cast_or_null<OMPIteratorExpr>(
4746 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4747 : nullptr));
4748 for (const Expr *E : Data.DepExprs) {
4749 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4750 LValue Base = CGF.EmitLoadOfPointerLValue(
4751 DepobjLVal.getAddress(CGF),
4752 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4753 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4754 Base.getAddress(CGF), KmpDependInfoPtrT);
4755 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4756 Base.getTBAAInfo());
4757 Address DepObjAddr = CGF.Builder.CreateGEP(
4758 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4759 LValue NumDepsBase = CGF.MakeAddrLValue(
4760 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4761 // NumDeps = deps[i].base_addr;
4762 LValue BaseAddrLVal = CGF.EmitLValueForField(
4763 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4764 llvm::Value *NumDeps =
4765 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4766 LValue NumLVal = CGF.MakeAddrLValue(
4767 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4768 C.getUIntPtrType());
4769 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4770 NumLVal.getAddress(CGF));
4771 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4772 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4773 CGF.EmitStoreOfScalar(Add, NumLVal);
4774 SizeLVals.push_back(NumLVal);
4775 }
4776 }
4777 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4778 llvm::Value *Size =
4779 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4780 Sizes.push_back(Size);
4781 }
4782 return Sizes;
4783}
4784
4785static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4786 LValue PosLVal,
4787 const OMPTaskDataTy::DependData &Data,
4788 Address DependenciesArray) {
4789 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4790, __extension__
__PRETTY_FUNCTION__))
4790 "Expected depobj dependecy kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4790, __extension__
__PRETTY_FUNCTION__))
;
4791 ASTContext &C = CGF.getContext();
4792 QualType FlagsTy;
4793 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4794 RecordDecl *KmpDependInfoRD =
4795 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4796 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4797 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4798 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4799 {
4800 OMPIteratorGeneratorScope IteratorScope(
4801 CGF, cast_or_null<OMPIteratorExpr>(
4802 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4803 : nullptr));
4804 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4805 const Expr *E = Data.DepExprs[I];
4806 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4807 LValue Base = CGF.EmitLoadOfPointerLValue(
4808 DepobjLVal.getAddress(CGF),
4809 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4810 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4811 Base.getAddress(CGF), KmpDependInfoPtrT);
4812 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4813 Base.getTBAAInfo());
4814
4815 // Get number of elements in a single depobj.
4816 Address DepObjAddr = CGF.Builder.CreateGEP(
4817 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4818 LValue NumDepsBase = CGF.MakeAddrLValue(
4819 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4820 // NumDeps = deps[i].base_addr;
4821 LValue BaseAddrLVal = CGF.EmitLValueForField(
4822 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4823 llvm::Value *NumDeps =
4824 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4825
4826 // memcopy dependency data.
4827 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4828 ElSize,
4829 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4830 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4831 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4832 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4833
4834 // Increase pos.
4835 // pos += size;
4836 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4837 CGF.EmitStoreOfScalar(Add, PosLVal);
4838 }
4839 }
4840}
4841
4842std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4843 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4844 SourceLocation Loc) {
4845 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4846 return D.DepExprs.empty();
4847 }))
4848 return std::make_pair(nullptr, Address::invalid());
4849 // Process list of dependencies.
4850 ASTContext &C = CGM.getContext();
4851 Address DependenciesArray = Address::invalid();
4852 llvm::Value *NumOfElements = nullptr;
4853 unsigned NumDependencies = std::accumulate(
4854 Dependencies.begin(), Dependencies.end(), 0,
4855 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4856 return D.DepKind == OMPC_DEPEND_depobj
4857 ? V
4858 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4859 });
4860 QualType FlagsTy;
4861 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4862 bool HasDepobjDeps = false;
4863 bool HasRegularWithIterators = false;
4864 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4865 llvm::Value *NumOfRegularWithIterators =
4866 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4867 // Calculate number of depobj dependecies and regular deps with the iterators.
4868 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4869 if (D.DepKind == OMPC_DEPEND_depobj) {
4870 SmallVector<llvm::Value *, 4> Sizes =
4871 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4872 for (llvm::Value *Size : Sizes) {
4873 NumOfDepobjElements =
4874 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4875 }
4876 HasDepobjDeps = true;
4877 continue;
4878 }
4879 // Include number of iterations, if any.
4880
4881 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4882 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4883 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4884 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4885 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4886 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4887 NumOfRegularWithIterators =
4888 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4889 }
4890 HasRegularWithIterators = true;
4891 continue;
4892 }
4893 }
4894
4895 QualType KmpDependInfoArrayTy;
4896 if (HasDepobjDeps || HasRegularWithIterators) {
4897 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4898 /*isSigned=*/false);
4899 if (HasDepobjDeps) {
4900 NumOfElements =
4901 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4902 }
4903 if (HasRegularWithIterators) {
4904 NumOfElements =
4905 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4906 }
4907 auto *OVE = new (C) OpaqueValueExpr(
4908 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4909 VK_PRValue);
4910 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4911 RValue::get(NumOfElements));
4912 KmpDependInfoArrayTy =
4913 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4914 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4915 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4916 // Properly emit variable-sized array.
4917 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4918 ImplicitParamDecl::Other);
4919 CGF.EmitVarDecl(*PD);
4920 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4921 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4922 /*isSigned=*/false);
4923 } else {
4924 KmpDependInfoArrayTy = C.getConstantArrayType(
4925 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4926 ArrayType::Normal, /*IndexTypeQuals=*/0);
4927 DependenciesArray =
4928 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4929 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4930 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4931 /*isSigned=*/false);
4932 }
4933 unsigned Pos = 0;
4934 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4935 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4936 Dependencies[I].IteratorExpr)
4937 continue;
4938 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4939 DependenciesArray);
4940 }
4941 // Copy regular dependecies with iterators.
4942 LValue PosLVal = CGF.MakeAddrLValue(
4943 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4944 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4945 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4946 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4947 !Dependencies[I].IteratorExpr)
4948 continue;
4949 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4950 DependenciesArray);
4951 }
4952 // Copy final depobj arrays without iterators.
4953 if (HasDepobjDeps) {
4954 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4955 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4956 continue;
4957 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4958 DependenciesArray);
4959 }
4960 }
4961 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4962 DependenciesArray, CGF.VoidPtrTy);
4963 return std::make_pair(NumOfElements, DependenciesArray);
4964}
4965
4966Address CGOpenMPRuntime::emitDepobjDependClause(
4967 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4968 SourceLocation Loc) {
4969 if (Dependencies.DepExprs.empty())
4970 return Address::invalid();
4971 // Process list of dependencies.
4972 ASTContext &C = CGM.getContext();
4973 Address DependenciesArray = Address::invalid();
4974 unsigned NumDependencies = Dependencies.DepExprs.size();
4975 QualType FlagsTy;
4976 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4977 RecordDecl *KmpDependInfoRD =
4978 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4979
4980 llvm::Value *Size;
4981 // Define type kmp_depend_info[<Dependencies.size()>];
4982 // For depobj reserve one extra element to store the number of elements.
4983 // It is required to handle depobj(x) update(in) construct.
4984 // kmp_depend_info[<Dependencies.size()>] deps;
4985 llvm::Value *NumDepsVal;
4986 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4987 if (const auto *IE =
4988 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4989 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4990 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4991 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4992 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4993 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4994 }
4995 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
4996 NumDepsVal);
4997 CharUnits SizeInBytes =
4998 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4999 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5000 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5001 NumDepsVal =
5002 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5003 } else {
5004 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5005 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5006 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5007 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5008 Size = CGM.getSize(Sz.alignTo(Align));
5009 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5010 }
5011 // Need to allocate on the dynamic memory.
5012 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5013 // Use default allocator.
5014 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5015 llvm::Value *Args[] = {ThreadID, Size, Allocator};
5016
5017 llvm::Value *Addr =
5018 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5019 CGM.getModule(), OMPRTL___kmpc_alloc),
5020 Args, ".dep.arr.addr");
5021 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5022 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5023 DependenciesArray = Address(Addr, Align);
5024 // Write number of elements in the first element of array for depobj.
5025 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5026 // deps[i].base_addr = NumDependencies;
5027 LValue BaseAddrLVal = CGF.EmitLValueForField(
5028 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5029 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5030 llvm::PointerUnion<unsigned *, LValue *> Pos;
5031 unsigned Idx = 1;
5032 LValue PosLVal;
5033 if (Dependencies.IteratorExpr) {
5034 PosLVal = CGF.MakeAddrLValue(
5035 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5036 C.getSizeType());
5037 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5038 /*IsInit=*/true);
5039 Pos = &PosLVal;
5040 } else {
5041 Pos = &Idx;
5042 }
5043 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5044 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5045 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5046 return DependenciesArray;
5047}
5048
5049void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5050 SourceLocation Loc) {
5051 ASTContext &C = CGM.getContext();
5052 QualType FlagsTy;
5053 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5054 LValue Base = CGF.EmitLoadOfPointerLValue(
5055 DepobjLVal.getAddress(CGF),
5056 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5057 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5058 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5059 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5060 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5061 Addr.getElementType(), Addr.getPointer(),
5062 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5063 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5064 CGF.VoidPtrTy);
5065 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5066 // Use default allocator.
5067 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5068 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5069
5070 // _kmpc_free(gtid, addr, nullptr);
5071 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5072 CGM.getModule(), OMPRTL___kmpc_free),
5073 Args);
5074}
5075
5076void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5077 OpenMPDependClauseKind NewDepKind,
5078 SourceLocation Loc) {
5079 ASTContext &C = CGM.getContext();
5080 QualType FlagsTy;
5081 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5082 RecordDecl *KmpDependInfoRD =
5083 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5084 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5085 llvm::Value *NumDeps;
5086 LValue Base;
5087 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5088
5089 Address Begin = Base.getAddress(CGF);
5090 // Cast from pointer to array type to pointer to single element.
5091 llvm::Value *End = CGF.Builder.CreateGEP(
5092 Begin.getElementType(), Begin.getPointer(), NumDeps);
5093 // The basic structure here is a while-do loop.
5094 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5095 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5096 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5097 CGF.EmitBlock(BodyBB);
5098 llvm::PHINode *ElementPHI =
5099 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5100 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5101 Begin = Address(ElementPHI, Begin.getAlignment());
5102 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5103 Base.getTBAAInfo());
5104 // deps[i].flags = NewDepKind;
5105 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5106 LValue FlagsLVal = CGF.EmitLValueForField(
5107 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5108 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5109 FlagsLVal);
5110
5111 // Shift the address forward by one element.
5112 Address ElementNext =
5113 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5114 ElementPHI->addIncoming(ElementNext.getPointer(),
5115 CGF.Builder.GetInsertBlock());
5116 llvm::Value *IsEmpty =
5117 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5118 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5119 // Done.
5120 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5121}
5122
5123void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5124 const OMPExecutableDirective &D,
5125 llvm::Function *TaskFunction,
5126 QualType SharedsTy, Address Shareds,
5127 const Expr *IfCond,
5128 const OMPTaskDataTy &Data) {
5129 if (!CGF.HaveInsertPoint())
5130 return;
5131
5132 TaskResultTy Result =
5133 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5134 llvm::Value *NewTask = Result.NewTask;
5135 llvm::Function *TaskEntry = Result.TaskEntry;
5136 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5137 LValue TDBase = Result.TDBase;
5138 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5139 // Process list of dependences.
5140 Address DependenciesArray = Address::invalid();
5141 llvm::Value *NumOfElements;
5142 std::tie(NumOfElements, DependenciesArray) =
5143 emitDependClause(CGF, Data.Dependences, Loc);
5144
5145 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5146 // libcall.
5147 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5148 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5149 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5150 // list is not empty
5151 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5152 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5153 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5154 llvm::Value *DepTaskArgs[7];
5155 if (!Data.Dependences.empty()) {
5156 DepTaskArgs[0] = UpLoc;
5157 DepTaskArgs[1] = ThreadID;
5158 DepTaskArgs[2] = NewTask;
5159 DepTaskArgs[3] = NumOfElements;
5160 DepTaskArgs[4] = DependenciesArray.getPointer();
5161 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5162 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5163 }
5164 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5165 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5166 if (!Data.Tied) {
5167 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5168 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5169 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5170 }
5171 if (!Data.Dependences.empty()) {
5172 CGF.EmitRuntimeCall(
5173 OMPBuilder.getOrCreateRuntimeFunction(
5174 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5175 DepTaskArgs);
5176 } else {
5177 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5178 CGM.getModule(), OMPRTL___kmpc_omp_task),
5179 TaskArgs);
5180 }
5181 // Check if parent region is untied and build return for untied task;
5182 if (auto *Region =
5183 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5184 Region->emitUntiedSwitch(CGF);
5185 };
5186
5187 llvm::Value *DepWaitTaskArgs[6];
5188 if (!Data.Dependences.empty()) {
5189 DepWaitTaskArgs[0] = UpLoc;
5190 DepWaitTaskArgs[1] = ThreadID;
5191 DepWaitTaskArgs[2] = NumOfElements;
5192 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5193 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5194 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5195 }
5196 auto &M = CGM.getModule();
5197 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5198 TaskEntry, &Data, &DepWaitTaskArgs,
5199 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5200 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5201 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5202 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5203 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5204 // is specified.
5205 if (!Data.Dependences.empty())
5206 CGF.EmitRuntimeCall(
5207 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5208 DepWaitTaskArgs);
5209 // Call proxy_task_entry(gtid, new_task);
5210 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5211 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5212 Action.Enter(CGF);
5213 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5214 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5215 OutlinedFnArgs);
5216 };
5217
5218 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5219 // kmp_task_t *new_task);
5220 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5221 // kmp_task_t *new_task);
5222 RegionCodeGenTy RCG(CodeGen);
5223 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5224 M, OMPRTL___kmpc_omp_task_begin_if0),
5225 TaskArgs,
5226 OMPBuilder.getOrCreateRuntimeFunction(
5227 M, OMPRTL___kmpc_omp_task_complete_if0),
5228 TaskArgs);
5229 RCG.setAction(Action);
5230 RCG(CGF);
5231 };
5232
5233 if (IfCond) {
5234 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5235 } else {
5236 RegionCodeGenTy ThenRCG(ThenCodeGen);
5237 ThenRCG(CGF);
5238 }
5239}
5240
5241void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5242 const OMPLoopDirective &D,
5243 llvm::Function *TaskFunction,
5244 QualType SharedsTy, Address Shareds,
5245 const Expr *IfCond,
5246 const OMPTaskDataTy &Data) {
5247 if (!CGF.HaveInsertPoint())
5248 return;
5249 TaskResultTy Result =
5250 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5251 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5252 // libcall.
5253 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5254 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5255 // sched, kmp_uint64 grainsize, void *task_dup);
5256 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5257 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5258 llvm::Value *IfVal;
5259 if (IfCond) {
5260 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5261 /*isSigned=*/true);
5262 } else {
5263 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5264 }
5265
5266 LValue LBLVal = CGF.EmitLValueForField(
5267 Result.TDBase,
5268 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5269 const auto *LBVar =
5270 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5271 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5272 LBLVal.getQuals(),
5273 /*IsInitializer=*/true);
5274 LValue UBLVal = CGF.EmitLValueForField(
5275 Result.TDBase,
5276 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5277 const auto *UBVar =
5278 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5279 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5280 UBLVal.getQuals(),
5281 /*IsInitializer=*/true);
5282 LValue StLVal = CGF.EmitLValueForField(
5283 Result.TDBase,
5284 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5285 const auto *StVar =
5286 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5287 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5288 StLVal.getQuals(),
5289 /*IsInitializer=*/true);
5290 // Store reductions address.
5291 LValue RedLVal = CGF.EmitLValueForField(
5292 Result.TDBase,
5293 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5294 if (Data.Reductions) {
5295 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5296 } else {
5297 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5298 CGF.getContext().VoidPtrTy);
5299 }
5300 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5301 llvm::Value *TaskArgs[] = {
5302 UpLoc,
5303 ThreadID,
5304 Result.NewTask,
5305 IfVal,
5306 LBLVal.getPointer(CGF),
5307 UBLVal.getPointer(CGF),
5308 CGF.EmitLoadOfScalar(StLVal, Loc),
5309 llvm::ConstantInt::getSigned(
5310 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5311 llvm::ConstantInt::getSigned(
5312 CGF.IntTy, Data.Schedule.getPointer()
5313 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5314 : NoSchedule),
5315 Data.Schedule.getPointer()
5316 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5317 /*isSigned=*/false)
5318 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5319 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5320 Result.TaskDupFn, CGF.VoidPtrTy)
5321 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5322 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5323 CGM.getModule(), OMPRTL___kmpc_taskloop),
5324 TaskArgs);
5325}
5326
5327/// Emit reduction operation for each element of array (required for
5328/// array sections) LHS op = RHS.
5329/// \param Type Type of array.
5330/// \param LHSVar Variable on the left side of the reduction operation
5331/// (references element of array in original variable).
5332/// \param RHSVar Variable on the right side of the reduction operation
5333/// (references element of array in original variable).
5334/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5335/// RHSVar.
5336static void EmitOMPAggregateReduction(
5337 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5338 const VarDecl *RHSVar,
5339 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5340 const Expr *, const Expr *)> &RedOpGen,
5341 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5342 const Expr *UpExpr = nullptr) {
5343 // Perform element-by-element initialization.
5344 QualType ElementTy;
5345 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5346 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5347
5348 // Drill down to the base element type on both arrays.
5349 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5350 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5351
5352 llvm::Value *RHSBegin = RHSAddr.getPointer();
5353 llvm::Value *LHSBegin = LHSAddr.getPointer();
5354 // Cast from pointer to array type to pointer to single element.
5355 llvm::Value *LHSEnd =
5356 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5357 // The basic structure here is a while-do loop.
5358 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5359 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5360 llvm::Value *IsEmpty =
5361 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5362 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5363
5364 // Enter the loop body, making that address the current address.
5365 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5366 CGF.EmitBlock(BodyBB);
5367
5368 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5369
5370 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5371 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5372 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5373 Address RHSElementCurrent =
5374 Address(RHSElementPHI,
5375 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5376
5377 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5378 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5379 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5380 Address LHSElementCurrent =
5381 Address(LHSElementPHI,
5382 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5383
5384 // Emit copy.
5385 CodeGenFunction::OMPPrivateScope Scope(CGF);
5386 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5387 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5388 Scope.Privatize();
5389 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5390 Scope.ForceCleanup();
5391
5392 // Shift the address forward by one element.
5393 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5394 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5395 "omp.arraycpy.dest.element");
5396 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5397 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5398 "omp.arraycpy.src.element");
5399 // Check whether we've reached the end.
5400 llvm::Value *Done =
5401 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5402 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5403 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5404 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5405
5406 // Done.
5407 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5408}
5409
5410/// Emit reduction combiner. If the combiner is a simple expression emit it as
5411/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5412/// UDR combiner function.
5413static void emitReductionCombiner(CodeGenFunction &CGF,
5414 const Expr *ReductionOp) {
5415 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5416 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5417 if (const auto *DRE =
5418 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5419 if (const auto *DRD =
5420 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5421 std::pair<llvm::Function *, llvm::Function *> Reduction =
5422 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5423 RValue Func = RValue::get(Reduction.first);
5424 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5425 CGF.EmitIgnoredExpr(ReductionOp);
5426 return;
5427 }
5428 CGF.EmitIgnoredExpr(ReductionOp);
5429}
5430
5431llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5432 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5433 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5434 ArrayRef<const Expr *> ReductionOps) {
5435 ASTContext &C = CGM.getContext();
5436
5437 // void reduction_func(void *LHSArg, void *RHSArg);
5438 FunctionArgList Args;
5439 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5440 ImplicitParamDecl::Other);
5441 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5442 ImplicitParamDecl::Other);
5443 Args.push_back(&LHSArg);
5444 Args.push_back(&RHSArg);
5445 const auto &CGFI =
5446 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5447 std::string Name = getName({"omp", "reduction", "reduction_func"});
5448 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5449 llvm::GlobalValue::InternalLinkage, Name,
5450 &CGM.getModule());
5451 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5452 Fn->setDoesNotRecurse();
5453 CodeGenFunction CGF(CGM);
5454 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5455
5456 // Dst = (void*[n])(LHSArg);
5457 // Src = (void*[n])(RHSArg);
5458 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5459 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5460 ArgsType), CGF.getPointerAlign());
5461 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5462 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5463 ArgsType), CGF.getPointerAlign());
5464
5465 // ...
5466 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5467 // ...
5468 CodeGenFunction::OMPPrivateScope Scope(CGF);
5469 auto IPriv = Privates.begin();
5470 unsigned Idx = 0;
5471 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5472 const auto *RHSVar =
5473 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5474 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5475 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5476 });
5477 const auto *LHSVar =
5478 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5479 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5480 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5481 });
5482 QualType PrivTy = (*IPriv)->getType();
5483 if (PrivTy->isVariablyModifiedType()) {
5484 // Get array size and emit VLA type.
5485 ++Idx;
5486 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5487 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5488 const VariableArrayType *VLA =
5489 CGF.getContext().getAsVariableArrayType(PrivTy);
5490 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5491 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5492 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5493 CGF.EmitVariablyModifiedType(PrivTy);
5494 }
5495 }
5496 Scope.Privatize();
5497 IPriv = Privates.begin();
5498 auto ILHS = LHSExprs.begin();
5499 auto IRHS = RHSExprs.begin();
5500 for (const Expr *E : ReductionOps) {
5501 if ((*IPriv)->getType()->isArrayType()) {
5502 // Emit reduction for array section.
5503 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5504 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5505 EmitOMPAggregateReduction(
5506 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5507 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5508 emitReductionCombiner(CGF, E);
5509 });
5510 } else {
5511 // Emit reduction for array subscript or single variable.
5512 emitReductionCombiner(CGF, E);
5513 }
5514 ++IPriv;
5515 ++ILHS;
5516 ++IRHS;
5517 }
5518 Scope.ForceCleanup();
5519 CGF.FinishFunction();
5520 return Fn;
5521}
5522
5523void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5524 const Expr *ReductionOp,
5525 const Expr *PrivateRef,
5526 const DeclRefExpr *LHS,
5527 const DeclRefExpr *RHS) {
5528 if (PrivateRef->getType()->isArrayType()) {
5529 // Emit reduction for array section.
5530 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5531 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5532 EmitOMPAggregateReduction(
5533 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5534 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5535 emitReductionCombiner(CGF, ReductionOp);
5536 });
5537 } else {
5538 // Emit reduction for array subscript or single variable.
5539 emitReductionCombiner(CGF, ReductionOp);
5540 }
5541}
5542
5543void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5544 ArrayRef<const Expr *> Privates,
5545 ArrayRef<const Expr *> LHSExprs,
5546 ArrayRef<const Expr *> RHSExprs,
5547 ArrayRef<const Expr *> ReductionOps,
5548 ReductionOptionsTy Options) {
5549 if (!CGF.HaveInsertPoint())
5550 return;
5551
5552 bool WithNowait = Options.WithNowait;
5553 bool SimpleReduction = Options.SimpleReduction;
5554
5555 // Next code should be emitted for reduction:
5556 //
5557 // static kmp_critical_name lock = { 0 };
5558 //
5559 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5560 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5561 // ...
5562 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5563 // *(Type<n>-1*)rhs[<n>-1]);
5564 // }
5565 //
5566 // ...
5567 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5568 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5569 // RedList, reduce_func, &<lock>)) {
5570 // case 1:
5571 // ...
5572 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5573 // ...
5574 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5575 // break;
5576 // case 2:
5577 // ...
5578 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5579 // ...
5580 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5581 // break;
5582 // default:;
5583 // }
5584 //
5585 // if SimpleReduction is true, only the next code is generated:
5586 // ...
5587 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5588 // ...
5589
5590 ASTContext &C = CGM.getContext();
5591
5592 if (SimpleReduction) {
5593 CodeGenFunction::RunCleanupsScope Scope(CGF);
5594 auto IPriv = Privates.begin();
5595 auto ILHS = LHSExprs.begin();
5596 auto IRHS = RHSExprs.begin();
5597 for (const Expr *E : ReductionOps) {
5598 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5599 cast<DeclRefExpr>(*IRHS));
5600 ++IPriv;
5601 ++ILHS;
5602 ++IRHS;
5603 }
5604 return;
5605 }
5606
5607 // 1. Build a list of reduction variables.
5608 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5609 auto Size = RHSExprs.size();
5610 for (const Expr *E : Privates) {
5611 if (E->getType()->isVariablyModifiedType())
5612 // Reserve place for array size.
5613 ++Size;
5614 }
5615 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5616 QualType ReductionArrayTy =
5617 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5618 /*IndexTypeQuals=*/0);
5619 Address ReductionList =
5620 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5621 auto IPriv = Privates.begin();
5622 unsigned Idx = 0;
5623 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5624 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5625 CGF.Builder.CreateStore(
5626 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5627 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5628 Elem);
5629 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5630 // Store array size.
5631 ++Idx;
5632 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5633 llvm::Value *Size = CGF.Builder.CreateIntCast(
5634 CGF.getVLASize(
5635 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5636 .NumElts,
5637 CGF.SizeTy, /*isSigned=*/false);
5638 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5639 Elem);
5640 }
5641 }
5642
5643 // 2. Emit reduce_func().
5644 llvm::Function *ReductionFn = emitReductionFunction(
5645 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5646 LHSExprs, RHSExprs, ReductionOps);
5647
5648 // 3. Create static kmp_critical_name lock = { 0 };
5649 std::string Name = getName({"reduction"});
5650 llvm::Value *Lock = getCriticalRegionLock(Name);
5651
5652 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5653 // RedList, reduce_func, &<lock>);
5654 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5655 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5656 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5657 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5658 ReductionList.getPointer(), CGF.VoidPtrTy);
5659 llvm::Value *Args[] = {
5660 IdentTLoc, // ident_t *<loc>
5661 ThreadId, // i32 <gtid>
5662 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5663 ReductionArrayTySize, // size_type sizeof(RedList)
5664 RL, // void *RedList
5665 ReductionFn, // void (*) (void *, void *) <reduce_func>
5666 Lock // kmp_critical_name *&<lock>
5667 };
5668 llvm::Value *Res = CGF.EmitRuntimeCall(
5669 OMPBuilder.getOrCreateRuntimeFunction(
5670 CGM.getModule(),
5671 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5672 Args);
5673
5674 // 5. Build switch(res)
5675 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5676 llvm::SwitchInst *SwInst =
5677 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5678
5679 // 6. Build case 1:
5680 // ...
5681 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5682 // ...
5683 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5684 // break;
5685 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5686 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5687 CGF.EmitBlock(Case1BB);
5688
5689 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5690 llvm::Value *EndArgs[] = {
5691 IdentTLoc, // ident_t *<loc>
5692 ThreadId, // i32 <gtid>
5693 Lock // kmp_critical_name *&<lock>
5694 };
5695 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5696 CodeGenFunction &CGF, PrePostActionTy &Action) {
5697 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5698 auto IPriv = Privates.begin();
5699 auto ILHS = LHSExprs.begin();
5700 auto IRHS = RHSExprs.begin();
5701 for (const Expr *E : ReductionOps) {
5702 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5703 cast<DeclRefExpr>(*IRHS));
5704 ++IPriv;
5705 ++ILHS;
5706 ++IRHS;
5707 }
5708 };
5709 RegionCodeGenTy RCG(CodeGen);
5710 CommonActionTy Action(
5711 nullptr, llvm::None,
5712 OMPBuilder.getOrCreateRuntimeFunction(
5713 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5714 : OMPRTL___kmpc_end_reduce),
5715 EndArgs);
5716 RCG.setAction(Action);
5717 RCG(CGF);
5718
5719 CGF.EmitBranch(DefaultBB);
5720
5721 // 7. Build case 2:
5722 // ...
5723 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5724 // ...
5725 // break;
5726 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5727 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5728 CGF.EmitBlock(Case2BB);
5729
5730 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5731 CodeGenFunction &CGF, PrePostActionTy &Action) {
5732 auto ILHS = LHSExprs.begin();
5733 auto IRHS = RHSExprs.begin();
5734 auto IPriv = Privates.begin();
5735 for (const Expr *E : ReductionOps) {
5736 const Expr *XExpr = nullptr;
5737 const Expr *EExpr = nullptr;
5738 const Expr *UpExpr = nullptr;
5739 BinaryOperatorKind BO = BO_Comma;
5740 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5741 if (BO->getOpcode() == BO_Assign) {
5742 XExpr = BO->getLHS();
5743 UpExpr = BO->getRHS();
5744 }
5745 }
5746 // Try to emit update expression as a simple atomic.
5747 const Expr *RHSExpr = UpExpr;
5748 if (RHSExpr) {
5749 // Analyze RHS part of the whole expression.
5750 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5751 RHSExpr->IgnoreParenImpCasts())) {
5752 // If this is a conditional operator, analyze its condition for
5753 // min/max reduction operator.
5754 RHSExpr = ACO->getCond();
5755 }
5756 if (const auto *BORHS =
5757 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5758 EExpr = BORHS->getRHS();
5759 BO = BORHS->getOpcode();
5760 }
5761 }
5762 if (XExpr) {
5763 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5764 auto &&AtomicRedGen = [BO, VD,
5765 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5766 const Expr *EExpr, const Expr *UpExpr) {
5767 LValue X = CGF.EmitLValue(XExpr);
5768 RValue E;
5769 if (EExpr)
5770 E = CGF.EmitAnyExpr(EExpr);
5771 CGF.EmitOMPAtomicSimpleUpdateExpr(
5772 X, E, BO, /*IsXLHSInRHSPart=*/true,
5773 llvm::AtomicOrdering::Monotonic, Loc,
5774 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5775 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5776 PrivateScope.addPrivate(
5777 VD, [&CGF, VD, XRValue, Loc]() {
5778 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5779 CGF.emitOMPSimpleStore(
5780 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5781 VD->getType().getNonReferenceType(), Loc);
5782 return LHSTemp;
5783 });
5784 (void)PrivateScope.Privatize();
5785 return CGF.EmitAnyExpr(UpExpr);
5786