Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/clang/lib/CodeGen/CGOpenMPRuntime.cpp
Warning:line 8336, column 9
2nd function call argument is an uninitialized value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name CGOpenMPRuntime.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -relaxed-aliasing -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/build-llvm -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I tools/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/clang/lib/CodeGen -I /build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/clang/include -I tools/clang/include -I include -I /build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/build-llvm=build-llvm -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/build-llvm=build-llvm -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/build-llvm=build-llvm -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/= -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-02-16-181416-145369-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/clang/lib/CodeGen/CGOpenMPRuntime.cpp

/build/llvm-toolchain-snapshot-15~++20220216111134+dda3c14fd7db/clang/lib/CodeGen/CGOpenMPRuntime.cpp

1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/Value.h"
38#include "llvm/Support/AtomicOrdering.h"
39#include "llvm/Support/Format.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <numeric>
43
44using namespace clang;
45using namespace CodeGen;
46using namespace llvm::omp;
47
48namespace {
49/// Base class for handling code generation inside OpenMP regions.
50class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
51public:
52 /// Kinds of OpenMP regions used in codegen.
53 enum CGOpenMPRegionKind {
54 /// Region with outlined function for standalone 'parallel'
55 /// directive.
56 ParallelOutlinedRegion,
57 /// Region with outlined function for standalone 'task' directive.
58 TaskOutlinedRegion,
59 /// Region for constructs that do not require function outlining,
60 /// like 'for', 'sections', 'atomic' etc. directives.
61 InlinedRegion,
62 /// Region with outlined function for standalone 'target' directive.
63 TargetRegion,
64 };
65
66 CGOpenMPRegionInfo(const CapturedStmt &CS,
67 const CGOpenMPRegionKind RegionKind,
68 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
69 bool HasCancel)
70 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
71 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
72
73 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
74 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
75 bool HasCancel)
76 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
77 Kind(Kind), HasCancel(HasCancel) {}
78
79 /// Get a variable or parameter for storing global thread id
80 /// inside OpenMP construct.
81 virtual const VarDecl *getThreadIDVariable() const = 0;
82
83 /// Emit the captured statement body.
84 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
85
86 /// Get an LValue for the current ThreadID variable.
87 /// \return LValue for thread id variable. This LValue always has type int32*.
88 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
89
90 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
91
92 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
93
94 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
95
96 bool hasCancel() const { return HasCancel; }
97
98 static bool classof(const CGCapturedStmtInfo *Info) {
99 return Info->getKind() == CR_OpenMP;
100 }
101
102 ~CGOpenMPRegionInfo() override = default;
103
104protected:
105 CGOpenMPRegionKind RegionKind;
106 RegionCodeGenTy CodeGen;
107 OpenMPDirectiveKind Kind;
108 bool HasCancel;
109};
110
111/// API for captured statement code generation in OpenMP constructs.
112class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
113public:
114 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
115 const RegionCodeGenTy &CodeGen,
116 OpenMPDirectiveKind Kind, bool HasCancel,
117 StringRef HelperName)
118 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
119 HasCancel),
120 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
121 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 121, __extension__
__PRETTY_FUNCTION__))
;
122 }
123
124 /// Get a variable or parameter for storing global thread id
125 /// inside OpenMP construct.
126 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
127
128 /// Get the name of the capture helper.
129 StringRef getHelperName() const override { return HelperName; }
130
131 static bool classof(const CGCapturedStmtInfo *Info) {
132 return CGOpenMPRegionInfo::classof(Info) &&
133 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
134 ParallelOutlinedRegion;
135 }
136
137private:
138 /// A variable or parameter storing global thread id for OpenMP
139 /// constructs.
140 const VarDecl *ThreadIDVar;
141 StringRef HelperName;
142};
143
144/// API for captured statement code generation in OpenMP constructs.
145class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
146public:
147 class UntiedTaskActionTy final : public PrePostActionTy {
148 bool Untied;
149 const VarDecl *PartIDVar;
150 const RegionCodeGenTy UntiedCodeGen;
151 llvm::SwitchInst *UntiedSwitch = nullptr;
152
153 public:
154 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
155 const RegionCodeGenTy &UntiedCodeGen)
156 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
157 void Enter(CodeGenFunction &CGF) override {
158 if (Untied) {
159 // Emit task switching point.
160 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
161 CGF.GetAddrOfLocalVar(PartIDVar),
162 PartIDVar->getType()->castAs<PointerType>());
163 llvm::Value *Res =
164 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
165 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
166 UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
167 CGF.EmitBlock(DoneBB);
168 CGF.EmitBranchThroughCleanup(CGF.ReturnBlock);
169 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
170 UntiedSwitch->addCase(CGF.Builder.getInt32(0),
171 CGF.Builder.GetInsertBlock());
172 emitUntiedSwitch(CGF);
173 }
174 }
175 void emitUntiedSwitch(CodeGenFunction &CGF) const {
176 if (Untied) {
177 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
178 CGF.GetAddrOfLocalVar(PartIDVar),
179 PartIDVar->getType()->castAs<PointerType>());
180 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
181 PartIdLVal);
182 UntiedCodeGen(CGF);
183 CodeGenFunction::JumpDest CurPoint =
184 CGF.getJumpDestInCurrentScope(".untied.next.");
185 CGF.EmitBranch(CGF.ReturnBlock.getBlock());
186 CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
187 UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
188 CGF.Builder.GetInsertBlock());
189 CGF.EmitBranchThroughCleanup(CurPoint);
190 CGF.EmitBlock(CurPoint.getBlock());
191 }
192 }
193 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
194 };
195 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
196 const VarDecl *ThreadIDVar,
197 const RegionCodeGenTy &CodeGen,
198 OpenMPDirectiveKind Kind, bool HasCancel,
199 const UntiedTaskActionTy &Action)
200 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
201 ThreadIDVar(ThreadIDVar), Action(Action) {
202 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region."
) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 202, __extension__
__PRETTY_FUNCTION__))
;
203 }
204
205 /// Get a variable or parameter for storing global thread id
206 /// inside OpenMP construct.
207 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
208
209 /// Get an LValue for the current ThreadID variable.
210 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
211
212 /// Get the name of the capture helper.
213 StringRef getHelperName() const override { return ".omp_outlined."; }
214
215 void emitUntiedSwitch(CodeGenFunction &CGF) override {
216 Action.emitUntiedSwitch(CGF);
217 }
218
219 static bool classof(const CGCapturedStmtInfo *Info) {
220 return CGOpenMPRegionInfo::classof(Info) &&
221 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
222 TaskOutlinedRegion;
223 }
224
225private:
226 /// A variable or parameter storing global thread id for OpenMP
227 /// constructs.
228 const VarDecl *ThreadIDVar;
229 /// Action for emitting code for untied tasks.
230 const UntiedTaskActionTy &Action;
231};
232
233/// API for inlined captured statement code generation in OpenMP
234/// constructs.
235class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
236public:
237 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
238 const RegionCodeGenTy &CodeGen,
239 OpenMPDirectiveKind Kind, bool HasCancel)
240 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
241 OldCSI(OldCSI),
242 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
243
244 // Retrieve the value of the context parameter.
245 llvm::Value *getContextValue() const override {
246 if (OuterRegionInfo)
247 return OuterRegionInfo->getContextValue();
248 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 248)
;
249 }
250
251 void setContextValue(llvm::Value *V) override {
252 if (OuterRegionInfo) {
253 OuterRegionInfo->setContextValue(V);
254 return;
255 }
256 llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 256)
;
257 }
258
259 /// Lookup the captured field decl for a variable.
260 const FieldDecl *lookup(const VarDecl *VD) const override {
261 if (OuterRegionInfo)
262 return OuterRegionInfo->lookup(VD);
263 // If there is no outer outlined region,no need to lookup in a list of
264 // captured variables, we can use the original one.
265 return nullptr;
266 }
267
268 FieldDecl *getThisFieldDecl() const override {
269 if (OuterRegionInfo)
270 return OuterRegionInfo->getThisFieldDecl();
271 return nullptr;
272 }
273
274 /// Get a variable or parameter for storing global thread id
275 /// inside OpenMP construct.
276 const VarDecl *getThreadIDVariable() const override {
277 if (OuterRegionInfo)
278 return OuterRegionInfo->getThreadIDVariable();
279 return nullptr;
280 }
281
282 /// Get an LValue for the current ThreadID variable.
283 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
284 if (OuterRegionInfo)
285 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
286 llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 286)
;
287 }
288
289 /// Get the name of the capture helper.
290 StringRef getHelperName() const override {
291 if (auto *OuterRegionInfo = getOldCSI())
292 return OuterRegionInfo->getHelperName();
293 llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 293)
;
294 }
295
296 void emitUntiedSwitch(CodeGenFunction &CGF) override {
297 if (OuterRegionInfo)
298 OuterRegionInfo->emitUntiedSwitch(CGF);
299 }
300
301 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
302
303 static bool classof(const CGCapturedStmtInfo *Info) {
304 return CGOpenMPRegionInfo::classof(Info) &&
305 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
306 }
307
308 ~CGOpenMPInlinedRegionInfo() override = default;
309
310private:
311 /// CodeGen info about outer OpenMP region.
312 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
313 CGOpenMPRegionInfo *OuterRegionInfo;
314};
315
316/// API for captured statement code generation in OpenMP target
317/// constructs. For this captures, implicit parameters are used instead of the
318/// captured fields. The name of the target region has to be unique in a given
319/// application so it is provided by the client, because only the client has
320/// the information to generate that.
321class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
322public:
323 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
324 const RegionCodeGenTy &CodeGen, StringRef HelperName)
325 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
326 /*HasCancel=*/false),
327 HelperName(HelperName) {}
328
329 /// This is unused for target regions because each starts executing
330 /// with a single thread.
331 const VarDecl *getThreadIDVariable() const override { return nullptr; }
332
333 /// Get the name of the capture helper.
334 StringRef getHelperName() const override { return HelperName; }
335
336 static bool classof(const CGCapturedStmtInfo *Info) {
337 return CGOpenMPRegionInfo::classof(Info) &&
338 cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
339 }
340
341private:
342 StringRef HelperName;
343};
344
345static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
346 llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 346)
;
347}
348/// API for generation of expressions captured in a innermost OpenMP
349/// region.
350class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
351public:
352 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
353 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
354 OMPD_unknown,
355 /*HasCancel=*/false),
356 PrivScope(CGF) {
357 // Make sure the globals captured in the provided statement are local by
358 // using the privatization logic. We assume the same variable is not
359 // captured more than once.
360 for (const auto &C : CS.captures()) {
361 if (!C.capturesVariable() && !C.capturesVariableByCopy())
362 continue;
363
364 const VarDecl *VD = C.getCapturedVar();
365 if (VD->isLocalVarDeclOrParm())
366 continue;
367
368 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
369 /*RefersToEnclosingVariableOrCapture=*/false,
370 VD->getType().getNonReferenceType(), VK_LValue,
371 C.getLocation());
372 PrivScope.addPrivate(
373 VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); });
374 }
375 (void)PrivScope.Privatize();
376 }
377
378 /// Lookup the captured field decl for a variable.
379 const FieldDecl *lookup(const VarDecl *VD) const override {
380 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
381 return FD;
382 return nullptr;
383 }
384
385 /// Emit the captured statement body.
386 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
387 llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 387)
;
388 }
389
390 /// Get a variable or parameter for storing global thread id
391 /// inside OpenMP construct.
392 const VarDecl *getThreadIDVariable() const override {
393 llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 393)
;
394 }
395
396 /// Get the name of the capture helper.
397 StringRef getHelperName() const override {
398 llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 398)
;
399 }
400
401 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
402
403private:
404 /// Private scope to capture global variables.
405 CodeGenFunction::OMPPrivateScope PrivScope;
406};
407
408/// RAII for emitting code of OpenMP constructs.
409class InlinedOpenMPRegionRAII {
410 CodeGenFunction &CGF;
411 llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
412 FieldDecl *LambdaThisCaptureField = nullptr;
413 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
414 bool NoInheritance = false;
415
416public:
417 /// Constructs region for combined constructs.
418 /// \param CodeGen Code generation sequence for combined directives. Includes
419 /// a list of functions used for code generation of implicitly inlined
420 /// regions.
421 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
422 OpenMPDirectiveKind Kind, bool HasCancel,
423 bool NoInheritance = true)
424 : CGF(CGF), NoInheritance(NoInheritance) {
425 // Start emission for the construct.
426 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
427 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
428 if (NoInheritance) {
429 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
430 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
431 CGF.LambdaThisCaptureField = nullptr;
432 BlockInfo = CGF.BlockInfo;
433 CGF.BlockInfo = nullptr;
434 }
435 }
436
437 ~InlinedOpenMPRegionRAII() {
438 // Restore original CapturedStmtInfo only if we're done with code emission.
439 auto *OldCSI =
440 cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
441 delete CGF.CapturedStmtInfo;
442 CGF.CapturedStmtInfo = OldCSI;
443 if (NoInheritance) {
444 std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
445 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
446 CGF.BlockInfo = BlockInfo;
447 }
448 }
449};
450
451/// Values for bit flags used in the ident_t to describe the fields.
452/// All enumeric elements are named and described in accordance with the code
453/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454enum OpenMPLocationFlags : unsigned {
455 /// Use trampoline for internal microtask.
456 OMP_IDENT_IMD = 0x01,
457 /// Use c-style ident structure.
458 OMP_IDENT_KMPC = 0x02,
459 /// Atomic reduction option for kmpc_reduce.
460 OMP_ATOMIC_REDUCE = 0x10,
461 /// Explicit 'barrier' directive.
462 OMP_IDENT_BARRIER_EXPL = 0x20,
463 /// Implicit barrier in code.
464 OMP_IDENT_BARRIER_IMPL = 0x40,
465 /// Implicit barrier in 'for' directive.
466 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
467 /// Implicit barrier in 'sections' directive.
468 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
469 /// Implicit barrier in 'single' directive.
470 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
471 /// Call of __kmp_for_static_init for static loop.
472 OMP_IDENT_WORK_LOOP = 0x200,
473 /// Call of __kmp_for_static_init for sections.
474 OMP_IDENT_WORK_SECTIONS = 0x400,
475 /// Call of __kmp_for_static_init for distribute.
476 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE
478};
479
480namespace {
481LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
482/// Values for bit flags for marking which requires clauses have been used.
483enum OpenMPOffloadingRequiresDirFlags : int64_t {
484 /// flag undefined.
485 OMP_REQ_UNDEFINED = 0x000,
486 /// no requires clause present.
487 OMP_REQ_NONE = 0x001,
488 /// reverse_offload clause.
489 OMP_REQ_REVERSE_OFFLOAD = 0x002,
490 /// unified_address clause.
491 OMP_REQ_UNIFIED_ADDRESS = 0x004,
492 /// unified_shared_memory clause.
493 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
494 /// dynamic_allocators clause.
495 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
496 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS
497};
498
499enum OpenMPOffloadingReservedDeviceIDs {
500 /// Device ID if the device was not defined, runtime should get it
501 /// from environment variables in the spec.
502 OMP_DEVICEID_UNDEF = -1,
503};
504} // anonymous namespace
505
506/// Describes ident structure that describes a source location.
507/// All descriptions are taken from
508/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
509/// Original structure:
510/// typedef struct ident {
511/// kmp_int32 reserved_1; /**< might be used in Fortran;
512/// see above */
513/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
514/// KMP_IDENT_KMPC identifies this union
515/// member */
516/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
517/// see above */
518///#if USE_ITT_BUILD
519/// /* but currently used for storing
520/// region-specific ITT */
521/// /* contextual information. */
522///#endif /* USE_ITT_BUILD */
523/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
524/// C++ */
525/// char const *psource; /**< String describing the source location.
526/// The string is composed of semi-colon separated
527// fields which describe the source file,
528/// the function and a pair of line numbers that
529/// delimit the construct.
530/// */
531/// } ident_t;
532enum IdentFieldIndex {
533 /// might be used in Fortran
534 IdentField_Reserved_1,
535 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
536 IdentField_Flags,
537 /// Not really used in Fortran any more
538 IdentField_Reserved_2,
539 /// Source[4] in Fortran, do not use for C++
540 IdentField_Reserved_3,
541 /// String describing the source location. The string is composed of
542 /// semi-colon separated fields which describe the source file, the function
543 /// and a pair of line numbers that delimit the construct.
544 IdentField_PSource
545};
546
547/// Schedule types for 'omp for' loops (these enumerators are taken from
548/// the enum sched_type in kmp.h).
549enum OpenMPSchedType {
550 /// Lower bound for default (unordered) versions.
551 OMP_sch_lower = 32,
552 OMP_sch_static_chunked = 33,
553 OMP_sch_static = 34,
554 OMP_sch_dynamic_chunked = 35,
555 OMP_sch_guided_chunked = 36,
556 OMP_sch_runtime = 37,
557 OMP_sch_auto = 38,
558 /// static with chunk adjustment (e.g., simd)
559 OMP_sch_static_balanced_chunked = 45,
560 /// Lower bound for 'ordered' versions.
561 OMP_ord_lower = 64,
562 OMP_ord_static_chunked = 65,
563 OMP_ord_static = 66,
564 OMP_ord_dynamic_chunked = 67,
565 OMP_ord_guided_chunked = 68,
566 OMP_ord_runtime = 69,
567 OMP_ord_auto = 70,
568 OMP_sch_default = OMP_sch_static,
569 /// dist_schedule types
570 OMP_dist_sch_static_chunked = 91,
571 OMP_dist_sch_static = 92,
572 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
573 /// Set if the monotonic schedule modifier was present.
574 OMP_sch_modifier_monotonic = (1 << 29),
575 /// Set if the nonmonotonic schedule modifier was present.
576 OMP_sch_modifier_nonmonotonic = (1 << 30),
577};
578
579/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
580/// region.
581class CleanupTy final : public EHScopeStack::Cleanup {
582 PrePostActionTy *Action;
583
584public:
585 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
586 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
587 if (!CGF.HaveInsertPoint())
588 return;
589 Action->Exit(CGF);
590 }
591};
592
593} // anonymous namespace
594
595void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
596 CodeGenFunction::RunCleanupsScope Scope(CGF);
597 if (PrePostAction) {
598 CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
599 Callback(CodeGen, CGF, *PrePostAction);
600 } else {
601 PrePostActionTy Action;
602 Callback(CodeGen, CGF, Action);
603 }
604}
605
606/// Check if the combiner is a call to UDR combiner and if it is so return the
607/// UDR decl used for reduction.
608static const OMPDeclareReductionDecl *
609getReductionInit(const Expr *ReductionOp) {
610 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
611 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
612 if (const auto *DRE =
613 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
614 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
615 return DRD;
616 return nullptr;
617}
618
619static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
620 const OMPDeclareReductionDecl *DRD,
621 const Expr *InitOp,
622 Address Private, Address Original,
623 QualType Ty) {
624 if (DRD->getInitializer()) {
625 std::pair<llvm::Function *, llvm::Function *> Reduction =
626 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
627 const auto *CE = cast<CallExpr>(InitOp);
628 const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
629 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
630 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
631 const auto *LHSDRE =
632 cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
633 const auto *RHSDRE =
634 cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
635 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
636 PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
637 [=]() { return Private; });
638 PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
639 [=]() { return Original; });
640 (void)PrivateScope.Privatize();
641 RValue Func = RValue::get(Reduction.second);
642 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
643 CGF.EmitIgnoredExpr(InitOp);
644 } else {
645 llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
646 std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
647 auto *GV = new llvm::GlobalVariable(
648 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
649 llvm::GlobalValue::PrivateLinkage, Init, Name);
650 LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
651 RValue InitRVal;
652 switch (CGF.getEvaluationKind(Ty)) {
653 case TEK_Scalar:
654 InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
655 break;
656 case TEK_Complex:
657 InitRVal =
658 RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation()));
659 break;
660 case TEK_Aggregate: {
661 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
662 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
663 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
664 /*IsInitializer=*/false);
665 return;
666 }
667 }
668 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
669 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
670 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
671 /*IsInitializer=*/false);
672 }
673}
674
675/// Emit initialization of arrays of complex types.
676/// \param DestAddr Address of the array.
677/// \param Type Type of array.
678/// \param Init Initial expression of array.
679/// \param SrcAddr Address of the original array.
680static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
681 QualType Type, bool EmitDeclareReductionInit,
682 const Expr *Init,
683 const OMPDeclareReductionDecl *DRD,
684 Address SrcAddr = Address::invalid()) {
685 // Perform element-by-element initialization.
686 QualType ElementTy;
687
688 // Drill down to the base element type on both arrays.
689 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
690 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
691 if (DRD)
692 SrcAddr =
693 CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
694
695 llvm::Value *SrcBegin = nullptr;
696 if (DRD)
697 SrcBegin = SrcAddr.getPointer();
698 llvm::Value *DestBegin = DestAddr.getPointer();
699 // Cast from pointer to array type to pointer to single element.
700 llvm::Value *DestEnd =
701 CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements);
702 // The basic structure here is a while-do loop.
703 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
704 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
705 llvm::Value *IsEmpty =
706 CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
707 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
708
709 // Enter the loop body, making that address the current address.
710 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
711 CGF.EmitBlock(BodyBB);
712
713 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
714
715 llvm::PHINode *SrcElementPHI = nullptr;
716 Address SrcElementCurrent = Address::invalid();
717 if (DRD) {
718 SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
719 "omp.arraycpy.srcElementPast");
720 SrcElementPHI->addIncoming(SrcBegin, EntryBB);
721 SrcElementCurrent =
722 Address(SrcElementPHI,
723 SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
724 }
725 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
726 DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
727 DestElementPHI->addIncoming(DestBegin, EntryBB);
728 Address DestElementCurrent =
729 Address(DestElementPHI,
730 DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
731
732 // Emit copy.
733 {
734 CodeGenFunction::RunCleanupsScope InitScope(CGF);
735 if (EmitDeclareReductionInit) {
736 emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
737 SrcElementCurrent, ElementTy);
738 } else
739 CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
740 /*IsInitializer=*/false);
741 }
742
743 if (DRD) {
744 // Shift the address forward by one element.
745 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
746 SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1,
747 "omp.arraycpy.dest.element");
748 SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
749 }
750
751 // Shift the address forward by one element.
752 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
753 DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1,
754 "omp.arraycpy.dest.element");
755 // Check whether we've reached the end.
756 llvm::Value *Done =
757 CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
758 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
759 DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
760
761 // Done.
762 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
763}
764
765LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
766 return CGF.EmitOMPSharedLValue(E);
767}
768
769LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
770 const Expr *E) {
771 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
772 return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
773 return LValue();
774}
775
776void ReductionCodeGen::emitAggregateInitialization(
777 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
778 const OMPDeclareReductionDecl *DRD) {
779 // Emit VarDecl with copy init for arrays.
780 // Get the address of the original variable captured in current
781 // captured region.
782 const auto *PrivateVD =
783 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
784 bool EmitDeclareReductionInit =
785 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
786 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
787 EmitDeclareReductionInit,
788 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
789 : PrivateVD->getInit(),
790 DRD, SharedAddr);
791}
792
793ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
794 ArrayRef<const Expr *> Origs,
795 ArrayRef<const Expr *> Privates,
796 ArrayRef<const Expr *> ReductionOps) {
797 ClausesData.reserve(Shareds.size());
798 SharedAddresses.reserve(Shareds.size());
799 Sizes.reserve(Shareds.size());
800 BaseDecls.reserve(Shareds.size());
801 const auto *IOrig = Origs.begin();
802 const auto *IPriv = Privates.begin();
803 const auto *IRed = ReductionOps.begin();
804 for (const Expr *Ref : Shareds) {
805 ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed);
806 std::advance(IOrig, 1);
807 std::advance(IPriv, 1);
808 std::advance(IRed, 1);
809 }
810}
811
812void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
813 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 814, __extension__
__PRETTY_FUNCTION__))
814 "Number of generated lvalues must be exactly N.")(static_cast <bool> (SharedAddresses.size() == N &&
OrigAddresses.size() == N && "Number of generated lvalues must be exactly N."
) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 814, __extension__
__PRETTY_FUNCTION__))
;
815 LValue First = emitSharedLValue(CGF, ClausesData[N].Shared);
816 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared);
817 SharedAddresses.emplace_back(First, Second);
818 if (ClausesData[N].Shared == ClausesData[N].Ref) {
819 OrigAddresses.emplace_back(First, Second);
820 } else {
821 LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
822 LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
823 OrigAddresses.emplace_back(First, Second);
824 }
825}
826
827void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
828 const auto *PrivateVD =
829 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
830 QualType PrivateType = PrivateVD->getType();
831 bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
832 if (!PrivateType->isVariablyModifiedType()) {
833 Sizes.emplace_back(
834 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()),
835 nullptr);
836 return;
837 }
838 llvm::Value *Size;
839 llvm::Value *SizeInChars;
840 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
841 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
842 if (AsArraySection) {
843 Size = CGF.Builder.CreatePtrDiff(ElemType,
844 OrigAddresses[N].second.getPointer(CGF),
845 OrigAddresses[N].first.getPointer(CGF));
846 Size = CGF.Builder.CreateNUWAdd(
847 Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
848 SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
849 } else {
850 SizeInChars =
851 CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType());
852 Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
853 }
854 Sizes.emplace_back(SizeInChars, Size);
855 CodeGenFunction::OpaqueValueMapping OpaqueMap(
856 CGF,
857 cast<OpaqueValueExpr>(
858 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
859 RValue::get(Size));
860 CGF.EmitVariablyModifiedType(PrivateType);
861}
862
863void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
864 llvm::Value *Size) {
865 const auto *PrivateVD =
866 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
867 QualType PrivateType = PrivateVD->getType();
868 if (!PrivateType->isVariablyModifiedType()) {
869 assert(!Size && !Sizes[N].second &&(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 871, __extension__
__PRETTY_FUNCTION__))
870 "Size should be nullptr for non-variably modified reduction "(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 871, __extension__
__PRETTY_FUNCTION__))
871 "items.")(static_cast <bool> (!Size && !Sizes[N].second &&
"Size should be nullptr for non-variably modified reduction "
"items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 871, __extension__
__PRETTY_FUNCTION__))
;
872 return;
873 }
874 CodeGenFunction::OpaqueValueMapping OpaqueMap(
875 CGF,
876 cast<OpaqueValueExpr>(
877 CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
878 RValue::get(Size));
879 CGF.EmitVariablyModifiedType(PrivateType);
880}
881
882void ReductionCodeGen::emitInitialization(
883 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
884 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
885 assert(SharedAddresses.size() > N && "No variable was generated")(static_cast <bool> (SharedAddresses.size() > N &&
"No variable was generated") ? void (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 885, __extension__
__PRETTY_FUNCTION__))
;
886 const auto *PrivateVD =
887 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
888 const OMPDeclareReductionDecl *DRD =
889 getReductionInit(ClausesData[N].ReductionOp);
890 QualType PrivateType = PrivateVD->getType();
891 PrivateAddr = CGF.Builder.CreateElementBitCast(
892 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
893 if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
894 if (DRD && DRD->getInitializer())
895 (void)DefaultInit(CGF);
896 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
897 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
898 (void)DefaultInit(CGF);
899 QualType SharedType = SharedAddresses[N].first.getType();
900 emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
901 PrivateAddr, SharedAddr, SharedType);
902 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
903 !CGF.isTrivialInitializer(PrivateVD->getInit())) {
904 CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
905 PrivateVD->getType().getQualifiers(),
906 /*IsInitializer=*/false);
907 }
908}
909
910bool ReductionCodeGen::needCleanups(unsigned N) {
911 const auto *PrivateVD =
912 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
913 QualType PrivateType = PrivateVD->getType();
914 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
915 return DTorKind != QualType::DK_none;
916}
917
918void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
919 Address PrivateAddr) {
920 const auto *PrivateVD =
921 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
922 QualType PrivateType = PrivateVD->getType();
923 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
924 if (needCleanups(N)) {
925 PrivateAddr = CGF.Builder.CreateElementBitCast(
926 PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
927 CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
928 }
929}
930
931static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
932 LValue BaseLV) {
933 BaseTy = BaseTy.getNonReferenceType();
934 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
935 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
936 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
937 BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy);
938 } else {
939 LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy);
940 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
941 }
942 BaseTy = BaseTy->getPointeeType();
943 }
944 return CGF.MakeAddrLValue(
945 CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF),
946 CGF.ConvertTypeForMem(ElTy)),
947 BaseLV.getType(), BaseLV.getBaseInfo(),
948 CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
949}
950
951static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
952 llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
953 llvm::Value *Addr) {
954 Address Tmp = Address::invalid();
955 Address TopTmp = Address::invalid();
956 Address MostTopTmp = Address::invalid();
957 BaseTy = BaseTy.getNonReferenceType();
958 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
959 !CGF.getContext().hasSameType(BaseTy, ElTy)) {
960 Tmp = CGF.CreateMemTemp(BaseTy);
961 if (TopTmp.isValid())
962 CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
963 else
964 MostTopTmp = Tmp;
965 TopTmp = Tmp;
966 BaseTy = BaseTy->getPointeeType();
967 }
968 llvm::Type *Ty = BaseLVType;
969 if (Tmp.isValid())
970 Ty = Tmp.getElementType();
971 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
972 if (Tmp.isValid()) {
973 CGF.Builder.CreateStore(Addr, Tmp);
974 return MostTopTmp;
975 }
976 return Address(Addr, BaseLVAlignment);
977}
978
979static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
980 const VarDecl *OrigVD = nullptr;
981 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
982 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
983 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
984 Base = TempOASE->getBase()->IgnoreParenImpCasts();
985 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
986 Base = TempASE->getBase()->IgnoreParenImpCasts();
987 DE = cast<DeclRefExpr>(Base);
988 OrigVD = cast<VarDecl>(DE->getDecl());
989 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
990 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
991 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
992 Base = TempASE->getBase()->IgnoreParenImpCasts();
993 DE = cast<DeclRefExpr>(Base);
994 OrigVD = cast<VarDecl>(DE->getDecl());
995 }
996 return OrigVD;
997}
998
999Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
1000 Address PrivateAddr) {
1001 const DeclRefExpr *DE;
1002 if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1003 BaseDecls.emplace_back(OrigVD);
1004 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1005 LValue BaseLValue =
1006 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1007 OriginalBaseLValue);
1008 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
1009 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1010 SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
1011 SharedAddr.getPointer());
1012 llvm::Value *PrivatePointer =
1013 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1014 PrivateAddr.getPointer(), SharedAddr.getType());
1015 llvm::Value *Ptr = CGF.Builder.CreateGEP(
1016 SharedAddr.getElementType(), PrivatePointer, Adjustment);
1017 return castToBase(CGF, OrigVD->getType(),
1018 SharedAddresses[N].first.getType(),
1019 OriginalBaseLValue.getAddress(CGF).getType(),
1020 OriginalBaseLValue.getAlignment(), Ptr);
1021 }
1022 BaseDecls.emplace_back(
1023 cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1024 return PrivateAddr;
1025}
1026
1027bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
1028 const OMPDeclareReductionDecl *DRD =
1029 getReductionInit(ClausesData[N].ReductionOp);
1030 return DRD && DRD->getInitializer();
1031}
1032
1033LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1034 return CGF.EmitLoadOfPointerLValue(
1035 CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1036 getThreadIDVariable()->getType()->castAs<PointerType>());
1037}
1038
1039void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1040 if (!CGF.HaveInsertPoint())
1041 return;
1042 // 1.2.2 OpenMP Language Terminology
1043 // Structured block - An executable statement with a single entry at the
1044 // top and a single exit at the bottom.
1045 // The point of exit cannot be a branch out of the structured block.
1046 // longjmp() and throw() must not violate the entry/exit criteria.
1047 CGF.EHStack.pushTerminate();
1048 if (S)
1049 CGF.incrementProfileCounter(S);
1050 CodeGen(CGF);
1051 CGF.EHStack.popTerminate();
1052}
1053
1054LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1055 CodeGenFunction &CGF) {
1056 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1057 getThreadIDVariable()->getType(),
1058 AlignmentSource::Decl);
1059}
1060
1061static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1062 QualType FieldTy) {
1063 auto *Field = FieldDecl::Create(
1064 C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1065 C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()),
1066 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1067 Field->setAccess(AS_public);
1068 DC->addDecl(Field);
1069 return Field;
1070}
1071
1072CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1073 StringRef Separator)
1074 : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1075 OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) {
1076 KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1077
1078 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1079 OMPBuilder.initialize();
1080 loadOffloadInfoMetadata();
1081}
1082
1083void CGOpenMPRuntime::clear() {
1084 InternalVars.clear();
1085 // Clean non-target variable declarations possibly used only in debug info.
1086 for (const auto &Data : EmittedNonTargetVariables) {
1087 if (!Data.getValue().pointsToAliveValue())
1088 continue;
1089 auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1090 if (!GV)
1091 continue;
1092 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1093 continue;
1094 GV->eraseFromParent();
1095 }
1096}
1097
1098std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1099 SmallString<128> Buffer;
1100 llvm::raw_svector_ostream OS(Buffer);
1101 StringRef Sep = FirstSeparator;
1102 for (StringRef Part : Parts) {
1103 OS << Sep << Part;
1104 Sep = Separator;
1105 }
1106 return std::string(OS.str());
1107}
1108
1109static llvm::Function *
1110emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1111 const Expr *CombinerInitializer, const VarDecl *In,
1112 const VarDecl *Out, bool IsCombiner) {
1113 // void .omp_combiner.(Ty *in, Ty *out);
1114 ASTContext &C = CGM.getContext();
1115 QualType PtrTy = C.getPointerType(Ty).withRestrict();
1116 FunctionArgList Args;
1117 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1118 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1119 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1120 /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1121 Args.push_back(&OmpOutParm);
1122 Args.push_back(&OmpInParm);
1123 const CGFunctionInfo &FnInfo =
1124 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1125 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1126 std::string Name = CGM.getOpenMPRuntime().getName(
1127 {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1128 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
1129 Name, &CGM.getModule());
1130 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1131 if (CGM.getLangOpts().Optimize) {
1132 Fn->removeFnAttr(llvm::Attribute::NoInline);
1133 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1134 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1135 }
1136 CodeGenFunction CGF(CGM);
1137 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1138 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1139 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1140 Out->getLocation());
1141 CodeGenFunction::OMPPrivateScope Scope(CGF);
1142 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1143 Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1144 return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1145 .getAddress(CGF);
1146 });
1147 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1148 Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1149 return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1150 .getAddress(CGF);
1151 });
1152 (void)Scope.Privatize();
1153 if (!IsCombiner && Out->hasInit() &&
1154 !CGF.isTrivialInitializer(Out->getInit())) {
1155 CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1156 Out->getType().getQualifiers(),
1157 /*IsInitializer=*/true);
1158 }
1159 if (CombinerInitializer)
1160 CGF.EmitIgnoredExpr(CombinerInitializer);
1161 Scope.ForceCleanup();
1162 CGF.FinishFunction();
1163 return Fn;
1164}
1165
1166void CGOpenMPRuntime::emitUserDefinedReduction(
1167 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1168 if (UDRMap.count(D) > 0)
1169 return;
1170 llvm::Function *Combiner = emitCombinerOrInitializer(
1171 CGM, D->getType(), D->getCombiner(),
1172 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1173 cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1174 /*IsCombiner=*/true);
1175 llvm::Function *Initializer = nullptr;
1176 if (const Expr *Init = D->getInitializer()) {
1177 Initializer = emitCombinerOrInitializer(
1178 CGM, D->getType(),
1179 D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init
1180 : nullptr,
1181 cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1182 cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1183 /*IsCombiner=*/false);
1184 }
1185 UDRMap.try_emplace(D, Combiner, Initializer);
1186 if (CGF) {
1187 auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1188 Decls.second.push_back(D);
1189 }
1190}
1191
1192std::pair<llvm::Function *, llvm::Function *>
1193CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1194 auto I = UDRMap.find(D);
1195 if (I != UDRMap.end())
1196 return I->second;
1197 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1198 return UDRMap.lookup(D);
1199}
1200
1201namespace {
1202// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1203// Builder if one is present.
1204struct PushAndPopStackRAII {
1205 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1206 bool HasCancel, llvm::omp::Directive Kind)
1207 : OMPBuilder(OMPBuilder) {
1208 if (!OMPBuilder)
1209 return;
1210
1211 // The following callback is the crucial part of clangs cleanup process.
1212 //
1213 // NOTE:
1214 // Once the OpenMPIRBuilder is used to create parallel regions (and
1215 // similar), the cancellation destination (Dest below) is determined via
1216 // IP. That means if we have variables to finalize we split the block at IP,
1217 // use the new block (=BB) as destination to build a JumpDest (via
1218 // getJumpDestInCurrentScope(BB)) which then is fed to
1219 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1220 // to push & pop an FinalizationInfo object.
1221 // The FiniCB will still be needed but at the point where the
1222 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1223 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1224 assert(IP.getBlock()->end() == IP.getPoint() &&(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1225, __extension__
__PRETTY_FUNCTION__))
1225 "Clang CG should cause non-terminated block!")(static_cast <bool> (IP.getBlock()->end() == IP.getPoint
() && "Clang CG should cause non-terminated block!") ?
void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1225, __extension__
__PRETTY_FUNCTION__))
;
1226 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1227 CGF.Builder.restoreIP(IP);
1228 CodeGenFunction::JumpDest Dest =
1229 CGF.getOMPCancelDestination(OMPD_parallel);
1230 CGF.EmitBranchThroughCleanup(Dest);
1231 };
1232
1233 // TODO: Remove this once we emit parallel regions through the
1234 // OpenMPIRBuilder as it can do this setup internally.
1235 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1236 OMPBuilder->pushFinalizationCB(std::move(FI));
1237 }
1238 ~PushAndPopStackRAII() {
1239 if (OMPBuilder)
1240 OMPBuilder->popFinalizationCB();
1241 }
1242 llvm::OpenMPIRBuilder *OMPBuilder;
1243};
1244} // namespace
1245
1246static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1247 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1248 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1249 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1250 assert(ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1251, __extension__
__PRETTY_FUNCTION__))
1251 "thread id variable must be of type kmp_int32 *")(static_cast <bool> (ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 *"
) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1251, __extension__
__PRETTY_FUNCTION__))
;
1252 CodeGenFunction CGF(CGM, true);
1253 bool HasCancel = false;
1254 if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1255 HasCancel = OPD->hasCancel();
1256 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D))
1257 HasCancel = OPD->hasCancel();
1258 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1259 HasCancel = OPSD->hasCancel();
1260 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1261 HasCancel = OPFD->hasCancel();
1262 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1263 HasCancel = OPFD->hasCancel();
1264 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1265 HasCancel = OPFD->hasCancel();
1266 else if (const auto *OPFD =
1267 dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1268 HasCancel = OPFD->hasCancel();
1269 else if (const auto *OPFD =
1270 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1271 HasCancel = OPFD->hasCancel();
1272
1273 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1274 // parallel region to make cancellation barriers work properly.
1275 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1276 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1277 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1278 HasCancel, OutlinedHelperName);
1279 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1280 return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc());
1281}
1282
1283llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1284 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1285 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1286 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1287 return emitParallelOrTeamsOutlinedFunction(
1288 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1289}
1290
1291llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1292 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1293 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1294 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1295 return emitParallelOrTeamsOutlinedFunction(
1296 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1297}
1298
1299llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1300 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1301 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1302 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1303 bool Tied, unsigned &NumberOfParts) {
1304 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1305 PrePostActionTy &) {
1306 llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1307 llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1308 llvm::Value *TaskArgs[] = {
1309 UpLoc, ThreadID,
1310 CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1311 TaskTVar->getType()->castAs<PointerType>())
1312 .getPointer(CGF)};
1313 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1314 CGM.getModule(), OMPRTL___kmpc_omp_task),
1315 TaskArgs);
1316 };
1317 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1318 UntiedCodeGen);
1319 CodeGen.setAction(Action);
1320 assert(!ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1321, __extension__
__PRETTY_FUNCTION__))
1321 "thread id variable must be of type kmp_int32 for tasks")(static_cast <bool> (!ThreadIDVar->getType()->isPointerType
() && "thread id variable must be of type kmp_int32 for tasks"
) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1321, __extension__
__PRETTY_FUNCTION__))
;
1322 const OpenMPDirectiveKind Region =
1323 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1324 : OMPD_task;
1325 const CapturedStmt *CS = D.getCapturedStmt(Region);
1326 bool HasCancel = false;
1327 if (const auto *TD = dyn_cast<OMPTaskDirective>(&D))
1328 HasCancel = TD->hasCancel();
1329 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D))
1330 HasCancel = TD->hasCancel();
1331 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D))
1332 HasCancel = TD->hasCancel();
1333 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D))
1334 HasCancel = TD->hasCancel();
1335
1336 CodeGenFunction CGF(CGM, true);
1337 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1338 InnermostKind, HasCancel, Action);
1339 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1340 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1341 if (!Tied)
1342 NumberOfParts = Action.getNumberOfParts();
1343 return Res;
1344}
1345
1346static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM,
1347 const RecordDecl *RD, const CGRecordLayout &RL,
1348 ArrayRef<llvm::Constant *> Data) {
1349 llvm::StructType *StructTy = RL.getLLVMType();
1350 unsigned PrevIdx = 0;
1351 ConstantInitBuilder CIBuilder(CGM);
1352 auto DI = Data.begin();
1353 for (const FieldDecl *FD : RD->fields()) {
1354 unsigned Idx = RL.getLLVMFieldNo(FD);
1355 // Fill the alignment.
1356 for (unsigned I = PrevIdx; I < Idx; ++I)
1357 Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1358 PrevIdx = Idx + 1;
1359 Fields.add(*DI);
1360 ++DI;
1361 }
1362}
1363
1364template <class... As>
1365static llvm::GlobalVariable *
1366createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant,
1367 ArrayRef<llvm::Constant *> Data, const Twine &Name,
1368 As &&... Args) {
1369 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1370 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1371 ConstantInitBuilder CIBuilder(CGM);
1372 ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1373 buildStructValue(Fields, CGM, RD, RL, Data);
1374 return Fields.finishAndCreateGlobal(
1375 Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1376 std::forward<As>(Args)...);
1377}
1378
1379template <typename T>
1380static void
1381createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty,
1382 ArrayRef<llvm::Constant *> Data,
1383 T &Parent) {
1384 const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1385 const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1386 ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1387 buildStructValue(Fields, CGM, RD, RL, Data);
1388 Fields.finishAndAddTo(Parent);
1389}
1390
1391void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1392 bool AtCurrentPoint) {
1393 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1394 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")(static_cast <bool> (!Elem.second.ServiceInsertPt &&
"Insert point is set already.") ? void (0) : __assert_fail (
"!Elem.second.ServiceInsertPt && \"Insert point is set already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1394, __extension__
__PRETTY_FUNCTION__))
;
1395
1396 llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1397 if (AtCurrentPoint) {
1398 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1399 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1400 } else {
1401 Elem.second.ServiceInsertPt =
1402 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1403 Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1404 }
1405}
1406
1407void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1408 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1409 if (Elem.second.ServiceInsertPt) {
1410 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1411 Elem.second.ServiceInsertPt = nullptr;
1412 Ptr->eraseFromParent();
1413 }
1414}
1415
1416static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1417 SourceLocation Loc,
1418 SmallString<128> &Buffer) {
1419 llvm::raw_svector_ostream OS(Buffer);
1420 // Build debug location
1421 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1422 OS << ";" << PLoc.getFilename() << ";";
1423 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1424 OS << FD->getQualifiedNameAsString();
1425 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1426 return OS.str();
1427}
1428
1429llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1430 SourceLocation Loc,
1431 unsigned Flags) {
1432 uint32_t SrcLocStrSize;
1433 llvm::Constant *SrcLocStr;
1434 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1435 Loc.isInvalid()) {
1436 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1437 } else {
1438 std::string FunctionName;
1439 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1440 FunctionName = FD->getQualifiedNameAsString();
1441 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1442 const char *FileName = PLoc.getFilename();
1443 unsigned Line = PLoc.getLine();
1444 unsigned Column = PLoc.getColumn();
1445 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1446 Column, SrcLocStrSize);
1447 }
1448 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1449 return OMPBuilder.getOrCreateIdent(
1450 SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags);
1451}
1452
1453llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1454 SourceLocation Loc) {
1455 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1455, __extension__
__PRETTY_FUNCTION__))
;
1456 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1457 // the clang invariants used below might be broken.
1458 if (CGM.getLangOpts().OpenMPIRBuilder) {
1459 SmallString<128> Buffer;
1460 OMPBuilder.updateToLocation(CGF.Builder.saveIP());
1461 uint32_t SrcLocStrSize;
1462 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1463 getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1464 return OMPBuilder.getOrCreateThreadID(
1465 OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1466 }
1467
1468 llvm::Value *ThreadID = nullptr;
1469 // Check whether we've already cached a load of the thread id in this
1470 // function.
1471 auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1472 if (I != OpenMPLocThreadIDMap.end()) {
1473 ThreadID = I->second.ThreadID;
1474 if (ThreadID != nullptr)
1475 return ThreadID;
1476 }
1477 // If exceptions are enabled, do not use parameter to avoid possible crash.
1478 if (auto *OMPRegionInfo =
1479 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1480 if (OMPRegionInfo->getThreadIDVariable()) {
1481 // Check if this an outlined function with thread id passed as argument.
1482 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1483 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1484 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1485 !CGF.getLangOpts().CXXExceptions ||
1486 CGF.Builder.GetInsertBlock() == TopBlock ||
1487 !isa<llvm::Instruction>(LVal.getPointer(CGF)) ||
1488 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1489 TopBlock ||
1490 cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() ==
1491 CGF.Builder.GetInsertBlock()) {
1492 ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1493 // If value loaded in entry block, cache it and use it everywhere in
1494 // function.
1495 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1496 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1497 Elem.second.ThreadID = ThreadID;
1498 }
1499 return ThreadID;
1500 }
1501 }
1502 }
1503
1504 // This is not an outlined function region - need to call __kmpc_int32
1505 // kmpc_global_thread_num(ident_t *loc).
1506 // Generate thread id value and cache this value for use across the
1507 // function.
1508 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1509 if (!Elem.second.ServiceInsertPt)
1510 setLocThreadIdInsertPt(CGF);
1511 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1512 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1513 llvm::CallInst *Call = CGF.Builder.CreateCall(
1514 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
1515 OMPRTL___kmpc_global_thread_num),
1516 emitUpdateLocation(CGF, Loc));
1517 Call->setCallingConv(CGF.getRuntimeCC());
1518 Elem.second.ThreadID = Call;
1519 return Call;
1520}
1521
1522void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1523 assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction."
) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1523, __extension__
__PRETTY_FUNCTION__))
;
1524 if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1525 clearLocThreadIdInsertPt(CGF);
1526 OpenMPLocThreadIDMap.erase(CGF.CurFn);
1527 }
1528 if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1529 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1530 UDRMap.erase(D);
1531 FunctionUDRMap.erase(CGF.CurFn);
1532 }
1533 auto I = FunctionUDMMap.find(CGF.CurFn);
1534 if (I != FunctionUDMMap.end()) {
1535 for(const auto *D : I->second)
1536 UDMMap.erase(D);
1537 FunctionUDMMap.erase(I);
1538 }
1539 LastprivateConditionalToTypes.erase(CGF.CurFn);
1540 FunctionToUntiedTaskStackMap.erase(CGF.CurFn);
1541}
1542
1543llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1544 return OMPBuilder.IdentPtr;
1545}
1546
1547llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1548 if (!Kmpc_MicroTy) {
1549 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1550 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1551 llvm::PointerType::getUnqual(CGM.Int32Ty)};
1552 Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1553 }
1554 return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1555}
1556
1557llvm::FunctionCallee
1558CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
1559 bool IsGPUDistribute) {
1560 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1561, __extension__
__PRETTY_FUNCTION__))
1561 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1561, __extension__
__PRETTY_FUNCTION__))
;
1562 StringRef Name;
1563 if (IsGPUDistribute)
1564 Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
1565 : "__kmpc_distribute_static_init_4u")
1566 : (IVSigned ? "__kmpc_distribute_static_init_8"
1567 : "__kmpc_distribute_static_init_8u");
1568 else
1569 Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
1570 : "__kmpc_for_static_init_4u")
1571 : (IVSigned ? "__kmpc_for_static_init_8"
1572 : "__kmpc_for_static_init_8u");
1573
1574 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1575 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1576 llvm::Type *TypeParams[] = {
1577 getIdentTyPointerTy(), // loc
1578 CGM.Int32Ty, // tid
1579 CGM.Int32Ty, // schedtype
1580 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1581 PtrTy, // p_lower
1582 PtrTy, // p_upper
1583 PtrTy, // p_stride
1584 ITy, // incr
1585 ITy // chunk
1586 };
1587 auto *FnTy =
1588 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1589 return CGM.CreateRuntimeFunction(FnTy, Name);
1590}
1591
1592llvm::FunctionCallee
1593CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
1594 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1595, __extension__
__PRETTY_FUNCTION__))
1595 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1595, __extension__
__PRETTY_FUNCTION__))
;
1596 StringRef Name =
1597 IVSize == 32
1598 ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1599 : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1600 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1601 llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
1602 CGM.Int32Ty, // tid
1603 CGM.Int32Ty, // schedtype
1604 ITy, // lower
1605 ITy, // upper
1606 ITy, // stride
1607 ITy // chunk
1608 };
1609 auto *FnTy =
1610 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1611 return CGM.CreateRuntimeFunction(FnTy, Name);
1612}
1613
1614llvm::FunctionCallee
1615CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
1616 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1617, __extension__
__PRETTY_FUNCTION__))
1617 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1617, __extension__
__PRETTY_FUNCTION__))
;
1618 StringRef Name =
1619 IVSize == 32
1620 ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1621 : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1622 llvm::Type *TypeParams[] = {
1623 getIdentTyPointerTy(), // loc
1624 CGM.Int32Ty, // tid
1625 };
1626 auto *FnTy =
1627 llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1628 return CGM.CreateRuntimeFunction(FnTy, Name);
1629}
1630
1631llvm::FunctionCallee
1632CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
1633 assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1634, __extension__
__PRETTY_FUNCTION__))
1634 "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime") ? void (0)
: __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1634, __extension__
__PRETTY_FUNCTION__))
;
1635 StringRef Name =
1636 IVSize == 32
1637 ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1638 : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1639 llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
1640 auto *PtrTy = llvm::PointerType::getUnqual(ITy);
1641 llvm::Type *TypeParams[] = {
1642 getIdentTyPointerTy(), // loc
1643 CGM.Int32Ty, // tid
1644 llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
1645 PtrTy, // p_lower
1646 PtrTy, // p_upper
1647 PtrTy // p_stride
1648 };
1649 auto *FnTy =
1650 llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1651 return CGM.CreateRuntimeFunction(FnTy, Name);
1652}
1653
1654/// Obtain information that uniquely identifies a target entry. This
1655/// consists of the file and device IDs as well as line number associated with
1656/// the relevant entry source location.
1657static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc,
1658 unsigned &DeviceID, unsigned &FileID,
1659 unsigned &LineNum) {
1660 SourceManager &SM = C.getSourceManager();
1661
1662 // The loc should be always valid and have a file ID (the user cannot use
1663 // #pragma directives in macros)
1664
1665 assert(Loc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (Loc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1665, __extension__
__PRETTY_FUNCTION__))
;
1666
1667 PresumedLoc PLoc = SM.getPresumedLoc(Loc);
1668 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1668, __extension__
__PRETTY_FUNCTION__))
;
1669
1670 llvm::sys::fs::UniqueID ID;
1671 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) {
1672 PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false);
1673 assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid."
) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1673, __extension__
__PRETTY_FUNCTION__))
;
1674 if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
1675 SM.getDiagnostics().Report(diag::err_cannot_open_file)
1676 << PLoc.getFilename() << EC.message();
1677 }
1678
1679 DeviceID = ID.getDevice();
1680 FileID = ID.getFile();
1681 LineNum = PLoc.getLine();
1682}
1683
1684Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1685 if (CGM.getLangOpts().OpenMPSimd)
1686 return Address::invalid();
1687 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1688 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1689 if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
1690 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1691 HasRequiresUnifiedSharedMemory))) {
1692 SmallString<64> PtrName;
1693 {
1694 llvm::raw_svector_ostream OS(PtrName);
1695 OS << CGM.getMangledName(GlobalDecl(VD));
1696 if (!VD->isExternallyVisible()) {
1697 unsigned DeviceID, FileID, Line;
1698 getTargetEntryUniqueInfo(CGM.getContext(),
1699 VD->getCanonicalDecl()->getBeginLoc(),
1700 DeviceID, FileID, Line);
1701 OS << llvm::format("_%x", FileID);
1702 }
1703 OS << "_decl_tgt_ref_ptr";
1704 }
1705 llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
1706 if (!Ptr) {
1707 QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
1708 Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy),
1709 PtrName);
1710
1711 auto *GV = cast<llvm::GlobalVariable>(Ptr);
1712 GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
1713
1714 if (!CGM.getLangOpts().OpenMPIsDevice)
1715 GV->setInitializer(CGM.GetAddrOfGlobal(VD));
1716 registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
1717 }
1718 return Address(Ptr, CGM.getContext().getDeclAlign(VD));
1719 }
1720 return Address::invalid();
1721}
1722
1723llvm::Constant *
1724CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1725 assert(!CGM.getLangOpts().OpenMPUseTLS ||(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1726, __extension__
__PRETTY_FUNCTION__))
1726 !CGM.getContext().getTargetInfo().isTLSSupported())(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS ||
!CGM.getContext().getTargetInfo().isTLSSupported()) ? void (
0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1726, __extension__
__PRETTY_FUNCTION__))
;
1727 // Lookup the entry, lazily creating it if necessary.
1728 std::string Suffix = getName({"cache", ""});
1729 return getOrCreateInternalVariable(
1730 CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
1731}
1732
1733Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1734 const VarDecl *VD,
1735 Address VDAddr,
1736 SourceLocation Loc) {
1737 if (CGM.getLangOpts().OpenMPUseTLS &&
1738 CGM.getContext().getTargetInfo().isTLSSupported())
1739 return VDAddr;
1740
1741 llvm::Type *VarTy = VDAddr.getElementType();
1742 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1743 CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
1744 CGM.Int8PtrTy),
1745 CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
1746 getOrCreateThreadPrivateCache(VD)};
1747 return Address(CGF.EmitRuntimeCall(
1748 OMPBuilder.getOrCreateRuntimeFunction(
1749 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
1750 Args),
1751 VDAddr.getAlignment());
1752}
1753
1754void CGOpenMPRuntime::emitThreadPrivateVarInit(
1755 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1756 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1757 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1758 // library.
1759 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1760 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1761 CGM.getModule(), OMPRTL___kmpc_global_thread_num),
1762 OMPLoc);
1763 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1764 // to register constructor/destructor for variable.
1765 llvm::Value *Args[] = {
1766 OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
1767 Ctor, CopyCtor, Dtor};
1768 CGF.EmitRuntimeCall(
1769 OMPBuilder.getOrCreateRuntimeFunction(
1770 CGM.getModule(), OMPRTL___kmpc_threadprivate_register),
1771 Args);
1772}
1773
1774llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1775 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1776 bool PerformInit, CodeGenFunction *CGF) {
1777 if (CGM.getLangOpts().OpenMPUseTLS &&
1778 CGM.getContext().getTargetInfo().isTLSSupported())
1779 return nullptr;
1780
1781 VD = VD->getDefinition(CGM.getContext());
1782 if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
1783 QualType ASTTy = VD->getType();
1784
1785 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1786 const Expr *Init = VD->getAnyInitializer();
1787 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1788 // Generate function that re-emits the declaration's initializer into the
1789 // threadprivate copy of the variable VD
1790 CodeGenFunction CtorCGF(CGM);
1791 FunctionArgList Args;
1792 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1793 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1794 ImplicitParamDecl::Other);
1795 Args.push_back(&Dst);
1796
1797 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1798 CGM.getContext().VoidPtrTy, Args);
1799 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1800 std::string Name = getName({"__kmpc_global_ctor_", ""});
1801 llvm::Function *Fn =
1802 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1803 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
1804 Args, Loc, Loc);
1805 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1806 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1807 CGM.getContext().VoidPtrTy, Dst.getLocation());
1808 Address Arg = Address(ArgVal, VDAddr.getAlignment());
1809 Arg = CtorCGF.Builder.CreateElementBitCast(
1810 Arg, CtorCGF.ConvertTypeForMem(ASTTy));
1811 CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
1812 /*IsInitializer=*/true);
1813 ArgVal = CtorCGF.EmitLoadOfScalar(
1814 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1815 CGM.getContext().VoidPtrTy, Dst.getLocation());
1816 CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
1817 CtorCGF.FinishFunction();
1818 Ctor = Fn;
1819 }
1820 if (VD->getType().isDestructedType() != QualType::DK_none) {
1821 // Generate function that emits destructor call for the threadprivate copy
1822 // of the variable VD
1823 CodeGenFunction DtorCGF(CGM);
1824 FunctionArgList Args;
1825 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1826 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1827 ImplicitParamDecl::Other);
1828 Args.push_back(&Dst);
1829
1830 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1831 CGM.getContext().VoidTy, Args);
1832 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1833 std::string Name = getName({"__kmpc_global_dtor_", ""});
1834 llvm::Function *Fn =
1835 CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc);
1836 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1837 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
1838 Loc, Loc);
1839 // Create a scope with an artificial location for the body of this function.
1840 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1841 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1842 DtorCGF.GetAddrOfLocalVar(&Dst),
1843 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1844 DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
1845 DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1846 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1847 DtorCGF.FinishFunction();
1848 Dtor = Fn;
1849 }
1850 // Do not emit init function if it is not required.
1851 if (!Ctor && !Dtor)
1852 return nullptr;
1853
1854 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1855 auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
1856 /*isVarArg=*/false)
1857 ->getPointerTo();
1858 // Copying constructor for the threadprivate variable.
1859 // Must be NULL - reserved by runtime, but currently it requires that this
1860 // parameter is always NULL. Otherwise it fires assertion.
1861 CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
1862 if (Ctor == nullptr) {
1863 auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1864 /*isVarArg=*/false)
1865 ->getPointerTo();
1866 Ctor = llvm::Constant::getNullValue(CtorTy);
1867 }
1868 if (Dtor == nullptr) {
1869 auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
1870 /*isVarArg=*/false)
1871 ->getPointerTo();
1872 Dtor = llvm::Constant::getNullValue(DtorTy);
1873 }
1874 if (!CGF) {
1875 auto *InitFunctionTy =
1876 llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
1877 std::string Name = getName({"__omp_threadprivate_init_", ""});
1878 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1879 InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
1880 CodeGenFunction InitCGF(CGM);
1881 FunctionArgList ArgList;
1882 InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
1883 CGM.getTypes().arrangeNullaryFunction(), ArgList,
1884 Loc, Loc);
1885 emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1886 InitCGF.FinishFunction();
1887 return InitFunction;
1888 }
1889 emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1890 }
1891 return nullptr;
1892}
1893
1894bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD,
1895 llvm::GlobalVariable *Addr,
1896 bool PerformInit) {
1897 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
1898 !CGM.getLangOpts().OpenMPIsDevice)
1899 return false;
1900 Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
1901 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1902 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
1903 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
1904 HasRequiresUnifiedSharedMemory))
1905 return CGM.getLangOpts().OpenMPIsDevice;
1906 VD = VD->getDefinition(CGM.getContext());
1907 assert(VD && "Unknown VarDecl")(static_cast <bool> (VD && "Unknown VarDecl") ?
void (0) : __assert_fail ("VD && \"Unknown VarDecl\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1907, __extension__
__PRETTY_FUNCTION__))
;
1908
1909 if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
1910 return CGM.getLangOpts().OpenMPIsDevice;
1911
1912 QualType ASTTy = VD->getType();
1913 SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc();
1914
1915 // Produce the unique prefix to identify the new target regions. We use
1916 // the source location of the variable declaration which we know to not
1917 // conflict with any target region.
1918 unsigned DeviceID;
1919 unsigned FileID;
1920 unsigned Line;
1921 getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
1922 SmallString<128> Buffer, Out;
1923 {
1924 llvm::raw_svector_ostream OS(Buffer);
1925 OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
1926 << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
1927 }
1928
1929 const Expr *Init = VD->getAnyInitializer();
1930 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1931 llvm::Constant *Ctor;
1932 llvm::Constant *ID;
1933 if (CGM.getLangOpts().OpenMPIsDevice) {
1934 // Generate function that re-emits the declaration's initializer into
1935 // the threadprivate copy of the variable VD
1936 CodeGenFunction CtorCGF(CGM);
1937
1938 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1939 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1940 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1941 FTy, Twine(Buffer, "_ctor"), FI, Loc);
1942 auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
1943 CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1944 FunctionArgList(), Loc, Loc);
1945 auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
1946 CtorCGF.EmitAnyExprToMem(Init,
1947 Address(Addr, CGM.getContext().getDeclAlign(VD)),
1948 Init->getType().getQualifiers(),
1949 /*IsInitializer=*/true);
1950 CtorCGF.FinishFunction();
1951 Ctor = Fn;
1952 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1953 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
1954 } else {
1955 Ctor = new llvm::GlobalVariable(
1956 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1957 llvm::GlobalValue::PrivateLinkage,
1958 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
1959 ID = Ctor;
1960 }
1961
1962 // Register the information for the entry associated with the constructor.
1963 Out.clear();
1964 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
1965 DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
1966 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor);
1967 }
1968 if (VD->getType().isDestructedType() != QualType::DK_none) {
1969 llvm::Constant *Dtor;
1970 llvm::Constant *ID;
1971 if (CGM.getLangOpts().OpenMPIsDevice) {
1972 // Generate function that emits destructor call for the threadprivate
1973 // copy of the variable VD
1974 CodeGenFunction DtorCGF(CGM);
1975
1976 const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction();
1977 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1978 llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction(
1979 FTy, Twine(Buffer, "_dtor"), FI, Loc);
1980 auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
1981 DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
1982 FunctionArgList(), Loc, Loc);
1983 // Create a scope with an artificial location for the body of this
1984 // function.
1985 auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
1986 DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
1987 ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
1988 DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
1989 DtorCGF.FinishFunction();
1990 Dtor = Fn;
1991 ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
1992 CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
1993 } else {
1994 Dtor = new llvm::GlobalVariable(
1995 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
1996 llvm::GlobalValue::PrivateLinkage,
1997 llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
1998 ID = Dtor;
1999 }
2000 // Register the information for the entry associated with the destructor.
2001 Out.clear();
2002 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
2003 DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2004 ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor);
2005 }
2006 return CGM.getLangOpts().OpenMPIsDevice;
2007}
2008
2009Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
2010 QualType VarType,
2011 StringRef Name) {
2012 std::string Suffix = getName({"artificial", ""});
2013 llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2014 llvm::GlobalVariable *GAddr =
2015 getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2016 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
2017 CGM.getTarget().isTLSSupported()) {
2018 GAddr->setThreadLocal(/*Val=*/true);
2019 return Address(GAddr, GAddr->getValueType(),
2020 CGM.getContext().getTypeAlignInChars(VarType));
2021 }
2022 std::string CacheSuffix = getName({"cache", ""});
2023 llvm::Value *Args[] = {
2024 emitUpdateLocation(CGF, SourceLocation()),
2025 getThreadID(CGF, SourceLocation()),
2026 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2027 CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2028 /*isSigned=*/false),
2029 getOrCreateInternalVariable(
2030 CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2031 return Address(
2032 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2033 CGF.EmitRuntimeCall(
2034 OMPBuilder.getOrCreateRuntimeFunction(
2035 CGM.getModule(), OMPRTL___kmpc_threadprivate_cached),
2036 Args),
2037 VarLVType->getPointerTo(/*AddrSpace=*/0)),
2038 CGM.getContext().getTypeAlignInChars(VarType));
2039}
2040
2041void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
2042 const RegionCodeGenTy &ThenGen,
2043 const RegionCodeGenTy &ElseGen) {
2044 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2045
2046 // If the condition constant folds and can be elided, try to avoid emitting
2047 // the condition and the dead arm of the if/else.
2048 bool CondConstant;
2049 if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2050 if (CondConstant)
2051 ThenGen(CGF);
2052 else
2053 ElseGen(CGF);
2054 return;
2055 }
2056
2057 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2058 // emit the conditional branch.
2059 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2060 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2061 llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2062 CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2063
2064 // Emit the 'then' code.
2065 CGF.EmitBlock(ThenBlock);
2066 ThenGen(CGF);
2067 CGF.EmitBranch(ContBlock);
2068 // Emit the 'else' code if present.
2069 // There is no need to emit line number for unconditional branch.
2070 (void)ApplyDebugLocation::CreateEmpty(CGF);
2071 CGF.EmitBlock(ElseBlock);
2072 ElseGen(CGF);
2073 // There is no need to emit line number for unconditional branch.
2074 (void)ApplyDebugLocation::CreateEmpty(CGF);
2075 CGF.EmitBranch(ContBlock);
2076 // Emit the continuation block for code after the if.
2077 CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2078}
2079
2080void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
2081 llvm::Function *OutlinedFn,
2082 ArrayRef<llvm::Value *> CapturedVars,
2083 const Expr *IfCond,
2084 llvm::Value *NumThreads) {
2085 if (!CGF.HaveInsertPoint())
2086 return;
2087 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2088 auto &M = CGM.getModule();
2089 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
2090 this](CodeGenFunction &CGF, PrePostActionTy &) {
2091 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2092 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2093 llvm::Value *Args[] = {
2094 RTLoc,
2095 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2096 CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2097 llvm::SmallVector<llvm::Value *, 16> RealArgs;
2098 RealArgs.append(std::begin(Args), std::end(Args));
2099 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2100
2101 llvm::FunctionCallee RTLFn =
2102 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call);
2103 CGF.EmitRuntimeCall(RTLFn, RealArgs);
2104 };
2105 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
2106 this](CodeGenFunction &CGF, PrePostActionTy &) {
2107 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2108 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2109 // Build calls:
2110 // __kmpc_serialized_parallel(&Loc, GTid);
2111 llvm::Value *Args[] = {RTLoc, ThreadID};
2112 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2113 M, OMPRTL___kmpc_serialized_parallel),
2114 Args);
2115
2116 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
2117 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2118 Address ZeroAddrBound =
2119 CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2120 /*Name=*/".bound.zero.addr");
2121 CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound);
2122 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2123 // ThreadId for serialized parallels is 0.
2124 OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2125 OutlinedFnArgs.push_back(ZeroAddrBound.getPointer());
2126 OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2127
2128 // Ensure we do not inline the function. This is trivially true for the ones
2129 // passed to __kmpc_fork_call but the ones called in serialized regions
2130 // could be inlined. This is not a perfect but it is closer to the invariant
2131 // we want, namely, every data environment starts with a new function.
2132 // TODO: We should pass the if condition to the runtime function and do the
2133 // handling there. Much cleaner code.
2134 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
2135 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
2136 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2137
2138 // __kmpc_end_serialized_parallel(&Loc, GTid);
2139 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2140 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2141 M, OMPRTL___kmpc_end_serialized_parallel),
2142 EndArgs);
2143 };
2144 if (IfCond) {
2145 emitIfClause(CGF, IfCond, ThenGen, ElseGen);
2146 } else {
2147 RegionCodeGenTy ThenRCG(ThenGen);
2148 ThenRCG(CGF);
2149 }
2150}
2151
2152// If we're inside an (outlined) parallel region, use the region info's
2153// thread-ID variable (it is passed in a first argument of the outlined function
2154// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2155// regular serial code region, get thread ID by calling kmp_int32
2156// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2157// return the address of that temp.
2158Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
2159 SourceLocation Loc) {
2160 if (auto *OMPRegionInfo =
2161 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2162 if (OMPRegionInfo->getThreadIDVariable())
2163 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
2164
2165 llvm::Value *ThreadID = getThreadID(CGF, Loc);
2166 QualType Int32Ty =
2167 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2168 Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2169 CGF.EmitStoreOfScalar(ThreadID,
2170 CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2171
2172 return ThreadIDTemp;
2173}
2174
2175llvm::GlobalVariable *CGOpenMPRuntime::getOrCreateInternalVariable(
2176 llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
2177 SmallString<256> Buffer;
2178 llvm::raw_svector_ostream Out(Buffer);
2179 Out << Name;
2180 StringRef RuntimeName = Out.str();
2181 auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
2182 if (Elem.second) {
2183 assert(Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) &&(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2184, __extension__
__PRETTY_FUNCTION__))
2184 "OMP internal variable has different type than requested")(static_cast <bool> (Elem.second->getType()->isOpaqueOrPointeeTypeMatches
(Ty) && "OMP internal variable has different type than requested"
) ? void (0) : __assert_fail ("Elem.second->getType()->isOpaqueOrPointeeTypeMatches(Ty) && \"OMP internal variable has different type than requested\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2184, __extension__
__PRETTY_FUNCTION__))
;
2185 return &*Elem.second;
2186 }
2187
2188 return Elem.second = new llvm::GlobalVariable(
2189 CGM.getModule(), Ty, /*IsConstant*/ false,
2190 llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2191 Elem.first(), /*InsertBefore=*/nullptr,
2192 llvm::GlobalValue::NotThreadLocal, AddressSpace);
2193}
2194
2195llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
2196 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
2197 std::string Name = getName({Prefix, "var"});
2198 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
2199}
2200
2201namespace {
2202/// Common pre(post)-action for different OpenMP constructs.
2203class CommonActionTy final : public PrePostActionTy {
2204 llvm::FunctionCallee EnterCallee;
2205 ArrayRef<llvm::Value *> EnterArgs;
2206 llvm::FunctionCallee ExitCallee;
2207 ArrayRef<llvm::Value *> ExitArgs;
2208 bool Conditional;
2209 llvm::BasicBlock *ContBlock = nullptr;
2210
2211public:
2212 CommonActionTy(llvm::FunctionCallee EnterCallee,
2213 ArrayRef<llvm::Value *> EnterArgs,
2214 llvm::FunctionCallee ExitCallee,
2215 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
2216 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2217 ExitArgs(ExitArgs), Conditional(Conditional) {}
2218 void Enter(CodeGenFunction &CGF) override {
2219 llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2220 if (Conditional) {
2221 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2222 auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2223 ContBlock = CGF.createBasicBlock("omp_if.end");
2224 // Generate the branch (If-stmt)
2225 CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2226 CGF.EmitBlock(ThenBlock);
2227 }
2228 }
2229 void Done(CodeGenFunction &CGF) {
2230 // Emit the rest of blocks/branches
2231 CGF.EmitBranch(ContBlock);
2232 CGF.EmitBlock(ContBlock, true);
2233 }
2234 void Exit(CodeGenFunction &CGF) override {
2235 CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2236 }
2237};
2238} // anonymous namespace
2239
2240void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2241 StringRef CriticalName,
2242 const RegionCodeGenTy &CriticalOpGen,
2243 SourceLocation Loc, const Expr *Hint) {
2244 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2245 // CriticalOpGen();
2246 // __kmpc_end_critical(ident_t *, gtid, Lock);
2247 // Prepare arguments and build a call to __kmpc_critical
2248 if (!CGF.HaveInsertPoint())
2249 return;
2250 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2251 getCriticalRegionLock(CriticalName)};
2252 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2253 std::end(Args));
2254 if (Hint) {
2255 EnterArgs.push_back(CGF.Builder.CreateIntCast(
2256 CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false));
2257 }
2258 CommonActionTy Action(
2259 OMPBuilder.getOrCreateRuntimeFunction(
2260 CGM.getModule(),
2261 Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2262 EnterArgs,
2263 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2264 OMPRTL___kmpc_end_critical),
2265 Args);
2266 CriticalOpGen.setAction(Action);
2267 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2268}
2269
2270void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2271 const RegionCodeGenTy &MasterOpGen,
2272 SourceLocation Loc) {
2273 if (!CGF.HaveInsertPoint())
2274 return;
2275 // if(__kmpc_master(ident_t *, gtid)) {
2276 // MasterOpGen();
2277 // __kmpc_end_master(ident_t *, gtid);
2278 // }
2279 // Prepare arguments and build a call to __kmpc_master
2280 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2281 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2282 CGM.getModule(), OMPRTL___kmpc_master),
2283 Args,
2284 OMPBuilder.getOrCreateRuntimeFunction(
2285 CGM.getModule(), OMPRTL___kmpc_end_master),
2286 Args,
2287 /*Conditional=*/true);
2288 MasterOpGen.setAction(Action);
2289 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2290 Action.Done(CGF);
2291}
2292
2293void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2294 const RegionCodeGenTy &MaskedOpGen,
2295 SourceLocation Loc, const Expr *Filter) {
2296 if (!CGF.HaveInsertPoint())
2297 return;
2298 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2299 // MaskedOpGen();
2300 // __kmpc_end_masked(iden_t *, gtid);
2301 // }
2302 // Prepare arguments and build a call to __kmpc_masked
2303 llvm::Value *FilterVal = Filter
2304 ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty)
2305 : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0);
2306 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2307 FilterVal};
2308 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2309 getThreadID(CGF, Loc)};
2310 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2311 CGM.getModule(), OMPRTL___kmpc_masked),
2312 Args,
2313 OMPBuilder.getOrCreateRuntimeFunction(
2314 CGM.getModule(), OMPRTL___kmpc_end_masked),
2315 ArgsEnd,
2316 /*Conditional=*/true);
2317 MaskedOpGen.setAction(Action);
2318 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2319 Action.Done(CGF);
2320}
2321
2322void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2323 SourceLocation Loc) {
2324 if (!CGF.HaveInsertPoint())
2325 return;
2326 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2327 OMPBuilder.createTaskyield(CGF.Builder);
2328 } else {
2329 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2330 llvm::Value *Args[] = {
2331 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2332 llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2333 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2334 CGM.getModule(), OMPRTL___kmpc_omp_taskyield),
2335 Args);
2336 }
2337
2338 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2339 Region->emitUntiedSwitch(CGF);
2340}
2341
2342void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2343 const RegionCodeGenTy &TaskgroupOpGen,
2344 SourceLocation Loc) {
2345 if (!CGF.HaveInsertPoint())
2346 return;
2347 // __kmpc_taskgroup(ident_t *, gtid);
2348 // TaskgroupOpGen();
2349 // __kmpc_end_taskgroup(ident_t *, gtid);
2350 // Prepare arguments and build a call to __kmpc_taskgroup
2351 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2352 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2353 CGM.getModule(), OMPRTL___kmpc_taskgroup),
2354 Args,
2355 OMPBuilder.getOrCreateRuntimeFunction(
2356 CGM.getModule(), OMPRTL___kmpc_end_taskgroup),
2357 Args);
2358 TaskgroupOpGen.setAction(Action);
2359 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2360}
2361
2362/// Given an array of pointers to variables, project the address of a
2363/// given variable.
2364static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2365 unsigned Index, const VarDecl *Var) {
2366 // Pull out the pointer to the variable.
2367 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
2368 llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2369
2370 Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2371 Addr = CGF.Builder.CreateElementBitCast(
2372 Addr, CGF.ConvertTypeForMem(Var->getType()));
2373 return Addr;
2374}
2375
2376static llvm::Value *emitCopyprivateCopyFunction(
2377 CodeGenModule &CGM, llvm::Type *ArgsType,
2378 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2379 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2380 SourceLocation Loc) {
2381 ASTContext &C = CGM.getContext();
2382 // void copy_func(void *LHSArg, void *RHSArg);
2383 FunctionArgList Args;
2384 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2385 ImplicitParamDecl::Other);
2386 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2387 ImplicitParamDecl::Other);
2388 Args.push_back(&LHSArg);
2389 Args.push_back(&RHSArg);
2390 const auto &CGFI =
2391 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2392 std::string Name =
2393 CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2394 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2395 llvm::GlobalValue::InternalLinkage, Name,
2396 &CGM.getModule());
2397 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
2398 Fn->setDoesNotRecurse();
2399 CodeGenFunction CGF(CGM);
2400 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
2401 // Dest = (void*[n])(LHSArg);
2402 // Src = (void*[n])(RHSArg);
2403 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2404 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2405 ArgsType), CGF.getPointerAlign());
2406 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2407 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2408 ArgsType), CGF.getPointerAlign());
2409 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2410 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2411 // ...
2412 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2413 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2414 const auto *DestVar =
2415 cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2416 Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2417
2418 const auto *SrcVar =
2419 cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2420 Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2421
2422 const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2423 QualType Type = VD->getType();
2424 CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2425 }
2426 CGF.FinishFunction();
2427 return Fn;
2428}
2429
2430void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2431 const RegionCodeGenTy &SingleOpGen,
2432 SourceLocation Loc,
2433 ArrayRef<const Expr *> CopyprivateVars,
2434 ArrayRef<const Expr *> SrcExprs,
2435 ArrayRef<const Expr *> DstExprs,
2436 ArrayRef<const Expr *> AssignmentOps) {
2437 if (!CGF.HaveInsertPoint())
2438 return;
2439 assert(CopyprivateVars.size() == SrcExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
2440 CopyprivateVars.size() == DstExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
2441 CopyprivateVars.size() == AssignmentOps.size())(static_cast <bool> (CopyprivateVars.size() == SrcExprs
.size() && CopyprivateVars.size() == DstExprs.size() &&
CopyprivateVars.size() == AssignmentOps.size()) ? void (0) :
__assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2441, __extension__
__PRETTY_FUNCTION__))
;
2442 ASTContext &C = CGM.getContext();
2443 // int32 did_it = 0;
2444 // if(__kmpc_single(ident_t *, gtid)) {
2445 // SingleOpGen();
2446 // __kmpc_end_single(ident_t *, gtid);
2447 // did_it = 1;
2448 // }
2449 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2450 // <copy_func>, did_it);
2451
2452 Address DidIt = Address::invalid();
2453 if (!CopyprivateVars.empty()) {
2454 // int32 did_it = 0;
2455 QualType KmpInt32Ty =
2456 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2457 DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2458 CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2459 }
2460 // Prepare arguments and build a call to __kmpc_single
2461 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2462 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2463 CGM.getModule(), OMPRTL___kmpc_single),
2464 Args,
2465 OMPBuilder.getOrCreateRuntimeFunction(
2466 CGM.getModule(), OMPRTL___kmpc_end_single),
2467 Args,
2468 /*Conditional=*/true);
2469 SingleOpGen.setAction(Action);
2470 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2471 if (DidIt.isValid()) {
2472 // did_it = 1;
2473 CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2474 }
2475 Action.Done(CGF);
2476 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2477 // <copy_func>, did_it);
2478 if (DidIt.isValid()) {
2479 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2480 QualType CopyprivateArrayTy = C.getConstantArrayType(
2481 C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
2482 /*IndexTypeQuals=*/0);
2483 // Create a list of all private variables for copyprivate.
2484 Address CopyprivateList =
2485 CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2486 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2487 Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
2488 CGF.Builder.CreateStore(
2489 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2490 CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF),
2491 CGF.VoidPtrTy),
2492 Elem);
2493 }
2494 // Build function that copies private values from single region to all other
2495 // threads in the corresponding parallel region.
2496 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2497 CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2498 CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
2499 llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2500 Address CL =
2501 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2502 CGF.VoidPtrTy);
2503 llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
2504 llvm::Value *Args[] = {
2505 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2506 getThreadID(CGF, Loc), // i32 <gtid>
2507 BufSize, // size_t <buf_size>
2508 CL.getPointer(), // void *<copyprivate list>
2509 CpyFn, // void (*) (void *, void *) <copy_func>
2510 DidItVal // i32 did_it
2511 };
2512 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2513 CGM.getModule(), OMPRTL___kmpc_copyprivate),
2514 Args);
2515 }
2516}
2517
2518void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2519 const RegionCodeGenTy &OrderedOpGen,
2520 SourceLocation Loc, bool IsThreads) {
2521 if (!CGF.HaveInsertPoint())
2522 return;
2523 // __kmpc_ordered(ident_t *, gtid);
2524 // OrderedOpGen();
2525 // __kmpc_end_ordered(ident_t *, gtid);
2526 // Prepare arguments and build a call to __kmpc_ordered
2527 if (IsThreads) {
2528 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2529 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2530 CGM.getModule(), OMPRTL___kmpc_ordered),
2531 Args,
2532 OMPBuilder.getOrCreateRuntimeFunction(
2533 CGM.getModule(), OMPRTL___kmpc_end_ordered),
2534 Args);
2535 OrderedOpGen.setAction(Action);
2536 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2537 return;
2538 }
2539 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2540}
2541
2542unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2543 unsigned Flags;
2544 if (Kind == OMPD_for)
2545 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2546 else if (Kind == OMPD_sections)
2547 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2548 else if (Kind == OMPD_single)
2549 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2550 else if (Kind == OMPD_barrier)
2551 Flags = OMP_IDENT_BARRIER_EXPL;
2552 else
2553 Flags = OMP_IDENT_BARRIER_IMPL;
2554 return Flags;
2555}
2556
2557void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2558 CodeGenFunction &CGF, const OMPLoopDirective &S,
2559 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2560 // Check if the loop directive is actually a doacross loop directive. In this
2561 // case choose static, 1 schedule.
2562 if (llvm::any_of(
2563 S.getClausesOfKind<OMPOrderedClause>(),
2564 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2565 ScheduleKind = OMPC_SCHEDULE_static;
2566 // Chunk size is 1 in this case.
2567 llvm::APInt ChunkSize(32, 1);
2568 ChunkExpr = IntegerLiteral::Create(
2569 CGF.getContext(), ChunkSize,
2570 CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2571 SourceLocation());
2572 }
2573}
2574
2575void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2576 OpenMPDirectiveKind Kind, bool EmitChecks,
2577 bool ForceSimpleCall) {
2578 // Check if we should use the OMPBuilder
2579 auto *OMPRegionInfo =
2580 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo);
2581 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2582 CGF.Builder.restoreIP(OMPBuilder.createBarrier(
2583 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2584 return;
2585 }
2586
2587 if (!CGF.HaveInsertPoint())
2588 return;
2589 // Build call __kmpc_cancel_barrier(loc, thread_id);
2590 // Build call __kmpc_barrier(loc, thread_id);
2591 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2592 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2593 // thread_id);
2594 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2595 getThreadID(CGF, Loc)};
2596 if (OMPRegionInfo) {
2597 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2598 llvm::Value *Result = CGF.EmitRuntimeCall(
2599 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
2600 OMPRTL___kmpc_cancel_barrier),
2601 Args);
2602 if (EmitChecks) {
2603 // if (__kmpc_cancel_barrier()) {
2604 // exit from construct;
2605 // }
2606 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
2607 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
2608 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
2609 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2610 CGF.EmitBlock(ExitBB);
2611 // exit from construct;
2612 CodeGenFunction::JumpDest CancelDestination =
2613 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2614 CGF.EmitBranchThroughCleanup(CancelDestination);
2615 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2616 }
2617 return;
2618 }
2619 }
2620 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2621 CGM.getModule(), OMPRTL___kmpc_barrier),
2622 Args);
2623}
2624
2625/// Map the OpenMP loop schedule to the runtime enumeration.
2626static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2627 bool Chunked, bool Ordered) {
2628 switch (ScheduleKind) {
2629 case OMPC_SCHEDULE_static:
2630 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2631 : (Ordered ? OMP_ord_static : OMP_sch_static);
2632 case OMPC_SCHEDULE_dynamic:
2633 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2634 case OMPC_SCHEDULE_guided:
2635 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2636 case OMPC_SCHEDULE_runtime:
2637 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2638 case OMPC_SCHEDULE_auto:
2639 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2640 case OMPC_SCHEDULE_unknown:
2641 assert(!Chunked && "chunk was specified but schedule kind not known")(static_cast <bool> (!Chunked && "chunk was specified but schedule kind not known"
) ? void (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2641, __extension__
__PRETTY_FUNCTION__))
;
2642 return Ordered ? OMP_ord_static : OMP_sch_static;
2643 }
2644 llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2644)
;
2645}
2646
2647/// Map the OpenMP distribute schedule to the runtime enumeration.
2648static OpenMPSchedType
2649getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2650 // only static is allowed for dist_schedule
2651 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2652}
2653
2654bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2655 bool Chunked) const {
2656 OpenMPSchedType Schedule =
2657 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2658 return Schedule == OMP_sch_static;
2659}
2660
2661bool CGOpenMPRuntime::isStaticNonchunked(
2662 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2663 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2664 return Schedule == OMP_dist_sch_static;
2665}
2666
2667bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2668 bool Chunked) const {
2669 OpenMPSchedType Schedule =
2670 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2671 return Schedule == OMP_sch_static_chunked;
2672}
2673
2674bool CGOpenMPRuntime::isStaticChunked(
2675 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2676 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2677 return Schedule == OMP_dist_sch_static_chunked;
2678}
2679
2680bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2681 OpenMPSchedType Schedule =
2682 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2683 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")(static_cast <bool> (Schedule != OMP_sch_static_chunked
&& "cannot be chunked here") ? void (0) : __assert_fail
("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2683, __extension__
__PRETTY_FUNCTION__))
;
2684 return Schedule != OMP_sch_static;
2685}
2686
2687static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2688 OpenMPScheduleClauseModifier M1,
2689 OpenMPScheduleClauseModifier M2) {
2690 int Modifier = 0;
2691 switch (M1) {
2692 case OMPC_SCHEDULE_MODIFIER_monotonic:
2693 Modifier = OMP_sch_modifier_monotonic;
2694 break;
2695 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2696 Modifier = OMP_sch_modifier_nonmonotonic;
2697 break;
2698 case OMPC_SCHEDULE_MODIFIER_simd:
2699 if (Schedule == OMP_sch_static_chunked)
2700 Schedule = OMP_sch_static_balanced_chunked;
2701 break;
2702 case OMPC_SCHEDULE_MODIFIER_last:
2703 case OMPC_SCHEDULE_MODIFIER_unknown:
2704 break;
2705 }
2706 switch (M2) {
2707 case OMPC_SCHEDULE_MODIFIER_monotonic:
2708 Modifier = OMP_sch_modifier_monotonic;
2709 break;
2710 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2711 Modifier = OMP_sch_modifier_nonmonotonic;
2712 break;
2713 case OMPC_SCHEDULE_MODIFIER_simd:
2714 if (Schedule == OMP_sch_static_chunked)
2715 Schedule = OMP_sch_static_balanced_chunked;
2716 break;
2717 case OMPC_SCHEDULE_MODIFIER_last:
2718 case OMPC_SCHEDULE_MODIFIER_unknown:
2719 break;
2720 }
2721 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2722 // If the static schedule kind is specified or if the ordered clause is
2723 // specified, and if the nonmonotonic modifier is not specified, the effect is
2724 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2725 // modifier is specified, the effect is as if the nonmonotonic modifier is
2726 // specified.
2727 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2728 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2729 Schedule == OMP_sch_static_balanced_chunked ||
2730 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2731 Schedule == OMP_dist_sch_static_chunked ||
2732 Schedule == OMP_dist_sch_static))
2733 Modifier = OMP_sch_modifier_nonmonotonic;
2734 }
2735 return Schedule | Modifier;
2736}
2737
2738void CGOpenMPRuntime::emitForDispatchInit(
2739 CodeGenFunction &CGF, SourceLocation Loc,
2740 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2741 bool Ordered, const DispatchRTInput &DispatchValues) {
2742 if (!CGF.HaveInsertPoint())
2743 return;
2744 OpenMPSchedType Schedule = getRuntimeSchedule(
2745 ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2746 assert(Ordered ||(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
2747 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
2748 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
2749 Schedule != OMP_sch_static_balanced_chunked))(static_cast <bool> (Ordered || (Schedule != OMP_sch_static
&& Schedule != OMP_sch_static_chunked && Schedule
!= OMP_ord_static && Schedule != OMP_ord_static_chunked
&& Schedule != OMP_sch_static_balanced_chunked)) ? void
(0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2749, __extension__
__PRETTY_FUNCTION__))
;
2750 // Call __kmpc_dispatch_init(
2751 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2752 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2753 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2754
2755 // If the Chunk was not specified in the clause - use default value 1.
2756 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2757 : CGF.Builder.getIntN(IVSize, 1);
2758 llvm::Value *Args[] = {
2759 emitUpdateLocation(CGF, Loc),
2760 getThreadID(CGF, Loc),
2761 CGF.Builder.getInt32(addMonoNonMonoModifier(
2762 CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2763 DispatchValues.LB, // Lower
2764 DispatchValues.UB, // Upper
2765 CGF.Builder.getIntN(IVSize, 1), // Stride
2766 Chunk // Chunk
2767 };
2768 CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2769}
2770
2771static void emitForStaticInitCall(
2772 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2773 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2774 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2775 const CGOpenMPRuntime::StaticRTInput &Values) {
2776 if (!CGF.HaveInsertPoint())
2777 return;
2778
2779 assert(!Values.Ordered)(static_cast <bool> (!Values.Ordered) ? void (0) : __assert_fail
("!Values.Ordered", "clang/lib/CodeGen/CGOpenMPRuntime.cpp",
2779, __extension__ __PRETTY_FUNCTION__))
;
2780 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2781 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2782 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2783 Schedule == OMP_dist_sch_static ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
2784 Schedule == OMP_dist_sch_static_chunked)(static_cast <bool> (Schedule == OMP_sch_static || Schedule
== OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked
|| Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked
|| Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked
) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2784, __extension__
__PRETTY_FUNCTION__))
;
2785
2786 // Call __kmpc_for_static_init(
2787 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2788 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2789 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2790 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2791 llvm::Value *Chunk = Values.Chunk;
2792 if (Chunk == nullptr) {
2793 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2795, __extension__
__PRETTY_FUNCTION__))
2794 Schedule == OMP_dist_sch_static) &&(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2795, __extension__
__PRETTY_FUNCTION__))
2795 "expected static non-chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static || Schedule
== OMP_ord_static || Schedule == OMP_dist_sch_static) &&
"expected static non-chunked schedule") ? void (0) : __assert_fail
("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2795, __extension__
__PRETTY_FUNCTION__))
;
2796 // If the Chunk was not specified in the clause - use default value 1.
2797 Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
2798 } else {
2799 assert((Schedule == OMP_sch_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2800 Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2801 Schedule == OMP_ord_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2802 Schedule == OMP_dist_sch_static_chunked) &&(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
2803 "expected static chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static_chunked
|| Schedule == OMP_sch_static_balanced_chunked || Schedule ==
OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked
) && "expected static chunked schedule") ? void (0) :
__assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2803, __extension__
__PRETTY_FUNCTION__))
;
2804 }
2805 llvm::Value *Args[] = {
2806 UpdateLocation,
2807 ThreadId,
2808 CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1,
2809 M2)), // Schedule type
2810 Values.IL.getPointer(), // &isLastIter
2811 Values.LB.getPointer(), // &LB
2812 Values.UB.getPointer(), // &UB
2813 Values.ST.getPointer(), // &Stride
2814 CGF.Builder.getIntN(Values.IVSize, 1), // Incr
2815 Chunk // Chunk
2816 };
2817 CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
2818}
2819
2820void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2821 SourceLocation Loc,
2822 OpenMPDirectiveKind DKind,
2823 const OpenMPScheduleTy &ScheduleKind,
2824 const StaticRTInput &Values) {
2825 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2826 ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
2827 assert(isOpenMPWorksharingDirective(DKind) &&(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2828, __extension__
__PRETTY_FUNCTION__))
2828 "Expected loop-based or sections-based directive.")(static_cast <bool> (isOpenMPWorksharingDirective(DKind
) && "Expected loop-based or sections-based directive."
) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2828, __extension__
__PRETTY_FUNCTION__))
;
2829 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2830 isOpenMPLoopDirective(DKind)
2831 ? OMP_IDENT_WORK_LOOP
2832 : OMP_IDENT_WORK_SECTIONS);
2833 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2834 llvm::FunctionCallee StaticInitFunction =
2835 createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
2836 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2837 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2838 ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
2839}
2840
2841void CGOpenMPRuntime::emitDistributeStaticInit(
2842 CodeGenFunction &CGF, SourceLocation Loc,
2843 OpenMPDistScheduleClauseKind SchedKind,
2844 const CGOpenMPRuntime::StaticRTInput &Values) {
2845 OpenMPSchedType ScheduleNum =
2846 getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
2847 llvm::Value *UpdatedLocation =
2848 emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
2849 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2850 llvm::FunctionCallee StaticInitFunction;
2851 bool isGPUDistribute =
2852 CGM.getLangOpts().OpenMPIsDevice &&
2853 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2854 StaticInitFunction = createForStaticInitFunction(
2855 Values.IVSize, Values.IVSigned, isGPUDistribute);
2856
2857 emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
2858 ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
2859 OMPC_SCHEDULE_MODIFIER_unknown, Values);
2860}
2861
2862void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2863 SourceLocation Loc,
2864 OpenMPDirectiveKind DKind) {
2865 if (!CGF.HaveInsertPoint())
2866 return;
2867 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2868 llvm::Value *Args[] = {
2869 emitUpdateLocation(CGF, Loc,
2870 isOpenMPDistributeDirective(DKind)
2871 ? OMP_IDENT_WORK_DISTRIBUTE
2872 : isOpenMPLoopDirective(DKind)
2873 ? OMP_IDENT_WORK_LOOP
2874 : OMP_IDENT_WORK_SECTIONS),
2875 getThreadID(CGF, Loc)};
2876 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
2877 if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
2878 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2879 CGF.EmitRuntimeCall(
2880 OMPBuilder.getOrCreateRuntimeFunction(
2881 CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
2882 Args);
2883 else
2884 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2885 CGM.getModule(), OMPRTL___kmpc_for_static_fini),
2886 Args);
2887}
2888
2889void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2890 SourceLocation Loc,
2891 unsigned IVSize,
2892 bool IVSigned) {
2893 if (!CGF.HaveInsertPoint())
2894 return;
2895 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2896 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2897 CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
2898}
2899
2900llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2901 SourceLocation Loc, unsigned IVSize,
2902 bool IVSigned, Address IL,
2903 Address LB, Address UB,
2904 Address ST) {
2905 // Call __kmpc_dispatch_next(
2906 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2907 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2908 // kmp_int[32|64] *p_stride);
2909 llvm::Value *Args[] = {
2910 emitUpdateLocation(CGF, Loc),
2911 getThreadID(CGF, Loc),
2912 IL.getPointer(), // &isLastIter
2913 LB.getPointer(), // &Lower
2914 UB.getPointer(), // &Upper
2915 ST.getPointer() // &Stride
2916 };
2917 llvm::Value *Call =
2918 CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
2919 return CGF.EmitScalarConversion(
2920 Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2921 CGF.getContext().BoolTy, Loc);
2922}
2923
2924void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2925 llvm::Value *NumThreads,
2926 SourceLocation Loc) {
2927 if (!CGF.HaveInsertPoint())
2928 return;
2929 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2930 llvm::Value *Args[] = {
2931 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2932 CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
2933 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2934 CGM.getModule(), OMPRTL___kmpc_push_num_threads),
2935 Args);
2936}
2937
2938void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2939 ProcBindKind ProcBind,
2940 SourceLocation Loc) {
2941 if (!CGF.HaveInsertPoint())
2942 return;
2943 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")(static_cast <bool> (ProcBind != OMP_PROC_BIND_unknown &&
"Unsupported proc_bind value.") ? void (0) : __assert_fail (
"ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2943, __extension__
__PRETTY_FUNCTION__))
;
2944 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2945 llvm::Value *Args[] = {
2946 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2947 llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)};
2948 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2949 CGM.getModule(), OMPRTL___kmpc_push_proc_bind),
2950 Args);
2951}
2952
2953void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2954 SourceLocation Loc, llvm::AtomicOrdering AO) {
2955 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2956 OMPBuilder.createFlush(CGF.Builder);
2957 } else {
2958 if (!CGF.HaveInsertPoint())
2959 return;
2960 // Build call void __kmpc_flush(ident_t *loc)
2961 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2962 CGM.getModule(), OMPRTL___kmpc_flush),
2963 emitUpdateLocation(CGF, Loc));
2964 }
2965}
2966
2967namespace {
2968/// Indexes of fields for type kmp_task_t.
2969enum KmpTaskTFields {
2970 /// List of shared variables.
2971 KmpTaskTShareds,
2972 /// Task routine.
2973 KmpTaskTRoutine,
2974 /// Partition id for the untied tasks.
2975 KmpTaskTPartId,
2976 /// Function with call of destructors for private variables.
2977 Data1,
2978 /// Task priority.
2979 Data2,
2980 /// (Taskloops only) Lower bound.
2981 KmpTaskTLowerBound,
2982 /// (Taskloops only) Upper bound.
2983 KmpTaskTUpperBound,
2984 /// (Taskloops only) Stride.
2985 KmpTaskTStride,
2986 /// (Taskloops only) Is last iteration flag.
2987 KmpTaskTLastIter,
2988 /// (Taskloops only) Reduction data.
2989 KmpTaskTReductions,
2990};
2991} // anonymous namespace
2992
2993bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
2994 return OffloadEntriesTargetRegion.empty() &&
2995 OffloadEntriesDeviceGlobalVar.empty();
2996}
2997
2998/// Initialize target region entry.
2999void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3000 initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3001 StringRef ParentName, unsigned LineNum,
3002 unsigned Order) {
3003 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3005, __extension__
__PRETTY_FUNCTION__))
3004 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3005, __extension__
__PRETTY_FUNCTION__))
3005 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3005, __extension__
__PRETTY_FUNCTION__))
;
3006 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3007 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3008 OMPTargetRegionEntryTargetRegion);
3009 ++OffloadingEntriesNum;
3010}
3011
3012void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3013 registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3014 StringRef ParentName, unsigned LineNum,
3015 llvm::Constant *Addr, llvm::Constant *ID,
3016 OMPTargetRegionEntryKind Flags) {
3017 // If we are emitting code for a target, the entry is already initialized,
3018 // only has to be registered.
3019 if (CGM.getLangOpts().OpenMPIsDevice) {
3020 // This could happen if the device compilation is invoked standalone.
3021 if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum))
3022 return;
3023 auto &Entry =
3024 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3025 Entry.setAddress(Addr);
3026 Entry.setID(ID);
3027 Entry.setFlags(Flags);
3028 } else {
3029 if (Flags ==
3030 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion &&
3031 hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum,
3032 /*IgnoreAddressId*/ true))
3033 return;
3034 assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3035, __extension__
__PRETTY_FUNCTION__))
3035 "Target region entry already registered!")(static_cast <bool> (!hasTargetRegionEntryInfo(DeviceID
, FileID, ParentName, LineNum) && "Target region entry already registered!"
) ? void (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3035, __extension__
__PRETTY_FUNCTION__))
;
3036 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3037 OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3038 ++OffloadingEntriesNum;
3039 }
3040}
3041
3042bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3043 unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum,
3044 bool IgnoreAddressId) const {
3045 auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3046 if (PerDevice == OffloadEntriesTargetRegion.end())
3047 return false;
3048 auto PerFile = PerDevice->second.find(FileID);
3049 if (PerFile == PerDevice->second.end())
3050 return false;
3051 auto PerParentName = PerFile->second.find(ParentName);
3052 if (PerParentName == PerFile->second.end())
3053 return false;
3054 auto PerLine = PerParentName->second.find(LineNum);
3055 if (PerLine == PerParentName->second.end())
3056 return false;
3057 // Fail if this entry is already registered.
3058 if (!IgnoreAddressId &&
3059 (PerLine->second.getAddress() || PerLine->second.getID()))
3060 return false;
3061 return true;
3062}
3063
3064void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3065 const OffloadTargetRegionEntryInfoActTy &Action) {
3066 // Scan all target region entries and perform the provided action.
3067 for (const auto &D : OffloadEntriesTargetRegion)
3068 for (const auto &F : D.second)
3069 for (const auto &P : F.second)
3070 for (const auto &L : P.second)
3071 Action(D.first, F.first, P.first(), L.first, L.second);
3072}
3073
3074void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3075 initializeDeviceGlobalVarEntryInfo(StringRef Name,
3076 OMPTargetGlobalVarEntryKind Flags,
3077 unsigned Order) {
3078 assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3080, __extension__
__PRETTY_FUNCTION__))
3079 "only required for the device "(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3080, __extension__
__PRETTY_FUNCTION__))
3080 "code generation.")(static_cast <bool> (CGM.getLangOpts().OpenMPIsDevice &&
"Initialization of entries is " "only required for the device "
"code generation.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3080, __extension__
__PRETTY_FUNCTION__))
;
3081 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3082 ++OffloadingEntriesNum;
3083}
3084
3085void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3086 registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3087 CharUnits VarSize,
3088 OMPTargetGlobalVarEntryKind Flags,
3089 llvm::GlobalValue::LinkageTypes Linkage) {
3090 if (CGM.getLangOpts().OpenMPIsDevice) {
3091 // This could happen if the device compilation is invoked standalone.
3092 if (!hasDeviceGlobalVarEntryInfo(VarName))
3093 return;
3094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3095 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3096 if (Entry.getVarSize().isZero()) {
3097 Entry.setVarSize(VarSize);
3098 Entry.setLinkage(Linkage);
3099 }
3100 return;
3101 }
3102 Entry.setVarSize(VarSize);
3103 Entry.setLinkage(Linkage);
3104 Entry.setAddress(Addr);
3105 } else {
3106 if (hasDeviceGlobalVarEntryInfo(VarName)) {
3107 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3108 assert(Entry.isValid() && Entry.getFlags() == Flags &&(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3109, __extension__
__PRETTY_FUNCTION__))
3109 "Entry not initialized!")(static_cast <bool> (Entry.isValid() && Entry.getFlags
() == Flags && "Entry not initialized!") ? void (0) :
__assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3109, __extension__
__PRETTY_FUNCTION__))
;
3110 if (Entry.getVarSize().isZero()) {
3111 Entry.setVarSize(VarSize);
3112 Entry.setLinkage(Linkage);
3113 }
3114 return;
3115 }
3116 OffloadEntriesDeviceGlobalVar.try_emplace(
3117 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3118 ++OffloadingEntriesNum;
3119 }
3120}
3121
3122void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3123 actOnDeviceGlobalVarEntriesInfo(
3124 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3125 // Scan all target region entries and perform the provided action.
3126 for (const auto &E : OffloadEntriesDeviceGlobalVar)
3127 Action(E.getKey(), E.getValue());
3128}
3129
3130void CGOpenMPRuntime::createOffloadEntry(
3131 llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
3132 llvm::GlobalValue::LinkageTypes Linkage) {
3133 StringRef Name = Addr->getName();
3134 llvm::Module &M = CGM.getModule();
3135 llvm::LLVMContext &C = M.getContext();
3136
3137 // Create constant string with the name.
3138 llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3139
3140 std::string StringName = getName({"omp_offloading", "entry_name"});
3141 auto *Str = new llvm::GlobalVariable(
3142 M, StrPtrInit->getType(), /*isConstant=*/true,
3143 llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
3144 Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3145
3146 llvm::Constant *Data[] = {
3147 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy),
3148 llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy),
3149 llvm::ConstantInt::get(CGM.SizeTy, Size),
3150 llvm::ConstantInt::get(CGM.Int32Ty, Flags),
3151 llvm::ConstantInt::get(CGM.Int32Ty, 0)};
3152 std::string EntryName = getName({"omp_offloading", "entry", ""});
3153 llvm::GlobalVariable *Entry = createGlobalStruct(
3154 CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
3155 Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
3156
3157 // The entry has to be created in the section the linker expects it to be.
3158 Entry->setSection("omp_offloading_entries");
3159}
3160
3161void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
3162 // Emit the offloading entries and metadata so that the device codegen side
3163 // can easily figure out what to emit. The produced metadata looks like
3164 // this:
3165 //
3166 // !omp_offload.info = !{!1, ...}
3167 //
3168 // Right now we only generate metadata for function that contain target
3169 // regions.
3170
3171 // If we are in simd mode or there are no entries, we don't need to do
3172 // anything.
3173 if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty())
3174 return;
3175
3176 llvm::Module &M = CGM.getModule();
3177 llvm::LLVMContext &C = M.getContext();
3178 SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *,
3179 SourceLocation, StringRef>,
3180 16>
3181 OrderedEntries(OffloadEntriesInfoManager.size());
3182 llvm::SmallVector<StringRef, 16> ParentFunctions(
3183 OffloadEntriesInfoManager.size());
3184
3185 // Auxiliary methods to create metadata values and strings.
3186 auto &&GetMDInt = [this](unsigned V) {
3187 return llvm::ConstantAsMetadata::get(
3188 llvm::ConstantInt::get(CGM.Int32Ty, V));
3189 };
3190
3191 auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
3192
3193 // Create the offloading info metadata node.
3194 llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3195
3196 // Create function that emits metadata for each target region entry;
3197 auto &&TargetRegionMetadataEmitter =
3198 [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt,
3199 &GetMDString](
3200 unsigned DeviceID, unsigned FileID, StringRef ParentName,
3201 unsigned Line,
3202 const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) {
3203 // Generate metadata for target regions. Each entry of this metadata
3204 // contains:
3205 // - Entry 0 -> Kind of this type of metadata (0).
3206 // - Entry 1 -> Device ID of the file where the entry was identified.
3207 // - Entry 2 -> File ID of the file where the entry was identified.
3208 // - Entry 3 -> Mangled name of the function where the entry was
3209 // identified.
3210 // - Entry 4 -> Line in the file where the entry was identified.
3211 // - Entry 5 -> Order the entry was created.
3212 // The first element of the metadata node is the kind.
3213 llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
3214 GetMDInt(FileID), GetMDString(ParentName),
3215 GetMDInt(Line), GetMDInt(E.getOrder())};
3216
3217 SourceLocation Loc;
3218 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
3219 E = CGM.getContext().getSourceManager().fileinfo_end();
3220 I != E; ++I) {
3221 if (I->getFirst()->getUniqueID().getDevice() == DeviceID &&
3222 I->getFirst()->getUniqueID().getFile() == FileID) {
3223 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
3224 I->getFirst(), Line, 1);
3225 break;
3226 }
3227 }
3228 // Save this entry in the right position of the ordered entries array.
3229 OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName);
3230 ParentFunctions[E.getOrder()] = ParentName;
3231
3232 // Add metadata to the named metadata node.
3233 MD->addOperand(llvm::MDNode::get(C, Ops));
3234 };
3235
3236 OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
3237 TargetRegionMetadataEmitter);
3238
3239 // Create function that emits metadata for each device global variable entry;
3240 auto &&DeviceGlobalVarMetadataEmitter =
3241 [&C, &OrderedEntries, &GetMDInt, &GetMDString,
3242 MD](StringRef MangledName,
3243 const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar
3244 &E) {
3245 // Generate metadata for global variables. Each entry of this metadata
3246 // contains:
3247 // - Entry 0 -> Kind of this type of metadata (1).
3248 // - Entry 1 -> Mangled name of the variable.
3249 // - Entry 2 -> Declare target kind.
3250 // - Entry 3 -> Order the entry was created.
3251 // The first element of the metadata node is the kind.
3252 llvm::Metadata *Ops[] = {
3253 GetMDInt(E.getKind()), GetMDString(MangledName),
3254 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
3255
3256 // Save this entry in the right position of the ordered entries array.
3257 OrderedEntries[E.getOrder()] =
3258 std::make_tuple(&E, SourceLocation(), MangledName);
3259
3260 // Add metadata to the named metadata node.
3261 MD->addOperand(llvm::MDNode::get(C, Ops));
3262 };
3263
3264 OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
3265 DeviceGlobalVarMetadataEmitter);
3266
3267 for (const auto &E : OrderedEntries) {
3268 assert(std::get<0>(E) && "All ordered entries must exist!")(static_cast <bool> (std::get<0>(E) && "All ordered entries must exist!"
) ? void (0) : __assert_fail ("std::get<0>(E) && \"All ordered entries must exist!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3268, __extension__
__PRETTY_FUNCTION__))
;
3269 if (const auto *CE =
3270 dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3271 std::get<0>(E))) {
3272 if (!CE->getID() || !CE->getAddress()) {
3273 // Do not blame the entry if the parent funtion is not emitted.
3274 StringRef FnName = ParentFunctions[CE->getOrder()];
3275 if (!CGM.GetGlobalValue(FnName))
3276 continue;
3277 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3278 DiagnosticsEngine::Error,
3279 "Offloading entry for target region in %0 is incorrect: either the "
3280 "address or the ID is invalid.");
3281 CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName;
3282 continue;
3283 }
3284 createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
3285 CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
3286 } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy::
3287 OffloadEntryInfoDeviceGlobalVar>(
3288 std::get<0>(E))) {
3289 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags =
3290 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3291 CE->getFlags());
3292 switch (Flags) {
3293 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: {
3294 if (CGM.getLangOpts().OpenMPIsDevice &&
3295 CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
3296 continue;
3297 if (!CE->getAddress()) {
3298 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3299 DiagnosticsEngine::Error, "Offloading entry for declare target "
3300 "variable %0 is incorrect: the "
3301 "address is invalid.");
3302 CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E);
3303 continue;
3304 }
3305 // The vaiable has no definition - no need to add the entry.
3306 if (CE->getVarSize().isZero())
3307 continue;
3308 break;
3309 }
3310 case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink:
3311 assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3313, __extension__
__PRETTY_FUNCTION__))
3312 (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3313, __extension__
__PRETTY_FUNCTION__))
3313 "Declaret target link address is set.")(static_cast <bool> (((CGM.getLangOpts().OpenMPIsDevice
&& !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice
&& CE->getAddress())) && "Declaret target link address is set."
) ? void (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3313, __extension__
__PRETTY_FUNCTION__))
;
3314 if (CGM.getLangOpts().OpenMPIsDevice)
3315 continue;
3316 if (!CE->getAddress()) {
3317 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3318 DiagnosticsEngine::Error,
3319 "Offloading entry for declare target variable is incorrect: the "
3320 "address is invalid.");
3321 CGM.getDiags().Report(DiagID);
3322 continue;
3323 }
3324 break;
3325 }
3326 createOffloadEntry(CE->getAddress(), CE->getAddress(),
3327 CE->getVarSize().getQuantity(), Flags,
3328 CE->getLinkage());
3329 } else {
3330 llvm_unreachable("Unsupported entry kind.")::llvm::llvm_unreachable_internal("Unsupported entry kind.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3330)
;
3331 }
3332 }
3333}
3334
3335/// Loads all the offload entries information from the host IR
3336/// metadata.
3337void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3338 // If we are in target mode, load the metadata from the host IR. This code has
3339 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3340
3341 if (!CGM.getLangOpts().OpenMPIsDevice)
3342 return;
3343
3344 if (CGM.getLangOpts().OMPHostIRFile.empty())
3345 return;
3346
3347 auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3348 if (auto EC = Buf.getError()) {
3349 CGM.getDiags().Report(diag::err_cannot_open_file)
3350 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3351 return;
3352 }
3353
3354 llvm::LLVMContext C;
3355 auto ME = expectedToErrorOrAndEmitErrors(
3356 C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3357
3358 if (auto EC = ME.getError()) {
3359 unsigned DiagID = CGM.getDiags().getCustomDiagID(
3360 DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
3361 CGM.getDiags().Report(DiagID)
3362 << CGM.getLangOpts().OMPHostIRFile << EC.message();
3363 return;
3364 }
3365
3366 llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3367 if (!MD)
3368 return;
3369
3370 for (llvm::MDNode *MN : MD->operands()) {
3371 auto &&GetMDInt = [MN](unsigned Idx) {
3372 auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3373 return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3374 };
3375
3376 auto &&GetMDString = [MN](unsigned Idx) {
3377 auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
3378 return V->getString();
3379 };
3380
3381 switch (GetMDInt(0)) {
3382 default:
3383 llvm_unreachable("Unexpected metadata!")::llvm::llvm_unreachable_internal("Unexpected metadata!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 3383)
;
3384 break;
3385 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3386 OffloadingEntryInfoTargetRegion:
3387 OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
3388 /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
3389 /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
3390 /*Order=*/GetMDInt(5));
3391 break;
3392 case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3393 OffloadingEntryInfoDeviceGlobalVar:
3394 OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
3395 /*MangledName=*/GetMDString(1),
3396 static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
3397 /*Flags=*/GetMDInt(2)),
3398 /*Order=*/GetMDInt(3));
3399 break;
3400 }
3401 }
3402}
3403
3404void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
3405 if (!KmpRoutineEntryPtrTy) {
3406 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3407 ASTContext &C = CGM.getContext();
3408 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3409 FunctionProtoType::ExtProtoInfo EPI;
3410 KmpRoutineEntryPtrQTy = C.getPointerType(
3411 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3412 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3413 }
3414}
3415
3416QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() {
3417 // Make sure the type of the entry is already created. This is the type we
3418 // have to create:
3419 // struct __tgt_offload_entry{
3420 // void *addr; // Pointer to the offload entry info.
3421 // // (function or global)
3422 // char *name; // Name of the function or global.
3423 // size_t size; // Size of the entry info (0 if it a function).
3424 // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3425 // int32_t reserved; // Reserved, to use by the runtime library.
3426 // };
3427 if (TgtOffloadEntryQTy.isNull()) {
3428 ASTContext &C = CGM.getContext();
3429 RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
3430 RD->startDefinition();
3431 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3432 addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy));
3433 addFieldToRecordDecl(C, RD, C.getSizeType());
3434 addFieldToRecordDecl(
3435 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3436 addFieldToRecordDecl(
3437 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3438 RD->completeDefinition();
3439 RD->addAttr(PackedAttr::CreateImplicit(C));
3440 TgtOffloadEntryQTy = C.getRecordType(RD);
3441 }
3442 return TgtOffloadEntryQTy;
3443}
3444
3445namespace {
3446struct PrivateHelpersTy {
3447 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
3448 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
3449 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
3450 PrivateElemInit(PrivateElemInit) {}
3451 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
3452 const Expr *OriginalRef = nullptr;
3453 const VarDecl *Original = nullptr;
3454 const VarDecl *PrivateCopy = nullptr;
3455 const VarDecl *PrivateElemInit = nullptr;
3456 bool isLocalPrivate() const {
3457 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
3458 }
3459};
3460typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3461} // anonymous namespace
3462
3463static bool isAllocatableDecl(const VarDecl *VD) {
3464 const VarDecl *CVD = VD->getCanonicalDecl();
3465 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
3466 return false;
3467 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
3468 // Use the default allocation.
3469 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
3470 !AA->getAllocator());
3471}
3472
3473static RecordDecl *
3474createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3475 if (!Privates.empty()) {
3476 ASTContext &C = CGM.getContext();
3477 // Build struct .kmp_privates_t. {
3478 // /* private vars */
3479 // };
3480 RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
3481 RD->startDefinition();
3482 for (const auto &Pair : Privates) {
3483 const VarDecl *VD = Pair.second.Original;
3484 QualType Type = VD->getType().getNonReferenceType();
3485 // If the private variable is a local variable with lvalue ref type,
3486 // allocate the pointer instead of the pointee type.
3487 if (Pair.second.isLocalPrivate()) {
3488 if (VD->getType()->isLValueReferenceType())
3489 Type = C.getPointerType(Type);
3490 if (isAllocatableDecl(VD))
3491 Type = C.getPointerType(Type);
3492 }
3493 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
3494 if (VD->hasAttrs()) {
3495 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3496 E(VD->getAttrs().end());
3497 I != E; ++I)
3498 FD->addAttr(*I);
3499 }
3500 }
3501 RD->completeDefinition();
3502 return RD;
3503 }
3504 return nullptr;
3505}
3506
3507static RecordDecl *
3508createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
3509 QualType KmpInt32Ty,
3510 QualType KmpRoutineEntryPointerQTy) {
3511 ASTContext &C = CGM.getContext();
3512 // Build struct kmp_task_t {
3513 // void * shareds;
3514 // kmp_routine_entry_t routine;
3515 // kmp_int32 part_id;
3516 // kmp_cmplrdata_t data1;
3517 // kmp_cmplrdata_t data2;
3518 // For taskloops additional fields:
3519 // kmp_uint64 lb;
3520 // kmp_uint64 ub;
3521 // kmp_int64 st;
3522 // kmp_int32 liter;
3523 // void * reductions;
3524 // };
3525 RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3526 UD->startDefinition();
3527 addFieldToRecordDecl(C, UD, KmpInt32Ty);
3528 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3529 UD->completeDefinition();
3530 QualType KmpCmplrdataTy = C.getRecordType(UD);
3531 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
3532 RD->startDefinition();
3533 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3534 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3535 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3536 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3537 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3538 if (isOpenMPTaskLoopDirective(Kind)) {
3539 QualType KmpUInt64Ty =
3540 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3541 QualType KmpInt64Ty =
3542 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3543 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3544 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3545 addFieldToRecordDecl(C, RD, KmpInt64Ty);
3546 addFieldToRecordDecl(C, RD, KmpInt32Ty);
3547 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3548 }
3549 RD->completeDefinition();
3550 return RD;
3551}
3552
3553static RecordDecl *
3554createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
3555 ArrayRef<PrivateDataTy> Privates) {
3556 ASTContext &C = CGM.getContext();
3557 // Build struct kmp_task_t_with_privates {
3558 // kmp_task_t task_data;
3559 // .kmp_privates_t. privates;
3560 // };
3561 RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3562 RD->startDefinition();
3563 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3564 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
3565 addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3566 RD->completeDefinition();
3567 return RD;
3568}
3569
3570/// Emit a proxy function which accepts kmp_task_t as the second
3571/// argument.
3572/// \code
3573/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3574/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3575/// For taskloops:
3576/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3577/// tt->reductions, tt->shareds);
3578/// return 0;
3579/// }
3580/// \endcode
3581static llvm::Function *
3582emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
3583 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3584 QualType KmpTaskTWithPrivatesPtrQTy,
3585 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3586 QualType SharedsPtrTy, llvm::Function *TaskFunction,
3587 llvm::Value *TaskPrivatesMap) {
3588 ASTContext &C = CGM.getContext();
3589 FunctionArgList Args;
3590 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3591 ImplicitParamDecl::Other);
3592 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3593 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3594 ImplicitParamDecl::Other);
3595 Args.push_back(&GtidArg);
3596 Args.push_back(&TaskTypeArg);
3597 const auto &TaskEntryFnInfo =
3598 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3599 llvm::FunctionType *TaskEntryTy =
3600 CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3601 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
3602 auto *TaskEntry = llvm::Function::Create(
3603 TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
3604 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
3605 TaskEntry->setDoesNotRecurse();
3606 CodeGenFunction CGF(CGM);
3607 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
3608 Loc, Loc);
3609
3610 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3611 // tt,
3612 // For taskloops:
3613 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3614 // tt->task_data.shareds);
3615 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3616 CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3617 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3618 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3619 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3620 const auto *KmpTaskTWithPrivatesQTyRD =
3621 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3622 LValue Base =
3623 CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3624 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3625 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3626 LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3627 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3628
3629 auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3630 LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3631 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3632 CGF.EmitLoadOfScalar(SharedsLVal, Loc),
3633 CGF.ConvertTypeForMem(SharedsPtrTy));
3634
3635 auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3636 llvm::Value *PrivatesParam;
3637 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3638 LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3639 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3640 PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy);
3641 } else {
3642 PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3643 }
3644
3645 llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3646 TaskPrivatesMap,
3647 CGF.Builder
3648 .CreatePointerBitCastOrAddrSpaceCast(
3649 TDBase.getAddress(CGF), CGF.VoidPtrTy)
3650 .getPointer()};
3651 SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3652 std::end(CommonArgs));
3653 if (isOpenMPTaskLoopDirective(Kind)) {
3654 auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3655 LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3656 llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
3657 auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3658 LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3659 llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
3660 auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3661 LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
3662 llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
3663 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3664 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
3665 llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
3666 auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3667 LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
3668 llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
3669 CallArgs.push_back(LBParam);
3670 CallArgs.push_back(UBParam);
3671 CallArgs.push_back(StParam);
3672 CallArgs.push_back(LIParam);
3673 CallArgs.push_back(RParam);
3674 }
3675 CallArgs.push_back(SharedsParam);
3676
3677 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
3678 CallArgs);
3679 CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3680 CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3681 CGF.FinishFunction();
3682 return TaskEntry;
3683}
3684
3685static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3686 SourceLocation Loc,
3687 QualType KmpInt32Ty,
3688 QualType KmpTaskTWithPrivatesPtrQTy,
3689 QualType KmpTaskTWithPrivatesQTy) {
3690 ASTContext &C = CGM.getContext();
3691 FunctionArgList Args;
3692 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3693 ImplicitParamDecl::Other);
3694 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3695 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3696 ImplicitParamDecl::Other);
3697 Args.push_back(&GtidArg);
3698 Args.push_back(&TaskTypeArg);
3699 const auto &DestructorFnInfo =
3700 CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3701 llvm::FunctionType *DestructorFnTy =
3702 CGM.getTypes().GetFunctionType(DestructorFnInfo);
3703 std::string Name =
3704 CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3705 auto *DestructorFn =
3706 llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage,
3707 Name, &CGM.getModule());
3708 CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
3709 DestructorFnInfo);
3710 DestructorFn->setDoesNotRecurse();
3711 CodeGenFunction CGF(CGM);
3712 CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3713 Args, Loc, Loc);
3714
3715 LValue Base = CGF.EmitLoadOfPointerLValue(
3716 CGF.GetAddrOfLocalVar(&TaskTypeArg),
3717 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3718 const auto *KmpTaskTWithPrivatesQTyRD =
3719 cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3720 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3721 Base = CGF.EmitLValueForField(Base, *FI);
3722 for (const auto *Field :
3723 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3724 if (QualType::DestructionKind DtorKind =
3725 Field->getType().isDestructedType()) {
3726 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3727 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3728 }
3729 }
3730 CGF.FinishFunction();
3731 return DestructorFn;
3732}
3733
3734/// Emit a privates mapping function for correct handling of private and
3735/// firstprivate variables.
3736/// \code
3737/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3738/// **noalias priv1,..., <tyn> **noalias privn) {
3739/// *priv1 = &.privates.priv1;
3740/// ...;
3741/// *privn = &.privates.privn;
3742/// }
3743/// \endcode
3744static llvm::Value *
3745emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3746 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3747 ArrayRef<PrivateDataTy> Privates) {
3748 ASTContext &C = CGM.getContext();
3749 FunctionArgList Args;
3750 ImplicitParamDecl TaskPrivatesArg(
3751 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3752 C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3753 ImplicitParamDecl::Other);
3754 Args.push_back(&TaskPrivatesArg);
3755 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3756 unsigned Counter = 1;
3757 for (const Expr *E : Data.PrivateVars) {
3758 Args.push_back(ImplicitParamDecl::Create(
3759 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3760 C.getPointerType(C.getPointerType(E->getType()))
3761 .withConst()
3762 .withRestrict(),
3763 ImplicitParamDecl::Other));
3764 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3765 PrivateVarsPos[VD] = Counter;
3766 ++Counter;
3767 }
3768 for (const Expr *E : Data.FirstprivateVars) {
3769 Args.push_back(ImplicitParamDecl::Create(
3770 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3771 C.getPointerType(C.getPointerType(E->getType()))
3772 .withConst()
3773 .withRestrict(),
3774 ImplicitParamDecl::Other));
3775 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3776 PrivateVarsPos[VD] = Counter;
3777 ++Counter;
3778 }
3779 for (const Expr *E : Data.LastprivateVars) {
3780 Args.push_back(ImplicitParamDecl::Create(
3781 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3782 C.getPointerType(C.getPointerType(E->getType()))
3783 .withConst()
3784 .withRestrict(),
3785 ImplicitParamDecl::Other));
3786 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3787 PrivateVarsPos[VD] = Counter;
3788 ++Counter;
3789 }
3790 for (const VarDecl *VD : Data.PrivateLocals) {
3791 QualType Ty = VD->getType().getNonReferenceType();
3792 if (VD->getType()->isLValueReferenceType())
3793 Ty = C.getPointerType(Ty);
3794 if (isAllocatableDecl(VD))
3795 Ty = C.getPointerType(Ty);
3796 Args.push_back(ImplicitParamDecl::Create(
3797 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3798 C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(),
3799 ImplicitParamDecl::Other));
3800 PrivateVarsPos[VD] = Counter;
3801 ++Counter;
3802 }
3803 const auto &TaskPrivatesMapFnInfo =
3804 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3805 llvm::FunctionType *TaskPrivatesMapTy =
3806 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3807 std::string Name =
3808 CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3809 auto *TaskPrivatesMap = llvm::Function::Create(
3810 TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
3811 &CGM.getModule());
3812 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
3813 TaskPrivatesMapFnInfo);
3814 if (CGM.getLangOpts().Optimize) {
3815 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3816 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3817 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3818 }
3819 CodeGenFunction CGF(CGM);
3820 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3821 TaskPrivatesMapFnInfo, Args, Loc, Loc);
3822
3823 // *privi = &.privates.privi;
3824 LValue Base = CGF.EmitLoadOfPointerLValue(
3825 CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3826 TaskPrivatesArg.getType()->castAs<PointerType>());
3827 const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3828 Counter = 0;
3829 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3830 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3831 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3832 LValue RefLVal =
3833 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3834 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3835 RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>());
3836 CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal);
3837 ++Counter;
3838 }
3839 CGF.FinishFunction();
3840 return TaskPrivatesMap;
3841}
3842
3843/// Emit initialization for private variables in task-based directives.
3844static void emitPrivatesInit(CodeGenFunction &CGF,
3845 const OMPExecutableDirective &D,
3846 Address KmpTaskSharedsPtr, LValue TDBase,
3847 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3848 QualType SharedsTy, QualType SharedsPtrTy,
3849 const OMPTaskDataTy &Data,
3850 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3851 ASTContext &C = CGF.getContext();
3852 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3853 LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
3854 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3855 ? OMPD_taskloop
3856 : OMPD_task;
3857 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3858 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3859 LValue SrcBase;
3860 bool IsTargetTask =
3861 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3862 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3863 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3864 // PointersArray, SizesArray, and MappersArray. The original variables for
3865 // these arrays are not captured and we get their addresses explicitly.
3866 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3867 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3868 SrcBase = CGF.MakeAddrLValue(
3869 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3870 KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
3871 SharedsTy);
3872 }
3873 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3874 for (const PrivateDataTy &Pair : Privates) {
3875 // Do not initialize private locals.
3876 if (Pair.second.isLocalPrivate()) {
3877 ++FI;
3878 continue;
3879 }
3880 const VarDecl *VD = Pair.second.PrivateCopy;
3881 const Expr *Init = VD->getAnyInitializer();
3882 if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
3883 !CGF.isTrivialInitializer(Init)))) {
3884 LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
3885 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3886 const VarDecl *OriginalVD = Pair.second.Original;
3887 // Check if the variable is the target-based BasePointersArray,
3888 // PointersArray, SizesArray, or MappersArray.
3889 LValue SharedRefLValue;
3890 QualType Type = PrivateLValue.getType();
3891 const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
3892 if (IsTargetTask && !SharedField) {
3893 assert(isa<ImplicitParamDecl>(OriginalVD) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3894 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3895 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3896 ->getNumParams() == 0 &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3897 isa<TranslationUnitDecl>((static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3898 cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3899 ->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
3900 "Expected artificial target data variable.")(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD
) && isa<CapturedDecl>(OriginalVD->getDeclContext
()) && cast<CapturedDecl>(OriginalVD->getDeclContext
()) ->getNumParams() == 0 && isa<TranslationUnitDecl
>( cast<CapturedDecl>(OriginalVD->getDeclContext(
)) ->getDeclContext()) && "Expected artificial target data variable."
) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3900, __extension__
__PRETTY_FUNCTION__))
;
3901 SharedRefLValue =
3902 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
3903 } else if (ForDup) {
3904 SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
3905 SharedRefLValue = CGF.MakeAddrLValue(
3906 Address(SharedRefLValue.getPointer(CGF),
3907 C.getDeclAlign(OriginalVD)),
3908 SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
3909 SharedRefLValue.getTBAAInfo());
3910 } else if (CGF.LambdaCaptureFields.count(
3911 Pair.second.Original->getCanonicalDecl()) > 0 ||
3912 isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) {
3913 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3914 } else {
3915 // Processing for implicitly captured variables.
3916 InlinedOpenMPRegionRAII Region(
3917 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3918 /*HasCancel=*/false, /*NoInheritance=*/true);
3919 SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef);
3920 }
3921 if (Type->isArrayType()) {
3922 // Initialize firstprivate array.
3923 if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
3924 // Perform simple memcpy.
3925 CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
3926 } else {
3927 // Initialize firstprivate array using element-by-element
3928 // initialization.
3929 CGF.EmitOMPAggregateAssign(
3930 PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF),
3931 Type,
3932 [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3933 Address SrcElement) {
3934 // Clean up any temporaries needed by the initialization.
3935 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3936 InitScope.addPrivate(
3937 Elem, [SrcElement]() -> Address { return SrcElement; });
3938 (void)InitScope.Privatize();
3939 // Emit initialization for single element.
3940 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3941 CGF, &CapturesInfo);
3942 CGF.EmitAnyExprToMem(Init, DestElement,
3943 Init->getType().getQualifiers(),
3944 /*IsInitializer=*/false);
3945 });
3946 }
3947 } else {
3948 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3949 InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address {
3950 return SharedRefLValue.getAddress(CGF);
3951 });
3952 (void)InitScope.Privatize();
3953 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3954 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3955 /*capturedByInit=*/false);
3956 }
3957 } else {
3958 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3959 }
3960 }
3961 ++FI;
3962 }
3963}
3964
3965/// Check if duplication function is required for taskloops.
3966static bool checkInitIsRequired(CodeGenFunction &CGF,
3967 ArrayRef<PrivateDataTy> Privates) {
3968 bool InitRequired = false;
3969 for (const PrivateDataTy &Pair : Privates) {
3970 if (Pair.second.isLocalPrivate())
3971 continue;
3972 const VarDecl *VD = Pair.second.PrivateCopy;
3973 const Expr *Init = VD->getAnyInitializer();
3974 InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
3975 !CGF.isTrivialInitializer(Init));
3976 if (InitRequired)
3977 break;
3978 }
3979 return InitRequired;
3980}
3981
3982
3983/// Emit task_dup function (for initialization of
3984/// private/firstprivate/lastprivate vars and last_iter flag)
3985/// \code
3986/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3987/// lastpriv) {
3988/// // setup lastprivate flag
3989/// task_dst->last = lastpriv;
3990/// // could be constructor calls here...
3991/// }
3992/// \endcode
3993static llvm::Value *
3994emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3995 const OMPExecutableDirective &D,
3996 QualType KmpTaskTWithPrivatesPtrQTy,
3997 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3998 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3999 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4000 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4001 ASTContext &C = CGM.getContext();
4002 FunctionArgList Args;
4003 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4004 KmpTaskTWithPrivatesPtrQTy,
4005 ImplicitParamDecl::Other);
4006 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4007 KmpTaskTWithPrivatesPtrQTy,
4008 ImplicitParamDecl::Other);
4009 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4010 ImplicitParamDecl::Other);
4011 Args.push_back(&DstArg);
4012 Args.push_back(&SrcArg);
4013 Args.push_back(&LastprivArg);
4014 const auto &TaskDupFnInfo =
4015 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4016 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4017 std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4018 auto *TaskDup = llvm::Function::Create(
4019 TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4020 CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4021 TaskDup->setDoesNotRecurse();
4022 CodeGenFunction CGF(CGM);
4023 CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4024 Loc);
4025
4026 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4027 CGF.GetAddrOfLocalVar(&DstArg),
4028 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4029 // task_dst->liter = lastpriv;
4030 if (WithLastIter) {
4031 auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4032 LValue Base = CGF.EmitLValueForField(
4033 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4034 LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4035 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4036 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4037 CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4038 }
4039
4040 // Emit initial values for private copies (if any).
4041 assert(!Privates.empty())(static_cast <bool> (!Privates.empty()) ? void (0) : __assert_fail
("!Privates.empty()", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 4041, __extension__ __PRETTY_FUNCTION__))
;
4042 Address KmpTaskSharedsPtr = Address::invalid();
4043 if (!Data.FirstprivateVars.empty()) {
4044 LValue TDBase = CGF.EmitLoadOfPointerLValue(
4045 CGF.GetAddrOfLocalVar(&SrcArg),
4046 KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4047 LValue Base = CGF.EmitLValueForField(
4048 TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4049 KmpTaskSharedsPtr = Address(
4050 CGF.EmitLoadOfScalar(CGF.EmitLValueForField(
4051 Base, *std::next(KmpTaskTQTyRD->field_begin(),
4052 KmpTaskTShareds)),
4053 Loc),
4054 CGM.getNaturalTypeAlignment(SharedsTy));
4055 }
4056 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4057 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4058 CGF.FinishFunction();
4059 return TaskDup;
4060}
4061
4062/// Checks if destructor function is required to be generated.
4063/// \return true if cleanups are required, false otherwise.
4064static bool
4065checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4066 ArrayRef<PrivateDataTy> Privates) {
4067 for (const PrivateDataTy &P : Privates) {
4068 if (P.second.isLocalPrivate())
4069 continue;
4070 QualType Ty = P.second.Original->getType().getNonReferenceType();
4071 if (Ty.isDestructedType())
4072 return true;
4073 }
4074 return false;
4075}
4076
4077namespace {
4078/// Loop generator for OpenMP iterator expression.
4079class OMPIteratorGeneratorScope final
4080 : public CodeGenFunction::OMPPrivateScope {
4081 CodeGenFunction &CGF;
4082 const OMPIteratorExpr *E = nullptr;
4083 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
4084 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
4085 OMPIteratorGeneratorScope() = delete;
4086 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
4087
4088public:
4089 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
4090 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
4091 if (!E)
4092 return;
4093 SmallVector<llvm::Value *, 4> Uppers;
4094 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4095 Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper));
4096 const auto *VD = cast<VarDecl>(E->getIteratorDecl(I));
4097 addPrivate(VD, [&CGF, VD]() {
4098 return CGF.CreateMemTemp(VD->getType(), VD->getName());
4099 });
4100 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4101 addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() {
4102 return CGF.CreateMemTemp(HelperData.CounterVD->getType(),
4103 "counter.addr");
4104 });
4105 }
4106 Privatize();
4107
4108 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
4109 const OMPIteratorHelperData &HelperData = E->getHelper(I);
4110 LValue CLVal =
4111 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD),
4112 HelperData.CounterVD->getType());
4113 // Counter = 0;
4114 CGF.EmitStoreOfScalar(
4115 llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0),
4116 CLVal);
4117 CodeGenFunction::JumpDest &ContDest =
4118 ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont"));
4119 CodeGenFunction::JumpDest &ExitDest =
4120 ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit"));
4121 // N = <number-of_iterations>;
4122 llvm::Value *N = Uppers[I];
4123 // cont:
4124 // if (Counter < N) goto body; else goto exit;
4125 CGF.EmitBlock(ContDest.getBlock());
4126 auto *CVal =
4127 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
4128 llvm::Value *Cmp =
4129 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
4130 ? CGF.Builder.CreateICmpSLT(CVal, N)
4131 : CGF.Builder.CreateICmpULT(CVal, N);
4132 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body");
4133 CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock());
4134 // body:
4135 CGF.EmitBlock(BodyBB);
4136 // Iteri = Begini + Counter * Stepi;
4137 CGF.EmitIgnoredExpr(HelperData.Update);
4138 }
4139 }
4140 ~OMPIteratorGeneratorScope() {
4141 if (!E)
4142 return;
4143 for (unsigned I = E->numOfIterators(); I > 0; --I) {
4144 // Counter = Counter + 1;
4145 const OMPIteratorHelperData &HelperData = E->getHelper(I - 1);
4146 CGF.EmitIgnoredExpr(HelperData.CounterUpdate);
4147 // goto cont;
4148 CGF.EmitBranchThroughCleanup(ContDests[I - 1]);
4149 // exit:
4150 CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
4151 }
4152 }
4153};
4154} // namespace
4155
4156static std::pair<llvm::Value *, llvm::Value *>
4157getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
4158 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E);
4159 llvm::Value *Addr;
4160 if (OASE) {
4161 const Expr *Base = OASE->getBase();
4162 Addr = CGF.EmitScalarExpr(Base);
4163 } else {
4164 Addr = CGF.EmitLValue(E).getPointer(CGF);
4165 }
4166 llvm::Value *SizeVal;
4167 QualType Ty = E->getType();
4168 if (OASE) {
4169 SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType());
4170 for (const Expr *SE : OASE->getDimensions()) {
4171 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
4172 Sz = CGF.EmitScalarConversion(
4173 Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc());
4174 SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz);
4175 }
4176 } else if (const auto *ASE =
4177 dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4178 LValue UpAddrLVal =
4179 CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
4180 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
4181 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
4182 UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1);
4183 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy);
4184 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy);
4185 SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4186 } else {
4187 SizeVal = CGF.getTypeSize(Ty);
4188 }
4189 return std::make_pair(Addr, SizeVal);
4190}
4191
4192/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4193static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
4194 QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false);
4195 if (KmpTaskAffinityInfoTy.isNull()) {
4196 RecordDecl *KmpAffinityInfoRD =
4197 C.buildImplicitRecord("kmp_task_affinity_info_t");
4198 KmpAffinityInfoRD->startDefinition();
4199 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
4200 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
4201 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
4202 KmpAffinityInfoRD->completeDefinition();
4203 KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD);
4204 }
4205}
4206
4207CGOpenMPRuntime::TaskResultTy
4208CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
4209 const OMPExecutableDirective &D,
4210 llvm::Function *TaskFunction, QualType SharedsTy,
4211 Address Shareds, const OMPTaskDataTy &Data) {
4212 ASTContext &C = CGM.getContext();
4213 llvm::SmallVector<PrivateDataTy, 4> Privates;
4214 // Aggregate privates and sort them by the alignment.
4215 const auto *I = Data.PrivateCopies.begin();
4216 for (const Expr *E : Data.PrivateVars) {
4217 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4218 Privates.emplace_back(
4219 C.getDeclAlign(VD),
4220 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4221 /*PrivateElemInit=*/nullptr));
4222 ++I;
4223 }
4224 I = Data.FirstprivateCopies.begin();
4225 const auto *IElemInitRef = Data.FirstprivateInits.begin();
4226 for (const Expr *E : Data.FirstprivateVars) {
4227 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4228 Privates.emplace_back(
4229 C.getDeclAlign(VD),
4230 PrivateHelpersTy(
4231 E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4232 cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
4233 ++I;
4234 ++IElemInitRef;
4235 }
4236 I = Data.LastprivateCopies.begin();
4237 for (const Expr *E : Data.LastprivateVars) {
4238 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4239 Privates.emplace_back(
4240 C.getDeclAlign(VD),
4241 PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4242 /*PrivateElemInit=*/nullptr));
4243 ++I;
4244 }
4245 for (const VarDecl *VD : Data.PrivateLocals) {
4246 if (isAllocatableDecl(VD))
4247 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
4248 else
4249 Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD));
4250 }
4251 llvm::stable_sort(Privates,
4252 [](const PrivateDataTy &L, const PrivateDataTy &R) {
4253 return L.first > R.first;
4254 });
4255 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4256 // Build type kmp_routine_entry_t (if not built yet).
4257 emitKmpRoutineEntryT(KmpInt32Ty);
4258 // Build type kmp_task_t (if not built yet).
4259 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
4260 if (SavedKmpTaskloopTQTy.isNull()) {
4261 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4262 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4263 }
4264 KmpTaskTQTy = SavedKmpTaskloopTQTy;
4265 } else {
4266 assert((D.getDirectiveKind() == OMPD_task ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4267 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4268 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
4269 "Expected taskloop, task or target directive")(static_cast <bool> ((D.getDirectiveKind() == OMPD_task
|| isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
isOpenMPTargetDataManagementDirective(D.getDirectiveKind()))
&& "Expected taskloop, task or target directive") ? void
(0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4269, __extension__
__PRETTY_FUNCTION__))
;
4270 if (SavedKmpTaskTQTy.isNull()) {
4271 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4272 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4273 }
4274 KmpTaskTQTy = SavedKmpTaskTQTy;
4275 }
4276 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4277 // Build particular struct kmp_task_t for the given task.
4278 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
4279 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
4280 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4281 QualType KmpTaskTWithPrivatesPtrQTy =
4282 C.getPointerType(KmpTaskTWithPrivatesQTy);
4283 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4284 llvm::Type *KmpTaskTWithPrivatesPtrTy =
4285 KmpTaskTWithPrivatesTy->getPointerTo();
4286 llvm::Value *KmpTaskTWithPrivatesTySize =
4287 CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4288 QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4289
4290 // Emit initial values for private copies (if any).
4291 llvm::Value *TaskPrivatesMap = nullptr;
4292 llvm::Type *TaskPrivatesMapTy =
4293 std::next(TaskFunction->arg_begin(), 3)->getType();
4294 if (!Privates.empty()) {
4295 auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4296 TaskPrivatesMap =
4297 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
4298 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4299 TaskPrivatesMap, TaskPrivatesMapTy);
4300 } else {
4301 TaskPrivatesMap = llvm::ConstantPointerNull::get(
4302 cast<llvm::PointerType>(TaskPrivatesMapTy));
4303 }
4304 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4305 // kmp_task_t *tt);
4306 llvm::Function *TaskEntry = emitProxyTaskFunction(
4307 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4308 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4309 TaskPrivatesMap);
4310
4311 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4312 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4313 // kmp_routine_entry_t *task_entry);
4314 // Task flags. Format is taken from
4315 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
4316 // description of kmp_tasking_flags struct.
4317 enum {
4318 TiedFlag = 0x1,
4319 FinalFlag = 0x2,
4320 DestructorsFlag = 0x8,
4321 PriorityFlag = 0x20,
4322 DetachableFlag = 0x40,
4323 };
4324 unsigned Flags = Data.Tied ? TiedFlag : 0;
4325 bool NeedsCleanup = false;
4326 if (!Privates.empty()) {
4327 NeedsCleanup =
4328 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
4329 if (NeedsCleanup)
4330 Flags = Flags | DestructorsFlag;
4331 }
4332 if (Data.Priority.getInt())
4333 Flags = Flags | PriorityFlag;
4334 if (D.hasClausesOfKind<OMPDetachClause>())
4335 Flags = Flags | DetachableFlag;
4336 llvm::Value *TaskFlags =
4337 Data.Final.getPointer()
4338 ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4339 CGF.Builder.getInt32(FinalFlag),
4340 CGF.Builder.getInt32(/*C=*/0))
4341 : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4342 TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4343 llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4344 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
4345 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
4346 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4347 TaskEntry, KmpRoutineEntryPtrTy)};
4348 llvm::Value *NewTask;
4349 if (D.hasClausesOfKind<OMPNowaitClause>()) {
4350 // Check if we have any device clause associated with the directive.
4351 const Expr *Device = nullptr;
4352 if (auto *C = D.getSingleClause<OMPDeviceClause>())
4353 Device = C->getDevice();
4354 // Emit device ID if any otherwise use default value.
4355 llvm::Value *DeviceID;
4356 if (Device)
4357 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
4358 CGF.Int64Ty, /*isSigned=*/true);
4359 else
4360 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
4361 AllocArgs.push_back(DeviceID);
4362 NewTask = CGF.EmitRuntimeCall(
4363 OMPBuilder.getOrCreateRuntimeFunction(
4364 CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc),
4365 AllocArgs);
4366 } else {
4367 NewTask =
4368 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
4369 CGM.getModule(), OMPRTL___kmpc_omp_task_alloc),
4370 AllocArgs);
4371 }
4372 // Emit detach clause initialization.
4373 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
4374 // task_descriptor);
4375 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
4376 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
4377 LValue EvtLVal = CGF.EmitLValue(Evt);
4378
4379 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
4380 // int gtid, kmp_task_t *task);
4381 llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc());
4382 llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc());
4383 Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false);
4384 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
4385 OMPBuilder.getOrCreateRuntimeFunction(
4386 CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event),
4387 {Loc, Tid, NewTask});
4388 EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(),
4389 Evt->getExprLoc());
4390 CGF.EmitStoreOfScalar(EvtVal, EvtLVal);
4391 }
4392 // Process affinity clauses.
4393 if (D.hasClausesOfKind<OMPAffinityClause>()) {
4394 // Process list of affinity data.
4395 ASTContext &C = CGM.getContext();
4396 Address AffinitiesArray = Address::invalid();
4397 // Calculate number of elements to form the array of affinity data.
4398 llvm::Value *NumOfElements = nullptr;
4399 unsigned NumAffinities = 0;
4400 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4401 if (const Expr *Modifier = C->getModifier()) {
4402 const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts());
4403 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4404 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4405 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4406 NumOfElements =
4407 NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz;
4408 }
4409 } else {
4410 NumAffinities += C->varlist_size();
4411 }
4412 }
4413 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
4414 // Fields ids in kmp_task_affinity_info record.
4415 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
4416
4417 QualType KmpTaskAffinityInfoArrayTy;
4418 if (NumOfElements) {
4419 NumOfElements = CGF.Builder.CreateNUWAdd(
4420 llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements);
4421 auto *OVE = new (C) OpaqueValueExpr(
4422 Loc,
4423 C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0),
4424 VK_PRValue);
4425 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4426 RValue::get(NumOfElements));
4427 KmpTaskAffinityInfoArrayTy =
4428 C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal,
4429 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4430 // Properly emit variable-sized array.
4431 auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy,
4432 ImplicitParamDecl::Other);
4433 CGF.EmitVarDecl(*PD);
4434 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
4435 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4436 /*isSigned=*/false);
4437 } else {
4438 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
4439 KmpTaskAffinityInfoTy,
4440 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
4441 ArrayType::Normal, /*IndexTypeQuals=*/0);
4442 AffinitiesArray =
4443 CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr");
4444 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0);
4445 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities,
4446 /*isSigned=*/false);
4447 }
4448
4449 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
4450 // Fill array by elements without iterators.
4451 unsigned Pos = 0;
4452 bool HasIterator = false;
4453 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4454 if (C->getModifier()) {
4455 HasIterator = true;
4456 continue;
4457 }
4458 for (const Expr *E : C->varlists()) {
4459 llvm::Value *Addr;
4460 llvm::Value *Size;
4461 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4462 LValue Base =
4463 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
4464 KmpTaskAffinityInfoTy);
4465 // affs[i].base_addr = &<Affinities[i].second>;
4466 LValue BaseAddrLVal = CGF.EmitLValueForField(
4467 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4468 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4469 BaseAddrLVal);
4470 // affs[i].len = sizeof(<Affinities[i].second>);
4471 LValue LenLVal = CGF.EmitLValueForField(
4472 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4473 CGF.EmitStoreOfScalar(Size, LenLVal);
4474 ++Pos;
4475 }
4476 }
4477 LValue PosLVal;
4478 if (HasIterator) {
4479 PosLVal = CGF.MakeAddrLValue(
4480 CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"),
4481 C.getSizeType());
4482 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4483 }
4484 // Process elements with iterators.
4485 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
4486 const Expr *Modifier = C->getModifier();
4487 if (!Modifier)
4488 continue;
4489 OMPIteratorGeneratorScope IteratorScope(
4490 CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()));
4491 for (const Expr *E : C->varlists()) {
4492 llvm::Value *Addr;
4493 llvm::Value *Size;
4494 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4495 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4496 LValue Base = CGF.MakeAddrLValue(
4497 CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy);
4498 // affs[i].base_addr = &<Affinities[i].second>;
4499 LValue BaseAddrLVal = CGF.EmitLValueForField(
4500 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
4501 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4502 BaseAddrLVal);
4503 // affs[i].len = sizeof(<Affinities[i].second>);
4504 LValue LenLVal = CGF.EmitLValueForField(
4505 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
4506 CGF.EmitStoreOfScalar(Size, LenLVal);
4507 Idx = CGF.Builder.CreateNUWAdd(
4508 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
4509 CGF.EmitStoreOfScalar(Idx, PosLVal);
4510 }
4511 }
4512 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4513 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4514 // naffins, kmp_task_affinity_info_t *affin_list);
4515 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
4516 llvm::Value *GTid = getThreadID(CGF, Loc);
4517 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4518 AffinitiesArray.getPointer(), CGM.VoidPtrTy);
4519 // FIXME: Emit the function and ignore its result for now unless the
4520 // runtime function is properly implemented.
4521 (void)CGF.EmitRuntimeCall(
4522 OMPBuilder.getOrCreateRuntimeFunction(
4523 CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity),
4524 {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
4525 }
4526 llvm::Value *NewTaskNewTaskTTy =
4527 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4528 NewTask, KmpTaskTWithPrivatesPtrTy);
4529 LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4530 KmpTaskTWithPrivatesQTy);
4531 LValue TDBase =
4532 CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4533 // Fill the data in the resulting kmp_task_t record.
4534 // Copy shareds if there are any.
4535 Address KmpTaskSharedsPtr = Address::invalid();
4536 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4537 KmpTaskSharedsPtr =
4538 Address(CGF.EmitLoadOfScalar(
4539 CGF.EmitLValueForField(
4540 TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4541 KmpTaskTShareds)),
4542 Loc),
4543 CGM.getNaturalTypeAlignment(SharedsTy));
4544 LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
4545 LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
4546 CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
4547 }
4548 // Emit initial values for private copies (if any).
4549 TaskResultTy Result;
4550 if (!Privates.empty()) {
4551 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4552 SharedsTy, SharedsPtrTy, Data, Privates,
4553 /*ForDup=*/false);
4554 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
4555 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4556 Result.TaskDupFn = emitTaskDupFunction(
4557 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4558 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4559 /*WithLastIter=*/!Data.LastprivateVars.empty());
4560 }
4561 }
4562 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4563 enum { Priority = 0, Destructors = 1 };
4564 // Provide pointer to function with destructors for privates.
4565 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4566 const RecordDecl *KmpCmplrdataUD =
4567 (*FI)->getType()->getAsUnionType()->getDecl();
4568 if (NeedsCleanup) {
4569 llvm::Value *DestructorFn = emitDestructorsFunction(
4570 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4571 KmpTaskTWithPrivatesQTy);
4572 LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4573 LValue DestructorsLV = CGF.EmitLValueForField(
4574 Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4575 CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4576 DestructorFn, KmpRoutineEntryPtrTy),
4577 DestructorsLV);
4578 }
4579 // Set priority.
4580 if (Data.Priority.getInt()) {
4581 LValue Data2LV = CGF.EmitLValueForField(
4582 TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4583 LValue PriorityLV = CGF.EmitLValueForField(
4584 Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4585 CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4586 }
4587 Result.NewTask = NewTask;
4588 Result.TaskEntry = TaskEntry;
4589 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4590 Result.TDBase = TDBase;
4591 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4592 return Result;
4593}
4594
4595namespace {
4596/// Dependence kind for RTL.
4597enum RTLDependenceKindTy {
4598 DepIn = 0x01,
4599 DepInOut = 0x3,
4600 DepMutexInOutSet = 0x4,
4601 DepInOutSet = 0x8
4602};
4603/// Fields ids in kmp_depend_info record.
4604enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4605} // namespace
4606
4607/// Translates internal dependency kind into the runtime kind.
4608static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
4609 RTLDependenceKindTy DepKind;
4610 switch (K) {
4611 case OMPC_DEPEND_in:
4612 DepKind = DepIn;
4613 break;
4614 // Out and InOut dependencies must use the same code.
4615 case OMPC_DEPEND_out:
4616 case OMPC_DEPEND_inout:
4617 DepKind = DepInOut;
4618 break;
4619 case OMPC_DEPEND_mutexinoutset:
4620 DepKind = DepMutexInOutSet;
4621 break;
4622 case OMPC_DEPEND_inoutset:
4623 DepKind = DepInOutSet;
4624 break;
4625 case OMPC_DEPEND_source:
4626 case OMPC_DEPEND_sink:
4627 case OMPC_DEPEND_depobj:
4628 case OMPC_DEPEND_unknown:
4629 llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4629)
;
4630 }
4631 return DepKind;
4632}
4633
4634/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4635static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4636 QualType &FlagsTy) {
4637 FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4638 if (KmpDependInfoTy.isNull()) {
4639 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4640 KmpDependInfoRD->startDefinition();
4641 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4642 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4643 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4644 KmpDependInfoRD->completeDefinition();
4645 KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4646 }
4647}
4648
4649std::pair<llvm::Value *, LValue>
4650CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4651 SourceLocation Loc) {
4652 ASTContext &C = CGM.getContext();
4653 QualType FlagsTy;
4654 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4655 RecordDecl *KmpDependInfoRD =
4656 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4657 LValue Base = CGF.EmitLoadOfPointerLValue(
4658 DepobjLVal.getAddress(CGF),
4659 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4660 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4661 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4662 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
4663 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4664 Base.getTBAAInfo());
4665 Address DepObjAddr = CGF.Builder.CreateGEP(
4666 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4667 LValue NumDepsBase = CGF.MakeAddrLValue(
4668 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4669 // NumDeps = deps[i].base_addr;
4670 LValue BaseAddrLVal = CGF.EmitLValueForField(
4671 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4672 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc);
4673 return std::make_pair(NumDeps, Base);
4674}
4675
4676static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4677 llvm::PointerUnion<unsigned *, LValue *> Pos,
4678 const OMPTaskDataTy::DependData &Data,
4679 Address DependenciesArray) {
4680 CodeGenModule &CGM = CGF.CGM;
4681 ASTContext &C = CGM.getContext();
4682 QualType FlagsTy;
4683 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4684 RecordDecl *KmpDependInfoRD =
4685 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4686 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4687
4688 OMPIteratorGeneratorScope IteratorScope(
4689 CGF, cast_or_null<OMPIteratorExpr>(
4690 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4691 : nullptr));
4692 for (const Expr *E : Data.DepExprs) {
4693 llvm::Value *Addr;
4694 llvm::Value *Size;
4695 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
4696 LValue Base;
4697 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4698 Base = CGF.MakeAddrLValue(
4699 CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy);
4700 } else {
4701 LValue &PosLVal = *Pos.get<LValue *>();
4702 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4703 Base = CGF.MakeAddrLValue(
4704 CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy);
4705 }
4706 // deps[i].base_addr = &<Dependencies[i].second>;
4707 LValue BaseAddrLVal = CGF.EmitLValueForField(
4708 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4709 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
4710 BaseAddrLVal);
4711 // deps[i].len = sizeof(<Dependencies[i].second>);
4712 LValue LenLVal = CGF.EmitLValueForField(
4713 Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4714 CGF.EmitStoreOfScalar(Size, LenLVal);
4715 // deps[i].flags = <Dependencies[i].first>;
4716 RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind);
4717 LValue FlagsLVal = CGF.EmitLValueForField(
4718 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4719 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4720 FlagsLVal);
4721 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4722 ++(*P);
4723 } else {
4724 LValue &PosLVal = *Pos.get<LValue *>();
4725 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4726 Idx = CGF.Builder.CreateNUWAdd(Idx,
4727 llvm::ConstantInt::get(Idx->getType(), 1));
4728 CGF.EmitStoreOfScalar(Idx, PosLVal);
4729 }
4730 }
4731}
4732
4733static SmallVector<llvm::Value *, 4>
4734emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4735 const OMPTaskDataTy::DependData &Data) {
4736 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4737, __extension__
__PRETTY_FUNCTION__))
4737 "Expected depobj dependecy kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4737, __extension__
__PRETTY_FUNCTION__))
;
4738 SmallVector<llvm::Value *, 4> Sizes;
4739 SmallVector<LValue, 4> SizeLVals;
4740 ASTContext &C = CGF.getContext();
4741 QualType FlagsTy;
4742 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4743 RecordDecl *KmpDependInfoRD =
4744 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4745 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4746 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4747 {
4748 OMPIteratorGeneratorScope IteratorScope(
4749 CGF, cast_or_null<OMPIteratorExpr>(
4750 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4751 : nullptr));
4752 for (const Expr *E : Data.DepExprs) {
4753 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4754 LValue Base = CGF.EmitLoadOfPointerLValue(
4755 DepobjLVal.getAddress(CGF),
4756 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4757 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4758 Base.getAddress(CGF), KmpDependInfoPtrT);
4759 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4760 Base.getTBAAInfo());
4761 Address DepObjAddr = CGF.Builder.CreateGEP(
4762 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4763 LValue NumDepsBase = CGF.MakeAddrLValue(
4764 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4765 // NumDeps = deps[i].base_addr;
4766 LValue BaseAddrLVal = CGF.EmitLValueForField(
4767 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4768 llvm::Value *NumDeps =
4769 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4770 LValue NumLVal = CGF.MakeAddrLValue(
4771 CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"),
4772 C.getUIntPtrType());
4773 CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0),
4774 NumLVal.getAddress(CGF));
4775 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc());
4776 llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps);
4777 CGF.EmitStoreOfScalar(Add, NumLVal);
4778 SizeLVals.push_back(NumLVal);
4779 }
4780 }
4781 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4782 llvm::Value *Size =
4783 CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc());
4784 Sizes.push_back(Size);
4785 }
4786 return Sizes;
4787}
4788
4789static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4790 LValue PosLVal,
4791 const OMPTaskDataTy::DependData &Data,
4792 Address DependenciesArray) {
4793 assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4794, __extension__
__PRETTY_FUNCTION__))
4794 "Expected depobj dependecy kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj
&& "Expected depobj dependecy kind.") ? void (0) : __assert_fail
("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4794, __extension__
__PRETTY_FUNCTION__))
;
4795 ASTContext &C = CGF.getContext();
4796 QualType FlagsTy;
4797 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4798 RecordDecl *KmpDependInfoRD =
4799 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4800 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4801 llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy);
4802 llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy);
4803 {
4804 OMPIteratorGeneratorScope IteratorScope(
4805 CGF, cast_or_null<OMPIteratorExpr>(
4806 Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4807 : nullptr));
4808 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4809 const Expr *E = Data.DepExprs[I];
4810 LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts());
4811 LValue Base = CGF.EmitLoadOfPointerLValue(
4812 DepobjLVal.getAddress(CGF),
4813 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
4814 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4815 Base.getAddress(CGF), KmpDependInfoPtrT);
4816 Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(),
4817 Base.getTBAAInfo());
4818
4819 // Get number of elements in a single depobj.
4820 Address DepObjAddr = CGF.Builder.CreateGEP(
4821 Addr, llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
4822 LValue NumDepsBase = CGF.MakeAddrLValue(
4823 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4824 // NumDeps = deps[i].base_addr;
4825 LValue BaseAddrLVal = CGF.EmitLValueForField(
4826 NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4827 llvm::Value *NumDeps =
4828 CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc());
4829
4830 // memcopy dependency data.
4831 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4832 ElSize,
4833 CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false));
4834 llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
4835 Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos);
4836 CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size);
4837
4838 // Increase pos.
4839 // pos += size;
4840 llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps);
4841 CGF.EmitStoreOfScalar(Add, PosLVal);
4842 }
4843 }
4844}
4845
4846std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4847 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4848 SourceLocation Loc) {
4849 if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) {
4850 return D.DepExprs.empty();
4851 }))
4852 return std::make_pair(nullptr, Address::invalid());
4853 // Process list of dependencies.
4854 ASTContext &C = CGM.getContext();
4855 Address DependenciesArray = Address::invalid();
4856 llvm::Value *NumOfElements = nullptr;
4857 unsigned NumDependencies = std::accumulate(
4858 Dependencies.begin(), Dependencies.end(), 0,
4859 [](unsigned V, const OMPTaskDataTy::DependData &D) {
4860 return D.DepKind == OMPC_DEPEND_depobj
4861 ? V
4862 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4863 });
4864 QualType FlagsTy;
4865 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4866 bool HasDepobjDeps = false;
4867 bool HasRegularWithIterators = false;
4868 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4869 llvm::Value *NumOfRegularWithIterators =
4870 llvm::ConstantInt::get(CGF.IntPtrTy, 0);
4871 // Calculate number of depobj dependecies and regular deps with the iterators.
4872 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4873 if (D.DepKind == OMPC_DEPEND_depobj) {
4874 SmallVector<llvm::Value *, 4> Sizes =
4875 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4876 for (llvm::Value *Size : Sizes) {
4877 NumOfDepobjElements =
4878 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size);
4879 }
4880 HasDepobjDeps = true;
4881 continue;
4882 }
4883 // Include number of iterations, if any.
4884
4885 if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) {
4886 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4887 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4888 Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false);
4889 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4890 Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size()));
4891 NumOfRegularWithIterators =
4892 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps);
4893 }
4894 HasRegularWithIterators = true;
4895 continue;
4896 }
4897 }
4898
4899 QualType KmpDependInfoArrayTy;
4900 if (HasDepobjDeps || HasRegularWithIterators) {
4901 NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies,
4902 /*isSigned=*/false);
4903 if (HasDepobjDeps) {
4904 NumOfElements =
4905 CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements);
4906 }
4907 if (HasRegularWithIterators) {
4908 NumOfElements =
4909 CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements);
4910 }
4911 auto *OVE = new (C) OpaqueValueExpr(
4912 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4913 VK_PRValue);
4914 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4915 RValue::get(NumOfElements));
4916 KmpDependInfoArrayTy =
4917 C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal,
4918 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4919 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4920 // Properly emit variable-sized array.
4921 auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy,
4922 ImplicitParamDecl::Other);
4923 CGF.EmitVarDecl(*PD);
4924 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4925 NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty,
4926 /*isSigned=*/false);
4927 } else {
4928 KmpDependInfoArrayTy = C.getConstantArrayType(
4929 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4930 ArrayType::Normal, /*IndexTypeQuals=*/0);
4931 DependenciesArray =
4932 CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4933 DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0);
4934 NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies,
4935 /*isSigned=*/false);
4936 }
4937 unsigned Pos = 0;
4938 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4939 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4940 Dependencies[I].IteratorExpr)
4941 continue;
4942 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4943 DependenciesArray);
4944 }
4945 // Copy regular dependecies with iterators.
4946 LValue PosLVal = CGF.MakeAddrLValue(
4947 CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType());
4948 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal);
4949 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4950 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4951 !Dependencies[I].IteratorExpr)
4952 continue;
4953 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4954 DependenciesArray);
4955 }
4956 // Copy final depobj arrays without iterators.
4957 if (HasDepobjDeps) {
4958 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4959 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4960 continue;
4961 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4962 DependenciesArray);
4963 }
4964 }
4965 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4966 DependenciesArray, CGF.VoidPtrTy);
4967 return std::make_pair(NumOfElements, DependenciesArray);
4968}
4969
4970Address CGOpenMPRuntime::emitDepobjDependClause(
4971 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4972 SourceLocation Loc) {
4973 if (Dependencies.DepExprs.empty())
4974 return Address::invalid();
4975 // Process list of dependencies.
4976 ASTContext &C = CGM.getContext();
4977 Address DependenciesArray = Address::invalid();
4978 unsigned NumDependencies = Dependencies.DepExprs.size();
4979 QualType FlagsTy;
4980 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4981 RecordDecl *KmpDependInfoRD =
4982 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4983
4984 llvm::Value *Size;
4985 // Define type kmp_depend_info[<Dependencies.size()>];
4986 // For depobj reserve one extra element to store the number of elements.
4987 // It is required to handle depobj(x) update(in) construct.
4988 // kmp_depend_info[<Dependencies.size()>] deps;
4989 llvm::Value *NumDepsVal;
4990 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4991 if (const auto *IE =
4992 cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) {
4993 NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1);
4994 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4995 llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper);
4996 Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false);
4997 NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz);
4998 }
4999 Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1),
5000 NumDepsVal);
5001 CharUnits SizeInBytes =
5002 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
5003 llvm::Value *RecSize = CGM.getSize(SizeInBytes);
5004 Size = CGF.Builder.CreateNUWMul(Size, RecSize);
5005 NumDepsVal =
5006 CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false);
5007 } else {
5008 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5009 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
5010 nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
5011 CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy);
5012 Size = CGM.getSize(Sz.alignTo(Align));
5013 NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies);
5014 }
5015 // Need to allocate on the dynamic memory.
5016 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5017 // Use default allocator.
5018 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5019 llvm::Value *Args[] = {ThreadID, Size, Allocator};
5020
5021 llvm::Value *Addr =
5022 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5023 CGM.getModule(), OMPRTL___kmpc_alloc),
5024 Args, ".dep.arr.addr");
5025 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5026 Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo());
5027 DependenciesArray = Address(Addr, Align);
5028 // Write number of elements in the first element of array for depobj.
5029 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
5030 // deps[i].base_addr = NumDependencies;
5031 LValue BaseAddrLVal = CGF.EmitLValueForField(
5032 Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5033 CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal);
5034 llvm::PointerUnion<unsigned *, LValue *> Pos;
5035 unsigned Idx = 1;
5036 LValue PosLVal;
5037 if (Dependencies.IteratorExpr) {
5038 PosLVal = CGF.MakeAddrLValue(
5039 CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"),
5040 C.getSizeType());
5041 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal,
5042 /*IsInit=*/true);
5043 Pos = &PosLVal;
5044 } else {
5045 Pos = &Idx;
5046 }
5047 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
5048 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5049 CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy);
5050 return DependenciesArray;
5051}
5052
5053void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
5054 SourceLocation Loc) {
5055 ASTContext &C = CGM.getContext();
5056 QualType FlagsTy;
5057 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5058 LValue Base = CGF.EmitLoadOfPointerLValue(
5059 DepobjLVal.getAddress(CGF),
5060 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5061 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
5062 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5063 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy));
5064 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
5065 Addr.getElementType(), Addr.getPointer(),
5066 llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true));
5067 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr,
5068 CGF.VoidPtrTy);
5069 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5070 // Use default allocator.
5071 llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5072 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
5073
5074 // _kmpc_free(gtid, addr, nullptr);
5075 (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5076 CGM.getModule(), OMPRTL___kmpc_free),
5077 Args);
5078}
5079
5080void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
5081 OpenMPDependClauseKind NewDepKind,
5082 SourceLocation Loc) {
5083 ASTContext &C = CGM.getContext();
5084 QualType FlagsTy;
5085 getDependTypes(C, KmpDependInfoTy, FlagsTy);
5086 RecordDecl *KmpDependInfoRD =
5087 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5088 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5089 llvm::Value *NumDeps;
5090 LValue Base;
5091 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
5092
5093 Address Begin = Base.getAddress(CGF);
5094 // Cast from pointer to array type to pointer to single element.
5095 llvm::Value *End = CGF.Builder.CreateGEP(
5096 Begin.getElementType(), Begin.getPointer(), NumDeps);
5097 // The basic structure here is a while-do loop.
5098 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body");
5099 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done");
5100 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5101 CGF.EmitBlock(BodyBB);
5102 llvm::PHINode *ElementPHI =
5103 CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast");
5104 ElementPHI->addIncoming(Begin.getPointer(), EntryBB);
5105 Begin = Address(ElementPHI, Begin.getAlignment());
5106 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
5107 Base.getTBAAInfo());
5108 // deps[i].flags = NewDepKind;
5109 RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind);
5110 LValue FlagsLVal = CGF.EmitLValueForField(
5111 Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5112 CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5113 FlagsLVal);
5114
5115 // Shift the address forward by one element.
5116 Address ElementNext =
5117 CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext");
5118 ElementPHI->addIncoming(ElementNext.getPointer(),
5119 CGF.Builder.GetInsertBlock());
5120 llvm::Value *IsEmpty =
5121 CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty");
5122 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5123 // Done.
5124 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5125}
5126
5127void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
5128 const OMPExecutableDirective &D,
5129 llvm::Function *TaskFunction,
5130 QualType SharedsTy, Address Shareds,
5131 const Expr *IfCond,
5132 const OMPTaskDataTy &Data) {
5133 if (!CGF.HaveInsertPoint())
5134 return;
5135
5136 TaskResultTy Result =
5137 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5138 llvm::Value *NewTask = Result.NewTask;
5139 llvm::Function *TaskEntry = Result.TaskEntry;
5140 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5141 LValue TDBase = Result.TDBase;
5142 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5143 // Process list of dependences.
5144 Address DependenciesArray = Address::invalid();
5145 llvm::Value *NumOfElements;
5146 std::tie(NumOfElements, DependenciesArray) =
5147 emitDependClause(CGF, Data.Dependences, Loc);
5148
5149 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5150 // libcall.
5151 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5152 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5153 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5154 // list is not empty
5155 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5156 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5157 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5158 llvm::Value *DepTaskArgs[7];
5159 if (!Data.Dependences.empty()) {
5160 DepTaskArgs[0] = UpLoc;
5161 DepTaskArgs[1] = ThreadID;
5162 DepTaskArgs[2] = NewTask;
5163 DepTaskArgs[3] = NumOfElements;
5164 DepTaskArgs[4] = DependenciesArray.getPointer();
5165 DepTaskArgs[5] = CGF.Builder.getInt32(0);
5166 DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5167 }
5168 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
5169 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5170 if (!Data.Tied) {
5171 auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5172 LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5173 CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5174 }
5175 if (!Data.Dependences.empty()) {
5176 CGF.EmitRuntimeCall(
5177 OMPBuilder.getOrCreateRuntimeFunction(
5178 CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps),
5179 DepTaskArgs);
5180 } else {
5181 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5182 CGM.getModule(), OMPRTL___kmpc_omp_task),
5183 TaskArgs);
5184 }
5185 // Check if parent region is untied and build return for untied task;
5186 if (auto *Region =
5187 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5188 Region->emitUntiedSwitch(CGF);
5189 };
5190
5191 llvm::Value *DepWaitTaskArgs[6];
5192 if (!Data.Dependences.empty()) {
5193 DepWaitTaskArgs[0] = UpLoc;
5194 DepWaitTaskArgs[1] = ThreadID;
5195 DepWaitTaskArgs[2] = NumOfElements;
5196 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5197 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5198 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5199 }
5200 auto &M = CGM.getModule();
5201 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
5202 TaskEntry, &Data, &DepWaitTaskArgs,
5203 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5204 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5205 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5206 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5207 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5208 // is specified.
5209 if (!Data.Dependences.empty())
5210 CGF.EmitRuntimeCall(
5211 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
5212 DepWaitTaskArgs);
5213 // Call proxy_task_entry(gtid, new_task);
5214 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5215 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5216 Action.Enter(CGF);
5217 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5218 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5219 OutlinedFnArgs);
5220 };
5221
5222 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5223 // kmp_task_t *new_task);
5224 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5225 // kmp_task_t *new_task);
5226 RegionCodeGenTy RCG(CodeGen);
5227 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
5228 M, OMPRTL___kmpc_omp_task_begin_if0),
5229 TaskArgs,
5230 OMPBuilder.getOrCreateRuntimeFunction(
5231 M, OMPRTL___kmpc_omp_task_complete_if0),
5232 TaskArgs);
5233 RCG.setAction(Action);
5234 RCG(CGF);
5235 };
5236
5237 if (IfCond) {
5238 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5239 } else {
5240 RegionCodeGenTy ThenRCG(ThenCodeGen);
5241 ThenRCG(CGF);
5242 }
5243}
5244
5245void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
5246 const OMPLoopDirective &D,
5247 llvm::Function *TaskFunction,
5248 QualType SharedsTy, Address Shareds,
5249 const Expr *IfCond,
5250 const OMPTaskDataTy &Data) {
5251 if (!CGF.HaveInsertPoint())
5252 return;
5253 TaskResultTy Result =
5254 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5255 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5256 // libcall.
5257 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5258 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5259 // sched, kmp_uint64 grainsize, void *task_dup);
5260 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5261 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5262 llvm::Value *IfVal;
5263 if (IfCond) {
5264 IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5265 /*isSigned=*/true);
5266 } else {
5267 IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5268 }
5269
5270 LValue LBLVal = CGF.EmitLValueForField(
5271 Result.TDBase,
5272 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5273 const auto *LBVar =
5274 cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5275 CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF),
5276 LBLVal.getQuals(),
5277 /*IsInitializer=*/true);
5278 LValue UBLVal = CGF.EmitLValueForField(
5279 Result.TDBase,
5280 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5281 const auto *UBVar =
5282 cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5283 CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF),
5284 UBLVal.getQuals(),
5285 /*IsInitializer=*/true);
5286 LValue StLVal = CGF.EmitLValueForField(
5287 Result.TDBase,
5288 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5289 const auto *StVar =
5290 cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5291 CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF),
5292 StLVal.getQuals(),
5293 /*IsInitializer=*/true);
5294 // Store reductions address.
5295 LValue RedLVal = CGF.EmitLValueForField(
5296 Result.TDBase,
5297 *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5298 if (Data.Reductions) {
5299 CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5300 } else {
5301 CGF.EmitNullInitialization(RedLVal.getAddress(CGF),
5302 CGF.getContext().VoidPtrTy);
5303 }
5304 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5305 llvm::Value *TaskArgs[] = {
5306 UpLoc,
5307 ThreadID,
5308 Result.NewTask,
5309 IfVal,
5310 LBLVal.getPointer(CGF),
5311 UBLVal.getPointer(CGF),
5312 CGF.EmitLoadOfScalar(StLVal, Loc),
5313 llvm::ConstantInt::getSigned(
5314 CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5315 llvm::ConstantInt::getSigned(
5316 CGF.IntTy, Data.Schedule.getPointer()
5317 ? Data.Schedule.getInt() ? NumTasks : Grainsize
5318 : NoSchedule),
5319 Data.Schedule.getPointer()
5320 ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5321 /*isSigned=*/false)
5322 : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5323 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5324 Result.TaskDupFn, CGF.VoidPtrTy)
5325 : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5326 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
5327 CGM.getModule(), OMPRTL___kmpc_taskloop),
5328 TaskArgs);
5329}
5330
5331/// Emit reduction operation for each element of array (required for
5332/// array sections) LHS op = RHS.
5333/// \param Type Type of array.
5334/// \param LHSVar Variable on the left side of the reduction operation
5335/// (references element of array in original variable).
5336/// \param RHSVar Variable on the right side of the reduction operation
5337/// (references element of array in original variable).
5338/// \param RedOpGen Generator of reduction operation with use of LHSVar and
5339/// RHSVar.
5340static void EmitOMPAggregateReduction(
5341 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5342 const VarDecl *RHSVar,
5343 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5344 const Expr *, const Expr *)> &RedOpGen,
5345 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5346 const Expr *UpExpr = nullptr) {
5347 // Perform element-by-element initialization.
5348 QualType ElementTy;
5349 Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5350 Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5351
5352 // Drill down to the base element type on both arrays.
5353 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5354 llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5355
5356 llvm::Value *RHSBegin = RHSAddr.getPointer();
5357 llvm::Value *LHSBegin = LHSAddr.getPointer();
5358 // Cast from pointer to array type to pointer to single element.
5359 llvm::Value *LHSEnd =
5360 CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements);
5361 // The basic structure here is a while-do loop.
5362 llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5363 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5364 llvm::Value *IsEmpty =
5365 CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5366 CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5367
5368 // Enter the loop body, making that address the current address.
5369 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5370 CGF.EmitBlock(BodyBB);
5371
5372 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5373
5374 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5375 RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5376 RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5377 Address RHSElementCurrent =
5378 Address(RHSElementPHI,
5379 RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5380
5381 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5382 LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5383 LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5384 Address LHSElementCurrent =
5385 Address(LHSElementPHI,
5386 LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5387
5388 // Emit copy.
5389 CodeGenFunction::OMPPrivateScope Scope(CGF);
5390 Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5391 Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5392 Scope.Privatize();
5393 RedOpGen(CGF, XExpr, EExpr, UpExpr);
5394 Scope.ForceCleanup();
5395
5396 // Shift the address forward by one element.
5397 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5398 LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1,
5399 "omp.arraycpy.dest.element");
5400 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5401 RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1,
5402 "omp.arraycpy.src.element");
5403 // Check whether we've reached the end.
5404 llvm::Value *Done =
5405 CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5406 CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5407 LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5408 RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5409
5410 // Done.
5411 CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5412}
5413
5414/// Emit reduction combiner. If the combiner is a simple expression emit it as
5415/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5416/// UDR combiner function.
5417static void emitReductionCombiner(CodeGenFunction &CGF,
5418 const Expr *ReductionOp) {
5419 if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5420 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5421 if (const auto *DRE =
5422 dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5423 if (const auto *DRD =
5424 dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5425 std::pair<llvm::Function *, llvm::Function *> Reduction =
5426 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5427 RValue Func = RValue::get(Reduction.first);
5428 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5429 CGF.EmitIgnoredExpr(ReductionOp);
5430 return;
5431 }
5432 CGF.EmitIgnoredExpr(ReductionOp);
5433}
5434
5435llvm::Function *CGOpenMPRuntime::emitReductionFunction(
5436 SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5437 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5438 ArrayRef<const Expr *> ReductionOps) {
5439 ASTContext &C = CGM.getContext();
5440
5441 // void reduction_func(void *LHSArg, void *RHSArg);
5442 FunctionArgList Args;
5443 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5444 ImplicitParamDecl::Other);
5445 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5446 ImplicitParamDecl::Other);
5447 Args.push_back(&LHSArg);
5448 Args.push_back(&RHSArg);
5449 const auto &CGFI =
5450 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5451 std::string Name = getName({"omp", "reduction", "reduction_func"});
5452 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
5453 llvm::GlobalValue::InternalLinkage, Name,
5454 &CGM.getModule());
5455 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
5456 Fn->setDoesNotRecurse();
5457 CodeGenFunction CGF(CGM);
5458 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5459
5460 // Dst = (void*[n])(LHSArg);
5461 // Src = (void*[n])(RHSArg);
5462 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5463 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5464 ArgsType), CGF.getPointerAlign());
5465 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5466 CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5467 ArgsType), CGF.getPointerAlign());
5468
5469 // ...
5470 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5471 // ...
5472 CodeGenFunction::OMPPrivateScope Scope(CGF);
5473 auto IPriv = Privates.begin();
5474 unsigned Idx = 0;
5475 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5476 const auto *RHSVar =
5477 cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5478 Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5479 return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5480 });
5481 const auto *LHSVar =
5482 cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5483 Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5484 return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5485 });
5486 QualType PrivTy = (*IPriv)->getType();
5487 if (PrivTy->isVariablyModifiedType()) {
5488 // Get array size and emit VLA type.
5489 ++Idx;
5490 Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5491 llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5492 const VariableArrayType *VLA =
5493 CGF.getContext().getAsVariableArrayType(PrivTy);
5494 const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5495 CodeGenFunction::OpaqueValueMapping OpaqueMap(
5496 CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5497 CGF.EmitVariablyModifiedType(PrivTy);
5498 }
5499 }
5500 Scope.Privatize();
5501 IPriv = Privates.begin();
5502 auto ILHS = LHSExprs.begin();
5503 auto IRHS = RHSExprs.begin();
5504 for (const Expr *E : ReductionOps) {
5505 if ((*IPriv)->getType()->isArrayType()) {
5506 // Emit reduction for array section.
5507 const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5508 const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5509 EmitOMPAggregateReduction(
5510 CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5511 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5512 emitReductionCombiner(CGF, E);
5513 });
5514 } else {
5515 // Emit reduction for array subscript or single variable.
5516 emitReductionCombiner(CGF, E);
5517 }
5518 ++IPriv;
5519 ++ILHS;
5520 ++IRHS;
5521 }
5522 Scope.ForceCleanup();
5523 CGF.FinishFunction();
5524 return Fn;
5525}
5526
5527void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
5528 const Expr *ReductionOp,
5529 const Expr *PrivateRef,
5530 const DeclRefExpr *LHS,
5531 const DeclRefExpr *RHS) {
5532 if (PrivateRef->getType()->isArrayType()) {
5533 // Emit reduction for array section.
5534 const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5535 const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5536 EmitOMPAggregateReduction(
5537 CGF, PrivateRef->getType(), LHSVar, RHSVar,
5538 [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5539 emitReductionCombiner(CGF, ReductionOp);
5540 });
5541 } else {
5542 // Emit reduction for array subscript or single variable.
5543 emitReductionCombiner(CGF, ReductionOp);
5544 }
5545}
5546
5547void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5548 ArrayRef<const Expr *> Privates,
5549 ArrayRef<const Expr *> LHSExprs,
5550 ArrayRef<const Expr *> RHSExprs,
5551 ArrayRef<const Expr *> ReductionOps,
5552 ReductionOptionsTy Options) {
5553 if (!CGF.HaveInsertPoint())
5554 return;
5555
5556 bool WithNowait = Options.WithNowait;
5557 bool SimpleReduction = Options.SimpleReduction;
5558
5559 // Next code should be emitted for reduction:
5560 //
5561 // static kmp_critical_name lock = { 0 };
5562 //
5563 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5564 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5565 // ...
5566 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5567 // *(Type<n>-1*)rhs[<n>-1]);
5568 // }
5569 //
5570 // ...
5571 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5572 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5573 // RedList, reduce_func, &<lock>)) {
5574 // case 1:
5575 // ...
5576 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5577 // ...
5578 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5579 // break;
5580 // case 2:
5581 // ...
5582 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5583 // ...
5584 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5585 // break;
5586 // default:;
5587 // }
5588 //
5589 // if SimpleReduction is true, only the next code is generated:
5590 // ...
5591 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5592 // ...
5593
5594 ASTContext &C = CGM.getContext();
5595
5596 if (SimpleReduction) {
5597 CodeGenFunction::RunCleanupsScope Scope(CGF);
5598 auto IPriv = Privates.begin();
5599 auto ILHS = LHSExprs.begin();
5600 auto IRHS = RHSExprs.begin();
5601 for (const Expr *E : ReductionOps) {
5602 emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5603 cast<DeclRefExpr>(*IRHS));
5604 ++IPriv;
5605 ++ILHS;
5606 ++IRHS;
5607 }
5608 return;
5609 }
5610
5611 // 1. Build a list of reduction variables.
5612 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5613 auto Size = RHSExprs.size();
5614 for (const Expr *E : Privates) {
5615 if (E->getType()->isVariablyModifiedType())
5616 // Reserve place for array size.
5617 ++Size;
5618 }
5619 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5620 QualType ReductionArrayTy =
5621 C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
5622 /*IndexTypeQuals=*/0);
5623 Address ReductionList =
5624 CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5625 auto IPriv = Privates.begin();
5626 unsigned Idx = 0;
5627 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5628 Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5629 CGF.Builder.CreateStore(
5630 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5631 CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
5632 Elem);
5633 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5634 // Store array size.
5635 ++Idx;
5636 Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5637 llvm::Value *Size = CGF.Builder.CreateIntCast(
5638 CGF.getVLASize(
5639 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5640 .NumElts,
5641 CGF.SizeTy, /*isSigned=*/false);
5642 CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5643 Elem);
5644 }
5645 }
5646
5647 // 2. Emit reduce_func().
5648 llvm::Function *ReductionFn = emitReductionFunction(
5649 Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5650 LHSExprs, RHSExprs, ReductionOps);
5651
5652 // 3. Create static kmp_critical_name lock = { 0 };
5653 std::string Name = getName({"reduction"});
5654 llvm::Value *Lock = getCriticalRegionLock(Name);
5655
5656 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5657 // RedList, reduce_func, &<lock>);
5658 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5659 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5660 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5661 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5662 ReductionList.getPointer(), CGF.VoidPtrTy);
5663 llvm::Value *Args[] = {
5664 IdentTLoc, // ident_t *<loc>
5665 ThreadId, // i32 <gtid>
5666 CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5667 ReductionArrayTySize, // size_type sizeof(RedList)
5668 RL, // void *RedList
5669 ReductionFn, // void (*) (void *, void *) <reduce_func>
5670 Lock // kmp_critical_name *&<lock>
5671 };
5672 llvm::Value *Res = CGF.EmitRuntimeCall(
5673 OMPBuilder.getOrCreateRuntimeFunction(
5674 CGM.getModule(),
5675 WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5676 Args);
5677
5678 // 5. Build switch(res)
5679 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5680 llvm::SwitchInst *SwInst =
5681 CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5682
5683 // 6. Build case 1:
5684 // ...
5685 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5686 // ...
5687 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5688 // break;
5689 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5690 SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5691 CGF.EmitBlock(Case1BB);
5692
5693 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5694 llvm::Value *EndArgs[] = {
5695 IdentTLoc, // ident_t *<loc>
5696 ThreadId, // i32 <gtid>
5697 Lock // kmp_critical_name *&<lock>
5698 };
5699 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5700 CodeGenFunction &CGF, PrePostActionTy &Action) {
5701 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5702 auto IPriv = Privates.begin();
5703 auto ILHS = LHSExprs.begin();
5704 auto IRHS = RHSExprs.begin();
5705 for (const Expr *E : ReductionOps) {
5706 RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5707 cast<DeclRefExpr>(*IRHS));
5708 ++IPriv;
5709 ++ILHS;
5710 ++IRHS;
5711 }
5712 };
5713 RegionCodeGenTy RCG(CodeGen);
5714 CommonActionTy Action(
5715 nullptr, llvm::None,
5716 OMPBuilder.getOrCreateRuntimeFunction(
5717 CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5718 : OMPRTL___kmpc_end_reduce),
5719 EndArgs);
5720 RCG.setAction(Action);
5721 RCG(CGF);
5722
5723 CGF.EmitBranch(DefaultBB);
5724
5725 // 7. Build case 2:
5726 // ...
5727 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5728 // ...
5729 // break;
5730 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5731 SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5732 CGF.EmitBlock(Case2BB);
5733
5734 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5735 CodeGenFunction &CGF, PrePostActionTy &Action) {
5736 auto ILHS = LHSExprs.begin();
5737 auto IRHS = RHSExprs.begin();
5738 auto IPriv = Privates.begin();
5739 for (const Expr *E : ReductionOps) {
5740 const Expr *XExpr = nullptr;
5741 const Expr *EExpr = nullptr;
5742 const Expr *UpExpr = nullptr;
5743 BinaryOperatorKind BO = BO_Comma;
5744 if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5745 if (BO->getOpcode() == BO_Assign) {
5746 XExpr = BO->getLHS();
5747 UpExpr = BO->getRHS();
5748 }
5749 }
5750 // Try to emit update expression as a simple atomic.
5751 const Expr *RHSExpr = UpExpr;
5752 if (RHSExpr) {
5753 // Analyze RHS part of the whole expression.
5754 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5755 RHSExpr->IgnoreParenImpCasts())) {
5756 // If this is a conditional operator, analyze its condition for
5757 // min/max reduction operator.
5758 RHSExpr = ACO->getCond();
5759 }
5760 if (const auto *BORHS =
5761 dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5762 EExpr = BORHS->getRHS();
5763 BO = BORHS->getOpcode();
5764 }
5765 }
5766 if (XExpr) {
5767 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5768 auto &&AtomicRedGen = [BO, VD,
5769 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5770 const Expr *EExpr, const Expr *UpExpr) {
5771 LValue X = CGF.EmitLValue(XExpr);
5772 RValue E;
5773 if (EExpr)
5774 E = CGF.EmitAnyExpr(EExpr);
5775 CGF.EmitOMPAtomicSimpleUpdateExpr(
5776 X, E, BO, /*IsXLHSInRHSPart=*/true,
5777 llvm::AtomicOrdering::Monotonic, Loc,
5778 [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5779 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5780 PrivateScope.addPrivate(
5781 VD, [&CGF, VD, XRValue, Loc]() {
5782 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5783 CGF.emitOMPSimpleStore(
5784 CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5785 VD->getType().getNonReferenceType(), Loc);
5786 return LHSTemp;
5787 });
5788 (void)PrivateScope.Privatize();
5789 return CGF.EmitAnyExpr(UpExpr);
5790 });
5791 };
5792 if ((*IPriv)->getType()->isArrayType()) {
5793 // Emit atomic reduction for array section.
5794 const auto *RHSVar =
5795 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5796 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5797 AtomicRedGen, XExpr, EExpr, UpExpr);
5798 } else {
5799 // Emit atomic reduction for array subscript or single variable.
5800 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5801 }
5802 } else {
5803 // Emit as a critical region.
5804 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5805 const Expr *, const Expr *) {
5806 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5807 std::string Name = RT.getName({"atomic_reduction"});
5808 RT.emitCriticalRegion(
5809 CGF, Name,
5810 [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5811 Action.Enter(CGF);
5812 emitReductionCombiner(CGF, E);
5813 },
5814 Loc);
5815 };
5816 if ((*IPriv)->getType()->isArrayType()) {
5817 const auto *LHSVar =
5818 cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5819 const auto *RHSVar =
5820 cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5821 EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5822 CritRedGen);
5823 } else {
5824 CritRedGen(CGF, nullptr, nullptr, nullptr);
5825 }
5826 }
5827 ++ILHS;
5828 ++IRHS;
5829 ++IPriv;
5830 }
5831 };
5832 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5833 if (!WithNowait) {
5834 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5835 llvm::Value *EndArgs[] = {
5836 IdentTLoc, // ident_t *<loc>
5837 ThreadId, // i32 <gtid>
5838 Lock // kmp_critical_name *&<lock>
5839 };
5840 CommonActionTy Action(nullptr, llvm::None,
5841 OMPBuilder.getOrCreateRuntimeFunction(
5842 CGM.getModule(), OMPRTL___kmpc_end_reduce),
5843 EndArgs);
5844 AtomicRCG.setAction(Action);
5845 AtomicRCG(CGF);
5846 } else {
5847 AtomicRCG(CGF);
5848 }
5849
5850 CGF.EmitBranch(DefaultBB);
5851 CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5852}
5853
5854/// Generates unique name for artificial threadprivate variables.
5855/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5856static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5857 const Expr *Ref) {
5858 SmallString<256> Buffer;
5859 llvm::raw_svector_ostream Out(Buffer);
5860 const clang::DeclRefExpr *DE;
5861 const VarDecl *D = ::getBaseDecl(Ref, DE);
5862 if (!D)
5863 D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
5864 D = D->getCanonicalDecl();
5865 std::string Name = CGM.getOpenMPRuntime().getName(
5866 {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
5867 Out << Prefix << Name << "_"
5868 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5869 return std::string(Out.str());
5870}
5871
5872/// Emits reduction initializer function:
5873/// \code
5874/// void @.red_init(void* %arg, void* %orig) {
5875/// %0 = bitcast void* %arg to <type>*
5876/// store <type> <init>, <type>* %0
5877/// ret void
5878/// }
5879/// \endcode
5880static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5881 SourceLocation Loc,
5882 ReductionCodeGen &RCG, unsigned N) {
5883 ASTContext &C = CGM.getContext();
5884 QualType VoidPtrTy = C.VoidPtrTy;
5885 VoidPtrTy.addRestrict();
5886 FunctionArgList Args;
5887 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5888 ImplicitParamDecl::Other);
5889 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5890 ImplicitParamDecl::Other);
5891 Args.emplace_back(&Param);
5892 Args.emplace_back(&ParamOrig);
5893 const auto &FnInfo =
5894 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5895 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5896 std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
5897 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5898 Name, &CGM.getModule());
5899 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5900 Fn->setDoesNotRecurse();
5901 CodeGenFunction CGF(CGM);
5902 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5903 Address PrivateAddr = CGF.EmitLoadOfPointer(
5904 CGF.GetAddrOfLocalVar(&Param),
5905 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5906 llvm::Value *Size = nullptr;
5907 // If the size of the reduction item is non-constant, load it from global
5908 // threadprivate variable.
5909 if (RCG.getSizes(N).second) {
5910 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5911 CGF, CGM.getContext().getSizeType(),
5912 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5913 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5914 CGM.getContext().getSizeType(), Loc);
5915 }
5916 RCG.emitAggregateType(CGF, N, Size);
5917 Address OrigAddr = Address::invalid();
5918 // If initializer uses initializer from declare reduction construct, emit a
5919 // pointer to the address of the original reduction item (reuired by reduction
5920 // initializer)
5921 if (RCG.usesReductionInitializer(N)) {
5922 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5923 OrigAddr = CGF.EmitLoadOfPointer(
5924 SharedAddr,
5925 CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5926 }
5927 // Emit the initializer:
5928 // %0 = bitcast void* %arg to <type>*
5929 // store <type> <init>, <type>* %0
5930 RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr,
5931 [](CodeGenFunction &) { return false; });
5932 CGF.FinishFunction();
5933 return Fn;
5934}
5935
5936/// Emits reduction combiner function:
5937/// \code
5938/// void @.red_comb(void* %arg0, void* %arg1) {
5939/// %lhs = bitcast void* %arg0 to <type>*
5940/// %rhs = bitcast void* %arg1 to <type>*
5941/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5942/// store <type> %2, <type>* %lhs
5943/// ret void
5944/// }
5945/// \endcode
5946static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5947 SourceLocation Loc,
5948 ReductionCodeGen &RCG, unsigned N,
5949 const Expr *ReductionOp,
5950 const Expr *LHS, const Expr *RHS,
5951 const Expr *PrivateRef) {
5952 ASTContext &C = CGM.getContext();
5953 const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5954 const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5955 FunctionArgList Args;
5956 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5957 C.VoidPtrTy, ImplicitParamDecl::Other);
5958 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5959 ImplicitParamDecl::Other);
5960 Args.emplace_back(&ParamInOut);
5961 Args.emplace_back(&ParamIn);
5962 const auto &FnInfo =
5963 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5964 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5965 std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
5966 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
5967 Name, &CGM.getModule());
5968 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
5969 Fn->setDoesNotRecurse();
5970 CodeGenFunction CGF(CGM);
5971 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
5972 llvm::Value *Size = nullptr;
5973 // If the size of the reduction item is non-constant, load it from global
5974 // threadprivate variable.
5975 if (RCG.getSizes(N).second) {
5976 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5977 CGF, CGM.getContext().getSizeType(),
5978 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
5979 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5980 CGM.getContext().getSizeType(), Loc);
5981 }
5982 RCG.emitAggregateType(CGF, N, Size);
5983 // Remap lhs and rhs variables to the addresses of the function arguments.
5984 // %lhs = bitcast void* %arg0 to <type>*
5985 // %rhs = bitcast void* %arg1 to <type>*
5986 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5987 PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
5988 // Pull out the pointer to the variable.
5989 Address PtrAddr = CGF.EmitLoadOfPointer(
5990 CGF.GetAddrOfLocalVar(&ParamInOut),
5991 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5992 return CGF.Builder.CreateElementBitCast(
5993 PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5994 });
5995 PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
5996 // Pull out the pointer to the variable.
5997 Address PtrAddr = CGF.EmitLoadOfPointer(
5998 CGF.GetAddrOfLocalVar(&ParamIn),
5999 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6000 return CGF.Builder.CreateElementBitCast(
6001 PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6002 });
6003 PrivateScope.Privatize();
6004 // Emit the combiner body:
6005 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6006 // store <type> %2, <type>* %lhs
6007 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6008 CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6009 cast<DeclRefExpr>(RHS));
6010 CGF.FinishFunction();
6011 return Fn;
6012}
6013
6014/// Emits reduction finalizer function:
6015/// \code
6016/// void @.red_fini(void* %arg) {
6017/// %0 = bitcast void* %arg to <type>*
6018/// <destroy>(<type>* %0)
6019/// ret void
6020/// }
6021/// \endcode
6022static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
6023 SourceLocation Loc,
6024 ReductionCodeGen &RCG, unsigned N) {
6025 if (!RCG.needCleanups(N))
6026 return nullptr;
6027 ASTContext &C = CGM.getContext();
6028 FunctionArgList Args;
6029 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6030 ImplicitParamDecl::Other);
6031 Args.emplace_back(&Param);
6032 const auto &FnInfo =
6033 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
6034 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6035 std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6036 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
6037 Name, &CGM.getModule());
6038 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6039 Fn->setDoesNotRecurse();
6040 CodeGenFunction CGF(CGM);
6041 CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6042 Address PrivateAddr = CGF.EmitLoadOfPointer(
6043 CGF.GetAddrOfLocalVar(&Param),
6044 C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6045 llvm::Value *Size = nullptr;
6046 // If the size of the reduction item is non-constant, load it from global
6047 // threadprivate variable.
6048 if (RCG.getSizes(N).second) {
6049 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6050 CGF, CGM.getContext().getSizeType(),
6051 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6052 Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6053 CGM.getContext().getSizeType(), Loc);
6054 }
6055 RCG.emitAggregateType(CGF, N, Size);
6056 // Emit the finalizer body:
6057 // <destroy>(<type>* %0)
6058 RCG.emitCleanups(CGF, N, PrivateAddr);
6059 CGF.FinishFunction(Loc);
6060 return Fn;
6061}
6062
6063llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
6064 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6065 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6066 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6067 return nullptr;
6068
6069 // Build typedef struct:
6070 // kmp_taskred_input {
6071 // void *reduce_shar; // shared reduction item
6072 // void *reduce_orig; // original reduction item used for initialization
6073 // size_t reduce_size; // size of data item
6074 // void *reduce_init; // data initialization routine
6075 // void *reduce_fini; // data finalization routine
6076 // void *reduce_comb; // data combiner routine
6077 // kmp_task_red_flags_t flags; // flags for additional info from compiler
6078 // } kmp_taskred_input_t;
6079 ASTContext &C = CGM.getContext();
6080 RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t");
6081 RD->startDefinition();
6082 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6083 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6084 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6085 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6086 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6087 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6088 const FieldDecl *FlagsFD = addFieldToRecordDecl(
6089 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6090 RD->completeDefinition();
6091 QualType RDType = C.getRecordType(RD);
6092 unsigned Size = Data.ReductionVars.size();
6093 llvm::APInt ArraySize(/*numBits=*/64, Size);
6094 QualType ArrayRDType = C.getConstantArrayType(
6095 RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
6096 // kmp_task_red_input_t .rd_input.[Size];
6097 Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6098 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
6099 Data.ReductionCopies, Data.ReductionOps);
6100 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6101 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6102 llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6103 llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6104 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
6105 TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs,
6106 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6107 ".rd_input.gep.");
6108 LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6109 // ElemLVal.reduce_shar = &Shareds[Cnt];
6110 LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6111 RCG.emitSharedOrigLValue(CGF, Cnt);
6112 llvm::Value *CastedShared =
6113 CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF));
6114 CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6115 // ElemLVal.reduce_orig = &Origs[Cnt];
6116 LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD);
6117 llvm::Value *CastedOrig =
6118 CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF));
6119 CGF.EmitStoreOfScalar(CastedOrig, OrigLVal);
6120 RCG.emitAggregateType(CGF, Cnt);
6121 llvm::Value *SizeValInChars;
6122 llvm::Value *SizeVal;
6123 std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6124 // We use delayed creation/initialization for VLAs and array sections. It is
6125 // required because runtime does not provide the way to pass the sizes of
6126 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
6127 // threadprivate global variables are used to store these values and use
6128 // them in the functions.
6129 bool DelayedCreation = !!SizeVal;
6130 SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6131 /*isSigned=*/false);
6132 LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6133 CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6134 // ElemLVal.reduce_init = init;
6135 LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6136 llvm::Value *InitAddr =
6137 CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6138 CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6139 // ElemLVal.reduce_fini = fini;
6140 LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6141 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6142 llvm::Value *FiniAddr = Fini
6143 ? CGF.EmitCastToVoidPtr(Fini)
6144 : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6145 CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6146 // ElemLVal.reduce_comb = comb;
6147 LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6148 llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction(
6149 CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6150 RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6151 CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6152 // ElemLVal.flags = 0;
6153 LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6154 if (DelayedCreation) {
6155 CGF.EmitStoreOfScalar(
6156 llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6157 FlagsLVal);
6158 } else
6159 CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF),
6160 FlagsLVal.getType());
6161 }
6162 if (Data.IsReductionWithTaskMod) {
6163 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6164 // is_ws, int num, void *data);
6165 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6166 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6167 CGM.IntTy, /*isSigned=*/true);
6168 llvm::Value *Args[] = {
6169 IdentTLoc, GTid,
6170 llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0,
6171 /*isSigned=*/true),
6172 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6173 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6174 TaskRedInput.getPointer(), CGM.VoidPtrTy)};
6175 return CGF.EmitRuntimeCall(
6176 OMPBuilder.getOrCreateRuntimeFunction(
6177 CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init),
6178 Args);
6179 }
6180 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
6181 llvm::Value *Args[] = {
6182 CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6183 /*isSigned=*/true),
6184 llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6185 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(),
6186 CGM.VoidPtrTy)};
6187 return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6188 CGM.getModule(), OMPRTL___kmpc_taskred_init),
6189 Args);
6190}
6191
6192void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
6193 SourceLocation Loc,
6194 bool IsWorksharingReduction) {
6195 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
6196 // is_ws, int num, void *data);
6197 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
6198 llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6199 CGM.IntTy, /*isSigned=*/true);
6200 llvm::Value *Args[] = {IdentTLoc, GTid,
6201 llvm::ConstantInt::get(CGM.IntTy,
6202 IsWorksharingReduction ? 1 : 0,
6203 /*isSigned=*/true)};
6204 (void)CGF.EmitRuntimeCall(
6205 OMPBuilder.getOrCreateRuntimeFunction(
6206 CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini),
6207 Args);
6208}
6209
6210void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
6211 SourceLocation Loc,
6212 ReductionCodeGen &RCG,
6213 unsigned N) {
6214 auto Sizes = RCG.getSizes(N);
6215 // Emit threadprivate global variable if the type is non-constant
6216 // (Sizes.second = nullptr).
6217 if (Sizes.second) {
6218 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6219 /*isSigned=*/false);
6220 Address SizeAddr = getAddrOfArtificialThreadPrivate(
6221 CGF, CGM.getContext().getSizeType(),
6222 generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6223 CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6224 }
6225}
6226
6227Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
6228 SourceLocation Loc,
6229 llvm::Value *ReductionsPtr,
6230 LValue SharedLVal) {
6231 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6232 // *d);
6233 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc),
6234 CGM.IntTy,
6235 /*isSigned=*/true),
6236 ReductionsPtr,
6237 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6238 SharedLVal.getPointer(CGF), CGM.VoidPtrTy)};
6239 return Address(
6240 CGF.EmitRuntimeCall(
6241 OMPBuilder.getOrCreateRuntimeFunction(
6242 CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data),
6243 Args),
6244 SharedLVal.getAlignment());
6245}
6246
6247void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
6248 const OMPTaskDataTy &Data) {
6249 if (!CGF.HaveInsertPoint())
6250 return;
6251
6252 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
6253 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
6254 OMPBuilder.createTaskwait(CGF.Builder);
6255 } else {
6256 llvm::Value *ThreadID = getThreadID(CGF, Loc);
6257 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
6258 auto &M = CGM.getModule();
6259 Address DependenciesArray = Address::invalid();
6260 llvm::Value *NumOfElements;
6261 std::tie(NumOfElements, DependenciesArray) =
6262 emitDependClause(CGF, Data.Dependences, Loc);
6263 llvm::Value *DepWaitTaskArgs[6];
6264 if (!Data.Dependences.empty()) {
6265 DepWaitTaskArgs[0] = UpLoc;
6266 DepWaitTaskArgs[1] = ThreadID;
6267 DepWaitTaskArgs[2] = NumOfElements;
6268 DepWaitTaskArgs[3] = DependenciesArray.getPointer();
6269 DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
6270 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6271
6272 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
6273
6274 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
6275 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
6276 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
6277 // is specified.
6278 CGF.EmitRuntimeCall(
6279 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps),
6280 DepWaitTaskArgs);
6281
6282 } else {
6283
6284 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6285 // global_tid);
6286 llvm::Value *Args[] = {UpLoc, ThreadID};
6287 // Ignore return result until untied tasks are supported.
6288 CGF.EmitRuntimeCall(
6289 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_taskwait),
6290 Args);
6291 }
6292 }
6293
6294 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6295 Region->emitUntiedSwitch(CGF);
6296}
6297
6298void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
6299 OpenMPDirectiveKind InnerKind,
6300 const RegionCodeGenTy &CodeGen,
6301 bool HasCancel) {
6302 if (!CGF.HaveInsertPoint())
6303 return;
6304 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
6305 InnerKind != OMPD_critical &&
6306 InnerKind != OMPD_master &&
6307 InnerKind != OMPD_masked);
6308 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6309}
6310
6311namespace {
6312enum RTCancelKind {
6313 CancelNoreq = 0,
6314 CancelParallel = 1,
6315 CancelLoop = 2,
6316 CancelSections = 3,
6317 CancelTaskgroup = 4
6318};
6319} // anonymous namespace
6320
6321static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
6322 RTCancelKind CancelKind = CancelNoreq;
6323 if (CancelRegion == OMPD_parallel)
6324 CancelKind = CancelParallel;
6325 else if (CancelRegion == OMPD_for)
6326 CancelKind = CancelLoop;
6327 else if (CancelRegion == OMPD_sections)
6328 CancelKind = CancelSections;
6329 else {
6330 assert(CancelRegion == OMPD_taskgroup)(static_cast <bool> (CancelRegion == OMPD_taskgroup) ? void
(0) : __assert_fail ("CancelRegion == OMPD_taskgroup", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 6330, __extension__ __PRETTY_FUNCTION__))
;
6331 CancelKind = CancelTaskgroup;
6332 }
6333 return CancelKind;
6334}
6335
6336void CGOpenMPRuntime::emitCancellationPointCall(
6337 CodeGenFunction &CGF, SourceLocation Loc,
6338 OpenMPDirectiveKind CancelRegion) {
6339 if (!CGF.HaveInsertPoint())
6340 return;
6341 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6342 // global_tid, kmp_int32 cncl_kind);
6343 if (auto *OMPRegionInfo =
6344 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6345 // For 'cancellation point taskgroup', the task region info may not have a
6346 // cancel. This may instead happen in another adjacent task.
6347 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6348 llvm::Value *Args[] = {
6349 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6350 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6351 // Ignore return result until untied tasks are supported.
6352 llvm::Value *Result = CGF.EmitRuntimeCall(
6353 OMPBuilder.getOrCreateRuntimeFunction(
6354 CGM.getModule(), OMPRTL___kmpc_cancellationpoint),
6355 Args);
6356 // if (__kmpc_cancellationpoint()) {
6357 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6358 // exit from construct;
6359 // }
6360 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6361 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6362 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6363 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6364 CGF.EmitBlock(ExitBB);
6365 if (CancelRegion == OMPD_parallel)
6366 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6367 // exit from construct;
6368 CodeGenFunction::JumpDest CancelDest =
6369 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6370 CGF.EmitBranchThroughCleanup(CancelDest);
6371 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6372 }
6373 }
6374}
6375
6376void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6377 const Expr *IfCond,
6378 OpenMPDirectiveKind CancelRegion) {
6379 if (!CGF.HaveInsertPoint())
6380 return;
6381 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6382 // kmp_int32 cncl_kind);
6383 auto &M = CGM.getModule();
6384 if (auto *OMPRegionInfo =
6385 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6386 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6387 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6388 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6389 llvm::Value *Args[] = {
6390 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6391 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6392 // Ignore return result until untied tasks are supported.
6393 llvm::Value *Result = CGF.EmitRuntimeCall(
6394 OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_cancel), Args);
6395 // if (__kmpc_cancel()) {
6396 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6397 // exit from construct;
6398 // }
6399 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6400 llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6401 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6402 CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6403 CGF.EmitBlock(ExitBB);
6404 if (CancelRegion == OMPD_parallel)
6405 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6406 // exit from construct;
6407 CodeGenFunction::JumpDest CancelDest =
6408 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6409 CGF.EmitBranchThroughCleanup(CancelDest);
6410 CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6411 };
6412 if (IfCond) {
6413 emitIfClause(CGF, IfCond, ThenGen,
6414 [](CodeGenFunction &, PrePostActionTy &) {});
6415 } else {
6416 RegionCodeGenTy ThenRCG(ThenGen);
6417 ThenRCG(CGF);
6418 }
6419 }
6420}
6421
6422namespace {
6423/// Cleanup action for uses_allocators support.
6424class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6425 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6426
6427public:
6428 OMPUsesAllocatorsActionTy(
6429 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6430 : Allocators(Allocators) {}
6431 void Enter(CodeGenFunction &CGF) override {
6432 if (!CGF.HaveInsertPoint())
6433 return;
6434 for (const auto &AllocatorData : Allocators) {
6435 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6436 CGF, AllocatorData.first, AllocatorData.second);
6437 }
6438 }
6439 void Exit(CodeGenFunction &CGF) override {
6440 if (!CGF.HaveInsertPoint())
6441 return;
6442 for (const auto &AllocatorData : Allocators) {
6443 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6444 AllocatorData.first);
6445 }
6446 }
6447};
6448} // namespace
6449
6450void CGOpenMPRuntime::emitTargetOutlinedFunction(
6451 const OMPExecutableDirective &D, StringRef ParentName,
6452 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6453 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6454 assert(!ParentName.empty() && "Invalid target region parent name!")(static_cast <bool> (!ParentName.empty() && "Invalid target region parent name!"
) ? void (0) : __assert_fail ("!ParentName.empty() && \"Invalid target region parent name!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6454, __extension__
__PRETTY_FUNCTION__))
;
6455 HasEmittedTargetRegion = true;
6456 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6457 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6458 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6459 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6460 if (!D.AllocatorTraits)
6461 continue;
6462 Allocators.emplace_back(D.Allocator, D.AllocatorTraits);
6463 }
6464 }
6465 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6466 CodeGen.setAction(UsesAllocatorAction);
6467 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6468 IsOffloadEntry, CodeGen);
6469}
6470
6471void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6472 const Expr *Allocator,
6473 const Expr *AllocatorTraits) {
6474 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6475 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6476 // Use default memspace handle.
6477 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
6478 llvm::Value *NumTraits = llvm::ConstantInt::get(
6479 CGF.IntTy, cast<ConstantArrayType>(
6480 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6481 ->getSize()
6482 .getLimitedValue());
6483 LValue AllocatorTraitsLVal = CGF.EmitLValue(AllocatorTraits);
6484 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6485 AllocatorTraitsLVal.getAddress(CGF), CGF.VoidPtrPtrTy);
6486 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6487 AllocatorTraitsLVal.getBaseInfo(),
6488 AllocatorTraitsLVal.getTBAAInfo());
6489 llvm::Value *Traits =
6490 CGF.EmitLoadOfScalar(AllocatorTraitsLVal, AllocatorTraits->getExprLoc());
6491
6492 llvm::Value *AllocatorVal =
6493 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
6494 CGM.getModule(), OMPRTL___kmpc_init_allocator),
6495 {ThreadId, MemSpaceHandle, NumTraits, Traits});
6496 // Store to allocator.
6497 CGF.EmitVarDecl(*cast<VarDecl>(
6498 cast<DeclRefExpr>(Allocator->IgnoreParenImpCasts())->getDecl()));
6499 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6500 AllocatorVal =
6501 CGF.EmitScalarConversion(AllocatorVal, CGF.getContext().VoidPtrTy,
6502 Allocator->getType(), Allocator->getExprLoc());
6503 CGF.EmitStoreOfScalar(AllocatorVal, AllocatorLVal);
6504}
6505
6506void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6507 const Expr *Allocator) {
6508 llvm::Value *ThreadId = getThreadID(CGF, Allocator->getExprLoc());
6509 ThreadId = CGF.Builder.CreateIntCast(ThreadId, CGF.IntTy, /*isSigned=*/true);
6510 LValue AllocatorLVal = CGF.EmitLValue(Allocator->IgnoreParenImpCasts());
6511 llvm::Value *AllocatorVal =
6512 CGF.EmitLoadOfScalar(AllocatorLVal, Allocator->getExprLoc());
6513 AllocatorVal = CGF.EmitScalarConversion(AllocatorVal, Allocator->getType(),
6514 CGF.getContext().VoidPtrTy,
6515 Allocator->getExprLoc());
6516 (void)CGF.EmitRuntimeCall(
6517 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
6518 OMPRTL___kmpc_destroy_allocator),
6519 {ThreadId, AllocatorVal});
6520}
6521
6522void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6523 const OMPExecutableDirective &D, StringRef ParentName,
6524 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6525 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6526 // Create a unique name for the entry function using the source location
6527 // information of the current target region. The name will be something like:
6528 //
6529 // __omp_offloading_DD_FFFF_PP_lBB
6530 //
6531 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6532 // mangled name of the function that encloses the target region and BB is the
6533 // line number of the target region.
6534
6535 unsigned DeviceID;
6536 unsigned FileID;
6537 unsigned Line;
6538 getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6539 Line);
6540 SmallString<64> EntryFnName;
6541 {
6542 llvm::raw_svector_ostream OS(EntryFnName);
6543 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6544 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6545 }
6546
6547 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6548
6549 CodeGenFunction CGF(CGM, true);
6550 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6551 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6552
6553 OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS, D.getBeginLoc());
6554
6555 // If this target outline function is not an offload entry, we don't need to
6556 // register it.
6557 if (!IsOffloadEntry)
6558 return;
6559
6560 // The target region ID is used by the runtime library to identify the current
6561 // target region, so it only has to be unique and not necessarily point to
6562 // anything. It could be the pointer to the outlined function that implements
6563 // the target region, but we aren't using that so that the compiler doesn't
6564 // need to keep that, and could therefore inline the host function if proven
6565 // worthwhile during optimization. In the other hand, if emitting code for the
6566 // device, the ID has to be the function address so that it can retrieved from
6567 // the offloading entry and launched by the runtime library. We also mark the
6568 // outlined function to have external linkage in case we are emitting code for
6569 // the device, because these functions will be entry points to the device.
6570
6571 if (CGM.getLangOpts().OpenMPIsDevice) {
6572 OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6573 OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6574 OutlinedFn->setDSOLocal(false);
6575 if (CGM.getTriple().isAMDGCN())
6576 OutlinedFn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL);
6577 } else {
6578 std::string Name = getName({EntryFnName, "region_id"});
6579 OutlinedFnID = new llvm::GlobalVariable(
6580 CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6581 llvm::GlobalValue::WeakAnyLinkage,
6582 llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6583 }
6584
6585 // Register the information for the entry associated with this target region.
6586 OffloadEntriesInfoManager.registerTargetRegionEntryInfo(
6587 DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6588 OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion);
6589
6590 // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6591 int32_t DefaultValTeams = -1;
6592 getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
6593 if (DefaultValTeams > 0) {
6594 OutlinedFn->addFnAttr("omp_target_num_teams",
6595 std::to_string(DefaultValTeams));
6596 }
6597 int32_t DefaultValThreads = -1;
6598 getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads);
6599 if (DefaultValThreads > 0) {
6600 OutlinedFn->addFnAttr("omp_target_thread_limit",
6601 std::to_string(DefaultValThreads));
6602 }
6603
6604 CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM);
6605}
6606
6607/// Checks if the expression is constant or does not have non-trivial function
6608/// calls.
6609static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6610 // We can skip constant expressions.
6611 // We can skip expressions with trivial calls or simple expressions.
6612 return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
6613 !E->hasNonTrivialCall(Ctx)) &&
6614 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6615}
6616
6617const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6618 const Stmt *Body) {
6619 const Stmt *Child = Body->IgnoreContainers();
6620 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6621 Child = nullptr;
6622 for (const Stmt *S : C->body()) {
6623 if (const auto *E = dyn_cast<Expr>(S)) {
6624 if (isTrivial(Ctx, E))
6625 continue;
6626 }
6627 // Some of the statements can be ignored.
6628 if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6629 isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6630 continue;
6631 // Analyze declarations.
6632 if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6633 if (llvm::all_of(DS->decls(), [](const Decl *D) {
6634 if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6635 isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6636 isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6637 isa<UsingDirectiveDecl>(D) ||
6638 isa<OMPDeclareReductionDecl>(D) ||
6639 isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6640 return true;
6641 const auto *VD = dyn_cast<VarDecl>(D);
6642 if (!VD)
6643 return false;
6644 return VD->hasGlobalStorage() || !VD->isUsed();
6645 }))
6646 continue;
6647 }
6648 // Found multiple children - cannot get the one child only.
6649 if (Child)
6650 return nullptr;
6651 Child = S;
6652 }
6653 if (Child)
6654 Child = Child->IgnoreContainers();
6655 }
6656 return Child;
6657}
6658
6659const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6660 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6661 int32_t &DefaultVal) {
6662
6663 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6664 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6665, __extension__
__PRETTY_FUNCTION__))
6665 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6665, __extension__
__PRETTY_FUNCTION__))
;
6666 switch (DirectiveKind) {
6667 case OMPD_target: {
6668 const auto *CS = D.getInnermostCapturedStmt();
6669 const auto *Body =
6670 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6671 const Stmt *ChildStmt =
6672 CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body);
6673 if (const auto *NestedDir =
6674 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6675 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6676 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6677 const Expr *NumTeams =
6678 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6679 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6680 if (auto Constant =
6681 NumTeams->getIntegerConstantExpr(CGF.getContext()))
6682 DefaultVal = Constant->getExtValue();
6683 return NumTeams;
6684 }
6685 DefaultVal = 0;
6686 return nullptr;
6687 }
6688 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6689 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6690 DefaultVal = 1;
6691 return nullptr;
6692 }
6693 DefaultVal = 1;
6694 return nullptr;
6695 }
6696 // A value of -1 is used to check if we need to emit no teams region
6697 DefaultVal = -1;
6698 return nullptr;
6699 }
6700 case OMPD_target_teams:
6701 case OMPD_target_teams_distribute:
6702 case OMPD_target_teams_distribute_simd:
6703 case OMPD_target_teams_distribute_parallel_for:
6704 case OMPD_target_teams_distribute_parallel_for_simd: {
6705 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6706 const Expr *NumTeams =
6707 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6708 if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
6709 if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
6710 DefaultVal = Constant->getExtValue();
6711 return NumTeams;
6712 }
6713 DefaultVal = 0;
6714 return nullptr;
6715 }
6716 case OMPD_target_parallel:
6717 case OMPD_target_parallel_for:
6718 case OMPD_target_parallel_for_simd:
6719 case OMPD_target_simd:
6720 DefaultVal = 1;
6721 return nullptr;
6722 case OMPD_parallel:
6723 case OMPD_for:
6724 case OMPD_parallel_for:
6725 case OMPD_parallel_master:
6726 case OMPD_parallel_sections:
6727 case OMPD_for_simd:
6728 case OMPD_parallel_for_simd:
6729 case OMPD_cancel:
6730 case OMPD_cancellation_point:
6731 case OMPD_ordered:
6732 case OMPD_threadprivate:
6733 case OMPD_allocate:
6734 case OMPD_task:
6735 case OMPD_simd:
6736 case OMPD_tile:
6737 case OMPD_unroll:
6738 case OMPD_sections:
6739 case OMPD_section:
6740 case OMPD_single:
6741 case OMPD_master:
6742 case OMPD_critical:
6743 case OMPD_taskyield:
6744 case OMPD_barrier:
6745 case OMPD_taskwait:
6746 case OMPD_taskgroup:
6747 case OMPD_atomic:
6748 case OMPD_flush:
6749 case OMPD_depobj:
6750 case OMPD_scan:
6751 case OMPD_teams:
6752 case OMPD_target_data:
6753 case OMPD_target_exit_data:
6754 case OMPD_target_enter_data:
6755 case OMPD_distribute:
6756 case OMPD_distribute_simd:
6757 case OMPD_distribute_parallel_for:
6758 case OMPD_distribute_parallel_for_simd:
6759 case OMPD_teams_distribute:
6760 case OMPD_teams_distribute_simd:
6761 case OMPD_teams_distribute_parallel_for:
6762 case OMPD_teams_distribute_parallel_for_simd:
6763 case OMPD_target_update:
6764 case OMPD_declare_simd:
6765 case OMPD_declare_variant:
6766 case OMPD_begin_declare_variant:
6767 case OMPD_end_declare_variant:
6768 case OMPD_declare_target:
6769 case OMPD_end_declare_target:
6770 case OMPD_declare_reduction:
6771 case OMPD_declare_mapper:
6772 case OMPD_taskloop:
6773 case OMPD_taskloop_simd:
6774 case OMPD_master_taskloop:
6775 case OMPD_master_taskloop_simd:
6776 case OMPD_parallel_master_taskloop:
6777 case OMPD_parallel_master_taskloop_simd:
6778 case OMPD_requires:
6779 case OMPD_metadirective:
6780 case OMPD_unknown:
6781 break;
6782 default:
6783 break;
6784 }
6785 llvm_unreachable("Unexpected directive kind.")::llvm::llvm_unreachable_internal("Unexpected directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6785)
;
6786}
6787
6788llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6789 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6790 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6792, __extension__
__PRETTY_FUNCTION__))
6791 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6792, __extension__
__PRETTY_FUNCTION__))
6792 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6792, __extension__
__PRETTY_FUNCTION__))
;
6793 CGBuilderTy &Bld = CGF.Builder;
6794 int32_t DefaultNT = -1;
6795 const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
6796 if (NumTeams != nullptr) {
6797 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6798
6799 switch (DirectiveKind) {
6800 case OMPD_target: {
6801 const auto *CS = D.getInnermostCapturedStmt();
6802 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6803 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6804 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6805 /*IgnoreResultAssign*/ true);
6806 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6807 /*isSigned=*/true);
6808 }
6809 case OMPD_target_teams:
6810 case OMPD_target_teams_distribute:
6811 case OMPD_target_teams_distribute_simd:
6812 case OMPD_target_teams_distribute_parallel_for:
6813 case OMPD_target_teams_distribute_parallel_for_simd: {
6814 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6815 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(NumTeams,
6816 /*IgnoreResultAssign*/ true);
6817 return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6818 /*isSigned=*/true);
6819 }
6820 default:
6821 break;
6822 }
6823 } else if (DefaultNT == -1) {
6824 return nullptr;
6825 }
6826
6827 return Bld.getInt32(DefaultNT);
6828}
6829
6830static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6831 llvm::Value *DefaultThreadLimitVal) {
6832 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6833 CGF.getContext(), CS->getCapturedStmt());
6834 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6835 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6836 llvm::Value *NumThreads = nullptr;
6837 llvm::Value *CondVal = nullptr;
6838 // Handle if clause. If if clause present, the number of threads is
6839 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6840 if (Dir->hasClausesOfKind<OMPIfClause>()) {
6841 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6842 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6843 const OMPIfClause *IfClause = nullptr;
6844 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6845 if (C->getNameModifier() == OMPD_unknown ||
6846 C->getNameModifier() == OMPD_parallel) {
6847 IfClause = C;
6848 break;
6849 }
6850 }
6851 if (IfClause) {
6852 const Expr *Cond = IfClause->getCondition();
6853 bool Result;
6854 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6855 if (!Result)
6856 return CGF.Builder.getInt32(1);
6857 } else {
6858 CodeGenFunction::LexicalScope Scope(CGF, Cond->getSourceRange());
6859 if (const auto *PreInit =
6860 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6861 for (const auto *I : PreInit->decls()) {
6862 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6863 CGF.EmitVarDecl(cast<VarDecl>(*I));
6864 } else {
6865 CodeGenFunction::AutoVarEmission Emission =
6866 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6867 CGF.EmitAutoVarCleanups(Emission);
6868 }
6869 }
6870 }
6871 CondVal = CGF.EvaluateExprAsBool(Cond);
6872 }
6873 }
6874 }
6875 // Check the value of num_threads clause iff if clause was not specified
6876 // or is not evaluated to false.
6877 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6878 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6879 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6880 const auto *NumThreadsClause =
6881 Dir->getSingleClause<OMPNumThreadsClause>();
6882 CodeGenFunction::LexicalScope Scope(
6883 CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6884 if (const auto *PreInit =
6885 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6886 for (const auto *I : PreInit->decls()) {
6887 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6888 CGF.EmitVarDecl(cast<VarDecl>(*I));
6889 } else {
6890 CodeGenFunction::AutoVarEmission Emission =
6891 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6892 CGF.EmitAutoVarCleanups(Emission);
6893 }
6894 }
6895 }
6896 NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6897 NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6898 /*isSigned=*/false);
6899 if (DefaultThreadLimitVal)
6900 NumThreads = CGF.Builder.CreateSelect(
6901 CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6902 DefaultThreadLimitVal, NumThreads);
6903 } else {
6904 NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6905 : CGF.Builder.getInt32(0);
6906 }
6907 // Process condition of the if clause.
6908 if (CondVal) {
6909 NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6910 CGF.Builder.getInt32(1));
6911 }
6912 return NumThreads;
6913 }
6914 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6915 return CGF.Builder.getInt32(1);
6916 return DefaultThreadLimitVal;
6917 }
6918 return DefaultThreadLimitVal ? DefaultThreadLimitVal
6919 : CGF.Builder.getInt32(0);
6920}
6921
6922const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6923 CodeGenFunction &CGF, const OMPExecutableDirective &D,
6924 int32_t &DefaultVal) {
6925 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6926 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6927, __extension__
__PRETTY_FUNCTION__))
6927 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 6927, __extension__
__PRETTY_FUNCTION__))
;
6928
6929 switch (DirectiveKind) {
6930 case OMPD_target:
6931 // Teams have no clause thread_limit
6932 return nullptr;
6933 case OMPD_target_teams:
6934 case OMPD_target_teams_distribute:
6935 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6936 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6937 const Expr *ThreadLimit = ThreadLimitClause->getThreadLimit();
6938 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6939 if (auto Constant =
6940 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6941 DefaultVal = Constant->getExtValue();
6942 return ThreadLimit;
6943 }
6944 return nullptr;
6945 case OMPD_target_parallel:
6946 case OMPD_target_parallel_for:
6947 case OMPD_target_parallel_for_simd:
6948 case OMPD_target_teams_distribute_parallel_for:
6949 case OMPD_target_teams_distribute_parallel_for_simd: {
6950 Expr *ThreadLimit = nullptr;
6951 Expr *NumThreads = nullptr;
6952 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6953 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6954 ThreadLimit = ThreadLimitClause->getThreadLimit();
6955 if (ThreadLimit->isIntegerConstantExpr(CGF.getContext()))
6956 if (auto Constant =
6957 ThreadLimit->getIntegerConstantExpr(CGF.getContext()))
6958 DefaultVal = Constant->getExtValue();
6959 }
6960 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6961 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6962 NumThreads = NumThreadsClause->getNumThreads();
6963 if (NumThreads->isIntegerConstantExpr(CGF.getContext())) {
6964 if (auto Constant =
6965 NumThreads->getIntegerConstantExpr(CGF.getContext())) {
6966 if (Constant->getExtValue() < DefaultVal) {
6967 DefaultVal = Constant->getExtValue();
6968 ThreadLimit = NumThreads;
6969 }
6970 }
6971 }
6972 }
6973 return ThreadLimit;
6974 }
6975 case OMPD_target_teams_distribute_simd:
6976 case OMPD_target_simd:
6977 DefaultVal = 1;
6978 return nullptr;
6979 case OMPD_parallel:
6980 case OMPD_for:
6981 case OMPD_parallel_for:
6982 case OMPD_parallel_master:
6983 case OMPD_parallel_sections:
6984 case OMPD_for_simd:
6985 case OMPD_parallel_for_simd:
6986 case OMPD_cancel:
6987 case OMPD_cancellation_point:
6988 case OMPD_ordered:
6989 case OMPD_threadprivate:
6990 case OMPD_allocate:
6991 case OMPD_task:
6992 case OMPD_simd:
6993 case OMPD_tile:
6994 case OMPD_unroll:
6995 case OMPD_sections:
6996 case OMPD_section:
6997 case OMPD_single:
6998 case OMPD_master:
6999 case OMPD_critical:
7000 case OMPD_taskyield:
7001 case OMPD_barrier:
7002 case OMPD_taskwait:
7003 case OMPD_taskgroup:
7004 case OMPD_atomic:
7005 case OMPD_flush:
7006 case OMPD_depobj:
7007 case OMPD_scan:
7008 case OMPD_teams:
7009 case OMPD_target_data:
7010 case OMPD_target_exit_data:
7011 case OMPD_target_enter_data:
7012 case OMPD_distribute:
7013 case OMPD_distribute_simd:
7014 case OMPD_distribute_parallel_for:
7015 case OMPD_distribute_parallel_for_simd:
7016 case OMPD_teams_distribute:
7017 case OMPD_teams_distribute_simd:
7018 case OMPD_teams_distribute_parallel_for:
7019 case OMPD_teams_distribute_parallel_for_simd:
7020 case OMPD_target_update:
7021 case OMPD_declare_simd:
7022 case OMPD_declare_variant:
7023 case OMPD_begin_declare_variant:
7024 case OMPD_end_declare_variant:
7025 case OMPD_declare_target:
7026 case OMPD_end_declare_target:
7027 case OMPD_declare_reduction:
7028 case OMPD_declare_mapper:
7029 case OMPD_taskloop:
7030 case OMPD_taskloop_simd:
7031 case OMPD_master_taskloop:
7032 case OMPD_master_taskloop_simd:
7033 case OMPD_parallel_master_taskloop:
7034 case OMPD_parallel_master_taskloop_simd:
7035 case OMPD_requires:
7036 case OMPD_unknown:
7037 break;
7038 default:
7039 break;
7040 }
7041 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7041)
;
7042}
7043
7044llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
7045 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
7046 assert(!CGF.getLangOpts().OpenMPIsDevice &&(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7048, __extension__
__PRETTY_FUNCTION__))
7047 "Clauses associated with the teams directive expected to be emitted "(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7048, __extension__
__PRETTY_FUNCTION__))
7048 "only for the host!")(static_cast <bool> (!CGF.getLangOpts().OpenMPIsDevice &&
"Clauses associated with the teams directive expected to be emitted "
"only for the host!") ? void (0) : __assert_fail ("!CGF.getLangOpts().OpenMPIsDevice && \"Clauses associated with the teams directive expected to be emitted \" \"only for the host!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7048, __extension__
__PRETTY_FUNCTION__))
;
7049 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
7050 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7051, __extension__
__PRETTY_FUNCTION__))
7051 "Expected target-based executable directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(DirectiveKind
) && "Expected target-based executable directive.") ?
void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(DirectiveKind) && \"Expected target-based executable directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7051, __extension__
__PRETTY_FUNCTION__))
;
7052 CGBuilderTy &Bld = CGF.Builder;
7053 llvm::Value *ThreadLimitVal = nullptr;
7054 llvm::Value *NumThreadsVal = nullptr;
7055 switch (DirectiveKind) {
7056 case OMPD_target: {
7057 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7058 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7059 return NumThreads;
7060 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7061 CGF.getContext(), CS->getCapturedStmt());
7062 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7063 if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
7064 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
7065 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
7066 const auto *ThreadLimitClause =
7067 Dir->getSingleClause<OMPThreadLimitClause>();
7068 CodeGenFunction::LexicalScope Scope(
7069 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
7070 if (const auto *PreInit =
7071 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
7072 for (const auto *I : PreInit->decls()) {
7073 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
7074 CGF.EmitVarDecl(cast<VarDecl>(*I));
7075 } else {
7076 CodeGenFunction::AutoVarEmission Emission =
7077 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
7078 CGF.EmitAutoVarCleanups(Emission);
7079 }
7080 }
7081 }
7082 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7083 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7084 ThreadLimitVal =
7085 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7086 }
7087 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
7088 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
7089 CS = Dir->getInnermostCapturedStmt();
7090 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7091 CGF.getContext(), CS->getCapturedStmt());
7092 Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
7093 }
7094 if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
7095 !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
7096 CS = Dir->getInnermostCapturedStmt();
7097 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7098 return NumThreads;
7099 }
7100 if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
7101 return Bld.getInt32(1);
7102 }
7103 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7104 }
7105 case OMPD_target_teams: {
7106 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7107 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7108 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7109 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7110 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7111 ThreadLimitVal =
7112 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7113 }
7114 const CapturedStmt *CS = D.getInnermostCapturedStmt();
7115 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7116 return NumThreads;
7117 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
7118 CGF.getContext(), CS->getCapturedStmt());
7119 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
7120 if (Dir->getDirectiveKind() == OMPD_distribute) {
7121 CS = Dir->getInnermostCapturedStmt();
7122 if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
7123 return NumThreads;
7124 }
7125 }
7126 return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
7127 }
7128 case OMPD_target_teams_distribute:
7129 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7130 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7131 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7132 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7133 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7134 ThreadLimitVal =
7135 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7136 }
7137 return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
7138 case OMPD_target_parallel:
7139 case OMPD_target_parallel_for:
7140 case OMPD_target_parallel_for_simd:
7141 case OMPD_target_teams_distribute_parallel_for:
7142 case OMPD_target_teams_distribute_parallel_for_simd: {
7143 llvm::Value *CondVal = nullptr;
7144 // Handle if clause. If if clause present, the number of threads is
7145 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
7146 if (D.hasClausesOfKind<OMPIfClause>()) {
7147 const OMPIfClause *IfClause = nullptr;
7148 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
7149 if (C->getNameModifier() == OMPD_unknown ||
7150 C->getNameModifier() == OMPD_parallel) {
7151 IfClause = C;
7152 break;
7153 }
7154 }
7155 if (IfClause) {
7156 const Expr *Cond = IfClause->getCondition();
7157 bool Result;
7158 if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
7159 if (!Result)
7160 return Bld.getInt32(1);
7161 } else {
7162 CodeGenFunction::RunCleanupsScope Scope(CGF);
7163 CondVal = CGF.EvaluateExprAsBool(Cond);
7164 }
7165 }
7166 }
7167 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
7168 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
7169 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
7170 llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
7171 ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
7172 ThreadLimitVal =
7173 Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
7174 }
7175 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
7176 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
7177 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
7178 llvm::Value *NumThreads = CGF.EmitScalarExpr(
7179 NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
7180 NumThreadsVal =
7181 Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
7182 ThreadLimitVal = ThreadLimitVal
7183 ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
7184 ThreadLimitVal),
7185 NumThreadsVal, ThreadLimitVal)
7186 : NumThreadsVal;
7187 }
7188 if (!ThreadLimitVal)
7189 ThreadLimitVal = Bld.getInt32(0);
7190 if (CondVal)
7191 return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
7192 return ThreadLimitVal;
7193 }
7194 case OMPD_target_teams_distribute_simd:
7195 case OMPD_target_simd:
7196 return Bld.getInt32(1);
7197 case OMPD_parallel:
7198 case OMPD_for:
7199 case OMPD_parallel_for:
7200 case OMPD_parallel_master:
7201 case OMPD_parallel_sections:
7202 case OMPD_for_simd:
7203 case OMPD_parallel_for_simd:
7204 case OMPD_cancel:
7205 case OMPD_cancellation_point:
7206 case OMPD_ordered:
7207 case OMPD_threadprivate:
7208 case OMPD_allocate:
7209 case OMPD_task:
7210 case OMPD_simd:
7211 case OMPD_tile:
7212 case OMPD_unroll:
7213 case OMPD_sections:
7214 case OMPD_section:
7215 case OMPD_single:
7216 case OMPD_master:
7217 case OMPD_critical:
7218 case OMPD_taskyield:
7219 case OMPD_barrier:
7220 case OMPD_taskwait:
7221 case OMPD_taskgroup:
7222 case OMPD_atomic:
7223 case OMPD_flush:
7224 case OMPD_depobj:
7225 case OMPD_scan:
7226 case OMPD_teams:
7227 case OMPD_target_data:
7228 case OMPD_target_exit_data:
7229 case OMPD_target_enter_data:
7230 case OMPD_distribute:
7231 case OMPD_distribute_simd:
7232 case OMPD_distribute_parallel_for:
7233 case OMPD_distribute_parallel_for_simd:
7234 case OMPD_teams_distribute:
7235 case OMPD_teams_distribute_simd:
7236 case OMPD_teams_distribute_parallel_for:
7237 case OMPD_teams_distribute_parallel_for_simd:
7238 case OMPD_target_update:
7239 case OMPD_declare_simd:
7240 case OMPD_declare_variant:
7241 case OMPD_begin_declare_variant:
7242 case OMPD_end_declare_variant:
7243 case OMPD_declare_target:
7244 case OMPD_end_declare_target:
7245 case OMPD_declare_reduction:
7246 case OMPD_declare_mapper:
7247 case OMPD_taskloop:
7248 case OMPD_taskloop_simd:
7249 case OMPD_master_taskloop:
7250 case OMPD_master_taskloop_simd:
7251 case OMPD_parallel_master_taskloop:
7252 case OMPD_parallel_master_taskloop_simd:
7253 case OMPD_requires:
7254 case OMPD_metadirective:
7255 case OMPD_unknown:
7256 break;
7257 default:
7258 break;
7259 }
7260 llvm_unreachable("Unsupported directive kind.")::llvm::llvm_unreachable_internal("Unsupported directive kind."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7260)
;
7261}
7262
7263namespace {
7264LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail
::operator|; using ::llvm::BitmaskEnumDetail::operator&; using
::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail
::operator|=; using ::llvm::BitmaskEnumDetail::operator&=
; using ::llvm::BitmaskEnumDetail::operator^=
;
7265
7266// Utility to handle information from clauses associated with a given
7267// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7268// It provides a convenient interface to obtain the information and generate
7269// code for that information.
7270class MappableExprsHandler {
7271public:
7272 /// Values for bit flags used to specify the mapping type for
7273 /// offloading.
7274 enum OpenMPOffloadMappingFlags : uint64_t {
7275 /// No flags
7276 OMP_MAP_NONE = 0x0,
7277 /// Allocate memory on the device and move data from host to device.
7278 OMP_MAP_TO = 0x01,
7279 /// Allocate memory on the device and move data from device to host.
7280 OMP_MAP_FROM = 0x02,
7281 /// Always perform the requested mapping action on the element, even
7282 /// if it was already mapped before.
7283 OMP_MAP_ALWAYS = 0x04,
7284 /// Delete the element from the device environment, ignoring the
7285 /// current reference count associated with the element.
7286 OMP_MAP_DELETE = 0x08,
7287 /// The element being mapped is a pointer-pointee pair; both the
7288 /// pointer and the pointee should be mapped.
7289 OMP_MAP_PTR_AND_OBJ = 0x10,
7290 /// This flags signals that the base address of an entry should be
7291 /// passed to the target kernel as an argument.
7292 OMP_MAP_TARGET_PARAM = 0x20,
7293 /// Signal that the runtime library has to return the device pointer
7294 /// in the current position for the data being mapped. Used when we have the
7295 /// use_device_ptr or use_device_addr clause.
7296 OMP_MAP_RETURN_PARAM = 0x40,
7297 /// This flag signals that the reference being passed is a pointer to
7298 /// private data.
7299 OMP_MAP_PRIVATE = 0x80,
7300 /// Pass the element to the device by value.
7301 OMP_MAP_LITERAL = 0x100,
7302 /// Implicit map
7303 OMP_MAP_IMPLICIT = 0x200,
7304 /// Close is a hint to the runtime to allocate memory close to
7305 /// the target device.
7306 OMP_MAP_CLOSE = 0x400,
7307 /// 0x800 is reserved for compatibility with XLC.
7308 /// Produce a runtime error if the data is not already allocated.
7309 OMP_MAP_PRESENT = 0x1000,
7310 // Increment and decrement a separate reference counter so that the data
7311 // cannot be unmapped within the associated region. Thus, this flag is
7312 // intended to be used on 'target' and 'target data' directives because they
7313 // are inherently structured. It is not intended to be used on 'target
7314 // enter data' and 'target exit data' directives because they are inherently
7315 // dynamic.
7316 // This is an OpenMP extension for the sake of OpenACC support.
7317 OMP_MAP_OMPX_HOLD = 0x2000,
7318 /// Signal that the runtime library should use args as an array of
7319 /// descriptor_dim pointers and use args_size as dims. Used when we have
7320 /// non-contiguous list items in target update directive
7321 OMP_MAP_NON_CONTIG = 0x100000000000,
7322 /// The 16 MSBs of the flags indicate whether the entry is member of some
7323 /// struct/class.
7324 OMP_MAP_MEMBER_OF = 0xffff000000000000,
7325 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_MAP_MEMBER_OF,
7326 };
7327
7328 /// Get the offset of the OMP_MAP_MEMBER_OF field.
7329 static unsigned getFlagMemberOffset() {
7330 unsigned Offset = 0;
7331 for (uint64_t Remain = OMP_MAP_MEMBER_OF; !(Remain & 1);
7332 Remain = Remain >> 1)
7333 Offset++;
7334 return Offset;
7335 }
7336
7337 /// Class that holds debugging information for a data mapping to be passed to
7338 /// the runtime library.
7339 class MappingExprInfo {
7340 /// The variable declaration used for the data mapping.
7341 const ValueDecl *MapDecl = nullptr;
7342 /// The original expression used in the map clause, or null if there is
7343 /// none.
7344 const Expr *MapExpr = nullptr;
7345
7346 public:
7347 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
7348 : MapDecl(MapDecl), MapExpr(MapExpr) {}
7349
7350 const ValueDecl *getMapDecl() const { return MapDecl; }
7351 const Expr *getMapExpr() const { return MapExpr; }
7352 };
7353
7354 /// Class that associates information with a base pointer to be passed to the
7355 /// runtime library.
7356 class BasePointerInfo {
7357 /// The base pointer.
7358 llvm::Value *Ptr = nullptr;
7359 /// The base declaration that refers to this device pointer, or null if
7360 /// there is none.
7361 const ValueDecl *DevPtrDecl = nullptr;
7362
7363 public:
7364 BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7365 : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7366 llvm::Value *operator*() const { return Ptr; }
7367 const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7368 void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7369 };
7370
7371 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
7372 using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7373 using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7374 using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7375 using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
7376 using MapDimArrayTy = SmallVector<uint64_t, 4>;
7377 using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
7378
7379 /// This structure contains combined information generated for mappable
7380 /// clauses, including base pointers, pointers, sizes, map types, user-defined
7381 /// mappers, and non-contiguous information.
7382 struct MapCombinedInfoTy {
7383 struct StructNonContiguousInfo {
7384 bool IsNonContiguous = false;
7385 MapDimArrayTy Dims;
7386 MapNonContiguousArrayTy Offsets;
7387 MapNonContiguousArrayTy Counts;
7388 MapNonContiguousArrayTy Strides;
7389 };
7390 MapExprsArrayTy Exprs;
7391 MapBaseValuesArrayTy BasePointers;
7392 MapValuesArrayTy Pointers;
7393 MapValuesArrayTy Sizes;
7394 MapFlagsArrayTy Types;
7395 MapMappersArrayTy Mappers;
7396 StructNonContiguousInfo NonContigInfo;
7397
7398 /// Append arrays in \a CurInfo.
7399 void append(MapCombinedInfoTy &CurInfo) {
7400 Exprs.append(CurInfo.Exprs.begin(), CurInfo.Exprs.end());
7401 BasePointers.append(CurInfo.BasePointers.begin(),
7402 CurInfo.BasePointers.end());
7403 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
7404 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
7405 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
7406 Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
7407 NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
7408 CurInfo.NonContigInfo.Dims.end());
7409 NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
7410 CurInfo.NonContigInfo.Offsets.end());
7411 NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
7412 CurInfo.NonContigInfo.Counts.end());
7413 NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
7414 CurInfo.NonContigInfo.Strides.end());
7415 }
7416 };
7417
7418 /// Map between a struct and the its lowest & highest elements which have been
7419 /// mapped.
7420 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7421 /// HE(FieldIndex, Pointer)}
7422 struct StructRangeInfoTy {
7423 MapCombinedInfoTy PreliminaryMapData;
7424 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7425 0, Address::invalid()};
7426 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7427 0, Address::invalid()};
7428 Address Base = Address::invalid();
7429 Address LB = Address::invalid();
7430 bool IsArraySection = false;
7431 bool HasCompleteRecord = false;
7432 };
7433
7434private:
7435 /// Kind that defines how a device pointer has to be returned.
7436 struct MapInfo {
7437 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
7438 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
7439 ArrayRef<OpenMPMapModifierKind> MapModifiers;
7440 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
7441 bool ReturnDevicePointer = false;
7442 bool IsImplicit = false;
7443 const ValueDecl *Mapper = nullptr;
7444 const Expr *VarRef = nullptr;
7445 bool ForDeviceAddr = false;
7446
7447 MapInfo() = default;
7448 MapInfo(
7449 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7450 OpenMPMapClauseKind MapType,
7451 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7452 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7453 bool ReturnDevicePointer, bool IsImplicit,
7454 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
7455 bool ForDeviceAddr = false)
7456 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7457 MotionModifiers(MotionModifiers),
7458 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
7459 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
7460 };
7461
7462 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7463 /// member and there is no map information about it, then emission of that
7464 /// entry is deferred until the whole struct has been processed.
7465 struct DeferredDevicePtrEntryTy {
7466 const Expr *IE = nullptr;
7467 const ValueDecl *VD = nullptr;
7468 bool ForDeviceAddr = false;
7469
7470 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
7471 bool ForDeviceAddr)
7472 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
7473 };
7474
7475 /// The target directive from where the mappable clauses were extracted. It
7476 /// is either a executable directive or a user-defined mapper directive.
7477 llvm::PointerUnion<const OMPExecutableDirective *,
7478 const OMPDeclareMapperDecl *>
7479 CurDir;
7480
7481 /// Function the directive is being generated for.
7482 CodeGenFunction &CGF;
7483
7484 /// Set of all first private variables in the current directive.
7485 /// bool data is set to true if the variable is implicitly marked as
7486 /// firstprivate, false otherwise.
7487 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7488
7489 /// Map between device pointer declarations and their expression components.
7490 /// The key value for declarations in 'this' is null.
7491 llvm::DenseMap<
7492 const ValueDecl *,
7493 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
7494 DevPointersMap;
7495
7496 /// Map between lambda declarations and their map type.
7497 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
7498
7499 llvm::Value *getExprTypeSize(const Expr *E) const {
7500 QualType ExprTy = E->getType().getCanonicalType();
7501
7502 // Calculate the size for array shaping expression.
7503 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(E)) {
7504 llvm::Value *Size =
7505 CGF.getTypeSize(OAE->getBase()->getType()->getPointeeType());
7506 for (const Expr *SE : OAE->getDimensions()) {
7507 llvm::Value *Sz = CGF.EmitScalarExpr(SE);
7508 Sz = CGF.EmitScalarConversion(Sz, SE->getType(),
7509 CGF.getContext().getSizeType(),
7510 SE->getExprLoc());
7511 Size = CGF.Builder.CreateNUWMul(Size, Sz);
7512 }
7513 return Size;
7514 }
7515
7516 // Reference types are ignored for mapping purposes.
7517 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7518 ExprTy = RefTy->getPointeeType().getCanonicalType();
7519
7520 // Given that an array section is considered a built-in type, we need to
7521 // do the calculation based on the length of the section instead of relying
7522 // on CGF.getTypeSize(E->getType()).
7523 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7524 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
7525 OAE->getBase()->IgnoreParenImpCasts())
7526 .getCanonicalType();
7527
7528 // If there is no length associated with the expression and lower bound is
7529 // not specified too, that means we are using the whole length of the
7530 // base.
7531 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
7532 !OAE->getLowerBound())
7533 return CGF.getTypeSize(BaseTy);
7534
7535 llvm::Value *ElemSize;
7536 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7537 ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7538 } else {
7539 const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7540 assert(ATy && "Expecting array type if not a pointer type.")(static_cast <bool> (ATy && "Expecting array type if not a pointer type."
) ? void (0) : __assert_fail ("ATy && \"Expecting array type if not a pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7540, __extension__
__PRETTY_FUNCTION__))
;
7541 ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7542 }
7543
7544 // If we don't have a length at this point, that is because we have an
7545 // array section with a single element.
7546 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
7547 return ElemSize;
7548
7549 if (const Expr *LenExpr = OAE->getLength()) {
7550 llvm::Value *LengthVal = CGF.EmitScalarExpr(LenExpr);
7551 LengthVal = CGF.EmitScalarConversion(LengthVal, LenExpr->getType(),
7552 CGF.getContext().getSizeType(),
7553 LenExpr->getExprLoc());
7554 return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7555 }
7556 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7557, __extension__
__PRETTY_FUNCTION__))
7557 OAE->getLowerBound() && "expected array_section[lb:].")(static_cast <bool> (!OAE->getLength() && OAE
->getColonLocFirst().isValid() && OAE->getLowerBound
() && "expected array_section[lb:].") ? void (0) : __assert_fail
("!OAE->getLength() && OAE->getColonLocFirst().isValid() && OAE->getLowerBound() && \"expected array_section[lb:].\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 7557, __extension__
__PRETTY_FUNCTION__))
;
7558 // Size = sizetype - lb * elemtype;
7559 llvm::Value *LengthVal = CGF.getTypeSize(BaseTy);
7560 llvm::Value *LBVal = CGF.EmitScalarExpr(OAE->getLowerBound());
7561 LBVal = CGF.EmitScalarConversion(LBVal, OAE->getLowerBound()->getType(),
7562 CGF.getContext().getSizeType(),
7563 OAE->getLowerBound()->getExprLoc());
7564 LBVal = CGF.Builder.CreateNUWMul(LBVal, ElemSize);
7565 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LengthVal, LBVal);
7566 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LengthVal, LBVal);
7567 LengthVal = CGF.Builder.CreateSelect(
7568 Cmp, TrueVal, llvm::ConstantInt::get(CGF.SizeTy, 0));
7569 return LengthVal;
7570 }
7571 return CGF.getTypeSize(ExprTy);
7572 }
7573
7574 /// Return the corresponding bits for a given map clause modifier. Add
7575 /// a flag marking the map as a pointer if requested. Add a flag marking the
7576 /// map as the first one of a series of maps that relate to the same map
7577 /// expression.
7578 OpenMPOffloadMappingFlags getMapTypeBits(
7579 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7580 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
7581 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7582 OpenMPOffloadMappingFlags Bits =
7583 IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7584 switch (MapType) {
7585 case OMPC_MAP_alloc:
7586 case OMPC_MAP_release:
7587 // alloc and release is the default behavior in the runtime library, i.e.
7588 // if we don't pass any bits alloc/release that is what the runtime is
7589 // going to do. Therefore, we don't need to signal anything for these two
7590 // type modifiers.
7591 break;
7592 case OMPC_MAP_to:
7593 Bits |= OMP_MAP_TO;
7594 break;
7595 case OMPC_MAP_from:
7596 Bits |= OMP_MAP_FROM;
7597 break;
7598 case OMPC_MAP_tofrom:
7599 Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7600 break;
7601 case OMPC_MAP_delete:
7602 Bits |= OMP_MAP_DELETE;
7603 break;
7604 case OMPC_MAP_unknown:
7605 llvm_unreachable("Unexpected map type!")::llvm::llvm_unreachable_internal("Unexpected map type!", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 7605)
;
7606 }
7607 if (AddPtrFlag)
7608 Bits |= OMP_MAP_PTR_AND_OBJ;
7609 if (AddIsTargetParamFlag)
7610 Bits |= OMP_MAP_TARGET_PARAM;
7611 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_always))
7612 Bits |= OMP_MAP_ALWAYS;
7613 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_close))
7614 Bits |= OMP_MAP_CLOSE;
7615 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_present) ||
7616 llvm::is_contained(MotionModifiers, OMPC_MOTION_MODIFIER_present))
7617 Bits |= OMP_MAP_PRESENT;
7618 if (llvm::is_contained(MapModifiers, OMPC_MAP_MODIFIER_ompx_hold))
7619 Bits |= OMP_MAP_OMPX_HOLD;
7620 if (IsNonContiguous)
7621 Bits |= OMP_MAP_NON_CONTIG;
7622 return Bits;
7623 }
7624
7625 /// Return true if the provided expression is a final array section. A
7626 /// final array section, is one whose length can't be proved to be one.
7627 bool isFinalArraySectionExpression(const Expr *E) const {
7628 const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7629
7630 // It is not an array section and therefore not a unity-size one.
7631 if (!OASE)
7632 return false;
7633
7634 // An array section with no colon always refer to a single element.
7635 if (OASE->getColonLocFirst().isInvalid())
7636 return false;
7637
7638 const Expr *Length = OASE->getLength();
7639
7640 // If we don't have a length we have to check if the array has size 1
7641 // for this dimension. Also, we should always expect a length if the
7642 // base type is pointer.
7643 if (!Length) {
7644 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
7645 OASE->getBase()->IgnoreParenImpCasts())
7646 .getCanonicalType();
7647 if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7648 return ATy->getSize().getSExtValue() != 1;
7649 // If we don't have a constant dimension length, we have to consider
7650 // the current section as having any size, so it is not necessarily
7651 // unitary. If it happen to be unity size, that's user fault.
7652 return true;
7653 }
7654
7655 // Check if the length evaluates to 1.
7656 Expr::EvalResult Result;
7657 if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7658 return true; // Can have more that size 1.
7659
7660 llvm::APSInt ConstLength = Result.Val.getInt();
7661 return ConstLength.getSExtValue() != 1;
7662 }
7663
7664 /// Generate the base pointers, section pointers, sizes, map type bits, and
7665 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7666 /// map type, map or motion modifiers, and expression components.
7667 /// \a IsFirstComponent should be set to true if the provided set of
7668 /// components is the first associated with a capture.
7669 void generateInfoForComponentList(
7670 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7671 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7672 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7673 MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
7674 bool IsFirstComponentList, bool IsImplicit,
7675 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7676 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7677 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7678 OverlappedElements = llvm::None) const {
7679 // The following summarizes what has to be generated for each map and the
7680 // types below. The generated information is expressed in this order:
7681 // base pointer, section pointer, size, flags
7682 // (to add to the ones that come from the map type and modifier).
7683 //
7684 // double d;
7685 // int i[100];
7686 // float *p;
7687 //
7688 // struct S1 {
7689 // int i;
7690 // float f[50];
7691 // }
7692 // struct S2 {
7693 // int i;
7694 // float f[50];
7695 // S1 s;
7696 // double *p;
7697 // struct S2 *ps;
7698 // int &ref;
7699 // }
7700 // S2 s;
7701 // S2 *ps;
7702 //
7703 // map(d)
7704 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7705 //
7706 // map(i)
7707 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7708 //
7709 // map(i[1:23])
7710 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7711 //
7712 // map(p)
7713 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7714 //
7715 // map(p[1:24])
7716 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7717 // in unified shared memory mode or for local pointers
7718 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7719 //
7720 // map(s)
7721 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7722 //
7723 // map(s.i)
7724 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7725 //
7726 // map(s.s.f)
7727 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7728 //
7729 // map(s.p)
7730 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7731 //
7732 // map(to: s.p[:22])
7733 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7734 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7735 // &(s.p), &(s.p[0]), 22*sizeof(double),
7736 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7737 // (*) alloc space for struct members, only this is a target parameter
7738 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7739 // optimizes this entry out, same in the examples below)
7740 // (***) map the pointee (map: to)
7741 //
7742 // map(to: s.ref)
7743 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7744 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7745 // (*) alloc space for struct members, only this is a target parameter
7746 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7747 // optimizes this entry out, same in the examples below)
7748 // (***) map the pointee (map: to)
7749 //
7750 // map(s.ps)
7751 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7752 //
7753 // map(from: s.ps->s.i)
7754 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7755 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7756 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7757 //
7758 // map(to: s.ps->ps)
7759 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7760 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7761 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7762 //
7763 // map(s.ps->ps->ps)
7764 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7765 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7766 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7767 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7768 //
7769 // map(to: s.ps->ps->s.f[:22])
7770 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7771 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7772 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7773 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7774 //
7775 // map(ps)
7776 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7777 //
7778 // map(ps->i)
7779 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7780 //
7781 // map(ps->s.f)
7782 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7783 //
7784 // map(from: ps->p)
7785 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7786 //
7787 // map(to: ps->p[:22])
7788 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7789 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7790 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7791 //
7792 // map(ps->ps)
7793 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7794 //
7795 // map(from: ps->ps->s.i)
7796 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7797 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7798 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7799 //
7800 // map(from: ps->ps->ps)
7801 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7802 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7803 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7804 //
7805 // map(ps->ps->ps->ps)
7806 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7807 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7808 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7809 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7810 //
7811 // map(to: ps->ps->ps->s.f[:22])
7812 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7813 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7814 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7815 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7816 //
7817 // map(to: s.f[:22]) map(from: s.p[:33])
7818 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7819 // sizeof(double*) (**), TARGET_PARAM
7820 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7821 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7822 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7823 // (*) allocate contiguous space needed to fit all mapped members even if
7824 // we allocate space for members not mapped (in this example,
7825 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7826 // them as well because they fall between &s.f[0] and &s.p)
7827 //
7828 // map(from: s.f[:22]) map(to: ps->p[:33])
7829 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7830 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7831 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7832 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7833 // (*) the struct this entry pertains to is the 2nd element in the list of
7834 // arguments, hence MEMBER_OF(2)
7835 //
7836 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7837 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7838 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7839 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7840 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7841 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7842 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7843 // (*) the struct this entry pertains to is the 4th element in the list
7844 // of arguments, hence MEMBER_OF(4)
7845
7846 // Track if the map information being generated is the first for a capture.
7847 bool IsCaptureFirstInfo = IsFirstComponentList;
7848 // When the variable is on a declare target link or in a to clause with
7849 // unified memory, a reference is needed to hold the host/device address
7850 // of the variable.
7851 bool RequiresReference = false;
7852
7853 // Scan the components from the base to the complete expression.
7854 auto CI = Components.rbegin();
7855 auto CE = Components.rend();
7856 auto I = CI;
7857
7858 // Track if the map information being generated is the first for a list of
7859 // components.
7860 bool IsExpressionFirstInfo = true;
7861 bool FirstPointerInComplexData = false;
7862 Address BP = Address::invalid();
7863 const Expr *AssocExpr = I->getAssociatedExpression();
7864 const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
1
Assuming 'AssocExpr' is not a 'ArraySubscriptExpr'
7865 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
2
Assuming 'AssocExpr' is not a 'OMPArraySectionExpr'
7866 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(AssocExpr);
3
Assuming 'AssocExpr' is not a 'OMPArrayShapingExpr'
7867
7868 if (isa<MemberExpr>(AssocExpr)) {
4
Assuming 'AssocExpr' is a 'MemberExpr'
5
Taking true branch
7869 // The base is the 'this' pointer. The content of the pointer is going
7870 // to be the base of the field being mapped.
7871 BP = CGF.LoadCXXThisAddress();
7872 } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7873 (OASE &&
7874 isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7875 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7876 } else if (OAShE &&
7877 isa<CXXThisExpr>(OAShE->getBase()->IgnoreParenCasts())) {
7878 BP = Address(
7879 CGF.EmitScalarExpr(OAShE->getBase()),
7880 CGF.getContext().getTypeAlignInChars(OAShE->getBase()->getType()));
7881 } else {
7882 // The base is the reference to the variable.
7883 // BP = &Var.
7884 BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress(CGF);
7885 if (const auto *VD =
7886 dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7887 if (llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7888 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7889 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7890 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7891 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7892 RequiresReference = true;
7893 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7894 }
7895 }
7896 }
7897
7898 // If the variable is a pointer and is being dereferenced (i.e. is not
7899 // the last component), the base has to be the pointer itself, not its
7900 // reference. References are ignored for mapping purposes.
7901 QualType Ty =
7902 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7903 if (Ty->isAnyPointerType() && std::next(I) != CE) {
7904 // No need to generate individual map information for the pointer, it
7905 // can be associated with the combined storage if shared memory mode is
7906 // active or the base declaration is not global variable.
7907 const auto *VD = dyn_cast<VarDecl>(I->getAssociatedDeclaration());
7908 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7909 !VD || VD->hasLocalStorage())
7910 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7911 else
7912 FirstPointerInComplexData = true;
7913 ++I;
7914 }
7915 }
7916
7917 // Track whether a component of the list should be marked as MEMBER_OF some
7918 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7919 // in a component list should be marked as MEMBER_OF, all subsequent entries
7920 // do not belong to the base struct. E.g.
7921 // struct S2 s;
7922 // s.ps->ps->ps->f[:]
7923 // (1) (2) (3) (4)
7924 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7925 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7926 // is the pointee of ps(2) which is not member of struct s, so it should not
7927 // be marked as such (it is still PTR_AND_OBJ).
7928 // The variable is initialized to false so that PTR_AND_OBJ entries which
7929 // are not struct members are not considered (e.g. array of pointers to
7930 // data).
7931 bool ShouldBeMemberOf = false;
7932
7933 // Variable keeping track of whether or not we have encountered a component
7934 // in the component list which is a member expression. Useful when we have a
7935 // pointer or a final array section, in which case it is the previous
7936 // component in the list which tells us whether we have a member expression.
7937 // E.g. X.f[:]
7938 // While processing the final array section "[:]" it is "f" which tells us
7939 // whether we are dealing with a member of a declared struct.
7940 const MemberExpr *EncounteredME = nullptr;
7941
7942 // Track for the total number of dimension. Start from one for the dummy
7943 // dimension.
7944 uint64_t DimSize = 1;
7945
7946 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7947 bool IsPrevMemberReference = false;
7948
7949 for (; I != CE; ++I) {
6
Calling 'operator!=<const clang::OMPClauseMappableExprCommon::MappableComponent *>'
12
Returning from 'operator!=<const clang::OMPClauseMappableExprCommon::MappableComponent *>'
13
Loop condition is false. Execution continues on line 8258
7950 // If the current component is member of a struct (parent struct) mark it.
7951 if (!EncounteredME) {
7952 EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7953 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7954 // as MEMBER_OF the parent struct.
7955 if (EncounteredME) {
7956 ShouldBeMemberOf = true;
7957 // Do not emit as complex pointer if this is actually not array-like
7958 // expression.
7959 if (FirstPointerInComplexData) {
7960 QualType Ty = std::prev(I)
7961 ->getAssociatedDeclaration()
7962 ->getType()
7963 .getNonReferenceType();
7964 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7965 FirstPointerInComplexData = false;
7966 }
7967 }
7968 }
7969
7970 auto Next = std::next(I);
7971
7972 // We need to generate the addresses and sizes if this is the last
7973 // component, if the component is a pointer or if it is an array section
7974 // whose length can't be proved to be one. If this is a pointer, it
7975 // becomes the base address for the following components.
7976
7977 // A final array section, is one whose length can't be proved to be one.
7978 // If the map item is non-contiguous then we don't treat any array section
7979 // as final array section.
7980 bool IsFinalArraySection =
7981 !IsNonContiguous &&
7982 isFinalArraySectionExpression(I->getAssociatedExpression());
7983
7984 // If we have a declaration for the mapping use that, otherwise use
7985 // the base declaration of the map clause.
7986 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7987 ? I->getAssociatedDeclaration()
7988 : BaseDecl;
7989 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7990 : MapExpr;
7991
7992 // Get information on whether the element is a pointer. Have to do a
7993 // special treatment for array sections given that they are built-in
7994 // types.
7995 const auto *OASE =
7996 dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7997 const auto *OAShE =
7998 dyn_cast<OMPArrayShapingExpr>(I->getAssociatedExpression());
7999 const auto *UO = dyn_cast<UnaryOperator>(I->getAssociatedExpression());
8000 const auto *BO = dyn_cast<BinaryOperator>(I->getAssociatedExpression());
8001 bool IsPointer =
8002 OAShE ||
8003 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
8004 .getCanonicalType()
8005 ->isAnyPointerType()) ||
8006 I->getAssociatedExpression()->getType()->isAnyPointerType();
8007 bool IsMemberReference = isa<MemberExpr>(I->getAssociatedExpression()) &&
8008 MapDecl &&
8009 MapDecl->getType()->isLValueReferenceType();
8010 bool IsNonDerefPointer = IsPointer && !UO && !BO && !IsNonContiguous;
8011
8012 if (OASE)
8013 ++DimSize;
8014
8015 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
8016 IsFinalArraySection) {
8017 // If this is not the last component, we expect the pointer to be
8018 // associated with an array expression or member expression.
8019 assert((Next == CE ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8020 isa<MemberExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8021 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8022 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8023 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8024 isa<UnaryOperator>(Next->getAssociatedExpression()) ||(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8025 isa<BinaryOperator>(Next->getAssociatedExpression())) &&(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
8026 "Unexpected expression")(static_cast <bool> ((Next == CE || isa<MemberExpr>
(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr
>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr
>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr
>(Next->getAssociatedExpression()) || isa<UnaryOperator
>(Next->getAssociatedExpression()) || isa<BinaryOperator
>(Next->getAssociatedExpression())) && "Unexpected expression"
) ? void (0) : __assert_fail ("(Next == CE || isa<MemberExpr>(Next->getAssociatedExpression()) || isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) || isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) || isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) || isa<UnaryOperator>(Next->getAssociatedExpression()) || isa<BinaryOperator>(Next->getAssociatedExpression())) && \"Unexpected expression\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8026, __extension__
__PRETTY_FUNCTION__))
;
8027
8028 Address LB = Address::invalid();
8029 Address LowestElem = Address::invalid();
8030 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
8031 const MemberExpr *E) {
8032 const Expr *BaseExpr = E->getBase();
8033 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
8034 // scalar.
8035 LValue BaseLV;
8036 if (E->isArrow()) {
8037 LValueBaseInfo BaseInfo;
8038 TBAAAccessInfo TBAAInfo;
8039 Address Addr =
8040 CGF.EmitPointerWithAlignment(BaseExpr, &BaseInfo, &TBAAInfo);
8041 QualType PtrTy = BaseExpr->getType()->getPointeeType();
8042 BaseLV = CGF.MakeAddrLValue(Addr, PtrTy, BaseInfo, TBAAInfo);
8043 } else {
8044 BaseLV = CGF.EmitOMPSharedLValue(BaseExpr);
8045 }
8046 return BaseLV;
8047 };
8048 if (OAShE) {
8049 LowestElem = LB = Address(CGF.EmitScalarExpr(OAShE->getBase()),
8050 CGF.getContext().getTypeAlignInChars(
8051 OAShE->getBase()->getType()));
8052 } else if (IsMemberReference) {
8053 const auto *ME = cast<MemberExpr>(I->getAssociatedExpression());
8054 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8055 LowestElem = CGF.EmitLValueForFieldInitialization(
8056 BaseLVal, cast<FieldDecl>(MapDecl))
8057 .getAddress(CGF);
8058 LB = CGF.EmitLoadOfReferenceLValue(LowestElem, MapDecl->getType())
8059 .getAddress(CGF);
8060 } else {
8061 LowestElem = LB =
8062 CGF.EmitOMPSharedLValue(I->getAssociatedExpression())
8063 .getAddress(CGF);
8064 }
8065
8066 // If this component is a pointer inside the base struct then we don't
8067 // need to create any entry for it - it will be combined with the object
8068 // it is pointing to into a single PTR_AND_OBJ entry.
8069 bool IsMemberPointerOrAddr =
8070 EncounteredME &&
8071 (((IsPointer || ForDeviceAddr) &&
8072 I->getAssociatedExpression() == EncounteredME) ||
8073 (IsPrevMemberReference && !IsPointer) ||
8074 (IsMemberReference && Next != CE &&
8075 !Next->getAssociatedExpression()->getType()->isPointerType()));
8076 if (!OverlappedElements.empty() && Next == CE) {
8077 // Handle base element with the info for overlapped elements.
8078 assert(!PartialStruct.Base.isValid() && "The base element is set.")(static_cast <bool> (!PartialStruct.Base.isValid() &&
"The base element is set.") ? void (0) : __assert_fail ("!PartialStruct.Base.isValid() && \"The base element is set.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8078, __extension__
__PRETTY_FUNCTION__))
;
8079 assert(!IsPointer &&(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8080, __extension__
__PRETTY_FUNCTION__))
8080 "Unexpected base element with the pointer type.")(static_cast <bool> (!IsPointer && "Unexpected base element with the pointer type."
) ? void (0) : __assert_fail ("!IsPointer && \"Unexpected base element with the pointer type.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8080, __extension__
__PRETTY_FUNCTION__))
;
8081 // Mark the whole struct as the struct that requires allocation on the
8082 // device.
8083 PartialStruct.LowestElem = {0, LowestElem};
8084 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
8085 I->getAssociatedExpression()->getType());
8086 Address HB = CGF.Builder.CreateConstGEP(
8087 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(LowestElem,
8088 CGF.VoidPtrTy),
8089 TypeSize.getQuantity() - 1);
8090 PartialStruct.HighestElem = {
8091 std::numeric_limits<decltype(
8092 PartialStruct.HighestElem.first)>::max(),
8093 HB};
8094 PartialStruct.Base = BP;
8095 PartialStruct.LB = LB;
8096 assert((static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8098, __extension__
__PRETTY_FUNCTION__))
8097 PartialStruct.PreliminaryMapData.BasePointers.empty() &&(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8098, __extension__
__PRETTY_FUNCTION__))
8098 "Overlapped elements must be used only once for the variable.")(static_cast <bool> (PartialStruct.PreliminaryMapData.BasePointers
.empty() && "Overlapped elements must be used only once for the variable."
) ? void (0) : __assert_fail ("PartialStruct.PreliminaryMapData.BasePointers.empty() && \"Overlapped elements must be used only once for the variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8098, __extension__
__PRETTY_FUNCTION__))
;
8099 std::swap(PartialStruct.PreliminaryMapData, CombinedInfo);
8100 // Emit data for non-overlapped data.
8101 OpenMPOffloadMappingFlags Flags =
8102 OMP_MAP_MEMBER_OF |
8103 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
8104 /*AddPtrFlag=*/false,
8105 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
8106 llvm::Value *Size = nullptr;
8107 // Do bitcopy of all non-overlapped structure elements.
8108 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
8109 Component : OverlappedElements) {
8110 Address ComponentLB = Address::invalid();
8111 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
8112 Component) {
8113 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
8114 const auto *FD = dyn_cast<FieldDecl>(VD);
8115 if (FD && FD->getType()->isLValueReferenceType()) {
8116 const auto *ME =
8117 cast<MemberExpr>(MC.getAssociatedExpression());
8118 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
8119 ComponentLB =
8120 CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
8121 .getAddress(CGF);
8122 } else {
8123 ComponentLB =
8124 CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
8125 .getAddress(CGF);
8126 }
8127 Size = CGF.Builder.CreatePtrDiff(
8128 CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
8129 CGF.EmitCastToVoidPtr(LB.getPointer()));
8130 break;
8131 }
8132 }
8133 assert(Size && "Failed to determine structure size")(static_cast <bool> (Size && "Failed to determine structure size"
) ? void (0) : __assert_fail ("Size && \"Failed to determine structure size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8133, __extension__
__PRETTY_FUNCTION__))
;
8134 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8135 CombinedInfo.BasePointers.push_back(BP.getPointer());
8136 CombinedInfo.Pointers.push_back(LB.getPointer());
8137 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
8138 Size, CGF.Int64Ty, /*isSigned=*/true));
8139 CombinedInfo.Types.push_back(Flags);
8140 CombinedInfo.Mappers.push_back(nullptr);
8141 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8142 : 1);
8143 LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
8144 }
8145 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8146 CombinedInfo.BasePointers.push_back(BP.getPointer());
8147 CombinedInfo.Pointers.push_back(LB.getPointer());
8148 Size = CGF.Builder.CreatePtrDiff(
8149 CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
8150 CGF.EmitCastToVoidPtr(LB.getPointer()));
8151 CombinedInfo.Sizes.push_back(
8152 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8153 CombinedInfo.Types.push_back(Flags);
8154 CombinedInfo.Mappers.push_back(nullptr);
8155 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8156 : 1);
8157 break;
8158 }
8159 llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
8160 if (!IsMemberPointerOrAddr ||
8161 (Next == CE && MapType != OMPC_MAP_unknown)) {
8162 CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
8163 CombinedInfo.BasePointers.push_back(BP.getPointer());
8164 CombinedInfo.Pointers.push_back(LB.getPointer());
8165 CombinedInfo.Sizes.push_back(
8166 CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
8167 CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
8168 : 1);
8169
8170 // If Mapper is valid, the last component inherits the mapper.
8171 bool HasMapper = Mapper && Next == CE;
8172 CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
8173
8174 // We need to add a pointer flag for each map that comes from the
8175 // same expression except for the first one. We also need to signal
8176 // this map is the first one that relates with the current capture
8177 // (there is a set of entries for each capture).
8178 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
8179 MapType, MapModifiers, MotionModifiers, IsImplicit,
8180 !IsExpressionFirstInfo || RequiresReference ||
8181 FirstPointerInComplexData || IsMemberReference,
8182 IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
8183
8184 if (!IsExpressionFirstInfo || IsMemberReference) {
8185 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
8186 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
8187 if (IsPointer || (IsMemberReference && Next != CE))
8188 Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
8189 OMP_MAP_DELETE | OMP_MAP_CLOSE);
8190
8191 if (ShouldBeMemberOf) {
8192 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
8193 // should be later updated with the correct value of MEMBER_OF.
8194 Flags |= OMP_MAP_MEMBER_OF;
8195 // From now on, all subsequent PTR_AND_OBJ entries should not be
8196 // marked as MEMBER_OF.
8197 ShouldBeMemberOf = false;
8198 }
8199 }
8200
8201 CombinedInfo.Types.push_back(Flags);
8202 }
8203
8204 // If we have encountered a member expression so far, keep track of the
8205 // mapped member. If the parent is "*this", then the value declaration
8206 // is nullptr.
8207 if (EncounteredME) {
8208 const auto *FD = cast<FieldDecl>(EncounteredME->getMemberDecl());
8209 unsigned FieldIndex = FD->getFieldIndex();
8210
8211 // Update info about the lowest and highest elements for this struct
8212 if (!PartialStruct.Base.isValid()) {
8213 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8214 if (IsFinalArraySection) {
8215 Address HB =
8216 CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false)
8217 .getAddress(CGF);
8218 PartialStruct.HighestElem = {FieldIndex, HB};
8219 } else {
8220 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8221 }
8222 PartialStruct.Base = BP;
8223 PartialStruct.LB = BP;
8224 } else if (FieldIndex < PartialStruct.LowestElem.first) {
8225 PartialStruct.LowestElem = {FieldIndex, LowestElem};
8226 } else if (FieldIndex > PartialStruct.HighestElem.first) {
8227 PartialStruct.HighestElem = {FieldIndex, LowestElem};
8228 }
8229 }
8230
8231 // Need to emit combined struct for array sections.
8232 if (IsFinalArraySection || IsNonContiguous)
8233 PartialStruct.IsArraySection = true;
8234
8235 // If we have a final array section, we are done with this expression.
8236 if (IsFinalArraySection)
8237 break;
8238
8239 // The pointer becomes the base for the next element.
8240 if (Next != CE)
8241 BP = IsMemberReference ? LowestElem : LB;
8242
8243 IsExpressionFirstInfo = false;
8244 IsCaptureFirstInfo = false;
8245 FirstPointerInComplexData = false;
8246 IsPrevMemberReference = IsMemberReference;
8247 } else if (FirstPointerInComplexData) {
8248 QualType Ty = Components.rbegin()
8249 ->getAssociatedDeclaration()
8250 ->getType()
8251 .getNonReferenceType();
8252 BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
8253 FirstPointerInComplexData = false;
8254 }
8255 }
8256 // If ran into the whole component - allocate the space for the whole
8257 // record.
8258 if (!EncounteredME
13.1
'EncounteredME' is null
13.1
'EncounteredME' is null
)
14
Taking true branch
8259 PartialStruct.HasCompleteRecord = true;
8260
8261 if (!IsNonContiguous)
15
Assuming 'IsNonContiguous' is true
16
Taking false branch
8262 return;
8263
8264 const ASTContext &Context = CGF.getContext();
8265
8266 // For supporting stride in array section, we need to initialize the first
8267 // dimension size as 1, first offset as 0, and first count as 1
8268 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 0)};
8269 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8270 MapValuesArrayTy CurStrides;
8271 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(CGF.CGM.Int64Ty, 1)};
8272 uint64_t ElementTypeSize;
17
'ElementTypeSize' declared without an initial value
8273
8274 // Collect Size information for each dimension and get the element size as
8275 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
8276 // should be [10, 10] and the first stride is 4 btyes.
8277 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
18
Assuming '__begin2' is equal to '__end2'
8278 Components) {
8279 const Expr *AssocExpr = Component.getAssociatedExpression();
8280 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8281
8282 if (!OASE)
8283 continue;
8284
8285 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(OASE->getBase());
8286 auto *CAT = Context.getAsConstantArrayType(Ty);
8287 auto *VAT = Context.getAsVariableArrayType(Ty);
8288
8289 // We need all the dimension size except for the last dimension.
8290 assert((VAT || CAT || &Component == &*Components.begin()) &&(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8292, __extension__
__PRETTY_FUNCTION__))
8291 "Should be either ConstantArray or VariableArray if not the "(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8292, __extension__
__PRETTY_FUNCTION__))
8292 "first Component")(static_cast <bool> ((VAT || CAT || &Component == &
*Components.begin()) && "Should be either ConstantArray or VariableArray if not the "
"first Component") ? void (0) : __assert_fail ("(VAT || CAT || &Component == &*Components.begin()) && \"Should be either ConstantArray or VariableArray if not the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8292, __extension__
__PRETTY_FUNCTION__))
;
8293
8294 // Get element size if CurStrides is empty.
8295 if (CurStrides.empty()) {
8296 const Type *ElementType = nullptr;
8297 if (CAT)
8298 ElementType = CAT->getElementType().getTypePtr();
8299 else if (VAT)
8300 ElementType = VAT->getElementType().getTypePtr();
8301 else
8302 assert(&Component == &*Components.begin() &&(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8304, __extension__
__PRETTY_FUNCTION__))
8303 "Only expect pointer (non CAT or VAT) when this is the "(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8304, __extension__
__PRETTY_FUNCTION__))
8304 "first Component")(static_cast <bool> (&Component == &*Components
.begin() && "Only expect pointer (non CAT or VAT) when this is the "
"first Component") ? void (0) : __assert_fail ("&Component == &*Components.begin() && \"Only expect pointer (non CAT or VAT) when this is the \" \"first Component\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8304, __extension__
__PRETTY_FUNCTION__))
;
8305 // If ElementType is null, then it means the base is a pointer
8306 // (neither CAT nor VAT) and we'll attempt to get ElementType again
8307 // for next iteration.
8308 if (ElementType) {
8309 // For the case that having pointer as base, we need to remove one
8310 // level of indirection.
8311 if (&Component != &*Components.begin())
8312 ElementType = ElementType->getPointeeOrArrayElementType();
8313 ElementTypeSize =
8314 Context.getTypeSizeInChars(ElementType).getQuantity();
8315 CurStrides.push_back(
8316 llvm::ConstantInt::get(CGF.Int64Ty, ElementTypeSize));
8317 }
8318 }
8319 // Get dimension value except for the last dimension since we don't need
8320 // it.
8321 if (DimSizes.size() < Components.size() - 1) {
8322 if (CAT)
8323 DimSizes.push_back(llvm::ConstantInt::get(
8324 CGF.Int64Ty, CAT->getSize().getZExtValue()));
8325 else if (VAT)
8326 DimSizes.push_back(CGF.Builder.CreateIntCast(
8327 CGF.EmitScalarExpr(VAT->getSizeExpr()), CGF.Int64Ty,
8328 /*IsSigned=*/false));
8329 }
8330 }
8331
8332 // Skip the dummy dimension since we have already have its information.
8333 auto DI = DimSizes.begin() + 1;
8334 // Product of dimension.
8335 llvm::Value *DimProd =
8336 llvm::ConstantInt::get(CGF.CGM.Int64Ty, ElementTypeSize);
19
2nd function call argument is an uninitialized value
8337
8338 // Collect info for non-contiguous. Notice that offset, count, and stride
8339 // are only meaningful for array-section, so we insert a null for anything
8340 // other than array-section.
8341 // Also, the size of offset, count, and stride are not the same as
8342 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
8343 // count, and stride are the same as the number of non-contiguous
8344 // declaration in target update to/from clause.
8345 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
8346 Components) {
8347 const Expr *AssocExpr = Component.getAssociatedExpression();
8348
8349 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr)) {
8350 llvm::Value *Offset = CGF.Builder.CreateIntCast(
8351 CGF.EmitScalarExpr(AE->getIdx()), CGF.Int64Ty,
8352 /*isSigned=*/false);
8353 CurOffsets.push_back(Offset);
8354 CurCounts.push_back(llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/1));
8355 CurStrides.push_back(CurStrides.back());
8356 continue;
8357 }
8358
8359 const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
8360
8361 if (!OASE)
8362 continue;
8363
8364 // Offset
8365 const Expr *OffsetExpr = OASE->getLowerBound();
8366 llvm::Value *Offset = nullptr;
8367 if (!OffsetExpr) {
8368 // If offset is absent, then we just set it to zero.
8369 Offset = llvm::ConstantInt::get(CGF.Int64Ty, 0);
8370 } else {
8371 Offset = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(OffsetExpr),
8372 CGF.Int64Ty,
8373 /*isSigned=*/false);
8374 }
8375 CurOffsets.push_back(Offset);
8376
8377 // Count
8378 const Expr *CountExpr = OASE->getLength();
8379 llvm::Value *Count = nullptr;
8380 if (!CountExpr) {
8381 // In Clang, once a high dimension is an array section, we construct all
8382 // the lower dimension as array section, however, for case like
8383 // arr[0:2][2], Clang construct the inner dimension as an array section
8384 // but it actually is not in an array section form according to spec.
8385 if (!OASE->getColonLocFirst().isValid() &&
8386 !OASE->getColonLocSecond().isValid()) {
8387 Count = llvm::ConstantInt::get(CGF.Int64Ty, 1);
8388 } else {
8389 // OpenMP 5.0, 2.1.5 Array Sections, Description.
8390 // When the length is absent it defaults to ⌈(size −
8391 // lower-bound)/stride⌉, where size is the size of the array
8392 // dimension.
8393 const Expr *StrideExpr = OASE->getStride();
8394 llvm::Value *Stride =
8395 StrideExpr
8396 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8397 CGF.Int64Ty, /*isSigned=*/false)
8398 : nullptr;
8399 if (Stride)
8400 Count = CGF.Builder.CreateUDiv(
8401 CGF.Builder.CreateNUWSub(*DI, Offset), Stride);
8402 else
8403 Count = CGF.Builder.CreateNUWSub(*DI, Offset);
8404 }
8405 } else {
8406 Count = CGF.EmitScalarExpr(CountExpr);
8407 }
8408 Count = CGF.Builder.CreateIntCast(Count, CGF.Int64Ty, /*isSigned=*/false);
8409 CurCounts.push_back(Count);
8410
8411 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8412 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8413 // Offset Count Stride
8414 // D0 0 1 4 (int) <- dummy dimension
8415 // D1 0 2 8 (2 * (1) * 4)
8416 // D2 1 2 20 (1 * (1 * 5) * 4)
8417 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8418 const Expr *StrideExpr = OASE->getStride();
8419 llvm::Value *Stride =
8420 StrideExpr
8421 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(StrideExpr),
8422 CGF.Int64Ty, /*isSigned=*/false)
8423 : nullptr;
8424 DimProd = CGF.Builder.CreateNUWMul(DimProd, *(DI - 1));
8425 if (Stride)
8426 CurStrides.push_back(CGF.Builder.CreateNUWMul(DimProd, Stride));
8427 else
8428 CurStrides.push_back(DimProd);
8429 if (DI != DimSizes.end())
8430 ++DI;
8431 }
8432
8433 CombinedInfo.NonContigInfo.Offsets.push_back(CurOffsets);
8434 CombinedInfo.NonContigInfo.Counts.push_back(CurCounts);
8435 CombinedInfo.NonContigInfo.Strides.push_back(CurStrides);
8436 }
8437
8438 /// Return the adjusted map modifiers if the declaration a capture refers to
8439 /// appears in a first-private clause. This is expected to be used only with
8440 /// directives that start with 'target'.
8441 MappableExprsHandler::OpenMPOffloadMappingFlags
8442 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
8443 assert(Cap.capturesVariable() && "Expected capture by reference only!")(static_cast <bool> (Cap.capturesVariable() && "Expected capture by reference only!"
) ? void (0) : __assert_fail ("Cap.capturesVariable() && \"Expected capture by reference only!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8443, __extension__
__PRETTY_FUNCTION__))
;
8444
8445 // A first private variable captured by reference will use only the
8446 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8447 // declaration is known as first-private in this handler.
8448 if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
8449 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
8450 return MappableExprsHandler::OMP_MAP_TO |
8451 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
8452 return MappableExprsHandler::OMP_MAP_PRIVATE |
8453 MappableExprsHandler::OMP_MAP_TO;
8454 }
8455 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
8456 if (I != LambdasMap.end())
8457 // for map(to: lambda): using user specified map type.
8458 return getMapTypeBits(
8459 I->getSecond()->getMapType(), I->getSecond()->getMapTypeModifiers(),
8460 /*MotionModifiers=*/llvm::None, I->getSecond()->isImplicit(),
8461 /*AddPtrFlag=*/false,
8462 /*AddIsTargetParamFlag=*/false,
8463 /*isNonContiguous=*/false);
8464 return MappableExprsHandler::OMP_MAP_TO |
8465 MappableExprsHandler::OMP_MAP_FROM;
8466 }
8467
8468 static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
8469 // Rotate by getFlagMemberOffset() bits.
8470 return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
8471 << getFlagMemberOffset());
8472 }
8473
8474 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
8475 OpenMPOffloadMappingFlags MemberOfFlag) {
8476 // If the entry is PTR_AND_OBJ but has not been marked with the special
8477 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8478 // marked as MEMBER_OF.
8479 if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
8480 ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
8481 return;
8482
8483 // Reset the placeholder value to prepare the flag for the assignment of the
8484 // proper MEMBER_OF value.
8485 Flags &= ~OMP_MAP_MEMBER_OF;
8486 Flags |= MemberOfFlag;
8487 }
8488
8489 void getPlainLayout(const CXXRecordDecl *RD,
8490 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
8491 bool AsBase) const {
8492 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
8493
8494 llvm::StructType *St =
8495 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
8496
8497 unsigned NumElements = St->getNumElements();
8498 llvm::SmallVector<
8499 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
8500 RecordLayout(NumElements);
8501
8502 // Fill bases.
8503 for (const auto &I : RD->bases()) {
8504 if (I.isVirtual())
8505 continue;
8506 const auto *Base = I.getType()->getAsCXXRecordDecl();
8507 // Ignore empty bases.
8508 if (Base->isEmpty() || CGF.getContext()
8509 .getASTRecordLayout(Base)
8510 .getNonVirtualSize()
8511 .isZero())
8512 continue;
8513
8514 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
8515 RecordLayout[FieldIndex] = Base;
8516 }
8517 // Fill in virtual bases.
8518 for (const auto &I : RD->vbases()) {
8519 const auto *Base = I.getType()->getAsCXXRecordDecl();
8520 // Ignore empty bases.
8521 if (Base->isEmpty())
8522 continue;
8523 unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
8524 if (RecordLayout[FieldIndex])
8525 continue;
8526 RecordLayout[FieldIndex] = Base;
8527 }
8528 // Fill in all the fields.
8529 assert(!RD->isUnion() && "Unexpected union.")(static_cast <bool> (!RD->isUnion() && "Unexpected union."
) ? void (0) : __assert_fail ("!RD->isUnion() && \"Unexpected union.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8529, __extension__
__PRETTY_FUNCTION__))
;
8530 for (const auto *Field : RD->fields()) {
8531 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8532 // will fill in later.)
8533 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
8534 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8535 RecordLayout[FieldIndex] = Field;
8536 }
8537 }
8538 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8539 &Data : RecordLayout) {
8540 if (Data.isNull())
8541 continue;
8542 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
8543 getPlainLayout(Base, Layout, /*AsBase=*/true);
8544 else
8545 Layout.push_back(Data.get<const FieldDecl *>());
8546 }
8547 }
8548
8549 /// Generate all the base pointers, section pointers, sizes, map types, and
8550 /// mappers for the extracted mappable expressions (all included in \a
8551 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8552 /// pair of the relevant declaration and index where it occurs is appended to
8553 /// the device pointers info array.
8554 void generateAllInfoForClauses(
8555 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8556 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8557 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8558 // We have to process the component lists that relate with the same
8559 // declaration in a single chunk so that we can generate the map flags
8560 // correctly. Therefore, we organize all lists in a map.
8561 enum MapKind { Present, Allocs, Other, Total };
8562 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8563 SmallVector<SmallVector<MapInfo, 8>, 4>>
8564 Info;
8565
8566 // Helper function to fill the information map for the different supported
8567 // clauses.
8568 auto &&InfoGen =
8569 [&Info, &SkipVarSet](
8570 const ValueDecl *D, MapKind Kind,
8571 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8572 OpenMPMapClauseKind MapType,
8573 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8574 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8575 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8576 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8577 if (SkipVarSet.contains(D))
8578 return;
8579 auto It = Info.find(D);
8580 if (It == Info.end())
8581 It = Info
8582 .insert(std::make_pair(
8583 D, SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
8584 .first;
8585 It->second[Kind].emplace_back(
8586 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8587 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8588 };
8589
8590 for (const auto *Cl : Clauses) {
8591 const auto *C = dyn_cast<OMPMapClause>(Cl);
8592 if (!C)
8593 continue;
8594 MapKind Kind = Other;
8595 if (llvm::is_contained(C->getMapTypeModifiers(),
8596 OMPC_MAP_MODIFIER_present))
8597 Kind = Present;
8598 else if (C->getMapType() == OMPC_MAP_alloc)
8599 Kind = Allocs;
8600 const auto *EI = C->getVarRefs().begin();
8601 for (const auto L : C->component_lists()) {
8602 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8603 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8604 C->getMapTypeModifiers(), llvm::None,
8605 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8606 E);
8607 ++EI;
8608 }
8609 }
8610 for (const auto *Cl : Clauses) {
8611 const auto *C = dyn_cast<OMPToClause>(Cl);
8612 if (!C)
8613 continue;
8614 MapKind Kind = Other;
8615 if (llvm::is_contained(C->getMotionModifiers(),
8616 OMPC_MOTION_MODIFIER_present))
8617 Kind = Present;
8618 const auto *EI = C->getVarRefs().begin();
8619 for (const auto L : C->component_lists()) {
8620 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, llvm::None,
8621 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8622 C->isImplicit(), std::get<2>(L), *EI);
8623 ++EI;
8624 }
8625 }
8626 for (const auto *Cl : Clauses) {
8627 const auto *C = dyn_cast<OMPFromClause>(Cl);
8628 if (!C)
8629 continue;
8630 MapKind Kind = Other;
8631 if (llvm::is_contained(C->getMotionModifiers(),
8632 OMPC_MOTION_MODIFIER_present))
8633 Kind = Present;
8634 const auto *EI = C->getVarRefs().begin();
8635 for (const auto L : C->component_lists()) {
8636 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, llvm::None,
8637 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8638 C->isImplicit(), std::get<2>(L), *EI);
8639 ++EI;
8640 }
8641 }
8642
8643 // Look at the use_device_ptr clause information and mark the existing map
8644 // entries as such. If there is no map information for an entry in the
8645 // use_device_ptr list, we create one with map type 'alloc' and zero size
8646 // section. It is the user fault if that was not mapped before. If there is
8647 // no map information and the pointer is a struct member, then we defer the
8648 // emission of that entry until the whole struct has been processed.
8649 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8650 SmallVector<DeferredDevicePtrEntryTy, 4>>
8651 DeferredInfo;
8652 MapCombinedInfoTy UseDevicePtrCombinedInfo;
8653
8654 for (const auto *Cl : Clauses) {
8655 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Cl);
8656 if (!C)
8657 continue;
8658 for (const auto L : C->component_lists()) {
8659 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8660 std::get<1>(L);
8661 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8662, __extension__
__PRETTY_FUNCTION__))
8662 "Not expecting empty list of components!")(static_cast <bool> (!Components.empty() && "Not expecting empty list of components!"
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8662, __extension__
__PRETTY_FUNCTION__))
;
8663 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8664 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8665 const Expr *IE = Components.back().getAssociatedExpression();
8666 // If the first component is a member expression, we have to look into
8667 // 'this', which maps to null in the map of map information. Otherwise
8668 // look directly for the information.
8669 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8670
8671 // We potentially have map information for this declaration already.
8672 // Look for the first set of components that refer to it.
8673 if (It != Info.end()) {
8674 bool Found = false;
8675 for (auto &Data : It->second) {
8676 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8677 return MI.Components.back().getAssociatedDeclaration() == VD;
8678 });
8679 // If we found a map entry, signal that the pointer has to be
8680 // returned and move on to the next declaration. Exclude cases where
8681 // the base pointer is mapped as array subscript, array section or
8682 // array shaping. The base address is passed as a pointer to base in
8683 // this case and cannot be used as a base for use_device_ptr list
8684 // item.
8685 if (CI != Data.end()) {
8686 auto PrevCI = std::next(CI->Components.rbegin());
8687 const auto *VarD = dyn_cast<VarDecl>(VD);
8688 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8689 isa<MemberExpr>(IE) ||
8690 !VD->getType().getNonReferenceType()->isPointerType() ||
8691 PrevCI == CI->Components.rend() ||
8692 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8693 VarD->hasLocalStorage()) {
8694 CI->ReturnDevicePointer = true;
8695 Found = true;
8696 break;
8697 }
8698 }
8699 }
8700 if (Found)
8701 continue;
8702 }
8703
8704 // We didn't find any match in our map information - generate a zero
8705 // size array section - if the pointer is a struct member we defer this
8706 // action until the whole struct has been processed.
8707 if (isa<MemberExpr>(IE)) {
8708 // Insert the pointer into Info to be processed by
8709 // generateInfoForComponentList. Because it is a member pointer
8710 // without a pointee, no entry will be generated for it, therefore
8711 // we need to generate one after the whole struct has been processed.
8712 // Nonetheless, generateInfoForComponentList must be called to take
8713 // the pointer into account for the calculation of the range of the
8714 // partial struct.
8715 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, llvm::None,
8716 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8717 nullptr);
8718 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
8719 } else {
8720 llvm::Value *Ptr =
8721 CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
8722 UseDevicePtrCombinedInfo.Exprs.push_back(VD);
8723 UseDevicePtrCombinedInfo.BasePointers.emplace_back(Ptr, VD);
8724 UseDevicePtrCombinedInfo.Pointers.push_back(Ptr);
8725 UseDevicePtrCombinedInfo.Sizes.push_back(
8726 llvm::Constant::getNullValue(CGF.Int64Ty));
8727 UseDevicePtrCombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8728 UseDevicePtrCombinedInfo.Mappers.push_back(nullptr);
8729 }
8730 }
8731 }
8732
8733 // Look at the use_device_addr clause information and mark the existing map
8734 // entries as such. If there is no map information for an entry in the
8735 // use_device_addr list, we create one with map type 'alloc' and zero size
8736 // section. It is the user fault if that was not mapped before. If there is
8737 // no map information and the pointer is a struct member, then we defer the
8738 // emission of that entry until the whole struct has been processed.
8739 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8740 for (const auto *Cl : Clauses) {
8741 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Cl);
8742 if (!C)
8743 continue;
8744 for (const auto L : C->component_lists()) {
8745 assert(!std::get<1>(L).empty() &&(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8746, __extension__
__PRETTY_FUNCTION__))
8746 "Not expecting empty list of components!")(static_cast <bool> (!std::get<1>(L).empty() &&
"Not expecting empty list of components!") ? void (0) : __assert_fail
("!std::get<1>(L).empty() && \"Not expecting empty list of components!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8746, __extension__
__PRETTY_FUNCTION__))
;
8747 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8748 if (!Processed.insert(VD).second)
8749 continue;
8750 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8751 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8752 // If the first component is a member expression, we have to look into
8753 // 'this', which maps to null in the map of map information. Otherwise
8754 // look directly for the information.
8755 auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
8756
8757 // We potentially have map information for this declaration already.
8758 // Look for the first set of components that refer to it.
8759 if (It != Info.end()) {
8760 bool Found = false;
8761 for (auto &Data : It->second) {
8762 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8763 return MI.Components.back().getAssociatedDeclaration() == VD;
8764 });
8765 // If we found a map entry, signal that the pointer has to be
8766 // returned and move on to the next declaration.
8767 if (CI != Data.end()) {
8768 CI->ReturnDevicePointer = true;
8769 Found = true;
8770 break;
8771 }
8772 }
8773 if (Found)
8774 continue;
8775 }
8776
8777 // We didn't find any match in our map information - generate a zero
8778 // size array section - if the pointer is a struct member we defer this
8779 // action until the whole struct has been processed.
8780 if (isa<MemberExpr>(IE)) {
8781 // Insert the pointer into Info to be processed by
8782 // generateInfoForComponentList. Because it is a member pointer
8783 // without a pointee, no entry will be generated for it, therefore
8784 // we need to generate one after the whole struct has been processed.
8785 // Nonetheless, generateInfoForComponentList must be called to take
8786 // the pointer into account for the calculation of the range of the
8787 // partial struct.
8788 InfoGen(nullptr, Other, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
8789 llvm::None, /*ReturnDevicePointer=*/false, C->isImplicit(),
8790 nullptr, nullptr, /*ForDeviceAddr=*/true);
8791 DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
8792 } else {
8793 llvm::Value *Ptr;
8794 if (IE->isGLValue())
8795 Ptr = CGF.EmitLValue(IE).getPointer(CGF);
8796 else
8797 Ptr = CGF.EmitScalarExpr(IE);
8798 CombinedInfo.Exprs.push_back(VD);
8799 CombinedInfo.BasePointers.emplace_back(Ptr, VD);
8800 CombinedInfo.Pointers.push_back(Ptr);
8801 CombinedInfo.Sizes.push_back(
8802 llvm::Constant::getNullValue(CGF.Int64Ty));
8803 CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM);
8804 CombinedInfo.Mappers.push_back(nullptr);
8805 }
8806 }
8807 }
8808
8809 for (const auto &Data : Info) {
8810 StructRangeInfoTy PartialStruct;
8811 // Temporary generated information.
8812 MapCombinedInfoTy CurInfo;
8813 const Decl *D = Data.first;
8814 const ValueDecl *VD = cast_or_null<ValueDecl>(D);
8815 for (const auto &M : Data.second) {
8816 for (const MapInfo &L : M) {
8817 assert(!L.Components.empty() &&(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8818, __extension__
__PRETTY_FUNCTION__))
8818 "Not expecting declaration with no component lists.")(static_cast <bool> (!L.Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!L.Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8818, __extension__
__PRETTY_FUNCTION__))
;
8819
8820 // Remember the current base pointer index.
8821 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8822 CurInfo.NonContigInfo.IsNonContiguous =
8823 L.Components.back().isNonContiguous();
8824 generateInfoForComponentList(
8825 L.MapType, L.MapModifiers, L.MotionModifiers, L.Components,
8826 CurInfo, PartialStruct, /*IsFirstComponentList=*/false,
8827 L.IsImplicit, L.Mapper, L.ForDeviceAddr, VD, L.VarRef);
8828
8829 // If this entry relates with a device pointer, set the relevant
8830 // declaration and add the 'return pointer' flag.
8831 if (L.ReturnDevicePointer) {
8832 assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8833, __extension__
__PRETTY_FUNCTION__))
8833 "Unexpected number of mapped base pointers.")(static_cast <bool> (CurInfo.BasePointers.size() > CurrentBasePointersIdx
&& "Unexpected number of mapped base pointers.") ? void
(0) : __assert_fail ("CurInfo.BasePointers.size() > CurrentBasePointersIdx && \"Unexpected number of mapped base pointers.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8833, __extension__
__PRETTY_FUNCTION__))
;
8834
8835 const ValueDecl *RelevantVD =
8836 L.Components.back().getAssociatedDeclaration();
8837 assert(RelevantVD &&(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8838, __extension__
__PRETTY_FUNCTION__))
8838 "No relevant declaration related with device pointer??")(static_cast <bool> (RelevantVD && "No relevant declaration related with device pointer??"
) ? void (0) : __assert_fail ("RelevantVD && \"No relevant declaration related with device pointer??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 8838, __extension__
__PRETTY_FUNCTION__))
;
8839
8840 CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
8841 RelevantVD);
8842 CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8843 }
8844 }
8845 }
8846
8847 // Append any pending zero-length pointers which are struct members and
8848 // used with use_device_ptr or use_device_addr.
8849 auto CI = DeferredInfo.find(Data.first);
8850 if (CI != DeferredInfo.end()) {
8851 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8852 llvm::Value *BasePtr;
8853 llvm::Value *Ptr;
8854 if (L.ForDeviceAddr) {
8855 if (L.IE->isGLValue())
8856 Ptr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8857 else
8858 Ptr = this->CGF.EmitScalarExpr(L.IE);
8859 BasePtr = Ptr;
8860 // Entry is RETURN_PARAM. Also, set the placeholder value
8861 // MEMBER_OF=FFFF so that the entry is later updated with the
8862 // correct value of MEMBER_OF.
8863 CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
8864 } else {
8865 BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
8866 Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
8867 L.IE->getExprLoc());
8868 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8869 // placeholder value MEMBER_OF=FFFF so that the entry is later
8870 // updated with the correct value of MEMBER_OF.
8871 CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8872 OMP_MAP_MEMBER_OF);
8873 }
8874 CurInfo.Exprs.push_back(L.VD);
8875 CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
8876 CurInfo.Pointers.push_back(Ptr);
8877 CurInfo.Sizes.push_back(
8878 llvm::Constant::getNullValue(this->CGF.Int64Ty));
8879 CurInfo.Mappers.push_back(nullptr);
8880 }
8881 }
8882 // If there is an entry in PartialStruct it means we have a struct with
8883 // individual members mapped. Emit an extra combined entry.
8884 if (PartialStruct.Base.isValid()) {
8885 CurInfo.NonContigInfo.Dims.push_back(0);
8886 emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct, VD);
8887 }
8888
8889 // We need to append the results of this capture to what we already
8890 // have.
8891 CombinedInfo.append(CurInfo);
8892 }
8893 // Append data for use_device_ptr clauses.
8894 CombinedInfo.append(UseDevicePtrCombinedInfo);
8895 }
8896
8897public:
8898 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8899 : CurDir(&Dir), CGF(CGF) {
8900 // Extract firstprivate clause information.
8901 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8902 for (const auto *D : C->varlists())
8903 FirstPrivateDecls.try_emplace(
8904 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8905 // Extract implicit firstprivates from uses_allocators clauses.
8906 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8907 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8908 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8909 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(D.AllocatorTraits))
8910 FirstPrivateDecls.try_emplace(cast<VarDecl>(DRE->getDecl()),
8911 /*Implicit=*/true);
8912 else if (const auto *VD = dyn_cast<VarDecl>(
8913 cast<DeclRefExpr>(D.Allocator->IgnoreParenImpCasts())
8914 ->getDecl()))
8915 FirstPrivateDecls.try_emplace(VD, /*Implicit=*/true);
8916 }
8917 }
8918 // Extract device pointer clause information.
8919 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8920 for (auto L : C->component_lists())
8921 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8922 // Extract map information.
8923 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8924 if (C->getMapType() != OMPC_MAP_to)
8925 continue;
8926 for (auto L : C->component_lists()) {
8927 const ValueDecl *VD = std::get<0>(L);
8928 const auto *RD = VD ? VD->getType()
8929 .getCanonicalType()
8930 .getNonReferenceType()
8931 ->getAsCXXRecordDecl()
8932 : nullptr;
8933 if (RD && RD->isLambda())
8934 LambdasMap.try_emplace(std::get<0>(L), C);
8935 }
8936 }
8937 }
8938
8939 /// Constructor for the declare mapper directive.
8940 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8941 : CurDir(&Dir), CGF(CGF) {}
8942
8943 /// Generate code for the combined entry if we have a partially mapped struct
8944 /// and take care of the mapping flags of the arguments corresponding to
8945 /// individual struct members.
8946 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8947 MapFlagsArrayTy &CurTypes,
8948 const StructRangeInfoTy &PartialStruct,
8949 const ValueDecl *VD = nullptr,
8950 bool NotTargetParams = true) const {
8951 if (CurTypes.size() == 1 &&
8952 ((CurTypes.back() & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF) &&
8953 !PartialStruct.IsArraySection)
8954 return;
8955 Address LBAddr = PartialStruct.LowestElem.second;
8956 Address HBAddr = PartialStruct.HighestElem.second;
8957 if (PartialStruct.HasCompleteRecord) {
8958 LBAddr = PartialStruct.LB;
8959 HBAddr = PartialStruct.LB;
8960 }
8961 CombinedInfo.Exprs.push_back(VD);
8962 // Base is the base of the struct
8963 CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
8964 // Pointer is the address of the lowest element
8965 llvm::Value *LB = LBAddr.getPointer();
8966 CombinedInfo.Pointers.push_back(LB);
8967 // There should not be a mapper for a combined entry.
8968 CombinedInfo.Mappers.push_back(nullptr);
8969 // Size is (addr of {highest+1} element) - (addr of lowest element)
8970 llvm::Value *HB = HBAddr.getPointer();
8971 llvm::Value *HAddr =
8972 CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
8973 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
8974 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
8975 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
8976 llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
8977 /*isSigned=*/false);
8978 CombinedInfo.Sizes.push_back(Size);
8979 // Map type is always TARGET_PARAM, if generate info for captures.
8980 CombinedInfo.Types.push_back(NotTargetParams ? OMP_MAP_NONE
8981 : OMP_MAP_TARGET_PARAM);
8982 // If any element has the present modifier, then make sure the runtime
8983 // doesn't attempt to allocate the struct.
8984 if (CurTypes.end() !=
8985 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8986 return Type & OMP_MAP_PRESENT;
8987 }))
8988 CombinedInfo.Types.back() |= OMP_MAP_PRESENT;
8989 // Remove TARGET_PARAM flag from the first element
8990 (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
8991 // If any element has the ompx_hold modifier, then make sure the runtime
8992 // uses the hold reference count for the struct as a whole so that it won't
8993 // be unmapped by an extra dynamic reference count decrement. Add it to all
8994 // elements as well so the runtime knows which reference count to check
8995 // when determining whether it's time for device-to-host transfers of
8996 // individual elements.
8997 if (CurTypes.end() !=
8998 llvm::find_if(CurTypes, [](OpenMPOffloadMappingFlags Type) {
8999 return Type & OMP_MAP_OMPX_HOLD;
9000 })) {
9001 CombinedInfo.Types.back() |= OMP_MAP_OMPX_HOLD;
9002 for (auto &M : CurTypes)
9003 M |= OMP_MAP_OMPX_HOLD;
9004 }
9005
9006 // All other current entries will be MEMBER_OF the combined entry
9007 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9008 // 0xFFFF in the MEMBER_OF field).
9009 OpenMPOffloadMappingFlags MemberOfFlag =
9010 getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
9011 for (auto &M : CurTypes)
9012 setCorrectMemberOfFlag(M, MemberOfFlag);
9013 }
9014
9015 /// Generate all the base pointers, section pointers, sizes, map types, and
9016 /// mappers for the extracted mappable expressions (all included in \a
9017 /// CombinedInfo). Also, for each item that relates with a device pointer, a
9018 /// pair of the relevant declaration and index where it occurs is appended to
9019 /// the device pointers info array.
9020 void generateAllInfo(
9021 MapCombinedInfoTy &CombinedInfo,
9022 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
9023 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
9024 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9025, __extension__
__PRETTY_FUNCTION__))
9025 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9025, __extension__
__PRETTY_FUNCTION__))
;
9026 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9027 generateAllInfoForClauses(CurExecDir->clauses(), CombinedInfo, SkipVarSet);
9028 }
9029
9030 /// Generate all the base pointers, section pointers, sizes, map types, and
9031 /// mappers for the extracted map clauses of user-defined mapper (all included
9032 /// in \a CombinedInfo).
9033 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
9034 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9035, __extension__
__PRETTY_FUNCTION__))
9035 "Expect a declare mapper directive")(static_cast <bool> (CurDir.is<const OMPDeclareMapperDecl
*>() && "Expect a declare mapper directive") ? void
(0) : __assert_fail ("CurDir.is<const OMPDeclareMapperDecl *>() && \"Expect a declare mapper directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9035, __extension__
__PRETTY_FUNCTION__))
;
9036 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
9037 generateAllInfoForClauses(CurMapperDir->clauses(), CombinedInfo);
9038 }
9039
9040 /// Emit capture info for lambdas for variables captured by reference.
9041 void generateInfoForLambdaCaptures(
9042 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9043 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
9044 const auto *RD = VD->getType()
9045 .getCanonicalType()
9046 .getNonReferenceType()
9047 ->getAsCXXRecordDecl();
9048 if (!RD || !RD->isLambda())
9049 return;
9050 Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
9051 LValue VDLVal = CGF.MakeAddrLValue(
9052 VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
9053 llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
9054 FieldDecl *ThisCapture = nullptr;
9055 RD->getCaptureFields(Captures, ThisCapture);
9056 if (ThisCapture) {
9057 LValue ThisLVal =
9058 CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
9059 LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
9060 LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
9061 VDLVal.getPointer(CGF));
9062 CombinedInfo.Exprs.push_back(VD);
9063 CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
9064 CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
9065 CombinedInfo.Sizes.push_back(
9066 CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
9067 CGF.Int64Ty, /*isSigned=*/true));
9068 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9069 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9070 CombinedInfo.Mappers.push_back(nullptr);
9071 }
9072 for (const LambdaCapture &LC : RD->captures()) {
9073 if (!LC.capturesVariable())
9074 continue;
9075 const VarDecl *VD = LC.getCapturedVar();
9076 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
9077 continue;
9078 auto It = Captures.find(VD);
9079 assert(It != Captures.end() && "Found lambda capture without field.")(static_cast <bool> (It != Captures.end() && "Found lambda capture without field."
) ? void (0) : __assert_fail ("It != Captures.end() && \"Found lambda capture without field.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9079, __extension__
__PRETTY_FUNCTION__))
;
9080 LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
9081 if (LC.getCaptureKind() == LCK_ByRef) {
9082 LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
9083 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9084 VDLVal.getPointer(CGF));
9085 CombinedInfo.Exprs.push_back(VD);
9086 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9087 CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
9088 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9089 CGF.getTypeSize(
9090 VD->getType().getCanonicalType().getNonReferenceType()),
9091 CGF.Int64Ty, /*isSigned=*/true));
9092 } else {
9093 RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
9094 LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
9095 VDLVal.getPointer(CGF));
9096 CombinedInfo.Exprs.push_back(VD);
9097 CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
9098 CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
9099 CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
9100 }
9101 CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9102 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
9103 CombinedInfo.Mappers.push_back(nullptr);
9104 }
9105 }
9106
9107 /// Set correct indices for lambdas captures.
9108 void adjustMemberOfForLambdaCaptures(
9109 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
9110 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
9111 MapFlagsArrayTy &Types) const {
9112 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
9113 // Set correct member_of idx for all implicit lambda captures.
9114 if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
9115 OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
9116 continue;
9117 llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
9118 assert(BasePtr && "Unable to find base lambda address.")(static_cast <bool> (BasePtr && "Unable to find base lambda address."
) ? void (0) : __assert_fail ("BasePtr && \"Unable to find base lambda address.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9118, __extension__
__PRETTY_FUNCTION__))
;
9119 int TgtIdx = -1;
9120 for (unsigned J = I; J > 0; --J) {
9121 unsigned Idx = J - 1;
9122 if (Pointers[Idx] != BasePtr)
9123 continue;
9124 TgtIdx = Idx;
9125 break;
9126 }
9127 assert(TgtIdx != -1 && "Unable to find parent lambda.")(static_cast <bool> (TgtIdx != -1 && "Unable to find parent lambda."
) ? void (0) : __assert_fail ("TgtIdx != -1 && \"Unable to find parent lambda.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9127, __extension__
__PRETTY_FUNCTION__))
;
9128 // All other current entries will be MEMBER_OF the combined entry
9129 // (except for PTR_AND_OBJ entries which do not have a placeholder value
9130 // 0xFFFF in the MEMBER_OF field).
9131 OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
9132 setCorrectMemberOfFlag(Types[I], MemberOfFlag);
9133 }
9134 }
9135
9136 /// Generate the base pointers, section pointers, sizes, map types, and
9137 /// mappers associated to a given capture (all included in \a CombinedInfo).
9138 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
9139 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
9140 StructRangeInfoTy &PartialStruct) const {
9141 assert(!Cap->capturesVariableArrayType() &&(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9142, __extension__
__PRETTY_FUNCTION__))
9142 "Not expecting to generate map info for a variable array type!")(static_cast <bool> (!Cap->capturesVariableArrayType
() && "Not expecting to generate map info for a variable array type!"
) ? void (0) : __assert_fail ("!Cap->capturesVariableArrayType() && \"Not expecting to generate map info for a variable array type!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9142, __extension__
__PRETTY_FUNCTION__))
;
9143
9144 // We need to know when we generating information for the first component
9145 const ValueDecl *VD = Cap->capturesThis()
9146 ? nullptr
9147 : Cap->getCapturedVar()->getCanonicalDecl();
9148
9149 // for map(to: lambda): skip here, processing it in
9150 // generateDefaultMapInfo
9151 if (LambdasMap.count(VD))
9152 return;
9153
9154 // If this declaration appears in a is_device_ptr clause we just have to
9155 // pass the pointer by value. If it is a reference to a declaration, we just
9156 // pass its value.
9157 if (DevPointersMap.count(VD)) {
9158 CombinedInfo.Exprs.push_back(VD);
9159 CombinedInfo.BasePointers.emplace_back(Arg, VD);
9160 CombinedInfo.Pointers.push_back(Arg);
9161 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9162 CGF.getTypeSize(CGF.getContext().VoidPtrTy), CGF.Int64Ty,
9163 /*isSigned=*/true));
9164 CombinedInfo.Types.push_back(
9165 (Cap->capturesVariable() ? OMP_MAP_TO : OMP_MAP_LITERAL) |
9166 OMP_MAP_TARGET_PARAM);
9167 CombinedInfo.Mappers.push_back(nullptr);
9168 return;
9169 }
9170
9171 using MapData =
9172 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
9173 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
9174 const ValueDecl *, const Expr *>;
9175 SmallVector<MapData, 4> DeclComponentLists;
9176 assert(CurDir.is<const OMPExecutableDirective *>() &&(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9177, __extension__
__PRETTY_FUNCTION__))
9177 "Expect a executable directive")(static_cast <bool> (CurDir.is<const OMPExecutableDirective
*>() && "Expect a executable directive") ? void (
0) : __assert_fail ("CurDir.is<const OMPExecutableDirective *>() && \"Expect a executable directive\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9177, __extension__
__PRETTY_FUNCTION__))
;
9178 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
9179 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
9180 const auto *EI = C->getVarRefs().begin();
9181 for (const auto L : C->decl_component_lists(VD)) {
9182 const ValueDecl *VDecl, *Mapper;
9183 // The Expression is not correct if the mapping is implicit
9184 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
9185 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9186 std::tie(VDecl, Components, Mapper) = L;
9187 assert(VDecl == VD && "We got information for the wrong declaration??")(static_cast <bool> (VDecl == VD && "We got information for the wrong declaration??"
) ? void (0) : __assert_fail ("VDecl == VD && \"We got information for the wrong declaration??\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9187, __extension__
__PRETTY_FUNCTION__))
;
9188 assert(!Components.empty() &&(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9189, __extension__
__PRETTY_FUNCTION__))
9189 "Not expecting declaration with no component lists.")(static_cast <bool> (!Components.empty() && "Not expecting declaration with no component lists."
) ? void (0) : __assert_fail ("!Components.empty() && \"Not expecting declaration with no component lists.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9189, __extension__
__PRETTY_FUNCTION__))
;
9190 DeclComponentLists.emplace_back(Components, C->getMapType(),
9191 C->getMapTypeModifiers(),
9192 C->isImplicit(), Mapper, E);
9193 ++EI;
9194 }
9195 }
9196 llvm::stable_sort(DeclComponentLists, [](const MapData &LHS,
9197 const MapData &RHS) {
9198 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(LHS);
9199 OpenMPMapClauseKind MapType = std::get<1>(RHS);
9200 bool HasPresent =
9201 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9202 bool HasAllocs = MapType == OMPC_MAP_alloc;
9203 MapModifiers = std::get<2>(RHS);
9204 MapType = std::get<1>(LHS);
9205 bool HasPresentR =
9206 llvm::is_contained(MapModifiers, clang::OMPC_MAP_MODIFIER_present);
9207 bool HasAllocsR = MapType == OMPC_MAP_alloc;
9208 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
9209 });
9210
9211 // Find overlapping elements (including the offset from the base element).
9212 llvm::SmallDenseMap<
9213 const MapData *,
9214 llvm::SmallVector<
9215 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
9216 4>
9217 OverlappedData;
9218 size_t Count = 0;
9219 for (const MapData &L : DeclComponentLists) {
9220 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9221 OpenMPMapClauseKind MapType;
9222 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9223 bool IsImplicit;
9224 const ValueDecl *Mapper;
9225 const Expr *VarRef;
9226 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9227 L;
9228 ++Count;
9229 for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
9230 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
9231 std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper,
9232 VarRef) = L1;
9233 auto CI = Components.rbegin();
9234 auto CE = Components.rend();
9235 auto SI = Components1.rbegin();
9236 auto SE = Components1.rend();
9237 for (; CI != CE && SI != SE; ++CI, ++SI) {
9238 if (CI->getAssociatedExpression()->getStmtClass() !=
9239 SI->getAssociatedExpression()->getStmtClass())
9240 break;
9241 // Are we dealing with different variables/fields?
9242 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
9243 break;
9244 }
9245 // Found overlapping if, at least for one component, reached the head
9246 // of the components list.
9247 if (CI == CE || SI == SE) {
9248 // Ignore it if it is the same component.
9249 if (CI == CE && SI == SE)
9250 continue;
9251 const auto It = (SI == SE) ? CI : SI;
9252 // If one component is a pointer and another one is a kind of
9253 // dereference of this pointer (array subscript, section, dereference,
9254 // etc.), it is not an overlapping.
9255 // Same, if one component is a base and another component is a
9256 // dereferenced pointer memberexpr with the same base.
9257 if (!isa<MemberExpr>(It->getAssociatedExpression()) ||
9258 (std::prev(It)->getAssociatedDeclaration() &&
9259 std::prev(It)
9260 ->getAssociatedDeclaration()
9261 ->getType()
9262 ->isPointerType()) ||
9263 (It->getAssociatedDeclaration() &&
9264 It->getAssociatedDeclaration()->getType()->isPointerType() &&
9265 std::next(It) != CE && std::next(It) != SE))
9266 continue;
9267 const MapData &BaseData = CI == CE ? L : L1;
9268 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
9269 SI == SE ? Components : Components1;
9270 auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
9271 OverlappedElements.getSecond().push_back(SubData);
9272 }
9273 }
9274 }
9275 // Sort the overlapped elements for each item.
9276 llvm::SmallVector<const FieldDecl *, 4> Layout;
9277 if (!OverlappedData.empty()) {
9278 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
9279 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
9280 while (BaseType != OrigType) {
9281 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
9282 OrigType = BaseType->getPointeeOrArrayElementType();
9283 }
9284
9285 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
9286 getPlainLayout(CRD, Layout, /*AsBase=*/false);
9287 else {
9288 const auto *RD = BaseType->getAsRecordDecl();
9289 Layout.append(RD->field_begin(), RD->field_end());
9290 }
9291 }
9292 for (auto &Pair : OverlappedData) {
9293 llvm::stable_sort(
9294 Pair.getSecond(),
9295 [&Layout](
9296 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
9297 OMPClauseMappableExprCommon::MappableExprComponentListRef
9298 Second) {
9299 auto CI = First.rbegin();
9300 auto CE = First.rend();
9301 auto SI = Second.rbegin();
9302 auto SE = Second.rend();
9303 for (; CI != CE && SI != SE; ++CI, ++SI) {
9304 if (CI->getAssociatedExpression()->getStmtClass() !=
9305 SI->getAssociatedExpression()->getStmtClass())
9306 break;
9307 // Are we dealing with different variables/fields?
9308 if (CI->getAssociatedDeclaration() !=
9309 SI->getAssociatedDeclaration())
9310 break;
9311 }
9312
9313 // Lists contain the same elements.
9314 if (CI == CE && SI == SE)
9315 return false;
9316
9317 // List with less elements is less than list with more elements.
9318 if (CI == CE || SI == SE)
9319 return CI == CE;
9320
9321 const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
9322 const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
9323 if (FD1->getParent() == FD2->getParent())
9324 return FD1->getFieldIndex() < FD2->getFieldIndex();
9325 const auto *It =
9326 llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
9327 return FD == FD1 || FD == FD2;
9328 });
9329 return *It == FD1;
9330 });
9331 }
9332
9333 // Associated with a capture, because the mapping flags depend on it.
9334 // Go through all of the elements with the overlapped elements.
9335 bool IsFirstComponentList = true;
9336 for (const auto &Pair : OverlappedData) {
9337 const MapData &L = *Pair.getFirst();
9338 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9339 OpenMPMapClauseKind MapType;
9340 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9341 bool IsImplicit;
9342 const ValueDecl *Mapper;
9343 const Expr *VarRef;
9344 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9345 L;
9346 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9347 OverlappedComponents = Pair.getSecond();
9348 generateInfoForComponentList(
9349 MapType, MapModifiers, llvm::None, Components, CombinedInfo,
9350 PartialStruct, IsFirstComponentList, IsImplicit, Mapper,
9351 /*ForDeviceAddr=*/false, VD, VarRef, OverlappedComponents);
9352 IsFirstComponentList = false;
9353 }
9354 // Go through other elements without overlapped elements.
9355 for (const MapData &L : DeclComponentLists) {
9356 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9357 OpenMPMapClauseKind MapType;
9358 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9359 bool IsImplicit;
9360 const ValueDecl *Mapper;
9361 const Expr *VarRef;
9362 std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper, VarRef) =
9363 L;
9364 auto It = OverlappedData.find(&L);
9365 if (It == OverlappedData.end())
9366 generateInfoForComponentList(MapType, MapModifiers, llvm::None,
9367 Components, CombinedInfo, PartialStruct,
9368 IsFirstComponentList, IsImplicit, Mapper,
9369 /*ForDeviceAddr=*/false, VD, VarRef);
9370 IsFirstComponentList = false;
9371 }
9372 }
9373
9374 /// Generate the default map information for a given capture \a CI,
9375 /// record field declaration \a RI and captured value \a CV.
9376 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9377 const FieldDecl &RI, llvm::Value *CV,
9378 MapCombinedInfoTy &CombinedInfo) const {
9379 bool IsImplicit = true;
9380 // Do the default mapping.
9381 if (CI.capturesThis()) {
9382 CombinedInfo.Exprs.push_back(nullptr);
9383 CombinedInfo.BasePointers.push_back(CV);
9384 CombinedInfo.Pointers.push_back(CV);
9385 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9386 CombinedInfo.Sizes.push_back(
9387 CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
9388 CGF.Int64Ty, /*isSigned=*/true));
9389 // Default map type.
9390 CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
9391 } else if (CI.capturesVariableByCopy()) {
9392 const VarDecl *VD = CI.getCapturedVar();
9393 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9394 CombinedInfo.BasePointers.push_back(CV);
9395 CombinedInfo.Pointers.push_back(CV);
9396 if (!RI.getType()->isAnyPointerType()) {
9397 // We have to signal to the runtime captures passed by value that are
9398 // not pointers.
9399 CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
9400 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9401 CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9402 } else {
9403 // Pointers are implicitly mapped with a zero size and no flags
9404 // (other than first map that is added for all implicit maps).
9405 CombinedInfo.Types.push_back(OMP_MAP_NONE);
9406 CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
9407 }
9408 auto I = FirstPrivateDecls.find(VD);
9409 if (I != FirstPrivateDecls.end())
9410 IsImplicit = I->getSecond();
9411 } else {
9412 assert(CI.capturesVariable() && "Expected captured reference.")(static_cast <bool> (CI.capturesVariable() && "Expected captured reference."
) ? void (0) : __assert_fail ("CI.capturesVariable() && \"Expected captured reference.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9412, __extension__
__PRETTY_FUNCTION__))
;
9413 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9414 QualType ElementType = PtrTy->getPointeeType();
9415 CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
9416 CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
9417 // The default map type for a scalar/complex type is 'to' because by
9418 // default the value doesn't have to be retrieved. For an aggregate
9419 // type, the default is 'tofrom'.
9420 CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
9421 const VarDecl *VD = CI.getCapturedVar();
9422 auto I = FirstPrivateDecls.find(VD);
9423 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9424 CombinedInfo.BasePointers.push_back(CV);
9425 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9426 Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
9427 CV, ElementType, CGF.getContext().getDeclAlign(VD),
9428 AlignmentSource::Decl));
9429 CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
9430 } else {
9431 CombinedInfo.Pointers.push_back(CV);
9432 }
9433 if (I != FirstPrivateDecls.end())
9434 IsImplicit = I->getSecond();
9435 }
9436 // Every default map produces a single argument which is a target parameter.
9437 CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
9438
9439 // Add flag stating this is an implicit map.
9440 if (IsImplicit)
9441 CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
9442
9443 // No user-defined mapper for default mapping.
9444 CombinedInfo.Mappers.push_back(nullptr);
9445 }
9446};
9447} // anonymous namespace
9448
9449static void emitNonContiguousDescriptor(
9450 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9451 CGOpenMPRuntime::TargetDataInfo &Info) {
9452 CodeGenModule &CGM = CGF.CGM;
9453 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9454 &NonContigInfo = CombinedInfo.NonContigInfo;
9455
9456 // Build an array of struct descriptor_dim and then assign it to
9457 // offload_args.
9458 //
9459 // struct descriptor_dim {
9460 // uint64_t offset;
9461 // uint64_t count;
9462 // uint64_t stride
9463 // };
9464 ASTContext &C = CGF.getContext();
9465 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9466 RecordDecl *RD;
9467 RD = C.buildImplicitRecord("descriptor_dim");
9468 RD->startDefinition();
9469 addFieldToRecordDecl(C, RD, Int64Ty);
9470 addFieldToRecordDecl(C, RD, Int64Ty);
9471 addFieldToRecordDecl(C, RD, Int64Ty);
9472 RD->completeDefinition();
9473 QualType DimTy = C.getRecordType(RD);
9474
9475 enum { OffsetFD = 0, CountFD, StrideFD };
9476 // We need two index variable here since the size of "Dims" is the same as the
9477 // size of Components, however, the size of offset, count, and stride is equal
9478 // to the size of base declaration that is non-contiguous.
9479 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
9480 // Skip emitting ir if dimension size is 1 since it cannot be
9481 // non-contiguous.
9482 if (NonContigInfo.Dims[I] == 1)
9483 continue;
9484 llvm::APInt Size(/*numBits=*/32, NonContigInfo.Dims[I]);
9485 QualType ArrayTy =
9486 C.getConstantArrayType(DimTy, Size, nullptr, ArrayType::Normal, 0);
9487 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
9488 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
9489 unsigned RevIdx = EE - II - 1;
9490 LValue DimsLVal = CGF.MakeAddrLValue(
9491 CGF.Builder.CreateConstArrayGEP(DimsAddr, II), DimTy);
9492 // Offset
9493 LValue OffsetLVal = CGF.EmitLValueForField(
9494 DimsLVal, *std::next(RD->field_begin(), OffsetFD));
9495 CGF.EmitStoreOfScalar(NonContigInfo.Offsets[L][RevIdx], OffsetLVal);
9496 // Count
9497 LValue CountLVal = CGF.EmitLValueForField(
9498 DimsLVal, *std::next(RD->field_begin(), CountFD));
9499 CGF.EmitStoreOfScalar(NonContigInfo.Counts[L][RevIdx], CountLVal);
9500 // Stride
9501 LValue StrideLVal = CGF.EmitLValueForField(
9502 DimsLVal, *std::next(RD->field_begin(), StrideFD));
9503 CGF.EmitStoreOfScalar(NonContigInfo.Strides[L][RevIdx], StrideLVal);
9504 }
9505 // args[I] = &dims
9506 Address DAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9507 DimsAddr, CGM.Int8PtrTy);
9508 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9509 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9510 Info.PointersArray, 0, I);
9511 Address PAddr(P, CGF.getPointerAlign());
9512 CGF.Builder.CreateStore(DAddr.getPointer(), PAddr);
9513 ++L;
9514 }
9515}
9516
9517// Try to extract the base declaration from a `this->x` expression if possible.
9518static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9519 if (!E)
9520 return nullptr;
9521
9522 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenCasts()))
9523 if (const MemberExpr *ME =
9524 dyn_cast<MemberExpr>(OASE->getBase()->IgnoreParenImpCasts()))
9525 return ME->getMemberDecl();
9526 return nullptr;
9527}
9528
9529/// Emit a string constant containing the names of the values mapped to the
9530/// offloading runtime library.
9531llvm::Constant *
9532emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9533 MappableExprsHandler::MappingExprInfo &MapExprs) {
9534
9535 uint32_t SrcLocStrSize;
9536 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9537 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9538
9539 SourceLocation Loc;
9540 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9541 if (const ValueDecl *VD = getDeclFromThisExpr(MapExprs.getMapExpr()))
9542 Loc = VD->getLocation();
9543 else
9544 Loc = MapExprs.getMapExpr()->getExprLoc();
9545 } else {
9546 Loc = MapExprs.getMapDecl()->getLocation();
9547 }
9548
9549 std::string ExprName;
9550 if (MapExprs.getMapExpr()) {
9551 PrintingPolicy P(CGF.getContext().getLangOpts());
9552 llvm::raw_string_ostream OS(ExprName);
9553 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9554 OS.flush();
9555 } else {
9556 ExprName = MapExprs.getMapDecl()->getNameAsString();
9557 }
9558
9559 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9560 return OMPBuilder.getOrCreateSrcLocStr(PLoc.getFilename(), ExprName,
9561 PLoc.getLine(), PLoc.getColumn(),
9562 SrcLocStrSize);
9563}
9564
9565/// Emit the arrays used to pass the captures and map information to the
9566/// offloading runtime library. If there is no map or capture information,
9567/// return nullptr by reference.
9568static void emitOffloadingArrays(
9569 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9570 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9571 bool IsNonContiguous = false) {
9572 CodeGenModule &CGM = CGF.CGM;
9573 ASTContext &Ctx = CGF.getContext();
9574
9575 // Reset the array information.
9576 Info.clearArrayInfo();
9577 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9578
9579 if (Info.NumberOfPtrs) {
9580 // Detect if we have any capture size requiring runtime evaluation of the
9581 // size so that a constant array could be eventually used.
9582 bool hasRuntimeEvaluationCaptureSize = false;
9583 for (llvm::Value *S : CombinedInfo.Sizes)
9584 if (!isa<llvm::Constant>(S)) {
9585 hasRuntimeEvaluationCaptureSize = true;
9586 break;
9587 }
9588
9589 llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
9590 QualType PointerArrayType = Ctx.getConstantArrayType(
9591 Ctx.VoidPtrTy, PointerNumAP, nullptr, ArrayType::Normal,
9592 /*IndexTypeQuals=*/0);
9593
9594 Info.BasePointersArray =
9595 CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
9596 Info.PointersArray =
9597 CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
9598 Address MappersArray =
9599 CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
9600 Info.MappersArray = MappersArray.getPointer();
9601
9602 // If we don't have any VLA types or other types that require runtime
9603 // evaluation, we can use a constant array for the map sizes, otherwise we
9604 // need to fill up the arrays as we do for the pointers.
9605 QualType Int64Ty =
9606 Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9607 if (hasRuntimeEvaluationCaptureSize) {
9608 QualType SizeArrayType = Ctx.getConstantArrayType(
9609 Int64Ty, PointerNumAP, nullptr, ArrayType::Normal,
9610 /*IndexTypeQuals=*/0);
9611 Info.SizesArray =
9612 CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
9613 } else {
9614 // We expect all the sizes to be constant, so we collect them to create
9615 // a constant array.
9616 SmallVector<llvm::Constant *, 16> ConstSizes;
9617 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
9618 if (IsNonContiguous &&
9619 (CombinedInfo.Types[I] & MappableExprsHandler::OMP_MAP_NON_CONTIG)) {
9620 ConstSizes.push_back(llvm::ConstantInt::get(
9621 CGF.Int64Ty, CombinedInfo.NonContigInfo.Dims[I]));
9622 } else {
9623 ConstSizes.push_back(cast<llvm::Constant>(CombinedInfo.Sizes[I]));
9624 }
9625 }
9626
9627 auto *SizesArrayInit = llvm::ConstantArray::get(
9628 llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
9629 std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
9630 auto *SizesArrayGbl = new llvm::GlobalVariable(
9631 CGM.getModule(), SizesArrayInit->getType(),
9632 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
9633 SizesArrayInit, Name);
9634 SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
9635 Info.SizesArray = SizesArrayGbl;
9636 }
9637
9638 // The map types are always constant so we don't need to generate code to
9639 // fill arrays. Instead, we create an array constant.
9640 SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
9641 llvm::copy(CombinedInfo.Types, Mapping.begin());
9642 std::string MaptypesName =
9643 CGM.getOpenMPRuntime().getName({"offload_maptypes"});
9644 auto *MapTypesArrayGbl =
9645 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9646 Info.MapTypesArray = MapTypesArrayGbl;
9647
9648 // The information types are only built if there is debug information
9649 // requested.
9650 if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) {
9651 Info.MapNamesArray = llvm::Constant::getNullValue(
9652 llvm::Type::getInt8Ty(CGF.Builder.getContext())->getPointerTo());
9653 } else {
9654 auto fillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9655 return emitMappingInformation(CGF, OMPBuilder, MapExpr);
9656 };
9657 SmallVector<llvm::Constant *, 4> InfoMap(CombinedInfo.Exprs.size());
9658 llvm::transform(CombinedInfo.Exprs, InfoMap.begin(), fillInfoMap);
9659 std::string MapnamesName =
9660 CGM.getOpenMPRuntime().getName({"offload_mapnames"});
9661 auto *MapNamesArrayGbl =
9662 OMPBuilder.createOffloadMapnames(InfoMap, MapnamesName);
9663 Info.MapNamesArray = MapNamesArrayGbl;
9664 }
9665
9666 // If there's a present map type modifier, it must not be applied to the end
9667 // of a region, so generate a separate map type array in that case.
9668 if (Info.separateBeginEndCalls()) {
9669 bool EndMapTypesDiffer = false;
9670 for (uint64_t &Type : Mapping) {
9671 if (Type & MappableExprsHandler::OMP_MAP_PRESENT) {
9672 Type &= ~MappableExprsHandler::OMP_MAP_PRESENT;
9673 EndMapTypesDiffer = true;
9674 }
9675 }
9676 if (EndMapTypesDiffer) {
9677 MapTypesArrayGbl =
9678 OMPBuilder.createOffloadMaptypes(Mapping, MaptypesName);
9679 Info.MapTypesArrayEnd = MapTypesArrayGbl;
9680 }
9681 }
9682
9683 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
9684 llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
9685 llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
9686 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9687 Info.BasePointersArray, 0, I);
9688 BP = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9689 BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
9690 Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9691 CGF.Builder.CreateStore(BPVal, BPAddr);
9692
9693 if (Info.requiresDevicePointerInfo())
9694 if (const ValueDecl *DevVD =
9695 CombinedInfo.BasePointers[I].getDevicePtrDecl())
9696 Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
9697
9698 llvm::Value *PVal = CombinedInfo.Pointers[I];
9699 llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
9700 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9701 Info.PointersArray, 0, I);
9702 P = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
9703 P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
9704 Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
9705 CGF.Builder.CreateStore(PVal, PAddr);
9706
9707 if (hasRuntimeEvaluationCaptureSize) {
9708 llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
9709 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9710 Info.SizesArray,
9711 /*Idx0=*/0,
9712 /*Idx1=*/I);
9713 Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
9714 CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
9715 CGM.Int64Ty,
9716 /*isSigned=*/true),
9717 SAddr);
9718 }
9719
9720 // Fill up the mapper array.
9721 llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
9722 if (CombinedInfo.Mappers[I]) {
9723 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9724 cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
9725 MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
9726 Info.HasMapper = true;
9727 }
9728 Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
9729 CGF.Builder.CreateStore(MFunc, MAddr);
9730 }
9731 }
9732
9733 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9734 Info.NumberOfPtrs == 0)
9735 return;
9736
9737 emitNonContiguousDescriptor(CGF, CombinedInfo, Info);
9738}
9739
9740namespace {
9741/// Additional arguments for emitOffloadingArraysArgument function.
9742struct ArgumentsOptions {
9743 bool ForEndCall = false;
9744 ArgumentsOptions() = default;
9745 ArgumentsOptions(bool ForEndCall) : ForEndCall(ForEndCall) {}
9746};
9747} // namespace
9748
9749/// Emit the arguments to be passed to the runtime library based on the
9750/// arrays of base pointers, pointers, sizes, map types, and mappers. If
9751/// ForEndCall, emit map types to be passed for the end of the region instead of
9752/// the beginning.
9753static void emitOffloadingArraysArgument(
9754 CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
9755 llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
9756 llvm::Value *&MapTypesArrayArg, llvm::Value *&MapNamesArrayArg,
9757 llvm::Value *&MappersArrayArg, CGOpenMPRuntime::TargetDataInfo &Info,
9758 const ArgumentsOptions &Options = ArgumentsOptions()) {
9759 assert((!Options.ForEndCall || Info.separateBeginEndCalls()) &&(static_cast <bool> ((!Options.ForEndCall || Info.separateBeginEndCalls
()) && "expected region end call to runtime only when end call is separate"
) ? void (0) : __assert_fail ("(!Options.ForEndCall || Info.separateBeginEndCalls()) && \"expected region end call to runtime only when end call is separate\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9760, __extension__
__PRETTY_FUNCTION__))
9760 "expected region end call to runtime only when end call is separate")(static_cast <bool> ((!Options.ForEndCall || Info.separateBeginEndCalls
()) && "expected region end call to runtime only when end call is separate"
) ? void (0) : __assert_fail ("(!Options.ForEndCall || Info.separateBeginEndCalls()) && \"expected region end call to runtime only when end call is separate\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 9760, __extension__
__PRETTY_FUNCTION__))
;
9761 CodeGenModule &CGM = CGF.CGM;
9762 if (Info.NumberOfPtrs) {
9763 BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9764 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9765 Info.BasePointersArray,
9766 /*Idx0=*/0, /*Idx1=*/0);
9767 PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9768 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9769 Info.PointersArray,
9770 /*Idx0=*/0,
9771 /*Idx1=*/0);
9772 SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9773 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
9774 /*Idx0=*/0, /*Idx1=*/0);
9775 MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9776 llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
9777 Options.ForEndCall && Info.MapTypesArrayEnd ? Info.MapTypesArrayEnd
9778 : Info.MapTypesArray,
9779 /*Idx0=*/0,
9780 /*Idx1=*/0);
9781
9782 // Only emit the mapper information arrays if debug information is
9783 // requested.
9784 if (CGF.CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
9785 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9786 else
9787 MapNamesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
9788 llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
9789 Info.MapNamesArray,
9790 /*Idx0=*/0,
9791 /*Idx1=*/0);
9792 // If there is no user-defined mapper, set the mapper array to nullptr to
9793 // avoid an unnecessary data privatization
9794 if (!Info.HasMapper)
9795 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9796 else
9797 MappersArrayArg =
9798 CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy);
9799 } else {
9800 BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9801 PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9802 SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9803 MapTypesArrayArg =
9804 llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
9805 MapNamesArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9806 MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
9807 }
9808}
9809
9810/// Check for inner distribute directive.
9811static const OMPExecutableDirective *
9812getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9813 const auto *CS = D.getInnermostCapturedStmt();
9814 const auto *Body =
9815 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9816 const Stmt *ChildStmt =
9817 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9818
9819 if (const auto *NestedDir =
9820 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9821 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9822 switch (D.getDirectiveKind()) {
9823 case OMPD_target:
9824 if (isOpenMPDistributeDirective(DKind))
9825 return NestedDir;
9826 if (DKind == OMPD_teams) {
9827 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9828 /*IgnoreCaptured=*/true);
9829 if (!Body)
9830 return nullptr;
9831 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9832 if (const auto *NND =
9833 dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
9834 DKind = NND->getDirectiveKind();
9835 if (isOpenMPDistributeDirective(DKind))
9836 return NND;
9837 }
9838 }
9839 return nullptr;
9840 case OMPD_target_teams:
9841 if (isOpenMPDistributeDirective(DKind))
9842 return NestedDir;
9843 return nullptr;
9844 case OMPD_target_parallel:
9845 case OMPD_target_simd:
9846 case OMPD_target_parallel_for:
9847 case OMPD_target_parallel_for_simd:
9848 return nullptr;
9849 case OMPD_target_teams_distribute:
9850 case OMPD_target_teams_distribute_simd:
9851 case OMPD_target_teams_distribute_parallel_for:
9852 case OMPD_target_teams_distribute_parallel_for_simd:
9853 case OMPD_parallel:
9854 case OMPD_for:
9855 case OMPD_parallel_for:
9856 case OMPD_parallel_master:
9857 case OMPD_parallel_sections:
9858 case OMPD_for_simd:
9859 case OMPD_parallel_for_simd:
9860 case OMPD_cancel:
9861 case OMPD_cancellation_point:
9862 case OMPD_ordered:
9863 case OMPD_threadprivate:
9864 case OMPD_allocate:
9865 case OMPD_task:
9866 case OMPD_simd:
9867 case OMPD_tile:
9868 case OMPD_unroll:
9869 case OMPD_sections:
9870 case OMPD_section:
9871 case OMPD_single:
9872 case OMPD_master:
9873 case OMPD_critical:
9874 case OMPD_taskyield:
9875 case OMPD_barrier:
9876 case OMPD_taskwait:
9877 case OMPD_taskgroup:
9878 case OMPD_atomic:
9879 case OMPD_flush:
9880 case OMPD_depobj:
9881 case OMPD_scan:
9882 case OMPD_teams:
9883 case OMPD_target_data:
9884 case OMPD_target_exit_data:
9885 case OMPD_target_enter_data:
9886 case OMPD_distribute:
9887 case OMPD_distribute_simd:
9888 case OMPD_distribute_parallel_for:
9889 case OMPD_distribute_parallel_for_simd:
9890 case OMPD_teams_distribute:
9891 case OMPD_teams_distribute_simd:
9892 case OMPD_teams_distribute_parallel_for:
9893 case OMPD_teams_distribute_parallel_for_simd:
9894 case OMPD_target_update:
9895 case OMPD_declare_simd:
9896 case OMPD_declare_variant:
9897 case OMPD_begin_declare_variant:
9898 case OMPD_end_declare_variant:
9899 case OMPD_declare_target:
9900 case OMPD_end_declare_target:
9901 case OMPD_declare_reduction:
9902 case OMPD_declare_mapper:
9903 case OMPD_taskloop:
9904 case OMPD_taskloop_simd:
9905 case OMPD_master_taskloop:
9906 case OMPD_master_taskloop_simd:
9907 case OMPD_parallel_master_taskloop:
9908 case OMPD_parallel_master_taskloop_simd:
9909 case OMPD_requires:
9910 case OMPD_metadirective:
9911 case OMPD_unknown:
9912 default:
9913 llvm_unreachable("Unexpected directive.")::llvm::llvm_unreachable_internal("Unexpected directive.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp"
, 9913)
;
9914 }
9915 }
9916
9917 return nullptr;
9918}
9919
9920/// Emit the user-defined mapper function. The code generation follows the
9921/// pattern in the example below.
9922/// \code
9923/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9924/// void *base, void *begin,
9925/// int64_t size, int64_t type,
9926/// void *name = nullptr) {
9927/// // Allocate space for an array section first or add a base/begin for
9928/// // pointer dereference.
9929/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9930/// !maptype.IsDelete)
9931/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9932/// size*sizeof(Ty), clearToFromMember(type));
9933/// // Map members.
9934/// for (unsigned i = 0; i < size; i++) {
9935/// // For each component specified by this mapper:
9936/// for (auto c : begin[i]->all_components) {
9937/// if (c.hasMapper())
9938/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9939/// c.arg_type, c.arg_name);
9940/// else
9941/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9942/// c.arg_begin, c.arg_size, c.arg_type,
9943/// c.arg_name);
9944/// }
9945/// }
9946/// // Delete the array section.
9947/// if (size > 1 && maptype.IsDelete)
9948/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9949/// size*sizeof(Ty), clearToFromMember(type));
9950/// }
9951/// \endcode
9952void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9953 CodeGenFunction *CGF) {
9954 if (UDMMap.count(D) > 0)
9955 return;
9956 ASTContext &C = CGM.getContext();
9957 QualType Ty = D->getType();
9958 QualType PtrTy = C.getPointerType(Ty).withRestrict();
9959 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9960 auto *MapperVarDecl =
9961 cast<VarDecl>(cast<DeclRefExpr>(D->getMapperVarRef())->getDecl());
9962 SourceLocation Loc = D->getLocation();
9963 CharUnits ElementSize = C.getTypeSizeInChars(Ty);
9964
9965 // Prepare mapper function arguments and attributes.
9966 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9967 C.VoidPtrTy, ImplicitParamDecl::Other);
9968 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9969 ImplicitParamDecl::Other);
9970 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9971 C.VoidPtrTy, ImplicitParamDecl::Other);
9972 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9973 ImplicitParamDecl::Other);
9974 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9975 ImplicitParamDecl::Other);
9976 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9977 ImplicitParamDecl::Other);
9978 FunctionArgList Args;
9979 Args.push_back(&HandleArg);
9980 Args.push_back(&BaseArg);
9981 Args.push_back(&BeginArg);
9982 Args.push_back(&SizeArg);
9983 Args.push_back(&TypeArg);
9984 Args.push_back(&NameArg);
9985 const CGFunctionInfo &FnInfo =
9986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9987 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
9988 SmallString<64> TyStr;
9989 llvm::raw_svector_ostream Out(TyStr);
9990 CGM.getCXXABI().getMangleContext().mangleTypeName(Ty, Out);
9991 std::string Name = getName({"omp_mapper", TyStr, D->getName()});
9992 auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage,
9993 Name, &CGM.getModule());
9994 CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
9995 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9996 // Start the mapper function code generation.
9997 CodeGenFunction MapperCGF(CGM);
9998 MapperCGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
9999 // Compute the starting and end addresses of array elements.
10000 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
10001 MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
10002 C.getPointerType(Int64Ty), Loc);
10003 // Prepare common arguments for array initiation and deletion.
10004 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
10005 MapperCGF.GetAddrOfLocalVar(&HandleArg),
10006 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10007 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
10008 MapperCGF.GetAddrOfLocalVar(&BaseArg),
10009 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10010 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
10011 MapperCGF.GetAddrOfLocalVar(&BeginArg),
10012 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10013 // Convert the size in bytes into the number of array elements.
10014 Size = MapperCGF.Builder.CreateExactUDiv(
10015 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10016 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
10017 BeginIn, CGM.getTypes().ConvertTypeForMem(PtrTy));
10018 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(
10019 PtrBegin->getType()->getPointerElementType(), PtrBegin, Size);
10020 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
10021 MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
10022 C.getPointerType(Int64Ty), Loc);
10023 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
10024 MapperCGF.GetAddrOfLocalVar(&NameArg),
10025 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
10026
10027 // Emit array initiation if this is an array section and \p MapType indicates
10028 // that memory allocation is required.
10029 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock("omp.arraymap.head");
10030 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10031 MapName, ElementSize, HeadBB, /*IsInit=*/true);
10032
10033 // Emit a for loop to iterate through SizeArg of elements and map all of them.
10034
10035 // Emit the loop header block.
10036 MapperCGF.EmitBlock(HeadBB);
10037 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock("omp.arraymap.body");
10038 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock("omp.done");
10039 // Evaluate whether the initial condition is satisfied.
10040 llvm::Value *IsEmpty =
10041 MapperCGF.Builder.CreateICmpEQ(PtrBegin, PtrEnd, "omp.arraymap.isempty");
10042 MapperCGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10043 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
10044
10045 // Emit the loop body block.
10046 MapperCGF.EmitBlock(BodyBB);
10047 llvm::BasicBlock *LastBB = BodyBB;
10048 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
10049 PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
10050 PtrPHI->addIncoming(PtrBegin, EntryBB);
10051 Address PtrCurrent =
10052 Address(PtrPHI, MapperCGF.GetAddrOfLocalVar(&BeginArg)
10053 .getAlignment()
10054 .alignmentOfArrayElement(ElementSize));
10055 // Privatize the declared variable of mapper to be the current array element.
10056 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
10057 Scope.addPrivate(MapperVarDecl, [PtrCurrent]() { return PtrCurrent; });
10058 (void)Scope.Privatize();
10059
10060 // Get map clause information. Fill up the arrays with all mapped variables.
10061 MappableExprsHandler::MapCombinedInfoTy Info;
10062 MappableExprsHandler MEHandler(*D, MapperCGF);
10063 MEHandler.generateAllInfoForMapper(Info);
10064
10065 // Call the runtime API __tgt_mapper_num_components to get the number of
10066 // pre-existing components.
10067 llvm::Value *OffloadingArgs[] = {Handle};
10068 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
10069 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10070 OMPRTL___tgt_mapper_num_components),
10071 OffloadingArgs);
10072 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
10073 PreviousSize,
10074 MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
10075
10076 // Fill up the runtime mapper handle for all components.
10077 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
10078 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
10079 *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10080 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
10081 Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
10082 llvm::Value *CurSizeArg = Info.Sizes[I];
10083 llvm::Value *CurNameArg =
10084 (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo)
10085 ? llvm::ConstantPointerNull::get(CGM.VoidPtrTy)
10086 : emitMappingInformation(MapperCGF, OMPBuilder, Info.Exprs[I]);
10087
10088 // Extract the MEMBER_OF field from the map type.
10089 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
10090 llvm::Value *MemberMapType =
10091 MapperCGF.Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10092
10093 // Combine the map type inherited from user-defined mapper with that
10094 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
10095 // bits of the \a MapType, which is the input argument of the mapper
10096 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
10097 // bits of MemberMapType.
10098 // [OpenMP 5.0], 1.2.6. map-type decay.
10099 // | alloc | to | from | tofrom | release | delete
10100 // ----------------------------------------------------------
10101 // alloc | alloc | alloc | alloc | alloc | release | delete
10102 // to | alloc | to | alloc | to | release | delete
10103 // from | alloc | alloc | from | from | release | delete
10104 // tofrom | alloc | to | from | tofrom | release | delete
10105 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
10106 MapType,
10107 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO |
10108 MappableExprsHandler::OMP_MAP_FROM));
10109 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock("omp.type.alloc");
10110 llvm::BasicBlock *AllocElseBB =
10111 MapperCGF.createBasicBlock("omp.type.alloc.else");
10112 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock("omp.type.to");
10113 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock("omp.type.to.else");
10114 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock("omp.type.from");
10115 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock("omp.type.end");
10116 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(LeftToFrom);
10117 MapperCGF.Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10118 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
10119 MapperCGF.EmitBlock(AllocBB);
10120 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
10121 MemberMapType,
10122 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10123 MappableExprsHandler::OMP_MAP_FROM)));
10124 MapperCGF.Builder.CreateBr(EndBB);
10125 MapperCGF.EmitBlock(AllocElseBB);
10126 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
10127 LeftToFrom,
10128 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_TO));
10129 MapperCGF.Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10130 // In case of to, clear OMP_MAP_FROM.
10131 MapperCGF.EmitBlock(ToBB);
10132 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
10133 MemberMapType,
10134 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_FROM));
10135 MapperCGF.Builder.CreateBr(EndBB);
10136 MapperCGF.EmitBlock(ToElseBB);
10137 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
10138 LeftToFrom,
10139 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_FROM));
10140 MapperCGF.Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10141 // In case of from, clear OMP_MAP_TO.
10142 MapperCGF.EmitBlock(FromBB);
10143 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
10144 MemberMapType,
10145 MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
10146 // In case of tofrom, do nothing.
10147 MapperCGF.EmitBlock(EndBB);
10148 LastBB = EndBB;
10149 llvm::PHINode *CurMapType =
10150 MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
10151 CurMapType->addIncoming(AllocMapType, AllocBB);
10152 CurMapType->addIncoming(ToMapType, ToBB);
10153 CurMapType->addIncoming(FromMapType, FromBB);
10154 CurMapType->addIncoming(MemberMapType, ToElseBB);
10155
10156 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
10157 CurSizeArg, CurMapType, CurNameArg};
10158 if (Info.Mappers[I]) {
10159 // Call the corresponding mapper function.
10160 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
10161 cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
10162 assert(MapperFunc && "Expect a valid mapper function is available.")(static_cast <bool> (MapperFunc && "Expect a valid mapper function is available."
) ? void (0) : __assert_fail ("MapperFunc && \"Expect a valid mapper function is available.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10162, __extension__
__PRETTY_FUNCTION__))
;
10163 MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
10164 } else {
10165 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10166 // data structure.
10167 MapperCGF.EmitRuntimeCall(
10168 OMPBuilder.getOrCreateRuntimeFunction(
10169 CGM.getModule(), OMPRTL___tgt_push_mapper_component),
10170 OffloadingArgs);
10171 }
10172 }
10173
10174 // Update the pointer to point to the next element that needs to be mapped,
10175 // and check whether we have mapped all elements.
10176 llvm::Type *ElemTy = PtrPHI->getType()->getPointerElementType();
10177 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
10178 ElemTy, PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
10179 PtrPHI->addIncoming(PtrNext, LastBB);
10180 llvm::Value *IsDone =
10181 MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
10182 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
10183 MapperCGF.Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10184
10185 MapperCGF.EmitBlock(ExitBB);
10186 // Emit array deletion if this is an array section and \p MapType indicates
10187 // that deletion is required.
10188 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BaseIn, BeginIn, Size, MapType,
10189 MapName, ElementSize, DoneBB, /*IsInit=*/false);
10190
10191 // Emit the function exit block.
10192 MapperCGF.EmitBlock(DoneBB, /*IsFinished=*/true);
10193 MapperCGF.FinishFunction();
10194 UDMMap.try_emplace(D, Fn);
10195 if (CGF) {
10196 auto &Decls = FunctionUDMMap.FindAndConstruct(CGF->CurFn);
10197 Decls.second.push_back(D);
10198 }
10199}
10200
10201/// Emit the array initialization or deletion portion for user-defined mapper
10202/// code generation. First, it evaluates whether an array section is mapped and
10203/// whether the \a MapType instructs to delete this section. If \a IsInit is
10204/// true, and \a MapType indicates to not delete this array, array
10205/// initialization code is generated. If \a IsInit is false, and \a MapType
10206/// indicates to not this array, array deletion code is generated.
10207void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
10208 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
10209 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
10210 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
10211 bool IsInit) {
10212 StringRef Prefix = IsInit ? ".init" : ".del";
10213
10214 // Evaluate if this is an array section.
10215 llvm::BasicBlock *BodyBB =
10216 MapperCGF.createBasicBlock(getName({"omp.array", Prefix}));
10217 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
10218 Size, MapperCGF.Builder.getInt64(1), "omp.arrayinit.isarray");
10219 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
10220 MapType,
10221 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_DELETE));
10222 llvm::Value *DeleteCond;
10223 llvm::Value *Cond;
10224 if (IsInit) {
10225 // base != begin?
10226 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
10227 // IsPtrAndObj?
10228 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
10229 MapType,
10230 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ));
10231 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(PtrAndObjBit);
10232 BaseIsBegin = MapperCGF.Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
10233 Cond = MapperCGF.Builder.CreateOr(IsArray, BaseIsBegin);
10234 DeleteCond = MapperCGF.Builder.CreateIsNull(
10235 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10236 } else {
10237 Cond = IsArray;
10238 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
10239 DeleteBit, getName({"omp.array", Prefix, ".delete"}));
10240 }
10241 Cond = MapperCGF.Builder.CreateAnd(Cond, DeleteCond);
10242 MapperCGF.Builder.CreateCondBr(Cond, BodyBB, ExitBB);
10243
10244 MapperCGF.EmitBlock(BodyBB);
10245 // Get the array size by multiplying element size and element number (i.e., \p
10246 // Size).
10247 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
10248 Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
10249 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
10250 // memory allocation/deletion purpose only.
10251 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
10252 MapType,
10253 MapperCGF.Builder.getInt64(~(MappableExprsHandler::OMP_MAP_TO |
10254 MappableExprsHandler::OMP_MAP_FROM)));
10255 MapTypeArg = MapperCGF.Builder.CreateOr(
10256 MapTypeArg,
10257 MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT));
10258
10259 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
10260 // data structure.
10261 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
10262 ArraySize, MapTypeArg, MapName};
10263 MapperCGF.EmitRuntimeCall(
10264 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
10265 OMPRTL___tgt_push_mapper_component),
10266 OffloadingArgs);
10267}
10268
10269llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
10270 const OMPDeclareMapperDecl *D) {
10271 auto I = UDMMap.find(D);
10272 if (I != UDMMap.end())
10273 return I->second;
10274 emitUserDefinedMapper(D);
10275 return UDMMap.lookup(D);
10276}
10277
10278void CGOpenMPRuntime::emitTargetNumIterationsCall(
10279 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10280 llvm::Value *DeviceID,
10281 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10282 const OMPLoopDirective &D)>
10283 SizeEmitter) {
10284 OpenMPDirectiveKind Kind = D.getDirectiveKind();
10285 const OMPExecutableDirective *TD = &D;
10286 // Get nested teams distribute kind directive, if any.
10287 if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
10288 TD = getNestedDistributeDirective(CGM.getContext(), D);
10289 if (!TD)
10290 return;
10291 const auto *LD = cast<OMPLoopDirective>(TD);
10292 auto &&CodeGen = [LD, DeviceID, SizeEmitter, &D, this](CodeGenFunction &CGF,
10293 PrePostActionTy &) {
10294 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD)) {
10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10296 llvm::Value *Args[] = {RTLoc, DeviceID, NumIterations};
10297 CGF.EmitRuntimeCall(
10298 OMPBuilder.getOrCreateRuntimeFunction(
10299 CGM.getModule(), OMPRTL___kmpc_push_target_tripcount_mapper),
10300 Args);
10301 }
10302 };
10303 emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
10304}
10305
10306void CGOpenMPRuntime::emitTargetCall(
10307 CodeGenFunction &CGF, const OMPExecutableDirective &D,
10308 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
10309 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
10310 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
10311 const OMPLoopDirective &D)>
10312 SizeEmitter) {
10313 if (!CGF.HaveInsertPoint())
10314 return;
10315
10316 assert(OutlinedFn && "Invalid outlined function!")(static_cast <bool> (OutlinedFn && "Invalid outlined function!"
) ? void (0) : __assert_fail ("OutlinedFn && \"Invalid outlined function!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10316, __extension__
__PRETTY_FUNCTION__))
;
10317
10318 const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10319 D.hasClausesOfKind<OMPNowaitClause>();
10320 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
10321 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
10322 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
10323 PrePostActionTy &) {
10324 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10325 };
10326 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
10327
10328 CodeGenFunction::OMPTargetDataInfo InputInfo;
10329 llvm::Value *MapTypesArray = nullptr;
10330 llvm::Value *MapNamesArray = nullptr;
10331 // Fill up the pointer arrays and transfer execution to the device.
10332 auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
10333 &MapTypesArray, &MapNamesArray, &CS, RequiresOuterTask,
10334 &CapturedVars,
10335 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
10336 if (Device.getInt() == OMPC_DEVICE_ancestor) {
10337 // Reverse offloading is not supported, so just execute on the host.
10338 if (RequiresOuterTask) {
10339 CapturedVars.clear();
10340 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10341 }
10342 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10343 return;
10344 }
10345
10346 // On top of the arrays that were filled up, the target offloading call
10347 // takes as arguments the device id as well as the host pointer. The host
10348 // pointer is used by the runtime library to identify the current target
10349 // region, so it only has to be unique and not necessarily point to
10350 // anything. It could be the pointer to the outlined function that
10351 // implements the target region, but we aren't using that so that the
10352 // compiler doesn't need to keep that, and could therefore inline the host
10353 // function if proven worthwhile during optimization.
10354
10355 // From this point on, we need to have an ID of the target region defined.
10356 assert(OutlinedFnID && "Invalid outlined function ID!")(static_cast <bool> (OutlinedFnID && "Invalid outlined function ID!"
) ? void (0) : __assert_fail ("OutlinedFnID && \"Invalid outlined function ID!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10356, __extension__
__PRETTY_FUNCTION__))
;
10357
10358 // Emit device ID if any.
10359 llvm::Value *DeviceID;
10360 if (Device.getPointer()) {
10361 assert((Device.getInt() == OMPC_DEVICE_unknown ||(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10363, __extension__
__PRETTY_FUNCTION__))
10362 Device.getInt() == OMPC_DEVICE_device_num) &&(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10363, __extension__
__PRETTY_FUNCTION__))
10363 "Expected device_num modifier.")(static_cast <bool> ((Device.getInt() == OMPC_DEVICE_unknown
|| Device.getInt() == OMPC_DEVICE_device_num) && "Expected device_num modifier."
) ? void (0) : __assert_fail ("(Device.getInt() == OMPC_DEVICE_unknown || Device.getInt() == OMPC_DEVICE_device_num) && \"Expected device_num modifier.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10363, __extension__
__PRETTY_FUNCTION__))
;
10364 llvm::Value *DevVal = CGF.EmitScalarExpr(Device.getPointer());
10365 DeviceID =
10366 CGF.Builder.CreateIntCast(DevVal, CGF.Int64Ty, /*isSigned=*/true);
10367 } else {
10368 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
10369 }
10370
10371 // Emit the number of elements in the offloading arrays.
10372 llvm::Value *PointerNum =
10373 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
10374
10375 // Return value of the runtime offloading call.
10376 llvm::Value *Return;
10377
10378 llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
10379 llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
10380
10381 // Source location for the ident struct
10382 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
10383
10384 // Emit tripcount for the target loop-based directive.
10385 emitTargetNumIterationsCall(CGF, D, DeviceID, SizeEmitter);
10386
10387 bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10388 // The target region is an outlined function launched by the runtime
10389 // via calls __tgt_target() or __tgt_target_teams().
10390 //
10391 // __tgt_target() launches a target region with one team and one thread,
10392 // executing a serial region. This master thread may in turn launch
10393 // more threads within its team upon encountering a parallel region,
10394 // however, no additional teams can be launched on the device.
10395 //
10396 // __tgt_target_teams() launches a target region with one or more teams,
10397 // each with one or more threads. This call is required for target
10398 // constructs such as:
10399 // 'target teams'
10400 // 'target' / 'teams'
10401 // 'target teams distribute parallel for'
10402 // 'target parallel'
10403 // and so on.
10404 //
10405 // Note that on the host and CPU targets, the runtime implementation of
10406 // these calls simply call the outlined function without forking threads.
10407 // The outlined functions themselves have runtime calls to
10408 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
10409 // the compiler in emitTeamsCall() and emitParallelCall().
10410 //
10411 // In contrast, on the NVPTX target, the implementation of
10412 // __tgt_target_teams() launches a GPU kernel with the requested number
10413 // of teams and threads so no additional calls to the runtime are required.
10414 if (NumTeams) {
10415 // If we have NumTeams defined this means that we have an enclosed teams
10416 // region. Therefore we also expect to have NumThreads defined. These two
10417 // values should be defined in the presence of a teams directive,
10418 // regardless of having any clauses associated. If the user is using teams
10419 // but no clauses, these two values will be the default that should be
10420 // passed to the runtime library - a 32-bit integer with the value zero.
10421 assert(NumThreads && "Thread limit expression should be available along "(static_cast <bool> (NumThreads && "Thread limit expression should be available along "
"with number of teams.") ? void (0) : __assert_fail ("NumThreads && \"Thread limit expression should be available along \" \"with number of teams.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10422, __extension__
__PRETTY_FUNCTION__))
10422 "with number of teams.")(static_cast <bool> (NumThreads && "Thread limit expression should be available along "
"with number of teams.") ? void (0) : __assert_fail ("NumThreads && \"Thread limit expression should be available along \" \"with number of teams.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10422, __extension__
__PRETTY_FUNCTION__))
;
10423 SmallVector<llvm::Value *> OffloadingArgs = {
10424 RTLoc,
10425 DeviceID,
10426 OutlinedFnID,
10427 PointerNum,
10428 InputInfo.BasePointersArray.getPointer(),
10429 InputInfo.PointersArray.getPointer(),
10430 InputInfo.SizesArray.getPointer(),
10431 MapTypesArray,
10432 MapNamesArray,
10433 InputInfo.MappersArray.getPointer(),
10434 NumTeams,
10435 NumThreads};
10436 if (HasNowait) {
10437 // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10438 // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10439 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10440 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10441 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10442 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10443 }
10444 Return = CGF.EmitRuntimeCall(
10445 OMPBuilder.getOrCreateRuntimeFunction(
10446 CGM.getModule(), HasNowait
10447 ? OMPRTL___tgt_target_teams_nowait_mapper
10448 : OMPRTL___tgt_target_teams_mapper),
10449 OffloadingArgs);
10450 } else {
10451 SmallVector<llvm::Value *> OffloadingArgs = {
10452 RTLoc,
10453 DeviceID,
10454 OutlinedFnID,
10455 PointerNum,
10456 InputInfo.BasePointersArray.getPointer(),
10457 InputInfo.PointersArray.getPointer(),
10458 InputInfo.SizesArray.getPointer(),
10459 MapTypesArray,
10460 MapNamesArray,
10461 InputInfo.MappersArray.getPointer()};
10462 if (HasNowait) {
10463 // Add int32_t depNum = 0, void *depList = nullptr, int32_t
10464 // noAliasDepNum = 0, void *noAliasDepList = nullptr.
10465 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10466 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10467 OffloadingArgs.push_back(CGF.Builder.getInt32(0));
10468 OffloadingArgs.push_back(llvm::ConstantPointerNull::get(CGM.VoidPtrTy));
10469 }
10470 Return = CGF.EmitRuntimeCall(
10471 OMPBuilder.getOrCreateRuntimeFunction(
10472 CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
10473 : OMPRTL___tgt_target_mapper),
10474 OffloadingArgs);
10475 }
10476
10477 // Check the error code and execute the host version if required.
10478 llvm::BasicBlock *OffloadFailedBlock =
10479 CGF.createBasicBlock("omp_offload.failed");
10480 llvm::BasicBlock *OffloadContBlock =
10481 CGF.createBasicBlock("omp_offload.cont");
10482 llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
10483 CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
10484
10485 CGF.EmitBlock(OffloadFailedBlock);
10486 if (RequiresOuterTask) {
10487 CapturedVars.clear();
10488 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10489 }
10490 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10491 CGF.EmitBranch(OffloadContBlock);
10492
10493 CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
10494 };
10495
10496 // Notify that the host version must be executed.
10497 auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
10498 RequiresOuterTask](CodeGenFunction &CGF,
10499 PrePostActionTy &) {
10500 if (RequiresOuterTask) {
10501 CapturedVars.clear();
10502 CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
10503 }
10504 emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
10505 };
10506
10507 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10508 &MapNamesArray, &CapturedVars, RequiresOuterTask,
10509 &CS](CodeGenFunction &CGF, PrePostActionTy &) {
10510 // Fill up the arrays with all the captured variables.
10511 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10512
10513 // Get mappable expression information.
10514 MappableExprsHandler MEHandler(D, CGF);
10515 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
10516 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
10517
10518 auto RI = CS.getCapturedRecordDecl()->field_begin();
10519 auto *CV = CapturedVars.begin();
10520 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
10521 CE = CS.capture_end();
10522 CI != CE; ++CI, ++RI, ++CV) {
10523 MappableExprsHandler::MapCombinedInfoTy CurInfo;
10524 MappableExprsHandler::StructRangeInfoTy PartialStruct;
10525
10526 // VLA sizes are passed to the outlined region by copy and do not have map
10527 // information associated.
10528 if (CI->capturesVariableArrayType()) {
10529 CurInfo.Exprs.push_back(nullptr);
10530 CurInfo.BasePointers.push_back(*CV);
10531 CurInfo.Pointers.push_back(*CV);
10532 CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
10533 CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
10534 // Copy to the device as an argument. No need to retrieve it.
10535 CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
10536 MappableExprsHandler::OMP_MAP_TARGET_PARAM |
10537 MappableExprsHandler::OMP_MAP_IMPLICIT);
10538 CurInfo.Mappers.push_back(nullptr);
10539 } else {
10540 // If we have any information in the map clause, we use it, otherwise we
10541 // just do a default mapping.
10542 MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
10543 if (!CI->capturesThis())
10544 MappedVarSet.insert(CI->getCapturedVar());
10545 else
10546 MappedVarSet.insert(nullptr);
10547 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
10548 MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
10549 // Generate correct mapping for variables captured by reference in
10550 // lambdas.
10551 if (CI->capturesVariable())
10552 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
10553 CurInfo, LambdaPointers);
10554 }
10555 // We expect to have at least an element of information for this capture.
10556 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10557, __extension__
__PRETTY_FUNCTION__))
10557 "Non-existing map pointer for capture!")(static_cast <bool> ((!CurInfo.BasePointers.empty() || PartialStruct
.Base.isValid()) && "Non-existing map pointer for capture!"
) ? void (0) : __assert_fail ("(!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) && \"Non-existing map pointer for capture!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10557, __extension__
__PRETTY_FUNCTION__))
;
10558 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10562, __extension__
__PRETTY_FUNCTION__))
10559 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10562, __extension__
__PRETTY_FUNCTION__))
10560 CurInfo.BasePointers.size() == CurInfo.Types.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10562, __extension__
__PRETTY_FUNCTION__))
10561 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10562, __extension__
__PRETTY_FUNCTION__))
10562 "Inconsistent map information sizes!")(static_cast <bool> (CurInfo.BasePointers.size() == CurInfo
.Pointers.size() && CurInfo.BasePointers.size() == CurInfo
.Sizes.size() && CurInfo.BasePointers.size() == CurInfo
.Types.size() && CurInfo.BasePointers.size() == CurInfo
.Mappers.size() && "Inconsistent map information sizes!"
) ? void (0) : __assert_fail ("CurInfo.BasePointers.size() == CurInfo.Pointers.size() && CurInfo.BasePointers.size() == CurInfo.Sizes.size() && CurInfo.BasePointers.size() == CurInfo.Types.size() && CurInfo.BasePointers.size() == CurInfo.Mappers.size() && \"Inconsistent map information sizes!\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10562, __extension__
__PRETTY_FUNCTION__))
;
10563
10564 // If there is an entry in PartialStruct it means we have a struct with
10565 // individual members mapped. Emit an extra combined entry.
10566 if (PartialStruct.Base.isValid()) {
10567 CombinedInfo.append(PartialStruct.PreliminaryMapData);
10568 MEHandler.emitCombinedEntry(
10569 CombinedInfo, CurInfo.Types, PartialStruct, nullptr,
10570 !PartialStruct.PreliminaryMapData.BasePointers.empty());
10571 }
10572
10573 // We need to append the results of this capture to what we already have.
10574 CombinedInfo.append(CurInfo);
10575 }
10576 // Adjust MEMBER_OF flags for the lambdas captures.
10577 MEHandler.adjustMemberOfForLambdaCaptures(
10578 LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
10579 CombinedInfo.Types);
10580 // Map any list items in a map clause that were not captures because they
10581 // weren't referenced within the construct.
10582 MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
10583
10584 TargetDataInfo Info;
10585 // Fill up the arrays and create the arguments.
10586 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
10587 emitOffloadingArraysArgument(
10588 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
10589 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
10590 {/*ForEndCall=*/false});
10591
10592 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10593 InputInfo.BasePointersArray =
10594 Address(Info.BasePointersArray, CGM.getPointerAlign());
10595 InputInfo.PointersArray =
10596 Address(Info.PointersArray, CGM.getPointerAlign());
10597 InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
10598 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
10599 MapTypesArray = Info.MapTypesArray;
10600 MapNamesArray = Info.MapNamesArray;
10601 if (RequiresOuterTask)
10602 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
10603 else
10604 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10605 };
10606
10607 auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
10608 CodeGenFunction &CGF, PrePostActionTy &) {
10609 if (RequiresOuterTask) {
10610 CodeGenFunction::OMPTargetDataInfo InputInfo;
10611 CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
10612 } else {
10613 emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
10614 }
10615 };
10616
10617 // If we have a target function ID it means that we need to support
10618 // offloading, otherwise, just execute on the host. We need to execute on host
10619 // regardless of the conditional in the if clause if, e.g., the user do not
10620 // specify target triples.
10621 if (OutlinedFnID) {
10622 if (IfCond) {
10623 emitIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
10624 } else {
10625 RegionCodeGenTy ThenRCG(TargetThenGen);
10626 ThenRCG(CGF);
10627 }
10628 } else {
10629 RegionCodeGenTy ElseRCG(TargetElseGen);
10630 ElseRCG(CGF);
10631 }
10632}
10633
10634void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
10635 StringRef ParentName) {
10636 if (!S)
10637 return;
10638
10639 // Codegen OMP target directives that offload compute to the device.
10640 bool RequiresDeviceCodegen =
10641 isa<OMPExecutableDirective>(S) &&
10642 isOpenMPTargetExecutionDirective(
10643 cast<OMPExecutableDirective>(S)->getDirectiveKind());
10644
10645 if (RequiresDeviceCodegen) {
10646 const auto &E = *cast<OMPExecutableDirective>(S);
10647 unsigned DeviceID;
10648 unsigned FileID;
10649 unsigned Line;
10650 getTargetEntryUniqueInfo(CGM.getContext(), E.getBeginLoc(), DeviceID,
10651 FileID, Line);
10652
10653 // Is this a target region that should not be emitted as an entry point? If
10654 // so just signal we are done with this target region.
10655 if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(DeviceID, FileID,
10656 ParentName, Line))
10657 return;
10658
10659 switch (E.getDirectiveKind()) {
10660 case OMPD_target:
10661 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
10662 cast<OMPTargetDirective>(E));
10663 break;
10664 case OMPD_target_parallel:
10665 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10666 CGM, ParentName, cast<OMPTargetParallelDirective>(E));
10667 break;
10668 case OMPD_target_teams:
10669 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10670 CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
10671 break;
10672 case OMPD_target_teams_distribute:
10673 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10674 CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
10675 break;
10676 case OMPD_target_teams_distribute_simd:
10677 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10678 CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
10679 break;
10680 case OMPD_target_parallel_for:
10681 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10682 CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
10683 break;
10684 case OMPD_target_parallel_for_simd:
10685 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10686 CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
10687 break;
10688 case OMPD_target_simd:
10689 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10690 CGM, ParentName, cast<OMPTargetSimdDirective>(E));
10691 break;
10692 case OMPD_target_teams_distribute_parallel_for:
10693 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10694 CGM, ParentName,
10695 cast<OMPTargetTeamsDistributeParallelForDirective>(E));
10696 break;
10697 case OMPD_target_teams_distribute_parallel_for_simd:
10698 CodeGenFunction::
10699 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10700 CGM, ParentName,
10701 cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
10702 break;
10703 case OMPD_parallel:
10704 case OMPD_for:
10705 case OMPD_parallel_for:
10706 case OMPD_parallel_master:
10707 case OMPD_parallel_sections:
10708 case OMPD_for_simd:
10709 case OMPD_parallel_for_simd:
10710 case OMPD_cancel:
10711 case OMPD_cancellation_point:
10712 case OMPD_ordered:
10713 case OMPD_threadprivate:
10714 case OMPD_allocate:
10715 case OMPD_task:
10716 case OMPD_simd:
10717 case OMPD_tile:
10718 case OMPD_unroll:
10719 case OMPD_sections:
10720 case OMPD_section:
10721 case OMPD_single:
10722 case OMPD_master:
10723 case OMPD_critical:
10724 case OMPD_taskyield:
10725 case OMPD_barrier:
10726 case OMPD_taskwait:
10727 case OMPD_taskgroup:
10728 case OMPD_atomic:
10729 case OMPD_flush:
10730 case OMPD_depobj:
10731 case OMPD_scan:
10732 case OMPD_teams:
10733 case OMPD_target_data:
10734 case OMPD_target_exit_data:
10735 case OMPD_target_enter_data:
10736 case OMPD_distribute:
10737 case OMPD_distribute_simd:
10738 case OMPD_distribute_parallel_for:
10739 case OMPD_distribute_parallel_for_simd:
10740 case OMPD_teams_distribute:
10741 case OMPD_teams_distribute_simd:
10742 case OMPD_teams_distribute_parallel_for:
10743 case OMPD_teams_distribute_parallel_for_simd:
10744 case OMPD_target_update:
10745 case OMPD_declare_simd:
10746 case OMPD_declare_variant:
10747 case OMPD_begin_declare_variant:
10748 case OMPD_end_declare_variant:
10749 case OMPD_declare_target:
10750 case OMPD_end_declare_target:
10751 case OMPD_declare_reduction:
10752 case OMPD_declare_mapper:
10753 case OMPD_taskloop:
10754 case OMPD_taskloop_simd:
10755 case OMPD_master_taskloop:
10756 case OMPD_master_taskloop_simd:
10757 case OMPD_parallel_master_taskloop:
10758 case OMPD_parallel_master_taskloop_simd:
10759 case OMPD_requires:
10760 case OMPD_metadirective:
10761 case OMPD_unknown:
10762 default:
10763 llvm_unreachable("Unknown target directive for OpenMP device codegen.")::llvm::llvm_unreachable_internal("Unknown target directive for OpenMP device codegen."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10763)
;
10764 }
10765 return;
10766 }
10767
10768 if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
10769 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
10770 return;
10771
10772 scanForTargetRegionsFunctions(E->getRawStmt(), ParentName);
10773 return;
10774 }
10775
10776 // If this is a lambda function, look into its body.
10777 if (const auto *L = dyn_cast<LambdaExpr>(S))
10778 S = L->getBody();
10779
10780 // Keep looking for target regions recursively.
10781 for (const Stmt *II : S->children())
10782 scanForTargetRegionsFunctions(II, ParentName);
10783}
10784
10785static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
10786 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10787 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10788 if (!DevTy)
10789 return false;
10790 // Do not emit device_type(nohost) functions for the host.
10791 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
10792 return true;
10793 // Do not emit device_type(host) functions for the device.
10794 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
10795 return true;
10796 return false;
10797}
10798
10799bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10800 // If emitting code for the host, we do not process FD here. Instead we do
10801 // the normal code generation.
10802 if (!CGM.getLangOpts().OpenMPIsDevice) {
10803 if (const auto *FD = dyn_cast<FunctionDecl>(GD.getDecl()))
10804 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10805 CGM.getLangOpts().OpenMPIsDevice))
10806 return true;
10807 return false;
10808 }
10809
10810 const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
10811 // Try to detect target regions in the function.
10812 if (const auto *FD = dyn_cast<FunctionDecl>(VD)) {
10813 StringRef Name = CGM.getMangledName(GD);
10814 scanForTargetRegionsFunctions(FD->getBody(), Name);
10815 if (isAssumedToBeNotEmitted(cast<ValueDecl>(FD),
10816 CGM.getLangOpts().OpenMPIsDevice))
10817 return true;
10818 }
10819
10820 // Do not to emit function if it is not marked as declare target.
10821 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10822 AlreadyEmittedTargetDecls.count(VD) == 0;
10823}
10824
10825bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10826 if (isAssumedToBeNotEmitted(cast<ValueDecl>(GD.getDecl()),
10827 CGM.getLangOpts().OpenMPIsDevice))
10828 return true;
10829
10830 if (!CGM.getLangOpts().OpenMPIsDevice)
10831 return false;
10832
10833 // Check if there are Ctors/Dtors in this declaration and look for target
10834 // regions in it. We use the complete variant to produce the kernel name
10835 // mangling.
10836 QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
10837 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10838 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10839 StringRef ParentName =
10840 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10841 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10842 }
10843 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10844 StringRef ParentName =
10845 CGM.getMangledName(GlobalDecl(Dtor, Dtor_Complete));
10846 scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
10847 }
10848 }
10849
10850 // Do not to emit variable if it is not marked as declare target.
10851 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10852 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10853 cast<VarDecl>(GD.getDecl()));
10854 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10855 (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10856 HasRequiresUnifiedSharedMemory)) {
10857 DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
10858 return true;
10859 }
10860 return false;
10861}
10862
10863void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10864 llvm::Constant *Addr) {
10865 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10866 !CGM.getLangOpts().OpenMPIsDevice)
10867 return;
10868
10869 // If we have host/nohost variables, they do not need to be registered.
10870 Optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
10871 OMPDeclareTargetDeclAttr::getDeviceType(VD);
10872 if (DevTy && DevTy.getValue() != OMPDeclareTargetDeclAttr::DT_Any)
10873 return;
10874
10875 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10876 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10877 if (!Res) {
10878 if (CGM.getLangOpts().OpenMPIsDevice) {
10879 // Register non-target variables being emitted in device code (debug info
10880 // may cause this).
10881 StringRef VarName = CGM.getMangledName(VD);
10882 EmittedNonTargetVariables.try_emplace(VarName, Addr);
10883 }
10884 return;
10885 }
10886 // Register declare target variables.
10887 OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags;
10888 StringRef VarName;
10889 CharUnits VarSize;
10890 llvm::GlobalValue::LinkageTypes Linkage;
10891
10892 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10893 !HasRequiresUnifiedSharedMemory) {
10894 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10895 VarName = CGM.getMangledName(VD);
10896 if (VD->hasDefinition(CGM.getContext()) != VarDecl::DeclarationOnly) {
10897 VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
10898 assert(!VarSize.isZero() && "Expected non-zero size of the variable")(static_cast <bool> (!VarSize.isZero() && "Expected non-zero size of the variable"
) ? void (0) : __assert_fail ("!VarSize.isZero() && \"Expected non-zero size of the variable\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10898, __extension__
__PRETTY_FUNCTION__))
;
10899 } else {
10900 VarSize = CharUnits::Zero();
10901 }
10902 Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
10903 // Temp solution to prevent optimizations of the internal variables.
10904 if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
10905 // Do not create a "ref-variable" if the original is not also available
10906 // on the host.
10907 if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10908 return;
10909 std::string RefName = getName({VarName, "ref"});
10910 if (!CGM.GetGlobalValue(RefName)) {
10911 llvm::Constant *AddrRef =
10912 getOrCreateInternalVariable(Addr->getType(), RefName);
10913 auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
10914 GVAddrRef->setConstant(/*Val=*/true);
10915 GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
10916 GVAddrRef->setInitializer(Addr);
10917 CGM.addCompilerUsedGlobal(GVAddrRef);
10918 }
10919 }
10920 } else {
10921 assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10924, __extension__
__PRETTY_FUNCTION__))
10922 (*Res == OMPDeclareTargetDeclAttr::MT_To &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10924, __extension__
__PRETTY_FUNCTION__))
10923 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10924, __extension__
__PRETTY_FUNCTION__))
10924 "Declare target attribute must link or to with unified memory.")(static_cast <bool> (((*Res == OMPDeclareTargetDeclAttr
::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Declare target attribute must link or to with unified memory."
) ? void (0) : __assert_fail ("((*Res == OMPDeclareTargetDeclAttr::MT_Link) || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Declare target attribute must link or to with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10924, __extension__
__PRETTY_FUNCTION__))
;
10925 if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
10926 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink;
10927 else
10928 Flags = OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo;
10929
10930 if (CGM.getLangOpts().OpenMPIsDevice) {
10931 VarName = Addr->getName();
10932 Addr = nullptr;
10933 } else {
10934 VarName = getAddrOfDeclareTargetVar(VD).getName();
10935 Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
10936 }
10937 VarSize = CGM.getPointerSize();
10938 Linkage = llvm::GlobalValue::WeakAnyLinkage;
10939 }
10940
10941 OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo(
10942 VarName, Addr, VarSize, Flags, Linkage);
10943}
10944
10945bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10946 if (isa<FunctionDecl>(GD.getDecl()) ||
10947 isa<OMPDeclareReductionDecl>(GD.getDecl()))
10948 return emitTargetFunctions(GD);
10949
10950 return emitTargetGlobalVariable(GD);
10951}
10952
10953void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10954 for (const VarDecl *VD : DeferredGlobalVariables) {
10955 llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10956 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10957 if (!Res)
10958 continue;
10959 if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
10960 !HasRequiresUnifiedSharedMemory) {
10961 CGM.EmitGlobal(VD);
10962 } else {
10963 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10966, __extension__
__PRETTY_FUNCTION__))
10964 (*Res == OMPDeclareTargetDeclAttr::MT_To &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10966, __extension__
__PRETTY_FUNCTION__))
10965 HasRequiresUnifiedSharedMemory)) &&(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10966, __extension__
__PRETTY_FUNCTION__))
10966 "Expected link clause or to clause with unified memory.")(static_cast <bool> ((*Res == OMPDeclareTargetDeclAttr::
MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To &&
HasRequiresUnifiedSharedMemory)) && "Expected link clause or to clause with unified memory."
) ? void (0) : __assert_fail ("(*Res == OMPDeclareTargetDeclAttr::MT_Link || (*Res == OMPDeclareTargetDeclAttr::MT_To && HasRequiresUnifiedSharedMemory)) && \"Expected link clause or to clause with unified memory.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10966, __extension__
__PRETTY_FUNCTION__))
;
10967 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10968 }
10969 }
10970}
10971
10972void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10973 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10974 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10975, __extension__
__PRETTY_FUNCTION__))
10975 " Expected target-based directive.")(static_cast <bool> (isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) && " Expected target-based directive."
) ? void (0) : __assert_fail ("isOpenMPTargetExecutionDirective(D.getDirectiveKind()) && \" Expected target-based directive.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 10975, __extension__
__PRETTY_FUNCTION__))
;
10976}
10977
10978void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10979 for (const OMPClause *Clause : D->clauselists()) {
10980 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10981 HasRequiresUnifiedSharedMemory = true;
10982 } else if (const auto *AC =
10983 dyn_cast<OMPAtomicDefaultMemOrderClause>(Clause)) {
10984 switch (AC->getAtomicDefaultMemOrderKind()) {
10985 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10986 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10987 break;
10988 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10989 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10990 break;
10991 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10992 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10993 break;
10994 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10995 break;
10996 }
10997 }
10998 }
10999}
11000
11001llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
11002 return RequiresAtomicOrdering;
11003}
11004
11005bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
11006 LangAS &AS) {
11007 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
11008 return false;
11009 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
11010 switch(A->getAllocatorType()) {
11011 case OMPAllocateDeclAttr::OMPNullMemAlloc:
11012 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
11013 // Not supported, fallback to the default mem space.
11014 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
11015 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
11016 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
11017 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
11018 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
11019 case OMPAllocateDeclAttr::OMPConstMemAlloc:
11020 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
11021 AS = LangAS::Default;
11022 return true;
11023 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
11024 llvm_unreachable("Expected predefined allocator for the variables with the "::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11025
)
11025 "static storage.")::llvm::llvm_unreachable_internal("Expected predefined allocator for the variables with the "
"static storage.", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11025
)
;
11026 }
11027 return false;
11028}
11029
11030bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
11031 return HasRequiresUnifiedSharedMemory;
11032}
11033
11034CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
11035 CodeGenModule &CGM)
11036 : CGM(CGM) {
11037 if (CGM.getLangOpts().OpenMPIsDevice) {
11038 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
11039 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
11040 }
11041}
11042
11043CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
11044 if (CGM.getLangOpts().OpenMPIsDevice)
11045 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
11046}
11047
11048bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
11049 if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
11050 return true;
11051
11052 const auto *D = cast<FunctionDecl>(GD.getDecl());
11053 // Do not to emit function if it is marked as declare target as it was already
11054 // emitted.
11055 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
11056 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
11057 if (auto *F = dyn_cast_or_null<llvm::Function>(
11058 CGM.GetGlobalValue(CGM.getMangledName(GD))))
11059 return !F->isDeclaration();
11060 return false;
11061 }
11062 return true;
11063 }
11064
11065 return !AlreadyEmittedTargetDecls.insert(D).second;
11066}
11067
11068llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
11069 // If we don't have entries or if we are emitting code for the device, we
11070 // don't need to do anything.
11071 if (CGM.getLangOpts().OMPTargetTriples.empty() ||
11072 CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
11073 (OffloadEntriesInfoManager.empty() &&
11074 !HasEmittedDeclareTargetRegion &&
11075 !HasEmittedTargetRegion))
11076 return nullptr;
11077
11078 // Create and register the function that handles the requires directives.
11079 ASTContext &C = CGM.getContext();
11080
11081 llvm::Function *RequiresRegFn;
11082 {
11083 CodeGenFunction CGF(CGM);
11084 const auto &FI = CGM.getTypes().arrangeNullaryFunction();
11085 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
11086 std::string ReqName = getName({"omp_offloading", "requires_reg"});
11087 RequiresRegFn = CGM.CreateGlobalInitOrCleanUpFunction(FTy, ReqName, FI);
11088 CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
11089 OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
11090 // TODO: check for other requires clauses.
11091 // The requires directive takes effect only when a target region is
11092 // present in the compilation unit. Otherwise it is ignored and not
11093 // passed to the runtime. This avoids the runtime from throwing an error
11094 // for mismatching requires clauses across compilation units that don't
11095 // contain at least 1 target region.
11096 assert((HasEmittedTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11099, __extension__
__PRETTY_FUNCTION__))
11097 HasEmittedDeclareTargetRegion ||(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11099, __extension__
__PRETTY_FUNCTION__))
11098 !OffloadEntriesInfoManager.empty()) &&(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11099, __extension__
__PRETTY_FUNCTION__))
11099 "Target or declare target region expected.")(static_cast <bool> ((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion
|| !OffloadEntriesInfoManager.empty()) && "Target or declare target region expected."
) ? void (0) : __assert_fail ("(HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || !OffloadEntriesInfoManager.empty()) && \"Target or declare target region expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11099, __extension__
__PRETTY_FUNCTION__))
;
11100 if (HasRequiresUnifiedSharedMemory)
11101 Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
11102 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11103 CGM.getModule(), OMPRTL___tgt_register_requires),
11104 llvm::ConstantInt::get(CGM.Int64Ty, Flags));
11105 CGF.FinishFunction();
11106 }
11107 return RequiresRegFn;
11108}
11109
11110void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
11111 const OMPExecutableDirective &D,
11112 SourceLocation Loc,
11113 llvm::Function *OutlinedFn,
11114 ArrayRef<llvm::Value *> CapturedVars) {
11115 if (!CGF.HaveInsertPoint())
11116 return;
11117
11118 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11119 CodeGenFunction::RunCleanupsScope Scope(CGF);
11120
11121 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
11122 llvm::Value *Args[] = {
11123 RTLoc,
11124 CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
11125 CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
11126 llvm::SmallVector<llvm::Value *, 16> RealArgs;
11127 RealArgs.append(std::begin(Args), std::end(Args));
11128 RealArgs.append(CapturedVars.begin(), CapturedVars.end());
11129
11130 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11131 CGM.getModule(), OMPRTL___kmpc_fork_teams);
11132 CGF.EmitRuntimeCall(RTLFn, RealArgs);
11133}
11134
11135void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
11136 const Expr *NumTeams,
11137 const Expr *ThreadLimit,
11138 SourceLocation Loc) {
11139 if (!CGF.HaveInsertPoint())
11140 return;
11141
11142 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
11143
11144 llvm::Value *NumTeamsVal =
11145 NumTeams
11146 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
11147 CGF.CGM.Int32Ty, /* isSigned = */ true)
11148 : CGF.Builder.getInt32(0);
11149
11150 llvm::Value *ThreadLimitVal =
11151 ThreadLimit
11152 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
11153 CGF.CGM.Int32Ty, /* isSigned = */ true)
11154 : CGF.Builder.getInt32(0);
11155
11156 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
11157 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
11158 ThreadLimitVal};
11159 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
11160 CGM.getModule(), OMPRTL___kmpc_push_num_teams),
11161 PushNumTeamsArgs);
11162}
11163
11164void CGOpenMPRuntime::emitTargetDataCalls(
11165 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11166 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
11167 if (!CGF.HaveInsertPoint())
11168 return;
11169
11170 // Action used to replace the default codegen action and turn privatization
11171 // off.
11172 PrePostActionTy NoPrivAction;
11173
11174 // Generate the code for the opening of the data environment. Capture all the
11175 // arguments of the runtime call by reference because they are used in the
11176 // closing of the region.
11177 auto &&BeginThenGen = [this, &D, Device, &Info,
11178 &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
11179 // Fill up the arrays with all the mapped variables.
11180 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11181
11182 // Get map clause information.
11183 MappableExprsHandler MEHandler(D, CGF);
11184 MEHandler.generateAllInfo(CombinedInfo);
11185
11186 // Fill up the arrays and create the arguments.
11187 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11188 /*IsNonContiguous=*/true);
11189
11190 llvm::Value *BasePointersArrayArg = nullptr;
11191 llvm::Value *PointersArrayArg = nullptr;
11192 llvm::Value *SizesArrayArg = nullptr;
11193 llvm::Value *MapTypesArrayArg = nullptr;
11194 llvm::Value *MapNamesArrayArg = nullptr;
11195 llvm::Value *MappersArrayArg = nullptr;
11196 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11197 SizesArrayArg, MapTypesArrayArg,
11198 MapNamesArrayArg, MappersArrayArg, Info);
11199
11200 // Emit device ID if any.
11201 llvm::Value *DeviceID = nullptr;
11202 if (Device) {
11203 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11204 CGF.Int64Ty, /*isSigned=*/true);
11205 } else {
11206 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11207 }
11208
11209 // Emit the number of elements in the offloading arrays.
11210 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11211 //
11212 // Source location for the ident struct
11213 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11214
11215 llvm::Value *OffloadingArgs[] = {RTLoc,
11216 DeviceID,
11217 PointerNum,
11218 BasePointersArrayArg,
11219 PointersArrayArg,
11220 SizesArrayArg,
11221 MapTypesArrayArg,
11222 MapNamesArrayArg,
11223 MappersArrayArg};
11224 CGF.EmitRuntimeCall(
11225 OMPBuilder.getOrCreateRuntimeFunction(
11226 CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
11227 OffloadingArgs);
11228
11229 // If device pointer privatization is required, emit the body of the region
11230 // here. It will have to be duplicated: with and without privatization.
11231 if (!Info.CaptureDeviceAddrMap.empty())
11232 CodeGen(CGF);
11233 };
11234
11235 // Generate code for the closing of the data region.
11236 auto &&EndThenGen = [this, Device, &Info, &D](CodeGenFunction &CGF,
11237 PrePostActionTy &) {
11238 assert(Info.isValid() && "Invalid data environment closing arguments.")(static_cast <bool> (Info.isValid() && "Invalid data environment closing arguments."
) ? void (0) : __assert_fail ("Info.isValid() && \"Invalid data environment closing arguments.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11238, __extension__
__PRETTY_FUNCTION__))
;
11239
11240 llvm::Value *BasePointersArrayArg = nullptr;
11241 llvm::Value *PointersArrayArg = nullptr;
11242 llvm::Value *SizesArrayArg = nullptr;
11243 llvm::Value *MapTypesArrayArg = nullptr;
11244 llvm::Value *MapNamesArrayArg = nullptr;
11245 llvm::Value *MappersArrayArg = nullptr;
11246 emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
11247 SizesArrayArg, MapTypesArrayArg,
11248 MapNamesArrayArg, MappersArrayArg, Info,
11249 {/*ForEndCall=*/true});
11250
11251 // Emit device ID if any.
11252 llvm::Value *DeviceID = nullptr;
11253 if (Device) {
11254 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11255 CGF.Int64Ty, /*isSigned=*/true);
11256 } else {
11257 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11258 }
11259
11260 // Emit the number of elements in the offloading arrays.
11261 llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
11262
11263 // Source location for the ident struct
11264 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11265
11266 llvm::Value *OffloadingArgs[] = {RTLoc,
11267 DeviceID,
11268 PointerNum,
11269 BasePointersArrayArg,
11270 PointersArrayArg,
11271 SizesArrayArg,
11272 MapTypesArrayArg,
11273 MapNamesArrayArg,
11274 MappersArrayArg};
11275 CGF.EmitRuntimeCall(
11276 OMPBuilder.getOrCreateRuntimeFunction(
11277 CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
11278 OffloadingArgs);
11279 };
11280
11281 // If we need device pointer privatization, we need to emit the body of the
11282 // region with no privatization in the 'else' branch of the conditional.
11283 // Otherwise, we don't have to do anything.
11284 auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
11285 PrePostActionTy &) {
11286 if (!Info.CaptureDeviceAddrMap.empty()) {
11287 CodeGen.setAction(NoPrivAction);
11288 CodeGen(CGF);
11289 }
11290 };
11291
11292 // We don't have to do anything to close the region if the if clause evaluates
11293 // to false.
11294 auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
11295
11296 if (IfCond) {
11297 emitIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
11298 } else {
11299 RegionCodeGenTy RCG(BeginThenGen);
11300 RCG(CGF);
11301 }
11302
11303 // If we don't require privatization of device pointers, we emit the body in
11304 // between the runtime calls. This avoids duplicating the body code.
11305 if (Info.CaptureDeviceAddrMap.empty()) {
11306 CodeGen.setAction(NoPrivAction);
11307 CodeGen(CGF);
11308 }
11309
11310 if (IfCond) {
11311 emitIfClause(CGF, IfCond, EndThenGen, EndElseGen);
11312 } else {
11313 RegionCodeGenTy RCG(EndThenGen);
11314 RCG(CGF);
11315 }
11316}
11317
11318void CGOpenMPRuntime::emitTargetDataStandAloneCall(
11319 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
11320 const Expr *Device) {
11321 if (!CGF.HaveInsertPoint())
11322 return;
11323
11324 assert((isa<OMPTargetEnterDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11327, __extension__
__PRETTY_FUNCTION__))
11325 isa<OMPTargetExitDataDirective>(D) ||(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11327, __extension__
__PRETTY_FUNCTION__))
11326 isa<OMPTargetUpdateDirective>(D)) &&(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11327, __extension__
__PRETTY_FUNCTION__))
11327 "Expecting either target enter, exit data, or update directives.")(static_cast <bool> ((isa<OMPTargetEnterDataDirective
>(D) || isa<OMPTargetExitDataDirective>(D) || isa<
OMPTargetUpdateDirective>(D)) && "Expecting either target enter, exit data, or update directives."
) ? void (0) : __assert_fail ("(isa<OMPTargetEnterDataDirective>(D) || isa<OMPTargetExitDataDirective>(D) || isa<OMPTargetUpdateDirective>(D)) && \"Expecting either target enter, exit data, or update directives.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11327, __extension__
__PRETTY_FUNCTION__))
;
11328
11329 CodeGenFunction::OMPTargetDataInfo InputInfo;
11330 llvm::Value *MapTypesArray = nullptr;
11331 llvm::Value *MapNamesArray = nullptr;
11332 // Generate the code for the opening of the data environment.
11333 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
11334 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
11335 // Emit device ID if any.
11336 llvm::Value *DeviceID = nullptr;
11337 if (Device) {
11338 DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
11339 CGF.Int64Ty, /*isSigned=*/true);
11340 } else {
11341 DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
11342 }
11343
11344 // Emit the number of elements in the offloading arrays.
11345 llvm::Constant *PointerNum =
11346 CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
11347
11348 // Source location for the ident struct
11349 llvm::Value *RTLoc = emitUpdateLocation(CGF, D.getBeginLoc());
11350
11351 llvm::Value *OffloadingArgs[] = {RTLoc,
11352 DeviceID,
11353 PointerNum,
11354 InputInfo.BasePointersArray.getPointer(),
11355 InputInfo.PointersArray.getPointer(),
11356 InputInfo.SizesArray.getPointer(),
11357 MapTypesArray,
11358 MapNamesArray,
11359 InputInfo.MappersArray.getPointer()};
11360
11361 // Select the right runtime function call for each standalone
11362 // directive.
11363 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
11364 RuntimeFunction RTLFn;
11365 switch (D.getDirectiveKind()) {
11366 case OMPD_target_enter_data:
11367 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
11368 : OMPRTL___tgt_target_data_begin_mapper;
11369 break;
11370 case OMPD_target_exit_data:
11371 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
11372 : OMPRTL___tgt_target_data_end_mapper;
11373 break;
11374 case OMPD_target_update:
11375 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
11376 : OMPRTL___tgt_target_data_update_mapper;
11377 break;
11378 case OMPD_parallel:
11379 case OMPD_for:
11380 case OMPD_parallel_for:
11381 case OMPD_parallel_master:
11382 case OMPD_parallel_sections:
11383 case OMPD_for_simd:
11384 case OMPD_parallel_for_simd:
11385 case OMPD_cancel:
11386 case OMPD_cancellation_point:
11387 case OMPD_ordered:
11388 case OMPD_threadprivate:
11389 case OMPD_allocate:
11390 case OMPD_task:
11391 case OMPD_simd:
11392 case OMPD_tile:
11393 case OMPD_unroll:
11394 case OMPD_sections:
11395 case OMPD_section:
11396 case OMPD_single:
11397 case OMPD_master:
11398 case OMPD_critical:
11399 case OMPD_taskyield:
11400 case OMPD_barrier:
11401 case OMPD_taskwait:
11402 case OMPD_taskgroup:
11403 case OMPD_atomic:
11404 case OMPD_flush:
11405 case OMPD_depobj:
11406 case OMPD_scan:
11407 case OMPD_teams:
11408 case OMPD_target_data:
11409 case OMPD_distribute:
11410 case OMPD_distribute_simd:
11411 case OMPD_distribute_parallel_for:
11412 case OMPD_distribute_parallel_for_simd:
11413 case OMPD_teams_distribute:
11414 case OMPD_teams_distribute_simd:
11415 case OMPD_teams_distribute_parallel_for:
11416 case OMPD_teams_distribute_parallel_for_simd:
11417 case OMPD_declare_simd:
11418 case OMPD_declare_variant:
11419 case OMPD_begin_declare_variant:
11420 case OMPD_end_declare_variant:
11421 case OMPD_declare_target:
11422 case OMPD_end_declare_target:
11423 case OMPD_declare_reduction:
11424 case OMPD_declare_mapper:
11425 case OMPD_taskloop:
11426 case OMPD_taskloop_simd:
11427 case OMPD_master_taskloop:
11428 case OMPD_master_taskloop_simd:
11429 case OMPD_parallel_master_taskloop:
11430 case OMPD_parallel_master_taskloop_simd:
11431 case OMPD_target:
11432 case OMPD_target_simd:
11433 case OMPD_target_teams_distribute:
11434 case OMPD_target_teams_distribute_simd:
11435 case OMPD_target_teams_distribute_parallel_for:
11436 case OMPD_target_teams_distribute_parallel_for_simd:
11437 case OMPD_target_teams:
11438 case OMPD_target_parallel:
11439 case OMPD_target_parallel_for:
11440 case OMPD_target_parallel_for_simd:
11441 case OMPD_requires:
11442 case OMPD_metadirective:
11443 case OMPD_unknown:
11444 default:
11445 llvm_unreachable("Unexpected standalone target data directive.")::llvm::llvm_unreachable_internal("Unexpected standalone target data directive."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11445)
;
11446 break;
11447 }
11448 CGF.EmitRuntimeCall(
11449 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), RTLFn),
11450 OffloadingArgs);
11451 };
11452
11453 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
11454 &MapNamesArray](CodeGenFunction &CGF,
11455 PrePostActionTy &) {
11456 // Fill up the arrays with all the mapped variables.
11457 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
11458
11459 // Get map clause information.
11460 MappableExprsHandler MEHandler(D, CGF);
11461 MEHandler.generateAllInfo(CombinedInfo);
11462
11463 TargetDataInfo Info;
11464 // Fill up the arrays and create the arguments.
11465 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
11466 /*IsNonContiguous=*/true);
11467 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
11468 D.hasClausesOfKind<OMPNowaitClause>();
11469 emitOffloadingArraysArgument(
11470 CGF, Info.BasePointersArray, Info.PointersArray, Info.SizesArray,
11471 Info.MapTypesArray, Info.MapNamesArray, Info.MappersArray, Info,
11472 {/*ForEndCall=*/false});
11473 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
11474 InputInfo.BasePointersArray =
11475 Address(Info.BasePointersArray, CGM.getPointerAlign());
11476 InputInfo.PointersArray =
11477 Address(Info.PointersArray, CGM.getPointerAlign());
11478 InputInfo.SizesArray =
11479 Address(Info.SizesArray, CGM.getPointerAlign());
11480 InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
11481 MapTypesArray = Info.MapTypesArray;
11482 MapNamesArray = Info.MapNamesArray;
11483 if (RequiresOuterTask)
11484 CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
11485 else
11486 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
11487 };
11488
11489 if (IfCond) {
11490 emitIfClause(CGF, IfCond, TargetThenGen,
11491 [](CodeGenFunction &CGF, PrePostActionTy &) {});
11492 } else {
11493 RegionCodeGenTy ThenRCG(TargetThenGen);
11494 ThenRCG(CGF);
11495 }
11496}
11497
11498namespace {
11499 /// Kind of parameter in a function with 'declare simd' directive.
11500 enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
11501 /// Attribute set of the parameter.
11502 struct ParamAttrTy {
11503 ParamKindTy Kind = Vector;
11504 llvm::APSInt StrideOrArg;
11505 llvm::APSInt Alignment;
11506 };
11507} // namespace
11508
11509static unsigned evaluateCDTSize(const FunctionDecl *FD,
11510 ArrayRef<ParamAttrTy> ParamAttrs) {
11511 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11512 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11513 // of that clause. The VLEN value must be power of 2.
11514 // In other case the notion of the function`s "characteristic data type" (CDT)
11515 // is used to compute the vector length.
11516 // CDT is defined in the following order:
11517 // a) For non-void function, the CDT is the return type.
11518 // b) If the function has any non-uniform, non-linear parameters, then the
11519 // CDT is the type of the first such parameter.
11520 // c) If the CDT determined by a) or b) above is struct, union, or class
11521 // type which is pass-by-value (except for the type that maps to the
11522 // built-in complex data type), the characteristic data type is int.
11523 // d) If none of the above three cases is applicable, the CDT is int.
11524 // The VLEN is then determined based on the CDT and the size of vector
11525 // register of that ISA for which current vector version is generated. The
11526 // VLEN is computed using the formula below:
11527 // VLEN = sizeof(vector_register) / sizeof(CDT),
11528 // where vector register size specified in section 3.2.1 Registers and the
11529 // Stack Frame of original AMD64 ABI document.
11530 QualType RetType = FD->getReturnType();
11531 if (RetType.isNull())
11532 return 0;
11533 ASTContext &C = FD->getASTContext();
11534 QualType CDT;
11535 if (!RetType.isNull() && !RetType->isVoidType()) {
11536 CDT = RetType;
11537 } else {
11538 unsigned Offset = 0;
11539 if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
11540 if (ParamAttrs[Offset].Kind == Vector)
11541 CDT = C.getPointerType(C.getRecordType(MD->getParent()));
11542 ++Offset;
11543 }
11544 if (CDT.isNull()) {
11545 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11546 if (ParamAttrs[I + Offset].Kind == Vector) {
11547 CDT = FD->getParamDecl(I)->getType();
11548 break;
11549 }
11550 }
11551 }
11552 }
11553 if (CDT.isNull())
11554 CDT = C.IntTy;
11555 CDT = CDT->getCanonicalTypeUnqualified();
11556 if (CDT->isRecordType() || CDT->isUnionType())
11557 CDT = C.IntTy;
11558 return C.getTypeSize(CDT);
11559}
11560
11561static void
11562emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
11563 const llvm::APSInt &VLENVal,
11564 ArrayRef<ParamAttrTy> ParamAttrs,
11565 OMPDeclareSimdDeclAttr::BranchStateTy State) {
11566 struct ISADataTy {
11567 char ISA;
11568 unsigned VecRegSize;
11569 };
11570 ISADataTy ISAData[] = {
11571 {
11572 'b', 128
11573 }, // SSE
11574 {
11575 'c', 256
11576 }, // AVX
11577 {
11578 'd', 256
11579 }, // AVX2
11580 {
11581 'e', 512
11582 }, // AVX512
11583 };
11584 llvm::SmallVector<char, 2> Masked;
11585 switch (State) {
11586 case OMPDeclareSimdDeclAttr::BS_Undefined:
11587 Masked.push_back('N');
11588 Masked.push_back('M');
11589 break;
11590 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11591 Masked.push_back('N');
11592 break;
11593 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11594 Masked.push_back('M');
11595 break;
11596 }
11597 for (char Mask : Masked) {
11598 for (const ISADataTy &Data : ISAData) {
11599 SmallString<256> Buffer;
11600 llvm::raw_svector_ostream Out(Buffer);
11601 Out << "_ZGV" << Data.ISA << Mask;
11602 if (!VLENVal) {
11603 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
11604 assert(NumElts && "Non-zero simdlen/cdtsize expected")(static_cast <bool> (NumElts && "Non-zero simdlen/cdtsize expected"
) ? void (0) : __assert_fail ("NumElts && \"Non-zero simdlen/cdtsize expected\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11604, __extension__
__PRETTY_FUNCTION__))
;
11605 Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
11606 } else {
11607 Out << VLENVal;
11608 }
11609 for (const ParamAttrTy &ParamAttr : ParamAttrs) {
11610 switch (ParamAttr.Kind){
11611 case LinearWithVarStride:
11612 Out << 's' << ParamAttr.StrideOrArg;
11613 break;
11614 case Linear:
11615 Out << 'l';
11616 if (ParamAttr.StrideOrArg != 1)
11617 Out << ParamAttr.StrideOrArg;
11618 break;
11619 case Uniform:
11620 Out << 'u';
11621 break;
11622 case Vector:
11623 Out << 'v';
11624 break;
11625 }
11626 if (!!ParamAttr.Alignment)
11627 Out << 'a' << ParamAttr.Alignment;
11628 }
11629 Out << '_' << Fn->getName();
11630 Fn->addFnAttr(Out.str());
11631 }
11632 }
11633}
11634
11635// This are the Functions that are needed to mangle the name of the
11636// vector functions generated by the compiler, according to the rules
11637// defined in the "Vector Function ABI specifications for AArch64",
11638// available at
11639// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11640
11641/// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
11642///
11643/// TODO: Need to implement the behavior for reference marked with a
11644/// var or no linear modifiers (1.b in the section). For this, we
11645/// need to extend ParamKindTy to support the linear modifiers.
11646static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
11647 QT = QT.getCanonicalType();
11648
11649 if (QT->isVoidType())
11650 return false;
11651
11652 if (Kind == ParamKindTy::Uniform)
11653 return false;
11654
11655 if (Kind == ParamKindTy::Linear)
11656 return false;
11657
11658 // TODO: Handle linear references with modifiers
11659
11660 if (Kind == ParamKindTy::LinearWithVarStride)
11661 return false;
11662
11663 return true;
11664}
11665
11666/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11667static bool getAArch64PBV(QualType QT, ASTContext &C) {
11668 QT = QT.getCanonicalType();
11669 unsigned Size = C.getTypeSize(QT);
11670
11671 // Only scalars and complex within 16 bytes wide set PVB to true.
11672 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
11673 return false;
11674
11675 if (QT->isFloatingType())
11676 return true;
11677
11678 if (QT->isIntegerType())
11679 return true;
11680
11681 if (QT->isPointerType())
11682 return true;
11683
11684 // TODO: Add support for complex types (section 3.1.2, item 2).
11685
11686 return false;
11687}
11688
11689/// Computes the lane size (LS) of a return type or of an input parameter,
11690/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11691/// TODO: Add support for references, section 3.2.1, item 1.
11692static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
11693 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
11694 QualType PTy = QT.getCanonicalType()->getPointeeType();
11695 if (getAArch64PBV(PTy, C))
11696 return C.getTypeSize(PTy);
11697 }
11698 if (getAArch64PBV(QT, C))
11699 return C.getTypeSize(QT);
11700
11701 return C.getTypeSize(C.getUIntPtrType());
11702}
11703
11704// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11705// signature of the scalar function, as defined in 3.2.2 of the
11706// AAVFABI.
11707static std::tuple<unsigned, unsigned, bool>
11708getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
11709 QualType RetType = FD->getReturnType().getCanonicalType();
11710
11711 ASTContext &C = FD->getASTContext();
11712
11713 bool OutputBecomesInput = false;
11714
11715 llvm::SmallVector<unsigned, 8> Sizes;
11716 if (!RetType->isVoidType()) {
11717 Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
11718 if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
11719 OutputBecomesInput = true;
11720 }
11721 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
11722 QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
11723 Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
11724 }
11725
11726 assert(!Sizes.empty() && "Unable to determine NDS and WDS.")(static_cast <bool> (!Sizes.empty() && "Unable to determine NDS and WDS."
) ? void (0) : __assert_fail ("!Sizes.empty() && \"Unable to determine NDS and WDS.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11726, __extension__
__PRETTY_FUNCTION__))
;
11727 // The LS of a function parameter / return value can only be a power
11728 // of 2, starting from 8 bits, up to 128.
11729 assert(llvm::all_of(Sizes,(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11734, __extension__
__PRETTY_FUNCTION__))
11730 [](unsigned Size) {(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11734, __extension__
__PRETTY_FUNCTION__))
11731 return Size == 8 || Size == 16 || Size == 32 ||(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11734, __extension__
__PRETTY_FUNCTION__))
11732 Size == 64 || Size == 128;(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11734, __extension__
__PRETTY_FUNCTION__))
11733 }) &&(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11734, __extension__
__PRETTY_FUNCTION__))
11734 "Invalid size")(static_cast <bool> (llvm::all_of(Sizes, [](unsigned Size
) { return Size == 8 || Size == 16 || Size == 32 || Size == 64
|| Size == 128; }) && "Invalid size") ? void (0) : __assert_fail
("llvm::all_of(Sizes, [](unsigned Size) { return Size == 8 || Size == 16 || Size == 32 || Size == 64 || Size == 128; }) && \"Invalid size\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11734, __extension__
__PRETTY_FUNCTION__))
;
11735
11736 return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
11737 *std::max_element(std::begin(Sizes), std::end(Sizes)),
11738 OutputBecomesInput);
11739}
11740
11741/// Mangle the parameter part of the vector function name according to
11742/// their OpenMP classification. The mangling function is defined in
11743/// section 3.5 of the AAVFABI.
11744static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
11745 SmallString<256> Buffer;
11746 llvm::raw_svector_ostream Out(Buffer);
11747 for (const auto &ParamAttr : ParamAttrs) {
11748 switch (ParamAttr.Kind) {
11749 case LinearWithVarStride:
11750 Out << "ls" << ParamAttr.StrideOrArg;
11751 break;
11752 case Linear:
11753 Out << 'l';
11754 // Don't print the step value if it is not present or if it is
11755 // equal to 1.
11756 if (ParamAttr.StrideOrArg != 1)
11757 Out << ParamAttr.StrideOrArg;
11758 break;
11759 case Uniform:
11760 Out << 'u';
11761 break;
11762 case Vector:
11763 Out << 'v';
11764 break;
11765 }
11766
11767 if (!!ParamAttr.Alignment)
11768 Out << 'a' << ParamAttr.Alignment;
11769 }
11770
11771 return std::string(Out.str());
11772}
11773
11774// Function used to add the attribute. The parameter `VLEN` is
11775// templated to allow the use of "x" when targeting scalable functions
11776// for SVE.
11777template <typename T>
11778static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
11779 char ISA, StringRef ParSeq,
11780 StringRef MangledName, bool OutputBecomesInput,
11781 llvm::Function *Fn) {
11782 SmallString<256> Buffer;
11783 llvm::raw_svector_ostream Out(Buffer);
11784 Out << Prefix << ISA << LMask << VLEN;
11785 if (OutputBecomesInput)
11786 Out << "v";
11787 Out << ParSeq << "_" << MangledName;
11788 Fn->addFnAttr(Out.str());
11789}
11790
11791// Helper function to generate the Advanced SIMD names depending on
11792// the value of the NDS when simdlen is not present.
11793static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
11794 StringRef Prefix, char ISA,
11795 StringRef ParSeq, StringRef MangledName,
11796 bool OutputBecomesInput,
11797 llvm::Function *Fn) {
11798 switch (NDS) {
11799 case 8:
11800 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11801 OutputBecomesInput, Fn);
11802 addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
11803 OutputBecomesInput, Fn);
11804 break;
11805 case 16:
11806 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11807 OutputBecomesInput, Fn);
11808 addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
11809 OutputBecomesInput, Fn);
11810 break;
11811 case 32:
11812 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11813 OutputBecomesInput, Fn);
11814 addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
11815 OutputBecomesInput, Fn);
11816 break;
11817 case 64:
11818 case 128:
11819 addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
11820 OutputBecomesInput, Fn);
11821 break;
11822 default:
11823 llvm_unreachable("Scalar type is too wide.")::llvm::llvm_unreachable_internal("Scalar type is too wide.",
"clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11823)
;
11824 }
11825}
11826
11827/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11828static void emitAArch64DeclareSimdFunction(
11829 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
11830 ArrayRef<ParamAttrTy> ParamAttrs,
11831 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
11832 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
11833
11834 // Get basic data for building the vector signature.
11835 const auto Data = getNDSWDS(FD, ParamAttrs);
11836 const unsigned NDS = std::get<0>(Data);
11837 const unsigned WDS = std::get<1>(Data);
11838 const bool OutputBecomesInput = std::get<2>(Data);
11839
11840 // Check the values provided via `simdlen` by the user.
11841 // 1. A `simdlen(1)` doesn't produce vector signatures,
11842 if (UserVLEN == 1) {
11843 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11844 DiagnosticsEngine::Warning,
11845 "The clause simdlen(1) has no effect when targeting aarch64.");
11846 CGM.getDiags().Report(SLoc, DiagID);
11847 return;
11848 }
11849
11850 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11851 // Advanced SIMD output.
11852 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
11853 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11854 DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
11855 "power of 2 when targeting Advanced SIMD.");
11856 CGM.getDiags().Report(SLoc, DiagID);
11857 return;
11858 }
11859
11860 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11861 // limits.
11862 if (ISA == 's' && UserVLEN != 0) {
11863 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
11864 unsigned DiagID = CGM.getDiags().getCustomDiagID(
11865 DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
11866 "lanes in the architectural constraints "
11867 "for SVE (min is 128-bit, max is "
11868 "2048-bit, by steps of 128-bit)");
11869 CGM.getDiags().Report(SLoc, DiagID) << WDS;
11870 return;
11871 }
11872 }
11873
11874 // Sort out parameter sequence.
11875 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
11876 StringRef Prefix = "_ZGV";
11877 // Generate simdlen from user input (if any).
11878 if (UserVLEN) {
11879 if (ISA == 's') {
11880 // SVE generates only a masked function.
11881 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11882 OutputBecomesInput, Fn);
11883 } else {
11884 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11884, __extension__
__PRETTY_FUNCTION__))
;
11885 // Advanced SIMD generates one or two functions, depending on
11886 // the `[not]inbranch` clause.
11887 switch (State) {
11888 case OMPDeclareSimdDeclAttr::BS_Undefined:
11889 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11890 OutputBecomesInput, Fn);
11891 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11892 OutputBecomesInput, Fn);
11893 break;
11894 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11895 addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
11896 OutputBecomesInput, Fn);
11897 break;
11898 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11899 addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
11900 OutputBecomesInput, Fn);
11901 break;
11902 }
11903 }
11904 } else {
11905 // If no user simdlen is provided, follow the AAVFABI rules for
11906 // generating the vector length.
11907 if (ISA == 's') {
11908 // SVE, section 3.4.1, item 1.
11909 addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
11910 OutputBecomesInput, Fn);
11911 } else {
11912 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.")(static_cast <bool> (ISA == 'n' && "Expected ISA either 's' or 'n'."
) ? void (0) : __assert_fail ("ISA == 'n' && \"Expected ISA either 's' or 'n'.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 11912, __extension__
__PRETTY_FUNCTION__))
;
11913 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11914 // two vector names depending on the use of the clause
11915 // `[not]inbranch`.
11916 switch (State) {
11917 case OMPDeclareSimdDeclAttr::BS_Undefined:
11918 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11919 OutputBecomesInput, Fn);
11920 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11921 OutputBecomesInput, Fn);
11922 break;
11923 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11924 addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
11925 OutputBecomesInput, Fn);
11926 break;
11927 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11928 addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
11929 OutputBecomesInput, Fn);
11930 break;
11931 }
11932 }
11933 }
11934}
11935
11936void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11937 llvm::Function *Fn) {
11938 ASTContext &C = CGM.getContext();
11939 FD = FD->getMostRecentDecl();
11940 // Map params to their positions in function decl.
11941 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11942 if (isa<CXXMethodDecl>(FD))
11943 ParamPositions.try_emplace(FD, 0);
11944 unsigned ParamPos = ParamPositions.size();
11945 for (const ParmVarDecl *P : FD->parameters()) {
11946 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11947 ++ParamPos;
11948 }
11949 while (FD) {
11950 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11951 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11952 // Mark uniform parameters.
11953 for (const Expr *E : Attr->uniforms()) {
11954 E = E->IgnoreParenImpCasts();
11955 unsigned Pos;
11956 if (isa<CXXThisExpr>(E)) {
11957 Pos = ParamPositions[FD];
11958 } else {
11959 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11960 ->getCanonicalDecl();
11961 Pos = ParamPositions[PVD];
11962 }
11963 ParamAttrs[Pos].Kind = Uniform;
11964 }
11965 // Get alignment info.
11966 auto NI = Attr->alignments_begin();
11967 for (const Expr *E : Attr->aligneds()) {
11968 E = E->IgnoreParenImpCasts();
11969 unsigned Pos;
11970 QualType ParmTy;
11971 if (isa<CXXThisExpr>(E)) {
11972 Pos = ParamPositions[FD];
11973 ParmTy = E->getType();
11974 } else {
11975 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11976 ->getCanonicalDecl();
11977 Pos = ParamPositions[PVD];
11978 ParmTy = PVD->getType();
11979 }
11980 ParamAttrs[Pos].Alignment =
11981 (*NI)
11982 ? (*NI)->EvaluateKnownConstInt(C)
11983 : llvm::APSInt::getUnsigned(
11984 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11985 .getQuantity());
11986 ++NI;
11987 }
11988 // Mark linear parameters.
11989 auto SI = Attr->steps_begin();
11990 auto MI = Attr->modifiers_begin();
11991 for (const Expr *E : Attr->linears()) {
11992 E = E->IgnoreParenImpCasts();
11993 unsigned Pos;
11994 // Rescaling factor needed to compute the linear parameter
11995 // value in the mangled name.
11996 unsigned PtrRescalingFactor = 1;
11997 if (isa<CXXThisExpr>(E)) {
11998 Pos = ParamPositions[FD];
11999 } else {
12000 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
12001 ->getCanonicalDecl();
12002 Pos = ParamPositions[PVD];
12003 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
12004 PtrRescalingFactor = CGM.getContext()
12005 .getTypeSizeInChars(P->getPointeeType())
12006 .getQuantity();
12007 }
12008 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
12009 ParamAttr.Kind = Linear;
12010 // Assuming a stride of 1, for `linear` without modifiers.
12011 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
12012 if (*SI) {
12013 Expr::EvalResult Result;
12014 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
12015 if (const auto *DRE =
12016 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
12017 if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
12018 ParamAttr.Kind = LinearWithVarStride;
12019 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
12020 ParamPositions[StridePVD->getCanonicalDecl()]);
12021 }
12022 }
12023 } else {
12024 ParamAttr.StrideOrArg = Result.Val.getInt();
12025 }
12026 }
12027 // If we are using a linear clause on a pointer, we need to
12028 // rescale the value of linear_step with the byte size of the
12029 // pointee type.
12030 if (Linear == ParamAttr.Kind)
12031 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
12032 ++SI;
12033 ++MI;
12034 }
12035 llvm::APSInt VLENVal;
12036 SourceLocation ExprLoc;
12037 const Expr *VLENExpr = Attr->getSimdlen();
12038 if (VLENExpr) {
12039 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
12040 ExprLoc = VLENExpr->getExprLoc();
12041 }
12042 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
12043 if (CGM.getTriple().isX86()) {
12044 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
12045 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
12046 unsigned VLEN = VLENVal.getExtValue();
12047 StringRef MangledName = Fn->getName();
12048 if (CGM.getTarget().hasFeature("sve"))
12049 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12050 MangledName, 's', 128, Fn, ExprLoc);
12051 if (CGM.getTarget().hasFeature("neon"))
12052 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
12053 MangledName, 'n', 128, Fn, ExprLoc);
12054 }
12055 }
12056 FD = FD->getPreviousDecl();
12057 }
12058}
12059
12060namespace {
12061/// Cleanup action for doacross support.
12062class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
12063public:
12064 static const int DoacrossFinArgs = 2;
12065
12066private:
12067 llvm::FunctionCallee RTLFn;
12068 llvm::Value *Args[DoacrossFinArgs];
12069
12070public:
12071 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
12072 ArrayRef<llvm::Value *> CallArgs)
12073 : RTLFn(RTLFn) {
12074 assert(CallArgs.size() == DoacrossFinArgs)(static_cast <bool> (CallArgs.size() == DoacrossFinArgs
) ? void (0) : __assert_fail ("CallArgs.size() == DoacrossFinArgs"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12074, __extension__
__PRETTY_FUNCTION__))
;
12075 std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
12076 }
12077 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12078 if (!CGF.HaveInsertPoint())
12079 return;
12080 CGF.EmitRuntimeCall(RTLFn, Args);
12081 }
12082};
12083} // namespace
12084
12085void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12086 const OMPLoopDirective &D,
12087 ArrayRef<Expr *> NumIterations) {
12088 if (!CGF.HaveInsertPoint())
12089 return;
12090
12091 ASTContext &C = CGM.getContext();
12092 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
12093 RecordDecl *RD;
12094 if (KmpDimTy.isNull()) {
12095 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
12096 // kmp_int64 lo; // lower
12097 // kmp_int64 up; // upper
12098 // kmp_int64 st; // stride
12099 // };
12100 RD = C.buildImplicitRecord("kmp_dim");
12101 RD->startDefinition();
12102 addFieldToRecordDecl(C, RD, Int64Ty);
12103 addFieldToRecordDecl(C, RD, Int64Ty);
12104 addFieldToRecordDecl(C, RD, Int64Ty);
12105 RD->completeDefinition();
12106 KmpDimTy = C.getRecordType(RD);
12107 } else {
12108 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
12109 }
12110 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
12111 QualType ArrayTy =
12112 C.getConstantArrayType(KmpDimTy, Size, nullptr, ArrayType::Normal, 0);
12113
12114 Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
12115 CGF.EmitNullInitialization(DimsAddr, ArrayTy);
12116 enum { LowerFD = 0, UpperFD, StrideFD };
12117 // Fill dims with data.
12118 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
12119 LValue DimsLVal = CGF.MakeAddrLValue(
12120 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
12121 // dims.upper = num_iterations;
12122 LValue UpperLVal = CGF.EmitLValueForField(
12123 DimsLVal, *std::next(RD->field_begin(), UpperFD));
12124 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
12125 CGF.EmitScalarExpr(NumIterations[I]), NumIterations[I]->getType(),
12126 Int64Ty, NumIterations[I]->getExprLoc());
12127 CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
12128 // dims.stride = 1;
12129 LValue StrideLVal = CGF.EmitLValueForField(
12130 DimsLVal, *std::next(RD->field_begin(), StrideFD));
12131 CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
12132 StrideLVal);
12133 }
12134
12135 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
12136 // kmp_int32 num_dims, struct kmp_dim * dims);
12137 llvm::Value *Args[] = {
12138 emitUpdateLocation(CGF, D.getBeginLoc()),
12139 getThreadID(CGF, D.getBeginLoc()),
12140 llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
12141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12142 CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
12143 CGM.VoidPtrTy)};
12144
12145 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12146 CGM.getModule(), OMPRTL___kmpc_doacross_init);
12147 CGF.EmitRuntimeCall(RTLFn, Args);
12148 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
12149 emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
12150 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12151 CGM.getModule(), OMPRTL___kmpc_doacross_fini);
12152 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
12153 llvm::makeArrayRef(FiniArgs));
12154}
12155
12156void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12157 const OMPDependClause *C) {
12158 QualType Int64Ty =
12159 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
12160 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
12161 QualType ArrayTy = CGM.getContext().getConstantArrayType(
12162 Int64Ty, Size, nullptr, ArrayType::Normal, 0);
12163 Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
12164 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
12165 const Expr *CounterVal = C->getLoopData(I);
12166 assert(CounterVal)(static_cast <bool> (CounterVal) ? void (0) : __assert_fail
("CounterVal", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12166
, __extension__ __PRETTY_FUNCTION__))
;
12167 llvm::Value *CntVal = CGF.EmitScalarConversion(
12168 CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
12169 CounterVal->getExprLoc());
12170 CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
12171 /*Volatile=*/false, Int64Ty);
12172 }
12173 llvm::Value *Args[] = {
12174 emitUpdateLocation(CGF, C->getBeginLoc()),
12175 getThreadID(CGF, C->getBeginLoc()),
12176 CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
12177 llvm::FunctionCallee RTLFn;
12178 if (C->getDependencyKind() == OMPC_DEPEND_source) {
12179 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12180 OMPRTL___kmpc_doacross_post);
12181 } else {
12182 assert(C->getDependencyKind() == OMPC_DEPEND_sink)(static_cast <bool> (C->getDependencyKind() == OMPC_DEPEND_sink
) ? void (0) : __assert_fail ("C->getDependencyKind() == OMPC_DEPEND_sink"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12182, __extension__
__PRETTY_FUNCTION__))
;
12183 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(),
12184 OMPRTL___kmpc_doacross_wait);
12185 }
12186 CGF.EmitRuntimeCall(RTLFn, Args);
12187}
12188
12189void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
12190 llvm::FunctionCallee Callee,
12191 ArrayRef<llvm::Value *> Args) const {
12192 assert(Loc.isValid() && "Outlined function call location must be valid.")(static_cast <bool> (Loc.isValid() && "Outlined function call location must be valid."
) ? void (0) : __assert_fail ("Loc.isValid() && \"Outlined function call location must be valid.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12192, __extension__
__PRETTY_FUNCTION__))
;
12193 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
12194
12195 if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
12196 if (Fn->doesNotThrow()) {
12197 CGF.EmitNounwindRuntimeCall(Fn, Args);
12198 return;
12199 }
12200 }
12201 CGF.EmitRuntimeCall(Callee, Args);
12202}
12203
12204void CGOpenMPRuntime::emitOutlinedFunctionCall(
12205 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
12206 ArrayRef<llvm::Value *> Args) const {
12207 emitCall(CGF, Loc, OutlinedFn, Args);
12208}
12209
12210void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
12211 if (const auto *FD = dyn_cast<FunctionDecl>(D))
12212 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
12213 HasEmittedDeclareTargetRegion = true;
12214}
12215
12216Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
12217 const VarDecl *NativeParam,
12218 const VarDecl *TargetParam) const {
12219 return CGF.GetAddrOfLocalVar(NativeParam);
12220}
12221
12222/// Return allocator value from expression, or return a null allocator (default
12223/// when no allocator specified).
12224static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
12225 const Expr *Allocator) {
12226 llvm::Value *AllocVal;
12227 if (Allocator) {
12228 AllocVal = CGF.EmitScalarExpr(Allocator);
12229 // According to the standard, the original allocator type is a enum
12230 // (integer). Convert to pointer type, if required.
12231 AllocVal = CGF.EmitScalarConversion(AllocVal, Allocator->getType(),
12232 CGF.getContext().VoidPtrTy,
12233 Allocator->getExprLoc());
12234 } else {
12235 // If no allocator specified, it defaults to the null allocator.
12236 AllocVal = llvm::Constant::getNullValue(
12237 CGF.CGM.getTypes().ConvertType(CGF.getContext().VoidPtrTy));
12238 }
12239 return AllocVal;
12240}
12241
12242Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
12243 const VarDecl *VD) {
12244 if (!VD)
12245 return Address::invalid();
12246 Address UntiedAddr = Address::invalid();
12247 Address UntiedRealAddr = Address::invalid();
12248 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12249 if (It != FunctionToUntiedTaskStackMap.end()) {
12250 const UntiedLocalVarsAddressesMap &UntiedData =
12251 UntiedLocalVarsStack[It->second];
12252 auto I = UntiedData.find(VD);
12253 if (I != UntiedData.end()) {
12254 UntiedAddr = I->second.first;
12255 UntiedRealAddr = I->second.second;
12256 }
12257 }
12258 const VarDecl *CVD = VD->getCanonicalDecl();
12259 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
12260 // Use the default allocation.
12261 if (!isAllocatableDecl(VD))
12262 return UntiedAddr;
12263 llvm::Value *Size;
12264 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
12265 if (CVD->getType()->isVariablyModifiedType()) {
12266 Size = CGF.getTypeSize(CVD->getType());
12267 // Align the size: ((size + align - 1) / align) * align
12268 Size = CGF.Builder.CreateNUWAdd(
12269 Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
12270 Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
12271 Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
12272 } else {
12273 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
12274 Size = CGM.getSize(Sz.alignTo(Align));
12275 }
12276 llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
12277 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
12278 const Expr *Allocator = AA->getAllocator();
12279 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
12280 llvm::Value *Alignment =
12281 AA->getAlignment()
12282 ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(AA->getAlignment()),
12283 CGM.SizeTy, /*isSigned=*/false)
12284 : nullptr;
12285 SmallVector<llvm::Value *, 4> Args;
12286 Args.push_back(ThreadID);
12287 if (Alignment)
12288 Args.push_back(Alignment);
12289 Args.push_back(Size);
12290 Args.push_back(AllocVal);
12291 llvm::omp::RuntimeFunction FnID =
12292 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
12293 llvm::Value *Addr = CGF.EmitRuntimeCall(
12294 OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), FnID), Args,
12295 getName({CVD->getName(), ".void.addr"}));
12296 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
12297 CGM.getModule(), OMPRTL___kmpc_free);
12298 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
12299 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12300 Addr, CGF.ConvertTypeForMem(Ty), getName({CVD->getName(), ".addr"}));
12301 if (UntiedAddr.isValid())
12302 CGF.EmitStoreOfScalar(Addr, UntiedAddr, /*Volatile=*/false, Ty);
12303
12304 // Cleanup action for allocate support.
12305 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
12306 llvm::FunctionCallee RTLFn;
12307 SourceLocation::UIntTy LocEncoding;
12308 Address Addr;
12309 const Expr *AllocExpr;
12310
12311 public:
12312 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
12313 SourceLocation::UIntTy LocEncoding, Address Addr,
12314 const Expr *AllocExpr)
12315 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
12316 AllocExpr(AllocExpr) {}
12317 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
12318 if (!CGF.HaveInsertPoint())
12319 return;
12320 llvm::Value *Args[3];
12321 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
12322 CGF, SourceLocation::getFromRawEncoding(LocEncoding));
12323 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12324 Addr.getPointer(), CGF.VoidPtrTy);
12325 llvm::Value *AllocVal = getAllocatorVal(CGF, AllocExpr);
12326 Args[2] = AllocVal;
12327 CGF.EmitRuntimeCall(RTLFn, Args);
12328 }
12329 };
12330 Address VDAddr =
12331 UntiedRealAddr.isValid() ? UntiedRealAddr : Address(Addr, Align);
12332 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
12333 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
12334 VDAddr, Allocator);
12335 if (UntiedRealAddr.isValid())
12336 if (auto *Region =
12337 dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
12338 Region->emitUntiedSwitch(CGF);
12339 return VDAddr;
12340 }
12341 return UntiedAddr;
12342}
12343
12344bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
12345 const VarDecl *VD) const {
12346 auto It = FunctionToUntiedTaskStackMap.find(CGF.CurFn);
12347 if (It == FunctionToUntiedTaskStackMap.end())
12348 return false;
12349 return UntiedLocalVarsStack[It->second].count(VD) > 0;
12350}
12351
12352CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
12353 CodeGenModule &CGM, const OMPLoopDirective &S)
12354 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
12355 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12355, __extension__
__PRETTY_FUNCTION__))
;
12356 if (!NeedToPush)
12357 return;
12358 NontemporalDeclsSet &DS =
12359 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
12360 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
12361 for (const Stmt *Ref : C->private_refs()) {
12362 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
12363 const ValueDecl *VD;
12364 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
12365 VD = DRE->getDecl();
12366 } else {
12367 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
12368 assert((ME->isImplicitCXXThis() ||(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12370, __extension__
__PRETTY_FUNCTION__))
12369 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12370, __extension__
__PRETTY_FUNCTION__))
12370 "Expected member of current class.")(static_cast <bool> ((ME->isImplicitCXXThis() || isa
<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts(
))) && "Expected member of current class.") ? void (0
) : __assert_fail ("(ME->isImplicitCXXThis() || isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) && \"Expected member of current class.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12370, __extension__
__PRETTY_FUNCTION__))
;
12371 VD = ME->getMemberDecl();
12372 }
12373 DS.insert(VD);
12374 }
12375 }
12376}
12377
12378CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
12379 if (!NeedToPush)
12380 return;
12381 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
12382}
12383
12384CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
12385 CodeGenFunction &CGF,
12386 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
12387 std::pair<Address, Address>> &LocalVars)
12388 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
12389 if (!NeedToPush)
12390 return;
12391 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
12392 CGF.CurFn, CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
12393 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(LocalVars);
12394}
12395
12396CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
12397 if (!NeedToPush)
12398 return;
12399 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
12400}
12401
12402bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
12403 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12403, __extension__
__PRETTY_FUNCTION__))
;
12404
12405 return llvm::any_of(
12406 CGM.getOpenMPRuntime().NontemporalDeclsStack,
12407 [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
12408}
12409
12410void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
12411 const OMPExecutableDirective &S,
12412 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
12413 const {
12414 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
12415 // Vars in target/task regions must be excluded completely.
12416 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
12417 isOpenMPTaskingDirective(S.getDirectiveKind())) {
12418 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12419 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
12420 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
12421 for (const CapturedStmt::Capture &Cap : CS->captures()) {
12422 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
12423 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
12424 }
12425 }
12426 // Exclude vars in private clauses.
12427 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
12428 for (const Expr *Ref : C->varlists()) {
12429 if (!Ref->getType()->isScalarType())
12430 continue;
12431 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12432 if (!DRE)
12433 continue;
12434 NeedToCheckForLPCs.insert(DRE->getDecl());
12435 }
12436 }
12437 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
12438 for (const Expr *Ref : C->varlists()) {
12439 if (!Ref->getType()->isScalarType())
12440 continue;
12441 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12442 if (!DRE)
12443 continue;
12444 NeedToCheckForLPCs.insert(DRE->getDecl());
12445 }
12446 }
12447 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12448 for (const Expr *Ref : C->varlists()) {
12449 if (!Ref->getType()->isScalarType())
12450 continue;
12451 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12452 if (!DRE)
12453 continue;
12454 NeedToCheckForLPCs.insert(DRE->getDecl());
12455 }
12456 }
12457 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
12458 for (const Expr *Ref : C->varlists()) {
12459 if (!Ref->getType()->isScalarType())
12460 continue;
12461 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12462 if (!DRE)
12463 continue;
12464 NeedToCheckForLPCs.insert(DRE->getDecl());
12465 }
12466 }
12467 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
12468 for (const Expr *Ref : C->varlists()) {
12469 if (!Ref->getType()->isScalarType())
12470 continue;
12471 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
12472 if (!DRE)
12473 continue;
12474 NeedToCheckForLPCs.insert(DRE->getDecl());
12475 }
12476 }
12477 for (const Decl *VD : NeedToCheckForLPCs) {
12478 for (const LastprivateConditionalData &Data :
12479 llvm::reverse(CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
12480 if (Data.DeclToUniqueName.count(VD) > 0) {
12481 if (!Data.Disabled)
12482 NeedToAddForLPCsAsDisabled.insert(VD);
12483 break;
12484 }
12485 }
12486 }
12487}
12488
12489CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12490 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
12491 : CGM(CGF.CGM),
12492 Action((CGM.getLangOpts().OpenMP >= 50 &&
12493 llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
12494 [](const OMPLastprivateClause *C) {
12495 return C->getKind() ==
12496 OMPC_LASTPRIVATE_conditional;
12497 }))
12498 ? ActionToDo::PushAsLastprivateConditional
12499 : ActionToDo::DoNotPush) {
12500 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12500, __extension__
__PRETTY_FUNCTION__))
;
12501 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
12502 return;
12503 assert(Action == ActionToDo::PushAsLastprivateConditional &&(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12504, __extension__
__PRETTY_FUNCTION__))
12504 "Expected a push action.")(static_cast <bool> (Action == ActionToDo::PushAsLastprivateConditional
&& "Expected a push action.") ? void (0) : __assert_fail
("Action == ActionToDo::PushAsLastprivateConditional && \"Expected a push action.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12504, __extension__
__PRETTY_FUNCTION__))
;
12505 LastprivateConditionalData &Data =
12506 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12507 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
12508 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
12509 continue;
12510
12511 for (const Expr *Ref : C->varlists()) {
12512 Data.DeclToUniqueName.insert(std::make_pair(
12513 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
12514 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
12515 }
12516 }
12517 Data.IVLVal = IVLVal;
12518 Data.Fn = CGF.CurFn;
12519}
12520
12521CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12522 CodeGenFunction &CGF, const OMPExecutableDirective &S)
12523 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
12524 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.")(static_cast <bool> (CGM.getLangOpts().OpenMP &&
"Not in OpenMP mode.") ? void (0) : __assert_fail ("CGM.getLangOpts().OpenMP && \"Not in OpenMP mode.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12524, __extension__
__PRETTY_FUNCTION__))
;
12525 if (CGM.getLangOpts().OpenMP < 50)
12526 return;
12527 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
12528 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
12529 if (!NeedToAddForLPCsAsDisabled.empty()) {
12530 Action = ActionToDo::DisableLastprivateConditional;
12531 LastprivateConditionalData &Data =
12532 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
12533 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
12534 Data.DeclToUniqueName.insert(std::make_pair(VD, SmallString<16>()));
12535 Data.Fn = CGF.CurFn;
12536 Data.Disabled = true;
12537 }
12538}
12539
12540CGOpenMPRuntime::LastprivateConditionalRAII
12541CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12542 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
12543 return LastprivateConditionalRAII(CGF, S);
12544}
12545
12546CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12547 if (CGM.getLangOpts().OpenMP < 50)
12548 return;
12549 if (Action == ActionToDo::DisableLastprivateConditional) {
12550 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12551, __extension__
__PRETTY_FUNCTION__))
12551 "Expected list of disabled private vars.")(static_cast <bool> (CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of disabled private vars."
) ? void (0) : __assert_fail ("CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of disabled private vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12551, __extension__
__PRETTY_FUNCTION__))
;
12552 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12553 }
12554 if (Action == ActionToDo::PushAsLastprivateConditional) {
12555 assert((static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12557, __extension__
__PRETTY_FUNCTION__))
12556 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12557, __extension__
__PRETTY_FUNCTION__))
12557 "Expected list of lastprivate conditional vars.")(static_cast <bool> (!CGM.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled && "Expected list of lastprivate conditional vars."
) ? void (0) : __assert_fail ("!CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled && \"Expected list of lastprivate conditional vars.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12557, __extension__
__PRETTY_FUNCTION__))
;
12558 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
12559 }
12560}
12561
12562Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
12563 const VarDecl *VD) {
12564 ASTContext &C = CGM.getContext();
12565 auto I = LastprivateConditionalToTypes.find(CGF.CurFn);
12566 if (I == LastprivateConditionalToTypes.end())
12567 I = LastprivateConditionalToTypes.try_emplace(CGF.CurFn).first;
12568 QualType NewType;
12569 const FieldDecl *VDField;
12570 const FieldDecl *FiredField;
12571 LValue BaseLVal;
12572 auto VI = I->getSecond().find(VD);
12573 if (VI == I->getSecond().end()) {
12574 RecordDecl *RD = C.buildImplicitRecord("lasprivate.conditional");
12575 RD->startDefinition();
12576 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
12577 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
12578 RD->completeDefinition();
12579 NewType = C.getRecordType(RD);
12580 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
12581 BaseLVal = CGF.MakeAddrLValue(Addr, NewType, AlignmentSource::Decl);
12582 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
12583 } else {
12584 NewType = std::get<0>(VI->getSecond());
12585 VDField = std::get<1>(VI->getSecond());
12586 FiredField = std::get<2>(VI->getSecond());
12587 BaseLVal = std::get<3>(VI->getSecond());
12588 }
12589 LValue FiredLVal =
12590 CGF.EmitLValueForField(BaseLVal, FiredField);
12591 CGF.EmitStoreOfScalar(
12592 llvm::ConstantInt::getNullValue(CGF.ConvertTypeForMem(C.CharTy)),
12593 FiredLVal);
12594 return CGF.EmitLValueForField(BaseLVal, VDField).getAddress(CGF);
12595}
12596
12597namespace {
12598/// Checks if the lastprivate conditional variable is referenced in LHS.
12599class LastprivateConditionalRefChecker final
12600 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
12601 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
12602 const Expr *FoundE = nullptr;
12603 const Decl *FoundD = nullptr;
12604 StringRef UniqueDeclName;
12605 LValue IVLVal;
12606 llvm::Function *FoundFn = nullptr;
12607 SourceLocation Loc;
12608
12609public:
12610 bool VisitDeclRefExpr(const DeclRefExpr *E) {
12611 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12612 llvm::reverse(LPM)) {
12613 auto It = D.DeclToUniqueName.find(E->getDecl());
12614 if (It == D.DeclToUniqueName.end())
12615 continue;
12616 if (D.Disabled)
12617 return false;
12618 FoundE = E;
12619 FoundD = E->getDecl()->getCanonicalDecl();
12620 UniqueDeclName = It->second;
12621 IVLVal = D.IVLVal;
12622 FoundFn = D.Fn;
12623 break;
12624 }
12625 return FoundE == E;
12626 }
12627 bool VisitMemberExpr(const MemberExpr *E) {
12628 if (!CodeGenFunction::IsWrappedCXXThis(E->getBase()))
12629 return false;
12630 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
12631 llvm::reverse(LPM)) {
12632 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
12633 if (It == D.DeclToUniqueName.end())
12634 continue;
12635 if (D.Disabled)
12636 return false;
12637 FoundE = E;
12638 FoundD = E->getMemberDecl()->getCanonicalDecl();
12639 UniqueDeclName = It->second;
12640 IVLVal = D.IVLVal;
12641 FoundFn = D.Fn;
12642 break;
12643 }
12644 return FoundE == E;
12645 }
12646 bool VisitStmt(const Stmt *S) {
12647 for (const Stmt *Child : S->children()) {
12648 if (!Child)
12649 continue;
12650 if (const auto *E = dyn_cast<Expr>(Child))
12651 if (!E->isGLValue())
12652 continue;
12653 if (Visit(Child))
12654 return true;
12655 }
12656 return false;
12657 }
12658 explicit LastprivateConditionalRefChecker(
12659 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
12660 : LPM(LPM) {}
12661 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
12662 getFoundData() const {
12663 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
12664 }
12665};
12666} // namespace
12667
12668void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
12669 LValue IVLVal,
12670 StringRef UniqueDeclName,
12671 LValue LVal,
12672 SourceLocation Loc) {
12673 // Last updated loop counter for the lastprivate conditional var.
12674 // int<xx> last_iv = 0;
12675 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
12676 llvm::Constant *LastIV =
12677 getOrCreateInternalVariable(LLIVTy, getName({UniqueDeclName, "iv"}));
12678 cast<llvm::GlobalVariable>(LastIV)->setAlignment(
12679 IVLVal.getAlignment().getAsAlign());
12680 LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
12681
12682 // Last value of the lastprivate conditional.
12683 // decltype(priv_a) last_a;
12684 llvm::GlobalVariable *Last = getOrCreateInternalVariable(
12685 CGF.ConvertTypeForMem(LVal.getType()), UniqueDeclName);
12686 Last->setAlignment(LVal.getAlignment().getAsAlign());
12687 LValue LastLVal = CGF.MakeAddrLValue(
12688 Address(Last, Last->getValueType(), LVal.getAlignment()), LVal.getType());
12689
12690 // Global loop counter. Required to handle inner parallel-for regions.
12691 // iv
12692 llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, Loc);
12693
12694 // #pragma omp critical(a)
12695 // if (last_iv <= iv) {
12696 // last_iv = iv;
12697 // last_a = priv_a;
12698 // }
12699 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
12700 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
12701 Action.Enter(CGF);
12702 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(LastIVLVal, Loc);
12703 // (last_iv <= iv) ? Check if the variable is updated and store new
12704 // value in global var.
12705 llvm::Value *CmpRes;
12706 if (IVLVal.getType()->isSignedIntegerType()) {
12707 CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
12708 } else {
12709 assert(IVLVal.getType()->isUnsignedIntegerType() &&(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12710, __extension__
__PRETTY_FUNCTION__))
12710 "Loop iteration variable must be integer.")(static_cast <bool> (IVLVal.getType()->isUnsignedIntegerType
() && "Loop iteration variable must be integer.") ? void
(0) : __assert_fail ("IVLVal.getType()->isUnsignedIntegerType() && \"Loop iteration variable must be integer.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12710, __extension__
__PRETTY_FUNCTION__))
;
12711 CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
12712 }
12713 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
12714 llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
12715 CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
12716 // {
12717 CGF.EmitBlock(ThenBB);
12718
12719 // last_iv = iv;
12720 CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
12721
12722 // last_a = priv_a;
12723 switch (CGF.getEvaluationKind(LVal.getType())) {
12724 case TEK_Scalar: {
12725 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, Loc);
12726 CGF.EmitStoreOfScalar(PrivVal, LastLVal);
12727 break;
12728 }
12729 case TEK_Complex: {
12730 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(LVal, Loc);
12731 CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
12732 break;
12733 }
12734 case TEK_Aggregate:
12735 llvm_unreachable(::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12736)
12736 "Aggregates are not supported in lastprivate conditional.")::llvm::llvm_unreachable_internal("Aggregates are not supported in lastprivate conditional."
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12736)
;
12737 }
12738 // }
12739 CGF.EmitBranch(ExitBB);
12740 // There is no need to emit line number for unconditional branch.
12741 (void)ApplyDebugLocation::CreateEmpty(CGF);
12742 CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
12743 };
12744
12745 if (CGM.getLangOpts().OpenMPSimd) {
12746 // Do not emit as a critical region as no parallel region could be emitted.
12747 RegionCodeGenTy ThenRCG(CodeGen);
12748 ThenRCG(CGF);
12749 } else {
12750 emitCriticalRegion(CGF, UniqueDeclName, CodeGen, Loc);
12751 }
12752}
12753
12754void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
12755 const Expr *LHS) {
12756 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12757 return;
12758 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
12759 if (!Checker.Visit(LHS))
12760 return;
12761 const Expr *FoundE;
12762 const Decl *FoundD;
12763 StringRef UniqueDeclName;
12764 LValue IVLVal;
12765 llvm::Function *FoundFn;
12766 std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn) =
12767 Checker.getFoundData();
12768 if (FoundFn != CGF.CurFn) {
12769 // Special codegen for inner parallel regions.
12770 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12771 auto It = LastprivateConditionalToTypes[FoundFn].find(FoundD);
12772 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12773, __extension__
__PRETTY_FUNCTION__))
12773 "Lastprivate conditional is not found in outer region.")(static_cast <bool> (It != LastprivateConditionalToTypes
[FoundFn].end() && "Lastprivate conditional is not found in outer region."
) ? void (0) : __assert_fail ("It != LastprivateConditionalToTypes[FoundFn].end() && \"Lastprivate conditional is not found in outer region.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12773, __extension__
__PRETTY_FUNCTION__))
;
12774 QualType StructTy = std::get<0>(It->getSecond());
12775 const FieldDecl* FiredDecl = std::get<2>(It->getSecond());
12776 LValue PrivLVal = CGF.EmitLValue(FoundE);
12777 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
12778 PrivLVal.getAddress(CGF),
12779 CGF.ConvertTypeForMem(CGF.getContext().getPointerType(StructTy)));
12780 LValue BaseLVal =
12781 CGF.MakeAddrLValue(StructAddr, StructTy, AlignmentSource::Decl);
12782 LValue FiredLVal = CGF.EmitLValueForField(BaseLVal, FiredDecl);
12783 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12784 CGF.ConvertTypeForMem(FiredDecl->getType()), 1)),
12785 FiredLVal, llvm::AtomicOrdering::Unordered,
12786 /*IsVolatile=*/true, /*isInit=*/false);
12787 return;
12788 }
12789
12790 // Private address of the lastprivate conditional in the current context.
12791 // priv_a
12792 LValue LVal = CGF.EmitLValue(FoundE);
12793 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
12794 FoundE->getExprLoc());
12795}
12796
12797void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12798 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12799 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
12800 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
12801 return;
12802 auto Range = llvm::reverse(LastprivateConditionalStack);
12803 auto It = llvm::find_if(
12804 Range, [](const LastprivateConditionalData &D) { return !D.Disabled; });
12805 if (It == Range.end() || It->Fn != CGF.CurFn)
12806 return;
12807 auto LPCI = LastprivateConditionalToTypes.find(It->Fn);
12808 assert(LPCI != LastprivateConditionalToTypes.end() &&(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12809, __extension__
__PRETTY_FUNCTION__))
12809 "Lastprivates must be registered already.")(static_cast <bool> (LPCI != LastprivateConditionalToTypes
.end() && "Lastprivates must be registered already.")
? void (0) : __assert_fail ("LPCI != LastprivateConditionalToTypes.end() && \"Lastprivates must be registered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12809, __extension__
__PRETTY_FUNCTION__))
;
12810 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
12811 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
12812 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
12813 for (const auto &Pair : It->DeclToUniqueName) {
12814 const auto *VD = cast<VarDecl>(Pair.first->getCanonicalDecl());
12815 if (!CS->capturesVariable(VD) || IgnoredDecls.contains(VD))
12816 continue;
12817 auto I = LPCI->getSecond().find(Pair.first);
12818 assert(I != LPCI->getSecond().end() &&(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12819, __extension__
__PRETTY_FUNCTION__))
12819 "Lastprivate must be rehistered already.")(static_cast <bool> (I != LPCI->getSecond().end() &&
"Lastprivate must be rehistered already.") ? void (0) : __assert_fail
("I != LPCI->getSecond().end() && \"Lastprivate must be rehistered already.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12819, __extension__
__PRETTY_FUNCTION__))
;
12820 // bool Cmp = priv_a.Fired != 0;
12821 LValue BaseLVal = std::get<3>(I->getSecond());
12822 LValue FiredLVal =
12823 CGF.EmitLValueForField(BaseLVal, std::get<2>(I->getSecond()));
12824 llvm::Value *Res = CGF.EmitLoadOfScalar(FiredLVal, D.getBeginLoc());
12825 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Res);
12826 llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lpc.then");
12827 llvm::BasicBlock *DoneBB = CGF.createBasicBlock("lpc.done");
12828 // if (Cmp) {
12829 CGF.Builder.CreateCondBr(Cmp, ThenBB, DoneBB);
12830 CGF.EmitBlock(ThenBB);
12831 Address Addr = CGF.GetAddrOfLocalVar(VD);
12832 LValue LVal;
12833 if (VD->getType()->isReferenceType())
12834 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
12835 AlignmentSource::Decl);
12836 else
12837 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
12838 AlignmentSource::Decl);
12839 emitLastprivateConditionalUpdate(CGF, It->IVLVal, Pair.second, LVal,
12840 D.getBeginLoc());
12841 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
12842 CGF.EmitBlock(DoneBB, /*IsFinal=*/true);
12843 // }
12844 }
12845}
12846
12847void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12848 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
12849 SourceLocation Loc) {
12850 if (CGF.getLangOpts().OpenMP < 50)
12851 return;
12852 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
12853 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12854, __extension__
__PRETTY_FUNCTION__))
12854 "Unknown lastprivate conditional variable.")(static_cast <bool> (It != LastprivateConditionalStack.
back().DeclToUniqueName.end() && "Unknown lastprivate conditional variable."
) ? void (0) : __assert_fail ("It != LastprivateConditionalStack.back().DeclToUniqueName.end() && \"Unknown lastprivate conditional variable.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12854, __extension__
__PRETTY_FUNCTION__))
;
12855 StringRef UniqueName = It->second;
12856 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
12857 // The variable was not updated in the region - exit.
12858 if (!GV)
12859 return;
12860 LValue LPLVal = CGF.MakeAddrLValue(
12861 Address(GV, GV->getValueType(), PrivLVal.getAlignment()),
12862 PrivLVal.getType().getNonReferenceType());
12863 llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
12864 CGF.EmitStoreOfScalar(Res, PrivLVal);
12865}
12866
12867llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12868 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12869 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12870 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12870)
;
12871}
12872
12873llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12874 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12875 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
12876 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12876)
;
12877}
12878
12879llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12880 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12881 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12882 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12883 bool Tied, unsigned &NumberOfParts) {
12884 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12884)
;
12885}
12886
12887void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12888 SourceLocation Loc,
12889 llvm::Function *OutlinedFn,
12890 ArrayRef<llvm::Value *> CapturedVars,
12891 const Expr *IfCond,
12892 llvm::Value *NumThreads) {
12893 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12893)
;
12894}
12895
12896void CGOpenMPSIMDRuntime::emitCriticalRegion(
12897 CodeGenFunction &CGF, StringRef CriticalName,
12898 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12899 const Expr *Hint) {
12900 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12900)
;
12901}
12902
12903void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12904 const RegionCodeGenTy &MasterOpGen,
12905 SourceLocation Loc) {
12906 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12906)
;
12907}
12908
12909void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12910 const RegionCodeGenTy &MasterOpGen,
12911 SourceLocation Loc,
12912 const Expr *Filter) {
12913 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12913)
;
12914}
12915
12916void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12917 SourceLocation Loc) {
12918 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12918)
;
12919}
12920
12921void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12922 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12923 SourceLocation Loc) {
12924 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12924)
;
12925}
12926
12927void CGOpenMPSIMDRuntime::emitSingleRegion(
12928 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12929 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12930 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12931 ArrayRef<const Expr *> AssignmentOps) {
12932 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12932)
;
12933}
12934
12935void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12936 const RegionCodeGenTy &OrderedOpGen,
12937 SourceLocation Loc,
12938 bool IsThreads) {
12939 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12939)
;
12940}
12941
12942void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12943 SourceLocation Loc,
12944 OpenMPDirectiveKind Kind,
12945 bool EmitChecks,
12946 bool ForceSimpleCall) {
12947 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12947)
;
12948}
12949
12950void CGOpenMPSIMDRuntime::emitForDispatchInit(
12951 CodeGenFunction &CGF, SourceLocation Loc,
12952 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12953 bool Ordered, const DispatchRTInput &DispatchValues) {
12954 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12954)
;
12955}
12956
12957void CGOpenMPSIMDRuntime::emitForStaticInit(
12958 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12959 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12960 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12960)
;
12961}
12962
12963void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12964 CodeGenFunction &CGF, SourceLocation Loc,
12965 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12966 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12966)
;
12967}
12968
12969void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12970 SourceLocation Loc,
12971 unsigned IVSize,
12972 bool IVSigned) {
12973 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12973)
;
12974}
12975
12976void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12977 SourceLocation Loc,
12978 OpenMPDirectiveKind DKind) {
12979 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12979)
;
12980}
12981
12982llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12983 SourceLocation Loc,
12984 unsigned IVSize, bool IVSigned,
12985 Address IL, Address LB,
12986 Address UB, Address ST) {
12987 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12987)
;
12988}
12989
12990void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12991 llvm::Value *NumThreads,
12992 SourceLocation Loc) {
12993 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12993)
;
12994}
12995
12996void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12997 ProcBindKind ProcBind,
12998 SourceLocation Loc) {
12999 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 12999)
;
13000}
13001
13002Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
13003 const VarDecl *VD,
13004 Address VDAddr,
13005 SourceLocation Loc) {
13006 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13006)
;
13007}
13008
13009llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
13010 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
13011 CodeGenFunction *CGF) {
13012 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13012)
;
13013}
13014
13015Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
13016 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
13017 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13017)
;
13018}
13019
13020void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
13021 ArrayRef<const Expr *> Vars,
13022 SourceLocation Loc,
13023 llvm::AtomicOrdering AO) {
13024 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13024)
;
13025}
13026
13027void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
13028 const OMPExecutableDirective &D,
13029 llvm::Function *TaskFunction,
13030 QualType SharedsTy, Address Shareds,
13031 const Expr *IfCond,
13032 const OMPTaskDataTy &Data) {
13033 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13033)
;
13034}
13035
13036void CGOpenMPSIMDRuntime::emitTaskLoopCall(
13037 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
13038 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
13039 const Expr *IfCond, const OMPTaskDataTy &Data) {
13040 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13040)
;
13041}
13042
13043void CGOpenMPSIMDRuntime::emitReduction(
13044 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
13045 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
13046 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
13047 assert(Options.SimpleReduction && "Only simple reduction is expected.")(static_cast <bool> (Options.SimpleReduction &&
"Only simple reduction is expected.") ? void (0) : __assert_fail
("Options.SimpleReduction && \"Only simple reduction is expected.\""
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13047, __extension__
__PRETTY_FUNCTION__))
;
13048 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
13049 ReductionOps, Options);
13050}
13051
13052llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
13053 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
13054 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
13055 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13055)
;
13056}
13057
13058void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
13059 SourceLocation Loc,
13060 bool IsWorksharingReduction) {
13061 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13061)
;
13062}
13063
13064void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
13065 SourceLocation Loc,
13066 ReductionCodeGen &RCG,
13067 unsigned N) {
13068 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13068)
;
13069}
13070
13071Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
13072 SourceLocation Loc,
13073 llvm::Value *ReductionsPtr,
13074 LValue SharedLVal) {
13075 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13075)
;
13076}
13077
13078void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
13079 SourceLocation Loc,
13080 const OMPTaskDataTy &Data) {
13081 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13081)
;
13082}
13083
13084void CGOpenMPSIMDRuntime::emitCancellationPointCall(
13085 CodeGenFunction &CGF, SourceLocation Loc,
13086 OpenMPDirectiveKind CancelRegion) {
13087 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13087)
;
13088}
13089
13090void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
13091 SourceLocation Loc, const Expr *IfCond,
13092 OpenMPDirectiveKind CancelRegion) {
13093 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13093)
;
13094}
13095
13096void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
13097 const OMPExecutableDirective &D, StringRef ParentName,
13098 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
13099 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
13100 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13100)
;
13101}
13102
13103void CGOpenMPSIMDRuntime::emitTargetCall(
13104 CodeGenFunction &CGF, const OMPExecutableDirective &D,
13105 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
13106 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
13107 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
13108 const OMPLoopDirective &D)>
13109 SizeEmitter) {
13110 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13110)
;
13111}
13112
13113bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
13114 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13114)
;
13115}
13116
13117bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
13118 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13118)
;
13119}
13120
13121bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
13122 return false;
13123}
13124
13125void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
13126 const OMPExecutableDirective &D,
13127 SourceLocation Loc,
13128 llvm::Function *OutlinedFn,
13129 ArrayRef<llvm::Value *> CapturedVars) {
13130 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13130)
;
13131}
13132
13133void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
13134 const Expr *NumTeams,
13135 const Expr *ThreadLimit,
13136 SourceLocation Loc) {
13137 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13137)
;
13138}
13139
13140void CGOpenMPSIMDRuntime::emitTargetDataCalls(
13141 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13142 const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
13143 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13143)
;
13144}
13145
13146void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
13147 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
13148 const Expr *Device) {
13149 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13149)
;
13150}
13151
13152void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
13153 const OMPLoopDirective &D,
13154 ArrayRef<Expr *> NumIterations) {
13155 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13155)
;
13156}
13157
13158void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
13159 const OMPDependClause *C) {
13160 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13160)
;
13161}
13162
13163const VarDecl *
13164CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
13165 const VarDecl *NativeParam) const {
13166 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13166)
;
13167}
13168
13169Address
13170CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
13171 const VarDecl *NativeParam,
13172 const VarDecl *TargetParam) const {
13173 llvm_unreachable("Not supported in SIMD-only mode")::llvm::llvm_unreachable_internal("Not supported in SIMD-only mode"
, "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 13173)
;
13174}

/usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/bits/stl_iterator.h

1// Iterators -*- C++ -*-
2
3// Copyright (C) 2001-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/*
26 *
27 * Copyright (c) 1994
28 * Hewlett-Packard Company
29 *
30 * Permission to use, copy, modify, distribute and sell this software
31 * and its documentation for any purpose is hereby granted without fee,
32 * provided that the above copyright notice appear in all copies and
33 * that both that copyright notice and this permission notice appear
34 * in supporting documentation. Hewlett-Packard Company makes no
35 * representations about the suitability of this software for any
36 * purpose. It is provided "as is" without express or implied warranty.
37 *
38 *
39 * Copyright (c) 1996-1998
40 * Silicon Graphics Computer Systems, Inc.
41 *
42 * Permission to use, copy, modify, distribute and sell this software
43 * and its documentation for any purpose is hereby granted without fee,
44 * provided that the above copyright notice appear in all copies and
45 * that both that copyright notice and this permission notice appear
46 * in supporting documentation. Silicon Graphics makes no
47 * representations about the suitability of this software for any
48 * purpose. It is provided "as is" without express or implied warranty.
49 */
50
51/** @file bits/stl_iterator.h
52 * This is an internal header file, included by other library headers.
53 * Do not attempt to use it directly. @headername{iterator}
54 *
55 * This file implements reverse_iterator, back_insert_iterator,
56 * front_insert_iterator, insert_iterator, __normal_iterator, and their
57 * supporting functions and overloaded operators.
58 */
59
60#ifndef _STL_ITERATOR_H1
61#define _STL_ITERATOR_H1 1
62
63#include <bits/cpp_type_traits.h>
64#include <ext/type_traits.h>
65#include <bits/move.h>
66#include <bits/ptr_traits.h>
67
68#if __cplusplus201402L >= 201103L
69# include <type_traits>
70#endif
71
72#if __cplusplus201402L > 201703L
73# define __cpp_lib_array_constexpr 201811L
74# define __cpp_lib_constexpr_iterator 201811L
75#elif __cplusplus201402L == 201703L
76# define __cpp_lib_array_constexpr 201803L
77#endif
78
79#if __cplusplus201402L > 201703L
80# include <compare>
81# include <new>
82# include <bits/iterator_concepts.h>
83#endif
84
85namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
86{
87_GLIBCXX_BEGIN_NAMESPACE_VERSION
88
89 /**
90 * @addtogroup iterators
91 * @{
92 */
93
94#if __cplusplus201402L > 201703L && __cpp_lib_concepts
95 namespace __detail
96 {
97 // Weaken iterator_category _Cat to _Limit if it is derived from that,
98 // otherwise use _Otherwise.
99 template<typename _Cat, typename _Limit, typename _Otherwise = _Cat>
100 using __clamp_iter_cat
101 = conditional_t<derived_from<_Cat, _Limit>, _Limit, _Otherwise>;
102 }
103#endif
104
105 // 24.4.1 Reverse iterators
106 /**
107 * Bidirectional and random access iterators have corresponding reverse
108 * %iterator adaptors that iterate through the data structure in the
109 * opposite direction. They have the same signatures as the corresponding
110 * iterators. The fundamental relation between a reverse %iterator and its
111 * corresponding %iterator @c i is established by the identity:
112 * @code
113 * &*(reverse_iterator(i)) == &*(i - 1)
114 * @endcode
115 *
116 * <em>This mapping is dictated by the fact that while there is always a
117 * pointer past the end of an array, there might not be a valid pointer
118 * before the beginning of an array.</em> [24.4.1]/1,2
119 *
120 * Reverse iterators can be tricky and surprising at first. Their
121 * semantics make sense, however, and the trickiness is a side effect of
122 * the requirement that the iterators must be safe.
123 */
124 template<typename _Iterator>
125 class reverse_iterator
126 : public iterator<typename iterator_traits<_Iterator>::iterator_category,
127 typename iterator_traits<_Iterator>::value_type,
128 typename iterator_traits<_Iterator>::difference_type,
129 typename iterator_traits<_Iterator>::pointer,
130 typename iterator_traits<_Iterator>::reference>
131 {
132 protected:
133 _Iterator current;
134
135 typedef iterator_traits<_Iterator> __traits_type;
136
137 public:
138 typedef _Iterator iterator_type;
139 typedef typename __traits_type::difference_type difference_type;
140 typedef typename __traits_type::pointer pointer;
141 typedef typename __traits_type::reference reference;
142
143#if __cplusplus201402L > 201703L && __cpp_lib_concepts
144 using iterator_concept
145 = conditional_t<random_access_iterator<_Iterator>,
146 random_access_iterator_tag,
147 bidirectional_iterator_tag>;
148 using iterator_category
149 = __detail::__clamp_iter_cat<typename __traits_type::iterator_category,
150 random_access_iterator_tag>;
151#endif
152
153 /**
154 * The default constructor value-initializes member @p current.
155 * If it is a pointer, that means it is zero-initialized.
156 */
157 // _GLIBCXX_RESOLVE_LIB_DEFECTS
158 // 235 No specification of default ctor for reverse_iterator
159 // 1012. reverse_iterator default ctor should value initialize
160 _GLIBCXX17_CONSTEXPR
161 reverse_iterator() : current() { }
162
163 /**
164 * This %iterator will move in the opposite direction that @p x does.
165 */
166 explicit _GLIBCXX17_CONSTEXPR
167 reverse_iterator(iterator_type __x) : current(__x) { }
168
169 /**
170 * The copy constructor is normal.
171 */
172 _GLIBCXX17_CONSTEXPR
173 reverse_iterator(const reverse_iterator& __x)
174 : current(__x.current) { }
175
176#if __cplusplus201402L >= 201103L
177 reverse_iterator& operator=(const reverse_iterator&) = default;
178#endif
179
180 /**
181 * A %reverse_iterator across other types can be copied if the
182 * underlying %iterator can be converted to the type of @c current.
183 */
184 template<typename _Iter>
185 _GLIBCXX17_CONSTEXPR
186 reverse_iterator(const reverse_iterator<_Iter>& __x)
187 : current(__x.base()) { }
188
189 /**
190 * @return @c current, the %iterator used for underlying work.
191 */
192 _GLIBCXX17_CONSTEXPR iterator_type
193 base() const
194 { return current; }
195
196 /**
197 * @return A reference to the value at @c --current
198 *
199 * This requires that @c --current is dereferenceable.
200 *
201 * @warning This implementation requires that for an iterator of the
202 * underlying iterator type, @c x, a reference obtained by
203 * @c *x remains valid after @c x has been modified or
204 * destroyed. This is a bug: http://gcc.gnu.org/PR51823
205 */
206 _GLIBCXX17_CONSTEXPR reference
207 operator*() const
208 {
209 _Iterator __tmp = current;
210 return *--__tmp;
211 }
212
213 /**
214 * @return A pointer to the value at @c --current
215 *
216 * This requires that @c --current is dereferenceable.
217 */
218 _GLIBCXX17_CONSTEXPR pointer
219 operator->() const
220#if __cplusplus201402L > 201703L && __cpp_concepts >= 201907L
221 requires is_pointer_v<_Iterator>
222 || requires(const _Iterator __i) { __i.operator->(); }
223#endif
224 {
225 // _GLIBCXX_RESOLVE_LIB_DEFECTS
226 // 1052. operator-> should also support smart pointers
227 _Iterator __tmp = current;
228 --__tmp;
229 return _S_to_pointer(__tmp);
230 }
231
232 /**
233 * @return @c *this
234 *
235 * Decrements the underlying iterator.
236 */
237 _GLIBCXX17_CONSTEXPR reverse_iterator&
238 operator++()
239 {
240 --current;
241 return *this;
242 }
243
244 /**
245 * @return The original value of @c *this
246 *
247 * Decrements the underlying iterator.
248 */
249 _GLIBCXX17_CONSTEXPR reverse_iterator
250 operator++(int)
251 {
252 reverse_iterator __tmp = *this;
253 --current;
254 return __tmp;
255 }
256
257 /**
258 * @return @c *this
259 *
260 * Increments the underlying iterator.
261 */
262 _GLIBCXX17_CONSTEXPR reverse_iterator&
263 operator--()
264 {
265 ++current;
266 return *this;
267 }
268
269 /**
270 * @return A reverse_iterator with the previous value of @c *this
271 *
272 * Increments the underlying iterator.
273 */
274 _GLIBCXX17_CONSTEXPR reverse_iterator
275 operator--(int)
276 {
277 reverse_iterator __tmp = *this;
278 ++current;
279 return __tmp;
280 }
281
282 /**
283 * @return A reverse_iterator that refers to @c current - @a __n
284 *
285 * The underlying iterator must be a Random Access Iterator.
286 */
287 _GLIBCXX17_CONSTEXPR reverse_iterator
288 operator+(difference_type __n) const
289 { return reverse_iterator(current - __n); }
290
291 /**
292 * @return *this
293 *
294 * Moves the underlying iterator backwards @a __n steps.
295 * The underlying iterator must be a Random Access Iterator.
296 */
297 _GLIBCXX17_CONSTEXPR reverse_iterator&
298 operator+=(difference_type __n)
299 {
300 current -= __n;
301 return *this;
302 }
303
304 /**
305 * @return A reverse_iterator that refers to @c current - @a __n
306 *
307 * The underlying iterator must be a Random Access Iterator.
308 */
309 _GLIBCXX17_CONSTEXPR reverse_iterator
310 operator-(difference_type __n) const
311 { return reverse_iterator(current + __n); }
312
313 /**
314 * @return *this
315 *
316 * Moves the underlying iterator forwards @a __n steps.
317 * The underlying iterator must be a Random Access Iterator.
318 */
319 _GLIBCXX17_CONSTEXPR reverse_iterator&
320 operator-=(difference_type __n)
321 {
322 current += __n;
323 return *this;
324 }
325
326 /**
327 * @return The value at @c current - @a __n - 1
328 *
329 * The underlying iterator must be a Random Access Iterator.
330 */
331 _GLIBCXX17_CONSTEXPR reference
332 operator[](difference_type __n) const
333 { return *(*this + __n); }
334
335#if __cplusplus201402L > 201703L && __cpp_lib_concepts
336 friend constexpr iter_rvalue_reference_t<_Iterator>
337 iter_move(const reverse_iterator& __i)
338 noexcept(is_nothrow_copy_constructible_v<_Iterator>
339 && noexcept(ranges::iter_move(--std::declval<_Iterator&>())))
340 {
341 auto __tmp = __i.base();
342 return ranges::iter_move(--__tmp);
343 }
344
345 template<indirectly_swappable<_Iterator> _Iter2>
346 friend constexpr void
347 iter_swap(const reverse_iterator& __x,
348 const reverse_iterator<_Iter2>& __y)
349 noexcept(is_nothrow_copy_constructible_v<_Iterator>
350 && is_nothrow_copy_constructible_v<_Iter2>
351 && noexcept(ranges::iter_swap(--std::declval<_Iterator&>(),
352 --std::declval<_Iter2&>())))
353 {
354 auto __xtmp = __x.base();
355 auto __ytmp = __y.base();
356 ranges::iter_swap(--__xtmp, --__ytmp);
357 }
358#endif
359
360 private:
361 template<typename _Tp>
362 static _GLIBCXX17_CONSTEXPR _Tp*
363 _S_to_pointer(_Tp* __p)
364 { return __p; }
365
366 template<typename _Tp>
367 static _GLIBCXX17_CONSTEXPR pointer
368 _S_to_pointer(_Tp __t)
369 { return __t.operator->(); }
370 };
371
372 //@{
373 /**
374 * @param __x A %reverse_iterator.
375 * @param __y A %reverse_iterator.
376 * @return A simple bool.
377 *
378 * Reverse iterators forward comparisons to their underlying base()
379 * iterators.
380 *
381 */
382#if __cplusplus201402L <= 201703L || ! defined __cpp_lib_concepts
383 template<typename _Iterator>
384 inline _GLIBCXX17_CONSTEXPR bool
385 operator==(const reverse_iterator<_Iterator>& __x,
386 const reverse_iterator<_Iterator>& __y)
387 { return __x.base() == __y.base(); }
8
Assuming the condition is true
9
Returning the value 1, which participates in a condition later
388
389 template<typename _Iterator>
390 inline _GLIBCXX17_CONSTEXPR bool
391 operator<(const reverse_iterator<_Iterator>& __x,
392 const reverse_iterator<_Iterator>& __y)
393 { return __y.base() < __x.base(); }
394
395 template<typename _Iterator>
396 inline _GLIBCXX17_CONSTEXPR bool
397 operator!=(const reverse_iterator<_Iterator>& __x,
398 const reverse_iterator<_Iterator>& __y)
399 { return !(__x == __y); }
7
Calling 'operator==<const clang::OMPClauseMappableExprCommon::MappableComponent *>'
10
Returning from 'operator==<const clang::OMPClauseMappableExprCommon::MappableComponent *>'
11
Returning zero, which participates in a condition later
400
401 template<typename _Iterator>
402 inline _GLIBCXX17_CONSTEXPR bool
403 operator>(const reverse_iterator<_Iterator>& __x,
404 const reverse_iterator<_Iterator>& __y)
405 { return __y < __x; }
406
407 template<typename _Iterator>
408 inline _GLIBCXX17_CONSTEXPR bool
409 operator<=(const reverse_iterator<_Iterator>& __x,
410 const reverse_iterator<_Iterator>& __y)
411 { return !(__y < __x); }
412
413 template<typename _Iterator>
414 inline _GLIBCXX17_CONSTEXPR bool
415 operator>=(const reverse_iterator<_Iterator>& __x,
416 const reverse_iterator<_Iterator>& __y)
417 { return !(__x < __y); }
418
419 // _GLIBCXX_RESOLVE_LIB_DEFECTS
420 // DR 280. Comparison of reverse_iterator to const reverse_iterator.
421 template<typename _IteratorL, typename _IteratorR>
422 inline _GLIBCXX17_CONSTEXPR bool
423 operator==(const reverse_iterator<_IteratorL>& __x,
424 const reverse_iterator<_IteratorR>& __y)
425 { return __x.base() == __y.base(); }
426
427 template<typename _IteratorL, typename _IteratorR>
428 inline _GLIBCXX17_CONSTEXPR bool
429 operator<(const reverse_iterator<_IteratorL>& __x,
430 const reverse_iterator<_IteratorR>& __y)
431 { return __y.base() < __x.base(); }
432
433 template<typename _IteratorL, typename _IteratorR>
434 inline _GLIBCXX17_CONSTEXPR bool
435 operator!=(const reverse_iterator<_IteratorL>& __x,
436 const reverse_iterator<_IteratorR>& __y)
437 { return !(__x == __y); }
438
439 template<typename _IteratorL, typename _IteratorR>
440 inline _GLIBCXX17_CONSTEXPR bool
441 operator>(const reverse_iterator<_IteratorL>& __x,
442 const reverse_iterator<_IteratorR>& __y)
443 { return __y < __x; }
444
445 template<typename _IteratorL, typename _IteratorR>
446 inline _GLIBCXX17_CONSTEXPR bool
447 operator<=(const reverse_iterator<_IteratorL>& __x,
448 const reverse_iterator<_IteratorR>& __y)
449 { return !(__y < __x); }
450
451 template<typename _IteratorL, typename _IteratorR>
452 inline _GLIBCXX17_CONSTEXPR bool
453 operator>=(const reverse_iterator<_IteratorL>& __x,
454 const reverse_iterator<_IteratorR>& __y)
455 { return !(__x < __y); }
456#else // C++20
457 template<typename _IteratorL, typename _IteratorR>
458 constexpr bool
459 operator==(const reverse_iterator<_IteratorL>& __x,
460 const reverse_iterator<_IteratorR>& __y)
461 requires requires { { __x.base() == __y.base() } -> convertible_to<bool>; }
462 { return __x.base() == __y.base(); }
463
464 template<typename _IteratorL, typename _IteratorR>
465 constexpr bool
466 operator!=(const reverse_iterator<_IteratorL>& __x,
467 const reverse_iterator<_IteratorR>& __y)
468 requires requires { { __x.base() != __y.base() } -> convertible_to<bool>; }
469 { return __x.base() != __y.base(); }
470
471 template<typename _IteratorL, typename _IteratorR>
472 constexpr bool
473 operator<(const reverse_iterator<_IteratorL>& __x,
474 const reverse_iterator<_IteratorR>& __y)
475 requires requires { { __x.base() > __y.base() } -> convertible_to<bool>; }
476 { return __x.base() > __y.base(); }
477
478 template<typename _IteratorL, typename _IteratorR>
479 constexpr bool
480 operator>(const reverse_iterator<_IteratorL>& __x,
481 const reverse_iterator<_IteratorR>& __y)
482 requires requires { { __x.base() < __y.base() } -> convertible_to<bool>; }
483 { return __x.base() < __y.base(); }
484
485 template<typename _IteratorL, typename _IteratorR>
486 constexpr bool
487 operator<=(const reverse_iterator<_IteratorL>& __x,
488 const reverse_iterator<_IteratorR>& __y)
489 requires requires { { __x.base() >= __y.base() } -> convertible_to<bool>; }
490 { return __x.base() >= __y.base(); }
491
492 template<typename _IteratorL, typename _IteratorR>
493 constexpr bool
494 operator>=(const reverse_iterator<_IteratorL>& __x,
495 const reverse_iterator<_IteratorR>& __y)
496 requires requires { { __x.base() <= __y.base() } -> convertible_to<bool>; }
497 { return __x.base() <= __y.base(); }
498
499 template<typename _IteratorL,
500 three_way_comparable_with<_IteratorL> _IteratorR>
501 constexpr compare_three_way_result_t<_IteratorL, _IteratorR>
502 operator<=>(const reverse_iterator<_IteratorL>& __x,
503 const reverse_iterator<_IteratorR>& __y)
504 { return __y.base() <=> __x.base(); }
505#endif // C++20
506 //@}
507
508#if __cplusplus201402L < 201103L
509 template<typename _Iterator>
510 inline typename reverse_iterator<_Iterator>::difference_type
511 operator-(const reverse_iterator<_Iterator>& __x,
512 const reverse_iterator<_Iterator>& __y)
513 { return __y.base() - __x.base(); }
514
515 template<typename _IteratorL, typename _IteratorR>
516 inline typename reverse_iterator<_IteratorL>::difference_type
517 operator-(const reverse_iterator<_IteratorL>& __x,
518 const reverse_iterator<_IteratorR>& __y)
519 { return __y.base() - __x.base(); }
520#else
521 // _GLIBCXX_RESOLVE_LIB_DEFECTS
522 // DR 685. reverse_iterator/move_iterator difference has invalid signatures
523 template<typename _IteratorL, typename _IteratorR>
524 inline _GLIBCXX17_CONSTEXPR auto
525 operator-(const reverse_iterator<_IteratorL>& __x,
526 const reverse_iterator<_IteratorR>& __y)
527 -> decltype(__y.base() - __x.base())
528 { return __y.base() - __x.base(); }
529#endif
530
531 template<typename _Iterator>
532 inline _GLIBCXX17_CONSTEXPR reverse_iterator<_Iterator>
533 operator+(typename reverse_iterator<_Iterator>::difference_type __n,
534 const reverse_iterator<_Iterator>& __x)
535 { return reverse_iterator<_Iterator>(__x.base() - __n); }
536
537#if __cplusplus201402L >= 201103L
538 // Same as C++14 make_reverse_iterator but used in C++11 mode too.
539 template<typename _Iterator>
540 inline _GLIBCXX17_CONSTEXPR reverse_iterator<_Iterator>
541 __make_reverse_iterator(_Iterator __i)
542 { return reverse_iterator<_Iterator>(__i); }
543
544# if __cplusplus201402L >= 201402L
545# define __cpp_lib_make_reverse_iterator201402 201402
546
547 // _GLIBCXX_RESOLVE_LIB_DEFECTS
548 // DR 2285. make_reverse_iterator
549 /// Generator function for reverse_iterator.
550 template<typename _Iterator>
551 inline _GLIBCXX17_CONSTEXPR reverse_iterator<_Iterator>
552 make_reverse_iterator(_Iterator __i)
553 { return reverse_iterator<_Iterator>(__i); }
554
555# if __cplusplus201402L > 201703L && defined __cpp_lib_concepts
556 template<typename _Iterator1, typename _Iterator2>
557 requires (!sized_sentinel_for<_Iterator1, _Iterator2>)
558 inline constexpr bool
559 disable_sized_sentinel_for<reverse_iterator<_Iterator1>,
560 reverse_iterator<_Iterator2>> = true;
561# endif // C++20
562# endif // C++14
563
564 template<typename _Iterator>
565 _GLIBCXX20_CONSTEXPR
566 auto
567 __niter_base(reverse_iterator<_Iterator> __it)
568 -> decltype(__make_reverse_iterator(__niter_base(__it.base())))
569 { return __make_reverse_iterator(__niter_base(__it.base())); }
570
571 template<typename _Iterator>
572 struct __is_move_iterator<reverse_iterator<_Iterator> >
573 : __is_move_iterator<_Iterator>
574 { };
575
576 template<typename _Iterator>
577 _GLIBCXX20_CONSTEXPR
578 auto
579 __miter_base(reverse_iterator<_Iterator> __it)
580 -> decltype(__make_reverse_iterator(__miter_base(__it.base())))
581 { return __make_reverse_iterator(__miter_base(__it.base())); }
582#endif // C++11
583
584 // 24.4.2.2.1 back_insert_iterator
585 /**
586 * @brief Turns assignment into insertion.
587 *
588 * These are output iterators, constructed from a container-of-T.
589 * Assigning a T to the iterator appends it to the container using
590 * push_back.
591 *
592 * Tip: Using the back_inserter function to create these iterators can
593 * save typing.
594 */
595 template<typename _Container>
596 class back_insert_iterator
597 : public iterator<output_iterator_tag, void, void, void, void>
598 {
599 protected:
600 _Container* container;
601
602 public:
603 /// A nested typedef for the type of whatever container you used.
604 typedef _Container container_type;
605#if __cplusplus201402L > 201703L
606 using difference_type = ptrdiff_t;
607
608 constexpr back_insert_iterator() noexcept : container(nullptr) { }
609#endif
610
611 /// The only way to create this %iterator is with a container.
612 explicit _GLIBCXX20_CONSTEXPR
613 back_insert_iterator(_Container& __x)
614 : container(std::__addressof(__x)) { }
615
616 /**
617 * @param __value An instance of whatever type
618 * container_type::const_reference is; presumably a
619 * reference-to-const T for container<T>.
620 * @return This %iterator, for chained operations.
621 *
622 * This kind of %iterator doesn't really have a @a position in the
623 * container (you can think of the position as being permanently at
624 * the end, if you like). Assigning a value to the %iterator will
625 * always append the value to the end of the container.
626 */
627#if __cplusplus201402L < 201103L
628 back_insert_iterator&
629 operator=(typename _Container::const_reference __value)
630 {
631 container->push_back(__value);
632 return *this;
633 }
634#else
635 _GLIBCXX20_CONSTEXPR
636 back_insert_iterator&
637 operator=(const typename _Container::value_type& __value)
638 {
639 container->push_back(__value);
640 return *this;
641 }
642
643 _GLIBCXX20_CONSTEXPR
644 back_insert_iterator&
645 operator=(typename _Container::value_type&& __value)
646 {
647 container->push_back(std::move(__value));
648 return *this;
649 }
650#endif
651
652 /// Simply returns *this.
653 _GLIBCXX20_CONSTEXPR
654 back_insert_iterator&
655 operator*()
656 { return *this; }
657
658 /// Simply returns *this. (This %iterator does not @a move.)
659 _GLIBCXX20_CONSTEXPR
660 back_insert_iterator&
661 operator++()
662 { return *this; }
663
664 /// Simply returns *this. (This %iterator does not @a move.)
665 _GLIBCXX20_CONSTEXPR
666 back_insert_iterator
667 operator++(int)
668 { return *this; }
669 };
670
671 /**
672 * @param __x A container of arbitrary type.
673 * @return An instance of back_insert_iterator working on @p __x.
674 *
675 * This wrapper function helps in creating back_insert_iterator instances.
676 * Typing the name of the %iterator requires knowing the precise full
677 * type of the container, which can be tedious and impedes generic
678 * programming. Using this function lets you take advantage of automatic
679 * template parameter deduction, making the compiler match the correct
680 * types for you.
681 */
682 template<typename _Container>
683 _GLIBCXX20_CONSTEXPR
684 inline back_insert_iterator<_Container>
685 back_inserter(_Container& __x)
686 { return back_insert_iterator<_Container>(__x); }
687
688 /**
689 * @brief Turns assignment into insertion.
690 *
691 * These are output iterators, constructed from a container-of-T.
692 * Assigning a T to the iterator prepends it to the container using
693 * push_front.
694 *
695 * Tip: Using the front_inserter function to create these iterators can
696 * save typing.
697 */
698 template<typename _Container>
699 class front_insert_iterator
700 : public iterator<output_iterator_tag, void, void, void, void>
701 {
702 protected:
703 _Container* container;
704
705 public:
706 /// A nested typedef for the type of whatever container you used.
707 typedef _Container container_type;
708#if __cplusplus201402L > 201703L
709 using difference_type = ptrdiff_t;
710
711 constexpr front_insert_iterator() noexcept : container(nullptr) { }
712#endif
713
714 /// The only way to create this %iterator is with a container.
715 explicit _GLIBCXX20_CONSTEXPR
716 front_insert_iterator(_Container& __x)
717 : container(std::__addressof(__x)) { }
718
719 /**
720 * @param __value An instance of whatever type
721 * container_type::const_reference is; presumably a
722 * reference-to-const T for container<T>.
723 * @return This %iterator, for chained operations.
724 *
725 * This kind of %iterator doesn't really have a @a position in the
726 * container (you can think of the position as being permanently at
727 * the front, if you like). Assigning a value to the %iterator will
728 * always prepend the value to the front of the container.
729 */
730#if __cplusplus201402L < 201103L
731 front_insert_iterator&
732 operator=(typename _Container::const_reference __value)
733 {
734 container->push_front(__value);
735 return *this;
736 }
737#else
738 _GLIBCXX20_CONSTEXPR
739 front_insert_iterator&
740 operator=(const typename _Container::value_type& __value)
741 {
742 container->push_front(__value);
743 return *this;
744 }
745
746 _GLIBCXX20_CONSTEXPR
747 front_insert_iterator&
748 operator=(typename _Container::value_type&& __value)
749 {
750 container->push_front(std::move(__value));
751 return *this;
752 }
753#endif
754
755 /// Simply returns *this.
756 _GLIBCXX20_CONSTEXPR
757 front_insert_iterator&
758 operator*()
759 { return *this; }
760
761 /// Simply returns *this. (This %iterator does not @a move.)
762 _GLIBCXX20_CONSTEXPR
763 front_insert_iterator&
764 operator++()
765 { return *this; }
766
767 /// Simply returns *this. (This %iterator does not @a move.)
768 _GLIBCXX20_CONSTEXPR
769 front_insert_iterator
770 operator++(int)
771 { return *this; }
772 };
773
774 /**
775 * @param __x A container of arbitrary type.
776 * @return An instance of front_insert_iterator working on @p x.
777 *
778 * This wrapper function helps in creating front_insert_iterator instances.
779 * Typing the name of the %iterator requires knowing the precise full
780 * type of the container, which can be tedious and impedes generic
781 * programming. Using this function lets you take advantage of automatic
782 * template parameter deduction, making the compiler match the correct
783 * types for you.
784 */
785 template<typename _Container>
786 _GLIBCXX20_CONSTEXPR
787 inline front_insert_iterator<_Container>
788 front_inserter(_Container& __x)
789 { return front_insert_iterator<_Container>(__x); }
790
791 /**
792 * @brief Turns assignment into insertion.
793 *
794 * These are output iterators, constructed from a container-of-T.
795 * Assigning a T to the iterator inserts it in the container at the
796 * %iterator's position, rather than overwriting the value at that
797 * position.
798 *
799 * (Sequences will actually insert a @e copy of the value before the
800 * %iterator's position.)
801 *
802 * Tip: Using the inserter function to create these iterators can
803 * save typing.
804 */
805 template<typename _Container>
806 class insert_iterator
807 : public iterator<output_iterator_tag, void, void, void, void>
808 {
809#if __cplusplus201402L > 201703L && defined __cpp_lib_concepts
810 using _Iter = std::__detail::__range_iter_t<_Container>;
811
812 protected:
813 _Container* container = nullptr;
814 _Iter iter = _Iter();
815#else
816 typedef typename _Container::iterator _Iter;
817
818 protected:
819 _Container* container;
820 _Iter iter;
821#endif
822
823 public:
824 /// A nested typedef for the type of whatever container you used.
825 typedef _Container container_type;
826
827#if __cplusplus201402L > 201703L && defined __cpp_lib_concepts
828 using difference_type = ptrdiff_t;
829
830 insert_iterator() = default;
831#endif
832
833 /**
834 * The only way to create this %iterator is with a container and an
835 * initial position (a normal %iterator into the container).
836 */
837 _GLIBCXX20_CONSTEXPR
838 insert_iterator(_Container& __x, _Iter __i)
839 : container(std::__addressof(__x)), iter(__i) {}
840
841 /**
842 * @param __value An instance of whatever type
843 * container_type::const_reference is; presumably a
844 * reference-to-const T for container<T>.
845 * @return This %iterator, for chained operations.
846 *
847 * This kind of %iterator maintains its own position in the
848 * container. Assigning a value to the %iterator will insert the
849 * value into the container at the place before the %iterator.
850 *
851 * The position is maintained such that subsequent assignments will
852 * insert values immediately after one another. For example,
853 * @code
854 * // vector v contains A and Z
855 *
856 * insert_iterator i (v, ++v.begin());
857 * i = 1;
858 * i = 2;
859 * i = 3;
860 *
861 * // vector v contains A, 1, 2, 3, and Z
862 * @endcode
863 */
864#if __cplusplus201402L < 201103L
865 insert_iterator&
866 operator=(typename _Container::const_reference __value)
867 {
868 iter = container->insert(iter, __value);
869 ++iter;
870 return *this;
871 }
872#else
873 _GLIBCXX20_CONSTEXPR
874 insert_iterator&
875 operator=(const typename _Container::value_type& __value)
876 {
877 iter = container->insert(iter, __value);
878 ++iter;
879 return *this;
880 }
881
882 _GLIBCXX20_CONSTEXPR
883 insert_iterator&
884 operator=(typename _Container::value_type&& __value)
885 {
886 iter = container->insert(iter, std::move(__value));
887 ++iter;
888 return *this;
889 }
890#endif
891
892 /// Simply returns *this.
893 _GLIBCXX20_CONSTEXPR
894 insert_iterator&
895 operator*()
896 { return *this; }
897
898 /// Simply returns *this. (This %iterator does not @a move.)
899 _GLIBCXX20_CONSTEXPR
900 insert_iterator&
901 operator++()
902 { return *this; }
903
904 /// Simply returns *this. (This %iterator does not @a move.)
905 _GLIBCXX20_CONSTEXPR
906 insert_iterator&
907 operator++(int)
908 { return *this; }
909 };
910
911 /**
912 * @param __x A container of arbitrary type.
913 * @param __i An iterator into the container.
914 * @return An instance of insert_iterator working on @p __x.
915 *
916 * This wrapper function helps in creating insert_iterator instances.
917 * Typing the name of the %iterator requires knowing the precise full
918 * type of the container, which can be tedious and impedes generic
919 * programming. Using this function lets you take advantage of automatic
920 * template parameter deduction, making the compiler match the correct
921 * types for you.
922 */
923#if __cplusplus201402L > 201703L && defined __cpp_lib_concepts
924 template<typename _Container>
925 constexpr insert_iterator<_Container>
926 inserter(_Container& __x, std::__detail::__range_iter_t<_Container> __i)
927 { return insert_iterator<_Container>(__x, __i); }
928#else
929 template<typename _Container, typename _Iterator>
930 inline insert_iterator<_Container>
931 inserter(_Container& __x, _Iterator __i)
932 {
933 return insert_iterator<_Container>(__x,
934 typename _Container::iterator(__i));
935 }
936#endif
937
938 // @} group iterators
939
940_GLIBCXX_END_NAMESPACE_VERSION
941} // namespace
942
943namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
944{
945_GLIBCXX_BEGIN_NAMESPACE_VERSION
946
947 // This iterator adapter is @a normal in the sense that it does not
948 // change the semantics of any of the operators of its iterator
949 // parameter. Its primary purpose is to convert an iterator that is
950 // not a class, e.g. a pointer, into an iterator that is a class.
951 // The _Container parameter exists solely so that different containers
952 // using this template can instantiate different types, even if the
953 // _Iterator parameter is the same.
954 template<typename _Iterator, typename _Container>
955 class __normal_iterator
956 {
957 protected:
958 _Iterator _M_current;
959
960 typedef std::iterator_traits<_Iterator> __traits_type;
961
962 public:
963 typedef _Iterator iterator_type;
964 typedef typename __traits_type::iterator_category iterator_category;
965 typedef typename __traits_type::value_type value_type;
966 typedef typename __traits_type::difference_type difference_type;
967 typedef typename __traits_type::reference reference;
968 typedef typename __traits_type::pointer pointer;
969
970#if __cplusplus201402L > 201703L && __cpp_lib_concepts
971 using iterator_concept = std::__detail::__iter_concept<_Iterator>;
972#endif
973
974 _GLIBCXX_CONSTEXPRconstexpr __normal_iterator() _GLIBCXX_NOEXCEPTnoexcept
975 : _M_current(_Iterator()) { }
976
977 explicit _GLIBCXX20_CONSTEXPR
978 __normal_iterator(const _Iterator& __i) _GLIBCXX_NOEXCEPTnoexcept
979 : _M_current(__i) { }
980
981 // Allow iterator to const_iterator conversion
982 template<typename _Iter>
983 _GLIBCXX20_CONSTEXPR
984 __normal_iterator(const __normal_iterator<_Iter,
985 typename __enable_if<
986 (std::__are_same<_Iter, typename _Container::pointer>::__value),
987 _Container>::__type>& __i) _GLIBCXX_NOEXCEPTnoexcept
988 : _M_current(__i.base()) { }
989
990 // Forward iterator requirements
991 _GLIBCXX20_CONSTEXPR
992 reference
993 operator*() const _GLIBCXX_NOEXCEPTnoexcept
994 { return *_M_current; }
995
996 _GLIBCXX20_CONSTEXPR
997 pointer
998 operator->() const _GLIBCXX_NOEXCEPTnoexcept
999 { return _M_current; }
1000
1001 _GLIBCXX20_CONSTEXPR
1002 __normal_iterator&
1003 operator++() _GLIBCXX_NOEXCEPTnoexcept
1004 {
1005 ++_M_current;
1006 return *this;
1007 }
1008
1009 _GLIBCXX20_CONSTEXPR
1010 __normal_iterator
1011 operator++(int) _GLIBCXX_NOEXCEPTnoexcept
1012 { return __normal_iterator(_M_current++); }
1013
1014 // Bidirectional iterator requirements
1015 _GLIBCXX20_CONSTEXPR
1016 __normal_iterator&
1017 operator--() _GLIBCXX_NOEXCEPTnoexcept
1018 {
1019 --_M_current;
1020 return *this;
1021 }
1022
1023 _GLIBCXX20_CONSTEXPR
1024 __normal_iterator
1025 operator--(int) _GLIBCXX_NOEXCEPTnoexcept
1026 { return __normal_iterator(_M_current--); }
1027
1028 // Random access iterator requirements
1029 _GLIBCXX20_CONSTEXPR
1030 reference
1031 operator[](difference_type __n) const _GLIBCXX_NOEXCEPTnoexcept
1032 { return _M_current[__n]; }
1033
1034 _GLIBCXX20_CONSTEXPR
1035 __normal_iterator&
1036 operator+=(difference_type __n) _GLIBCXX_NOEXCEPTnoexcept
1037 { _M_current += __n; return *this; }
1038
1039 _GLIBCXX20_CONSTEXPR
1040 __normal_iterator
1041 operator+(difference_type __n) const _GLIBCXX_NOEXCEPTnoexcept
1042 { return __normal_iterator(_M_current + __n); }
1043
1044 _GLIBCXX20_CONSTEXPR
1045 __normal_iterator&
1046 operator-=(difference_type __n) _GLIBCXX_NOEXCEPTnoexcept
1047 { _M_current -= __n; return *this; }
1048
1049 _GLIBCXX20_CONSTEXPR
1050 __normal_iterator
1051 operator-(difference_type __n) const _GLIBCXX_NOEXCEPTnoexcept
1052 { return __normal_iterator(_M_current - __n); }
1053
1054 _GLIBCXX20_CONSTEXPR
1055 const _Iterator&
1056 base() const _GLIBCXX_NOEXCEPTnoexcept
1057 { return _M_current; }
1058 };
1059
1060 // Note: In what follows, the left- and right-hand-side iterators are
1061 // allowed to vary in types (conceptually in cv-qualification) so that
1062 // comparison between cv-qualified and non-cv-qualified iterators be
1063 // valid. However, the greedy and unfriendly operators in std::rel_ops
1064 // will make overload resolution ambiguous (when in scope) if we don't
1065 // provide overloads whose operands are of the same type. Can someone
1066 // remind me what generic programming is about? -- Gaby
1067
1068#if __cpp_lib_three_way_comparison
1069 template<typename _IteratorL, typename _IteratorR, typename _Container>
1070 requires requires (_IteratorL __lhs, _IteratorR __rhs)
1071 { { __lhs == __rhs } -> std::convertible_to<bool>; }
1072 constexpr bool
1073 operator==(const __normal_iterator<_IteratorL, _Container>& __lhs,
1074 const __normal_iterator<_IteratorR, _Container>& __rhs)
1075 noexcept(noexcept(__lhs.base() == __rhs.base()))
1076 { return __lhs.base() == __rhs.base(); }
1077
1078 template<typename _IteratorL, typename _IteratorR, typename _Container>
1079 constexpr std::__detail::__synth3way_t<_IteratorR, _IteratorL>
1080 operator<=>(const __normal_iterator<_IteratorL, _Container>& __lhs,
1081 const __normal_iterator<_IteratorR, _Container>& __rhs)
1082 noexcept(noexcept(std::__detail::__synth3way(__lhs.base(), __rhs.base())))
1083 { return std::__detail::__synth3way(__lhs.base(), __rhs.base()); }
1084#else
1085 // Forward iterator requirements
1086 template<typename _IteratorL, typename _IteratorR, typename _Container>
1087 _GLIBCXX20_CONSTEXPR
1088 inline bool
1089 operator==(const __normal_iterator<_IteratorL, _Container>& __lhs,
1090 const __normal_iterator<_IteratorR, _Container>& __rhs)
1091 _GLIBCXX_NOEXCEPTnoexcept
1092 { return __lhs.base() == __rhs.base(); }
1093
1094 template<typename _Iterator, typename _Container>
1095 _GLIBCXX20_CONSTEXPR
1096 inline bool
1097 operator==(const __normal_iterator<_Iterator, _Container>& __lhs,
1098 const __normal_iterator<_Iterator, _Container>& __rhs)
1099 _GLIBCXX_NOEXCEPTnoexcept
1100 { return __lhs.base() == __rhs.base(); }
1101
1102 template<typename _IteratorL, typename _IteratorR, typename _Container>
1103 _GLIBCXX20_CONSTEXPR
1104 inline bool
1105 operator!=(const __normal_iterator<_IteratorL, _Container>& __lhs,
1106 const __normal_iterator<_IteratorR, _Container>& __rhs)
1107 _GLIBCXX_NOEXCEPTnoexcept
1108 { return __lhs.base() != __rhs.base(); }
1109
1110 template<typename _Iterator, typename _Container>
1111 _GLIBCXX20_CONSTEXPR
1112 inline bool
1113 operator!=(const __normal_iterator<_Iterator, _Container>& __lhs,
1114 const __normal_iterator<_Iterator, _Container>& __rhs)
1115 _GLIBCXX_NOEXCEPTnoexcept
1116 { return __lhs.base() != __rhs.base(); }
1117
1118 // Random access iterator requirements
1119 template<typename _IteratorL, typename _IteratorR, typename _Container>
1120 inline bool
1121 operator<(const __normal_iterator<_IteratorL, _Container>& __lhs,
1122 const __normal_iterator<_IteratorR, _Container>& __rhs)
1123 _GLIBCXX_NOEXCEPTnoexcept
1124 { return __lhs.base() < __rhs.base(); }
1125
1126 template<typename _Iterator, typename _Container>
1127 _GLIBCXX20_CONSTEXPR
1128 inline bool
1129 operator<(const __normal_iterator<_Iterator, _Container>& __lhs,
1130 const __normal_iterator<_Iterator, _Container>& __rhs)
1131 _GLIBCXX_NOEXCEPTnoexcept
1132 { return __lhs.base() < __rhs.base(); }
1133
1134 template<typename _IteratorL, typename _IteratorR, typename _Container>
1135 inline bool
1136 operator>(const __normal_iterator<_IteratorL, _Container>& __lhs,
1137 const __normal_iterator<_IteratorR, _Container>& __rhs)
1138 _GLIBCXX_NOEXCEPTnoexcept
1139 { return __lhs.base() > __rhs.base(); }
1140
1141 template<typename _Iterator, typename _Container>
1142 _GLIBCXX20_CONSTEXPR
1143 inline bool
1144 operator>(const __normal_iterator<_Iterator, _Container>& __lhs,
1145 const __normal_iterator<_Iterator, _Container>& __rhs)
1146 _GLIBCXX_NOEXCEPTnoexcept
1147 { return __lhs.base() > __rhs.base(); }
1148
1149 template<typename _IteratorL, typename _IteratorR, typename _Container>
1150 inline bool
1151 operator<=(const __normal_iterator<_IteratorL, _Container>& __lhs,
1152 const __normal_iterator<_IteratorR, _Container>& __rhs)
1153 _GLIBCXX_NOEXCEPTnoexcept
1154 { return __lhs.base() <= __rhs.base(); }
1155
1156 template<typename _Iterator, typename _Container>
1157 _GLIBCXX20_CONSTEXPR
1158 inline bool
1159 operator<=(const __normal_iterator<_Iterator, _Container>& __lhs,
1160 const __normal_iterator<_Iterator, _Container>& __rhs)
1161 _GLIBCXX_NOEXCEPTnoexcept
1162 { return __lhs.base() <= __rhs.base(); }
1163
1164 template<typename _IteratorL, typename _IteratorR, typename _Container>
1165 inline bool
1166 operator>=(const __normal_iterator<_IteratorL, _Container>& __lhs,
1167 const __normal_iterator<_IteratorR, _Container>& __rhs)
1168 _GLIBCXX_NOEXCEPTnoexcept
1169 { return __lhs.base() >= __rhs.base(); }
1170
1171 template<typename _Iterator, typename _Container>
1172 _GLIBCXX20_CONSTEXPR
1173 inline bool
1174 operator>=(const __normal_iterator<_Iterator, _Container>& __lhs,
1175 const __normal_iterator<_Iterator, _Container>& __rhs)
1176 _GLIBCXX_NOEXCEPTnoexcept
1177 { return __lhs.base() >= __rhs.base(); }
1178#endif // three-way comparison
1179
1180 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1181 // According to the resolution of DR179 not only the various comparison
1182 // operators but also operator- must accept mixed iterator/const_iterator
1183 // parameters.
1184 template<typename _IteratorL, typename _IteratorR, typename _Container>
1185#if __cplusplus201402L >= 201103L
1186 // DR 685.
1187 _GLIBCXX20_CONSTEXPR
1188 inline auto
1189 operator-(const __normal_iterator<_IteratorL, _Container>& __lhs,
1190 const __normal_iterator<_IteratorR, _Container>& __rhs) noexcept
1191 -> decltype(__lhs.base() - __rhs.base())
1192#else
1193 inline typename __normal_iterator<_IteratorL, _Container>::difference_type
1194 operator-(const __normal_iterator<_IteratorL, _Container>& __lhs,
1195 const __normal_iterator<_IteratorR, _Container>& __rhs)
1196#endif
1197 { return __lhs.base() - __rhs.base(); }
1198
1199 template<typename _Iterator, typename _Container>
1200 _GLIBCXX20_CONSTEXPR
1201 inline typename __normal_iterator<_Iterator, _Container>::difference_type
1202 operator-(const __normal_iterator<_Iterator, _Container>& __lhs,
1203 const __normal_iterator<_Iterator, _Container>& __rhs)
1204 _GLIBCXX_NOEXCEPTnoexcept
1205 { return __lhs.base() - __rhs.base(); }
1206
1207 template<typename _Iterator, typename _Container>
1208 _GLIBCXX20_CONSTEXPR
1209 inline __normal_iterator<_Iterator, _Container>
1210 operator+(typename __normal_iterator<_Iterator, _Container>::difference_type
1211 __n, const __normal_iterator<_Iterator, _Container>& __i)
1212 _GLIBCXX_NOEXCEPTnoexcept
1213 { return __normal_iterator<_Iterator, _Container>(__i.base() + __n); }
1214
1215_GLIBCXX_END_NAMESPACE_VERSION
1216} // namespace
1217
1218namespace std _GLIBCXX_VISIBILITY(default)__attribute__ ((__visibility__ ("default")))
1219{
1220_GLIBCXX_BEGIN_NAMESPACE_VERSION
1221
1222 template<typename _Iterator, typename _Container>
1223 _GLIBCXX20_CONSTEXPR
1224 _Iterator
1225 __niter_base(__gnu_cxx::__normal_iterator<_Iterator, _Container> __it)
1226 _GLIBCXX_NOEXCEPT_IF(std::is_nothrow_copy_constructible<_Iterator>::value)noexcept(std::is_nothrow_copy_constructible<_Iterator>::
value)
1227 { return __it.base(); }
1228
1229#if __cplusplus201402L >= 201103L
1230 /**
1231 * @addtogroup iterators
1232 * @{
1233 */
1234
1235#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1236 template<semiregular _Sent>
1237 class move_sentinel
1238 {
1239 public:
1240 constexpr
1241 move_sentinel()
1242 noexcept(is_nothrow_default_constructible_v<_Sent>)
1243 : _M_last() { }
1244
1245 constexpr explicit
1246 move_sentinel(_Sent __s)
1247 noexcept(is_nothrow_move_constructible_v<_Sent>)
1248 : _M_last(std::move(__s)) { }
1249
1250 template<typename _S2> requires convertible_to<const _S2&, _Sent>
1251 constexpr
1252 move_sentinel(const move_sentinel<_S2>& __s)
1253 noexcept(is_nothrow_constructible_v<_Sent, const _S2&>)
1254 : _M_last(__s.base())
1255 { }
1256
1257 template<typename _S2> requires assignable_from<_Sent&, const _S2&>
1258 constexpr move_sentinel&
1259 operator=(const move_sentinel<_S2>& __s)
1260 noexcept(is_nothrow_assignable_v<_Sent, const _S2&>)
1261 {
1262 _M_last = __s.base();
1263 return *this;
1264 }
1265
1266 constexpr _Sent
1267 base() const
1268 noexcept(is_nothrow_copy_constructible_v<_Sent>)
1269 { return _M_last; }
1270
1271 private:
1272 _Sent _M_last;
1273 };
1274#endif // C++20
1275
1276 // 24.4.3 Move iterators
1277 /**
1278 * Class template move_iterator is an iterator adapter with the same
1279 * behavior as the underlying iterator except that its dereference
1280 * operator implicitly converts the value returned by the underlying
1281 * iterator's dereference operator to an rvalue reference. Some
1282 * generic algorithms can be called with move iterators to replace
1283 * copying with moving.
1284 */
1285 template<typename _Iterator>
1286 class move_iterator
1287 {
1288 _Iterator _M_current;
1289
1290 using __traits_type = iterator_traits<_Iterator>;
1291#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1292 using __base_cat = typename __traits_type::iterator_category;
1293#else
1294 using __base_ref = typename __traits_type::reference;
1295#endif
1296
1297 public:
1298 using iterator_type = _Iterator;
1299
1300#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1301 using iterator_concept = input_iterator_tag;
1302 using iterator_category
1303 = __detail::__clamp_iter_cat<__base_cat, random_access_iterator_tag>;
1304 using value_type = iter_value_t<_Iterator>;
1305 using difference_type = iter_difference_t<_Iterator>;
1306 using pointer = _Iterator;
1307 using reference = iter_rvalue_reference_t<_Iterator>;
1308#else
1309 typedef typename __traits_type::iterator_category iterator_category;
1310 typedef typename __traits_type::value_type value_type;
1311 typedef typename __traits_type::difference_type difference_type;
1312 // NB: DR 680.
1313 typedef _Iterator pointer;
1314 // _GLIBCXX_RESOLVE_LIB_DEFECTS
1315 // 2106. move_iterator wrapping iterators returning prvalues
1316 typedef typename conditional<is_reference<__base_ref>::value,
1317 typename remove_reference<__base_ref>::type&&,
1318 __base_ref>::type reference;
1319#endif
1320
1321 _GLIBCXX17_CONSTEXPR
1322 move_iterator()
1323 : _M_current() { }
1324
1325 explicit _GLIBCXX17_CONSTEXPR
1326 move_iterator(iterator_type __i)
1327 : _M_current(std::move(__i)) { }
1328
1329 template<typename _Iter>
1330 _GLIBCXX17_CONSTEXPR
1331 move_iterator(const move_iterator<_Iter>& __i)
1332 : _M_current(__i.base()) { }
1333
1334#if __cplusplus201402L <= 201703L
1335 _GLIBCXX17_CONSTEXPR iterator_type
1336 base() const
1337 { return _M_current; }
1338#else
1339 constexpr iterator_type
1340 base() const &
1341#if __cpp_lib_concepts
1342 requires copy_constructible<iterator_type>
1343#endif
1344 { return _M_current; }
1345
1346 constexpr iterator_type
1347 base() &&
1348 { return std::move(_M_current); }
1349#endif
1350
1351 _GLIBCXX17_CONSTEXPR reference
1352 operator*() const
1353#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1354 { return ranges::iter_move(_M_current); }
1355#else
1356 { return static_cast<reference>(*_M_current); }
1357#endif
1358
1359 _GLIBCXX17_CONSTEXPR pointer
1360 operator->() const
1361 { return _M_current; }
1362
1363 _GLIBCXX17_CONSTEXPR move_iterator&
1364 operator++()
1365 {
1366 ++_M_current;
1367 return *this;
1368 }
1369
1370 _GLIBCXX17_CONSTEXPR move_iterator
1371 operator++(int)
1372 {
1373 move_iterator __tmp = *this;
1374 ++_M_current;
1375 return __tmp;
1376 }
1377
1378#if __cpp_lib_concepts
1379 constexpr void
1380 operator++(int) requires (!forward_iterator<_Iterator>)
1381 { ++_M_current; }
1382#endif
1383
1384 _GLIBCXX17_CONSTEXPR move_iterator&
1385 operator--()
1386 {
1387 --_M_current;
1388 return *this;
1389 }
1390
1391 _GLIBCXX17_CONSTEXPR move_iterator
1392 operator--(int)
1393 {
1394 move_iterator __tmp = *this;
1395 --_M_current;
1396 return __tmp;
1397 }
1398
1399 _GLIBCXX17_CONSTEXPR move_iterator
1400 operator+(difference_type __n) const
1401 { return move_iterator(_M_current + __n); }
1402
1403 _GLIBCXX17_CONSTEXPR move_iterator&
1404 operator+=(difference_type __n)
1405 {
1406 _M_current += __n;
1407 return *this;
1408 }
1409
1410 _GLIBCXX17_CONSTEXPR move_iterator
1411 operator-(difference_type __n) const
1412 { return move_iterator(_M_current - __n); }
1413
1414 _GLIBCXX17_CONSTEXPR move_iterator&
1415 operator-=(difference_type __n)
1416 {
1417 _M_current -= __n;
1418 return *this;
1419 }
1420
1421 _GLIBCXX17_CONSTEXPR reference
1422 operator[](difference_type __n) const
1423#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1424 { return ranges::iter_move(_M_current + __n); }
1425#else
1426 { return std::move(_M_current[__n]); }
1427#endif
1428
1429#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1430 template<sentinel_for<_Iterator> _Sent>
1431 friend constexpr bool
1432 operator==(const move_iterator& __x, const move_sentinel<_Sent>& __y)
1433 { return __x.base() == __y.base(); }
1434
1435 template<sized_sentinel_for<_Iterator> _Sent>
1436 friend constexpr iter_difference_t<_Iterator>
1437 operator-(const move_sentinel<_Sent>& __x, const move_iterator& __y)
1438 { return __x.base() - __y.base(); }
1439
1440 template<sized_sentinel_for<_Iterator> _Sent>
1441 friend constexpr iter_difference_t<_Iterator>
1442 operator-(const move_iterator& __x, const move_sentinel<_Sent>& __y)
1443 { return __x.base() - __y.base(); }
1444
1445 friend constexpr iter_rvalue_reference_t<_Iterator>
1446 iter_move(const move_iterator& __i)
1447 noexcept(noexcept(ranges::iter_move(__i._M_current)))
1448 { return ranges::iter_move(__i._M_current); }
1449
1450 template<indirectly_swappable<_Iterator> _Iter2>
1451 friend constexpr void
1452 iter_swap(const move_iterator& __x, const move_iterator<_Iter2>& __y)
1453 noexcept(noexcept(ranges::iter_swap(__x._M_current, __y._M_current)))
1454 { return ranges::iter_swap(__x._M_current, __y._M_current); }
1455#endif // C++20
1456 };
1457
1458 template<typename _IteratorL, typename _IteratorR>
1459 inline _GLIBCXX17_CONSTEXPR bool
1460 operator==(const move_iterator<_IteratorL>& __x,
1461 const move_iterator<_IteratorR>& __y)
1462#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1463 requires requires { { __x.base() == __y.base() } -> convertible_to<bool>; }
1464#endif
1465 { return __x.base() == __y.base(); }
1466
1467#if __cpp_lib_three_way_comparison
1468 template<typename _IteratorL,
1469 three_way_comparable_with<_IteratorL> _IteratorR>
1470 constexpr compare_three_way_result_t<_IteratorL, _IteratorR>
1471 operator<=>(const move_iterator<_IteratorL>& __x,
1472 const move_iterator<_IteratorR>& __y)
1473 { return __x.base() <=> __y.base(); }
1474#else
1475 template<typename _IteratorL, typename _IteratorR>
1476 inline _GLIBCXX17_CONSTEXPR bool
1477 operator!=(const move_iterator<_IteratorL>& __x,
1478 const move_iterator<_IteratorR>& __y)
1479 { return !(__x == __y); }
1480#endif
1481
1482 template<typename _IteratorL, typename _IteratorR>
1483 inline _GLIBCXX17_CONSTEXPR bool
1484 operator<(const move_iterator<_IteratorL>& __x,
1485 const move_iterator<_IteratorR>& __y)
1486#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1487 requires requires { { __x.base() < __y.base() } -> convertible_to<bool>; }
1488#endif
1489 { return __x.base() < __y.base(); }
1490
1491 template<typename _IteratorL, typename _IteratorR>
1492 inline _GLIBCXX17_CONSTEXPR bool
1493 operator<=(const move_iterator<_IteratorL>& __x,
1494 const move_iterator<_IteratorR>& __y)
1495#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1496 requires requires { { __y.base() < __x.base() } -> convertible_to<bool>; }
1497#endif
1498 { return !(__y < __x); }
1499
1500 template<typename _IteratorL, typename _IteratorR>
1501 inline _GLIBCXX17_CONSTEXPR bool
1502 operator>(const move_iterator<_IteratorL>& __x,
1503 const move_iterator<_IteratorR>& __y)
1504#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1505 requires requires { { __y.base() < __x.base() } -> convertible_to<bool>; }
1506#endif
1507 { return __y < __x; }
1508
1509 template<typename _IteratorL, typename _IteratorR>
1510 inline _GLIBCXX17_CONSTEXPR bool
1511 operator>=(const move_iterator<_IteratorL>& __x,
1512 const move_iterator<_IteratorR>& __y)
1513#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1514 requires requires { { __x.base() < __y.base() } -> convertible_to<bool>; }
1515#endif
1516 { return !(__x < __y); }
1517
1518#if ! (__cplusplus201402L > 201703L && __cpp_lib_concepts)
1519 // Note: See __normal_iterator operators note from Gaby to understand
1520 // why we have these extra overloads for some move_iterator operators.
1521
1522 // These extra overloads are not needed in C++20, because the ones above
1523 // are constrained with a requires-clause and so overload resolution will
1524 // prefer them to greedy unconstrained function templates.
1525
1526 template<typename _Iterator>
1527 inline _GLIBCXX17_CONSTEXPR bool
1528 operator==(const move_iterator<_Iterator>& __x,
1529 const move_iterator<_Iterator>& __y)
1530 { return __x.base() == __y.base(); }
1531
1532 template<typename _Iterator>
1533 inline _GLIBCXX17_CONSTEXPR bool
1534 operator!=(const move_iterator<_Iterator>& __x,
1535 const move_iterator<_Iterator>& __y)
1536 { return !(__x == __y); }
1537
1538 template<typename _Iterator>
1539 inline _GLIBCXX17_CONSTEXPR bool
1540 operator<(const move_iterator<_Iterator>& __x,
1541 const move_iterator<_Iterator>& __y)
1542 { return __x.base() < __y.base(); }
1543
1544 template<typename _Iterator>
1545 inline _GLIBCXX17_CONSTEXPR bool
1546 operator<=(const move_iterator<_Iterator>& __x,
1547 const move_iterator<_Iterator>& __y)
1548 { return !(__y < __x); }
1549
1550 template<typename _Iterator>
1551 inline _GLIBCXX17_CONSTEXPR bool
1552 operator>(const move_iterator<_Iterator>& __x,
1553 const move_iterator<_Iterator>& __y)
1554 { return __y < __x; }
1555
1556 template<typename _Iterator>
1557 inline _GLIBCXX17_CONSTEXPR bool
1558 operator>=(const move_iterator<_Iterator>& __x,
1559 const move_iterator<_Iterator>& __y)
1560 { return !(__x < __y); }
1561#endif // ! C++20
1562
1563 // DR 685.
1564 template<typename _IteratorL, typename _IteratorR>
1565 inline _GLIBCXX17_CONSTEXPR auto
1566 operator-(const move_iterator<_IteratorL>& __x,
1567 const move_iterator<_IteratorR>& __y)
1568 -> decltype(__x.base() - __y.base())
1569 { return __x.base() - __y.base(); }
1570
1571 template<typename _Iterator>
1572 inline _GLIBCXX17_CONSTEXPR move_iterator<_Iterator>
1573 operator+(typename move_iterator<_Iterator>::difference_type __n,
1574 const move_iterator<_Iterator>& __x)
1575 { return __x + __n; }
1576
1577 template<typename _Iterator>
1578 inline _GLIBCXX17_CONSTEXPR move_iterator<_Iterator>
1579 make_move_iterator(_Iterator __i)
1580 { return move_iterator<_Iterator>(std::move(__i)); }
1581
1582 template<typename _Iterator, typename _ReturnType
1583 = typename conditional<__move_if_noexcept_cond
1584 <typename iterator_traits<_Iterator>::value_type>::value,
1585 _Iterator, move_iterator<_Iterator>>::type>
1586 inline _GLIBCXX17_CONSTEXPR _ReturnType
1587 __make_move_if_noexcept_iterator(_Iterator __i)
1588 { return _ReturnType(__i); }
1589
1590 // Overload for pointers that matches std::move_if_noexcept more closely,
1591 // returning a constant iterator when we don't want to move.
1592 template<typename _Tp, typename _ReturnType
1593 = typename conditional<__move_if_noexcept_cond<_Tp>::value,
1594 const _Tp*, move_iterator<_Tp*>>::type>
1595 inline _GLIBCXX17_CONSTEXPR _ReturnType
1596 __make_move_if_noexcept_iterator(_Tp* __i)
1597 { return _ReturnType(__i); }
1598
1599#if __cplusplus201402L > 201703L && __cpp_lib_concepts
1600 // [iterators.common] Common iterators
1601
1602 namespace __detail
1603 {
1604 template<typename _It>
1605 concept __common_iter_has_arrow = indirectly_readable<const _It>
1606 && (requires(const _It& __it) { __it.operator->(); }
1607 || is_reference_v<iter_reference_t<_It>>
1608 || constructible_from<iter_value_t<_It>, iter_reference_t<_It>>);
1609
1610 } // namespace __detail
1611
1612 /// An iterator/sentinel adaptor for representing a non-common range.
1613 template<input_or_output_iterator _It, sentinel_for<_It> _Sent>
1614 requires (!same_as<_It, _Sent>) && copyable<_It>
1615 class common_iterator
1616 {
1617 template<typename _Tp, typename _Up>
1618 static constexpr bool
1619 _S_noexcept1()
1620 {
1621 if constexpr (is_trivially_default_constructible_v<_Tp>)
1622 return is_nothrow_assignable_v<_Tp, _Up>;
1623 else
1624 return is_nothrow_constructible_v<_Tp, _Up>;
1625 }
1626
1627 template<typename _It2, typename _Sent2>
1628 static constexpr bool
1629 _S_noexcept()
1630 { return _S_noexcept1<_It, _It2>() && _S_noexcept1<_Sent, _Sent2>(); }
1631
1632 class _Proxy
1633 {
1634 iter_value_t<_It> _M_keep;
1635
1636 _Proxy(iter_reference_t<_It>&& __x)
1637 : _M_keep(std::move(__x)) { }
1638
1639 friend class common_iterator;
1640
1641 public:
1642 const iter_value_t<_It>*
1643 operator->() const
1644 { return std::__addressof(_M_keep); }
1645 };
1646
1647 public:
1648 constexpr
1649 common_iterator()
1650 noexcept(is_nothrow_default_constructible_v<_It>)
1651 : _M_it(), _M_index(0)
1652 { }
1653
1654 constexpr
1655 common_iterator(_It __i)
1656 noexcept(is_nothrow_move_constructible_v<_It>)
1657 : _M_it(std::move(__i)), _M_index(0)
1658 { }
1659
1660 constexpr
1661 common_iterator(_Sent __s)
1662 noexcept(is_nothrow_move_constructible_v<_Sent>)
1663 : _M_sent(std::move(__s)), _M_index(1)
1664 { }
1665
1666 template<typename _It2, typename _Sent2>
1667 requires convertible_to<const _It2&, _It>
1668 && convertible_to<const _Sent2&, _Sent>
1669 constexpr
1670 common_iterator(const common_iterator<_It2, _Sent2>& __x)
1671 noexcept(_S_noexcept<const _It2&, const _Sent2&>())
1672 : _M_valueless(), _M_index(__x._M_index)
1673 {
1674 if (_M_index == 0)
1675 {
1676 if constexpr (is_trivially_default_constructible_v<_It>)
1677 _M_it = std::move(__x._M_it);
1678 else
1679 ::new((void*)std::__addressof(_M_it)) _It(__x._M_it);
1680 }
1681 else if (_M_index == 1)
1682 {
1683 if constexpr (is_trivially_default_constructible_v<_Sent>)
1684 _M_sent = std::move(__x._M_sent);
1685 else
1686 ::new((void*)std::__addressof(_M_sent)) _Sent(__x._M_sent);
1687 }
1688 }
1689
1690 constexpr
1691 common_iterator(const common_iterator& __x)
1692 noexcept(_S_noexcept<const _It&, const _Sent&>())
1693 : _M_valueless(), _M_index(__x._M_index)
1694 {
1695 if (_M_index == 0)
1696 {
1697 if constexpr (is_trivially_default_constructible_v<_It>)
1698 _M_it = std::move(__x._M_it);
1699 else
1700 ::new((void*)std::__addressof(_M_it)) _It(__x._M_it);
1701 }
1702 else if (_M_index == 1)
1703 {
1704 if constexpr (is_trivially_default_constructible_v<_Sent>)
1705 _M_sent = std::move(__x._M_sent);
1706 else
1707 ::new((void*)std::__addressof(_M_sent)) _Sent(__x._M_sent);
1708 }
1709 }
1710
1711 common_iterator&
1712 operator=(const common_iterator& __x)
1713 noexcept(is_nothrow_copy_assignable_v<_It>
1714 && is_nothrow_copy_assignable_v<_Sent>
1715 && is_nothrow_copy_constructible_v<_It>
1716 && is_nothrow_copy_constructible_v<_Sent>)
1717 {
1718 return this->operator=<_It, _Sent>(__x);
1719 }
1720
1721 template<typename _It2, typename _Sent2>
1722 requires convertible_to<const _It2&, _It>
1723 && convertible_to<const _Sent2&, _Sent>
1724 && assignable_from<_It&, const _It2&>
1725 && assignable_from<_Sent&, const _Sent2&>
1726 common_iterator&
1727 operator=(const common_iterator<_It2, _Sent2>& __x)
1728 noexcept(is_nothrow_constructible_v<_It, const _It2&>
1729 && is_nothrow_constructible_v<_Sent, const _Sent2&>
1730 && is_nothrow_assignable_v<_It, const _It2&>
1731 && is_nothrow_assignable_v<_Sent, const _Sent2&>)
1732 {
1733 switch(_M_index << 2 | __x._M_index)
1734 {
1735 case 0b0000:
1736 _M_it = __x._M_it;
1737 break;
1738 case 0b0101:
1739 _M_sent = __x._M_sent;
1740 break;
1741 case 0b0001:
1742 _M_it.~_It();
1743 _M_index = -1;
1744 [[fallthrough]];
1745 case 0b1001:
1746 ::new((void*)std::__addressof(_M_sent)) _Sent(__x._M_sent);
1747 _M_index = 1;
1748 break;
1749 case 0b0100:
1750 _M_sent.~_Sent();
1751 _M_index = -1;
1752 [[fallthrough]];
1753 case 0b1000:
1754 ::new((void*)std::__addressof(_M_it)) _It(__x._M_it);
1755 _M_index = 0;
1756 break;
1757 default:
1758 __glibcxx_assert(__x._M_has_value());
1759 __builtin_unreachable();
1760 }
1761 return *this;
1762 }
1763
1764 ~common_iterator()
1765 {
1766 switch (_M_index)
1767 {
1768 case 0:
1769 _M_it.~_It();
1770 break;
1771 case 1:
1772 _M_sent.~_Sent();
1773 break;
1774 }
1775 }
1776
1777 decltype(auto)
1778 operator*()
1779 {
1780 __glibcxx_assert(_M_index == 0);
1781 return *_M_it;
1782 }
1783
1784 decltype(auto)
1785 operator*() const requires __detail::__dereferenceable<const _It>
1786 {
1787 __glibcxx_assert(_M_index == 0);
1788 return *_M_it;
1789 }
1790
1791 decltype(auto)
1792 operator->() const requires __detail::__common_iter_has_arrow<_It>
1793 {
1794 __glibcxx_assert(_M_index == 0);
1795 if constexpr (is_pointer_v<_It> || requires { _M_it.operator->(); })
1796 return _M_it;
1797 else if constexpr (is_reference_v<iter_reference_t<_It>>)
1798 {
1799 auto&& __tmp = *_M_it;
1800 return std::__addressof(__tmp);
1801 }
1802 else
1803 return _Proxy{*_M_it};
1804 }
1805
1806 common_iterator&
1807 operator++()
1808 {
1809 __glibcxx_assert(_M_index == 0);
1810 ++_M_it;
1811 return *this;
1812 }
1813
1814 decltype(auto)
1815 operator++(int)
1816 {
1817 __glibcxx_assert(_M_index == 0);
1818 if constexpr (forward_iterator<_It>)
1819 {
1820 common_iterator __tmp = *this;
1821 ++*this;
1822 return __tmp;
1823 }
1824 else
1825 return _M_it++;
1826 }
1827
1828 template<typename _It2, sentinel_for<_It> _Sent2>
1829 requires sentinel_for<_Sent, _It2>
1830 friend bool
1831 operator==(const common_iterator& __x,
1832 const common_iterator<_It2, _Sent2>& __y)
1833 {
1834 switch(__x._M_index << 2 | __y._M_index)
1835 {
1836 case 0b0000:
1837 case 0b0101:
1838 return true;
1839 case 0b0001:
1840 return __x._M_it == __y._M_sent;
1841 case 0b0100:
1842 return __x._M_sent == __y._M_it;
1843 default:
1844 __glibcxx_assert(__x._M_has_value());
1845 __glibcxx_assert(__y._M_has_value());
1846 __builtin_unreachable();
1847 }
1848 }
1849
1850 template<typename _It2, sentinel_for<_It> _Sent2>
1851 requires sentinel_for<_Sent, _It2> && equality_comparable_with<_It, _It2>
1852 friend bool
1853 operator==(const common_iterator& __x,
1854 const common_iterator<_It2, _Sent2>& __y)
1855 {
1856 switch(__x._M_index << 2 | __y._M_index)
1857 {
1858 case 0b0101:
1859 return true;
1860 case 0b0000:
1861 return __x._M_it == __y._M_it;
1862 case 0b0001:
1863 return __x._M_it == __y._M_sent;
1864 case 0b0100:
1865 return __x._M_sent == __y._M_it;
1866 default:
1867 __glibcxx_assert(__x._M_has_value());
1868 __glibcxx_assert(__y._M_has_value());
1869 __builtin_unreachable();
1870 }
1871 }
1872
1873 template<sized_sentinel_for<_It> _It2, sized_sentinel_for<_It> _Sent2>
1874 requires sized_sentinel_for<_Sent, _It2>
1875 friend iter_difference_t<_It2>
1876 operator-(const common_iterator& __x,
1877 const common_iterator<_It2, _Sent2>& __y)
1878 {
1879 switch(__x._M_index << 2 | __y._M_index)
1880 {
1881 case 0b0101:
1882 return 0;
1883 case 0b0000:
1884 return __x._M_it - __y._M_it;
1885 case 0b0001:
1886 return __x._M_it - __y._M_sent;
1887 case 0b0100:
1888 return __x._M_sent - __y._M_it;
1889 default:
1890 __glibcxx_assert(__x._M_has_value());
1891 __glibcxx_assert(__y._M_has_value());
1892 __builtin_unreachable();
1893 }
1894 }
1895
1896 friend iter_rvalue_reference_t<_It>
1897 iter_move(const common_iterator& __i)
1898 noexcept(noexcept(ranges::iter_move(std::declval<const _It&>())))
1899 requires input_iterator<_It>
1900 {
1901 __glibcxx_assert(__i._M_index == 0);
1902 return ranges::iter_move(__i._M_it);
1903 }
1904
1905 template<indirectly_swappable<_It> _It2, typename _Sent2>
1906 friend void
1907 iter_swap(const common_iterator& __x,
1908 const common_iterator<_It2, _Sent2>& __y)
1909 noexcept(noexcept(ranges::iter_swap(std::declval<const _It&>(),
1910 std::declval<const _It2&>())))
1911 {
1912 __glibcxx_assert(__x._M_index == 0);
1913 __glibcxx_assert(__y._M_index == 0);
1914 return ranges::iter_swap(__x._M_it, __y._M_it);
1915 }
1916
1917 private:
1918 template<input_or_output_iterator _It2, sentinel_for<_It2> _Sent2>
1919 friend class common_iterator;
1920
1921 bool _M_has_value() const noexcept { return _M_index < 2; }
1922
1923 union
1924 {
1925 _It _M_it;
1926 _Sent _M_sent;
1927 unsigned char _M_valueless;
1928 };
1929 unsigned char _M_index; // 0==_M_it, 1==_M_sent, 2==valueless
1930 };
1931
1932 template<typename _It, typename _Sent>
1933 struct incrementable_traits<common_iterator<_It, _Sent>>
1934 {
1935 using difference_type = iter_difference_t<_It>;
1936 };
1937
1938 template<input_iterator _It, typename _Sent>
1939 struct iterator_traits<common_iterator<_It, _Sent>>
1940 {
1941 private:
1942 template<typename _Iter>
1943 struct __ptr
1944 {
1945 using type = void;
1946 };
1947
1948 template<typename _Iter>
1949 requires __detail::__common_iter_has_arrow<_Iter>
1950 struct __ptr<_Iter>
1951 {
1952 using _CIter = common_iterator<_Iter, _Sent>;
1953 using type = decltype(std::declval<const _CIter&>().operator->());
1954 };
1955
1956 public:
1957 using iterator_concept = conditional_t<forward_iterator<_It>,
1958 forward_iterator_tag, input_iterator_tag>;
1959 using iterator_category = __detail::__clamp_iter_cat<
1960 typename iterator_traits<_It>::iterator_category,
1961 forward_iterator_tag, input_iterator_tag>;
1962 using value_type = iter_value_t<_It>;
1963 using difference_type = iter_difference_t<_It>;
1964 using pointer = typename __ptr<_It>::type;
1965 using reference = iter_reference_t<_It>;
1966 };
1967
1968 // [iterators.counted] Counted iterators
1969
1970 /// An iterator adaptor that keeps track of the distance to the end.
1971 template<input_or_output_iterator _It>
1972 class counted_iterator
1973 {
1974 public:
1975 using iterator_type = _It;
1976
1977 constexpr counted_iterator() = default;
1978
1979 constexpr
1980 counted_iterator(_It __i, iter_difference_t<_It> __n)
1981 : _M_current(std::move(__i)), _M_length(__n)
1982 { __glibcxx_assert(__n >= 0); }
1983
1984 template<typename _It2>
1985 requires convertible_to<const _It2&, _It>
1986 constexpr
1987 counted_iterator(const counted_iterator<_It2>& __x)
1988 : _M_current(__x._M_current), _M_length(__x._M_length)
1989 { }
1990
1991 template<typename _It2>
1992 requires assignable_from<_It&, const _It2&>
1993 constexpr counted_iterator&
1994 operator=(const counted_iterator<_It2>& __x)
1995 {
1996 _M_current = __x._M_current;
1997 _M_length = __x._M_length;
1998 return *this;
1999 }
2000
2001 constexpr _It
2002 base() const &
2003 noexcept(is_nothrow_copy_constructible_v<_It>)
2004 requires copy_constructible<_It>
2005 { return _M_current; }
2006
2007 constexpr _It
2008 base() &&
2009 noexcept(is_nothrow_move_constructible_v<_It>)
2010 { return std::move(_M_current); }
2011
2012 constexpr iter_difference_t<_It>
2013 count() const noexcept { return _M_length; }
2014
2015 constexpr decltype(auto)
2016 operator*()
2017 noexcept(noexcept(*_M_current))
2018 { return *_M_current; }
2019
2020 constexpr decltype(auto)
2021 operator*() const
2022 noexcept(noexcept(*_M_current))
2023 requires __detail::__dereferenceable<const _It>
2024 { return *_M_current; }
2025
2026 constexpr counted_iterator&
2027 operator++()
2028 {
2029 __glibcxx_assert(_M_length > 0);
2030 ++_M_current;
2031 --_M_length;
2032 return *this;
2033 }
2034
2035 decltype(auto)
2036 operator++(int)
2037 {
2038 __glibcxx_assert(_M_length > 0);
2039 --_M_length;
2040 __tryif (true)
2041 {
2042 return _M_current++;
2043 } __catch(...)if (false) {
2044 ++_M_length;
2045 __throw_exception_again;
2046 }
2047
2048 }
2049
2050 constexpr counted_iterator
2051 operator++(int) requires forward_iterator<_It>
2052 {
2053 auto __tmp = *this;
2054 ++*this;
2055 return __tmp;
2056 }
2057
2058 constexpr counted_iterator&
2059 operator--() requires bidirectional_iterator<_It>
2060 {
2061 --_M_current;
2062 ++_M_length;
2063 return *this;
2064 }
2065
2066 constexpr counted_iterator
2067 operator--(int) requires bidirectional_iterator<_It>
2068 {
2069 auto __tmp = *this;
2070 --*this;
2071 return __tmp;
2072 }
2073
2074 constexpr counted_iterator
2075 operator+(iter_difference_t<_It> __n) const
2076 requires random_access_iterator<_It>
2077 { return counted_iterator(_M_current + __n, _M_length - __n); }
2078
2079 friend constexpr counted_iterator
2080 operator+(iter_difference_t<_It> __n, const counted_iterator& __x)
2081 requires random_access_iterator<_It>
2082 { return __x + __n; }
2083
2084 constexpr counted_iterator&
2085 operator+=(iter_difference_t<_It> __n)
2086 requires random_access_iterator<_It>
2087 {
2088 __glibcxx_assert(__n <= _M_length);
2089 _M_current += __n;
2090 _M_length -= __n;
2091 return *this;
2092 }
2093
2094 constexpr counted_iterator
2095 operator-(iter_difference_t<_It> __n) const
2096 requires random_access_iterator<_It>
2097 { return counted_iterator(_M_current - __n, _M_length + __n); }
2098
2099 template<common_with<_It> _It2>
2100 friend constexpr iter_difference_t<_It2>
2101 operator-(const counted_iterator& __x,
2102 const counted_iterator<_It2>& __y)
2103 { return __y._M_length - __x._M_length; }
2104
2105 friend constexpr iter_difference_t<_It>
2106 operator-(const counted_iterator& __x, default_sentinel_t)
2107 { return -__x._M_length; }
2108
2109 friend constexpr iter_difference_t<_It>
2110 operator-(default_sentinel_t, const counted_iterator& __y)
2111 { return __y._M_length; }
2112
2113 constexpr counted_iterator&
2114 operator-=(iter_difference_t<_It> __n)
2115 requires random_access_iterator<_It>
2116 {
2117 __glibcxx_assert(-__n <= _M_length);
2118 _M_current -= __n;
2119 _M_length += __n;
2120 return *this;
2121 }
2122
2123 constexpr decltype(auto)
2124 operator[](iter_difference_t<_It> __n) const
2125 noexcept(noexcept(_M_current[__n]))
2126 requires random_access_iterator<_It>
2127 {
2128 __glibcxx_assert(__n < _M_length);
2129 return _M_current[__n];
2130 }
2131
2132 template<common_with<_It> _It2>
2133 friend constexpr bool
2134 operator==(const counted_iterator& __x,
2135 const counted_iterator<_It2>& __y)
2136 { return __x._M_length == __y._M_length; }
2137
2138 friend constexpr bool
2139 operator==(const counted_iterator& __x, default_sentinel_t)
2140 { return __x._M_length == 0; }
2141
2142 template<common_with<_It> _It2>
2143 friend constexpr strong_ordering
2144 operator<=>(const counted_iterator& __x,
2145 const counted_iterator<_It2>& __y)
2146 { return __y._M_length <=> __x._M_length; }
2147
2148 friend constexpr iter_rvalue_reference_t<_It>
2149 iter_move(const counted_iterator& __i)
2150 noexcept(noexcept(ranges::iter_move(__i._M_current)))
2151 requires input_iterator<_It>
2152 { return ranges::iter_move(__i._M_current); }
2153
2154 template<indirectly_swappable<_It> _It2>
2155 friend constexpr void
2156 iter_swap(const counted_iterator& __x,
2157 const counted_iterator<_It2>& __y)
2158 noexcept(noexcept(ranges::iter_swap(__x._M_current, __y._M_current)))
2159 { ranges::iter_swap(__x._M_current, __y._M_current); }
2160
2161 private:
2162 template<input_or_output_iterator _It2> friend class counted_iterator;
2163
2164 _It _M_current = _It();
2165 iter_difference_t<_It> _M_length = 0;
2166 };
2167
2168 template<typename _It>
2169 struct incrementable_traits<counted_iterator<_It>>
2170 {
2171 using difference_type = iter_difference_t<_It>;
2172 };
2173
2174 template<input_iterator _It>
2175 struct iterator_traits<counted_iterator<_It>> : iterator_traits<_It>
2176 {
2177 using pointer = void;
2178 };
2179#endif // C++20
2180
2181 // @} group iterators
2182
2183 template<typename _Iterator>
2184 auto
2185 __niter_base(move_iterator<_Iterator> __it)
2186 -> decltype(make_move_iterator(__niter_base(__it.base())))
2187 { return make_move_iterator(__niter_base(__it.base())); }
2188
2189 template<typename _Iterator>
2190 struct __is_move_iterator<move_iterator<_Iterator> >
2191 {
2192 enum { __value = 1 };
2193 typedef __true_type __type;
2194 };
2195
2196 template<typename _Iterator>
2197 auto
2198 __miter_base(move_iterator<_Iterator> __it)
2199 -> decltype(__miter_base(__it.base()))
2200 { return __miter_base(__it.base()); }
2201
2202#define _GLIBCXX_MAKE_MOVE_ITERATOR(_Iter)std::make_move_iterator(_Iter) std::make_move_iterator(_Iter)
2203#define _GLIBCXX_MAKE_MOVE_IF_NOEXCEPT_ITERATOR(_Iter)std::__make_move_if_noexcept_iterator(_Iter) \
2204 std::__make_move_if_noexcept_iterator(_Iter)
2205#else
2206#define _GLIBCXX_MAKE_MOVE_ITERATOR(_Iter)std::make_move_iterator(_Iter) (_Iter)
2207#define _GLIBCXX_MAKE_MOVE_IF_NOEXCEPT_ITERATOR(_Iter)std::__make_move_if_noexcept_iterator(_Iter) (_Iter)
2208#endif // C++11
2209
2210#if __cpp_deduction_guides >= 201606
2211 // These helper traits are used for deduction guides
2212 // of associative containers.
2213 template<typename _InputIterator>
2214 using __iter_key_t = remove_const_t<
2215 typename iterator_traits<_InputIterator>::value_type::first_type>;
2216
2217 template<typename _InputIterator>
2218 using __iter_val_t =
2219 typename iterator_traits<_InputIterator>::value_type::second_type;
2220
2221 template<typename _T1, typename _T2>
2222 struct pair;
2223
2224 template<typename _InputIterator>
2225 using __iter_to_alloc_t =
2226 pair<add_const_t<__iter_key_t<_InputIterator>>,
2227 __iter_val_t<_InputIterator>>;
2228#endif // __cpp_deduction_guides
2229
2230_GLIBCXX_END_NAMESPACE_VERSION
2231} // namespace
2232
2233#ifdef _GLIBCXX_DEBUG
2234# include <debug/stl_iterator.h>
2235#endif
2236
2237#endif