File: | build/source/clang/lib/CodeGen/CGOpenMPRuntime.cpp |
Warning: | line 7829, column 9 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This provides a class for OpenMP runtime code generation. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGOpenMPRuntime.h" |
14 | #include "CGCXXABI.h" |
15 | #include "CGCleanup.h" |
16 | #include "CGRecordLayout.h" |
17 | #include "CodeGenFunction.h" |
18 | #include "TargetInfo.h" |
19 | #include "clang/AST/APValue.h" |
20 | #include "clang/AST/Attr.h" |
21 | #include "clang/AST/Decl.h" |
22 | #include "clang/AST/OpenMPClause.h" |
23 | #include "clang/AST/StmtOpenMP.h" |
24 | #include "clang/AST/StmtVisitor.h" |
25 | #include "clang/Basic/BitmaskEnum.h" |
26 | #include "clang/Basic/FileManager.h" |
27 | #include "clang/Basic/OpenMPKinds.h" |
28 | #include "clang/Basic/SourceManager.h" |
29 | #include "clang/CodeGen/ConstantInitBuilder.h" |
30 | #include "llvm/ADT/ArrayRef.h" |
31 | #include "llvm/ADT/SetOperations.h" |
32 | #include "llvm/ADT/SmallBitVector.h" |
33 | #include "llvm/ADT/StringExtras.h" |
34 | #include "llvm/Bitcode/BitcodeReader.h" |
35 | #include "llvm/IR/Constants.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/GlobalValue.h" |
38 | #include "llvm/IR/InstrTypes.h" |
39 | #include "llvm/IR/Value.h" |
40 | #include "llvm/Support/AtomicOrdering.h" |
41 | #include "llvm/Support/Format.h" |
42 | #include "llvm/Support/raw_ostream.h" |
43 | #include <cassert> |
44 | #include <numeric> |
45 | #include <optional> |
46 | |
47 | using namespace clang; |
48 | using namespace CodeGen; |
49 | using namespace llvm::omp; |
50 | |
51 | namespace { |
52 | /// Base class for handling code generation inside OpenMP regions. |
53 | class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { |
54 | public: |
55 | /// Kinds of OpenMP regions used in codegen. |
56 | enum CGOpenMPRegionKind { |
57 | /// Region with outlined function for standalone 'parallel' |
58 | /// directive. |
59 | ParallelOutlinedRegion, |
60 | /// Region with outlined function for standalone 'task' directive. |
61 | TaskOutlinedRegion, |
62 | /// Region for constructs that do not require function outlining, |
63 | /// like 'for', 'sections', 'atomic' etc. directives. |
64 | InlinedRegion, |
65 | /// Region with outlined function for standalone 'target' directive. |
66 | TargetRegion, |
67 | }; |
68 | |
69 | CGOpenMPRegionInfo(const CapturedStmt &CS, |
70 | const CGOpenMPRegionKind RegionKind, |
71 | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
72 | bool HasCancel) |
73 | : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), |
74 | CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} |
75 | |
76 | CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, |
77 | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
78 | bool HasCancel) |
79 | : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), |
80 | Kind(Kind), HasCancel(HasCancel) {} |
81 | |
82 | /// Get a variable or parameter for storing global thread id |
83 | /// inside OpenMP construct. |
84 | virtual const VarDecl *getThreadIDVariable() const = 0; |
85 | |
86 | /// Emit the captured statement body. |
87 | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; |
88 | |
89 | /// Get an LValue for the current ThreadID variable. |
90 | /// \return LValue for thread id variable. This LValue always has type int32*. |
91 | virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); |
92 | |
93 | virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} |
94 | |
95 | CGOpenMPRegionKind getRegionKind() const { return RegionKind; } |
96 | |
97 | OpenMPDirectiveKind getDirectiveKind() const { return Kind; } |
98 | |
99 | bool hasCancel() const { return HasCancel; } |
100 | |
101 | static bool classof(const CGCapturedStmtInfo *Info) { |
102 | return Info->getKind() == CR_OpenMP; |
103 | } |
104 | |
105 | ~CGOpenMPRegionInfo() override = default; |
106 | |
107 | protected: |
108 | CGOpenMPRegionKind RegionKind; |
109 | RegionCodeGenTy CodeGen; |
110 | OpenMPDirectiveKind Kind; |
111 | bool HasCancel; |
112 | }; |
113 | |
114 | /// API for captured statement code generation in OpenMP constructs. |
115 | class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
116 | public: |
117 | CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, |
118 | const RegionCodeGenTy &CodeGen, |
119 | OpenMPDirectiveKind Kind, bool HasCancel, |
120 | StringRef HelperName) |
121 | : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, |
122 | HasCancel), |
123 | ThreadIDVar(ThreadIDVar), HelperName(HelperName) { |
124 | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region." ) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 124, __extension__ __PRETTY_FUNCTION__)); |
125 | } |
126 | |
127 | /// Get a variable or parameter for storing global thread id |
128 | /// inside OpenMP construct. |
129 | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
130 | |
131 | /// Get the name of the capture helper. |
132 | StringRef getHelperName() const override { return HelperName; } |
133 | |
134 | static bool classof(const CGCapturedStmtInfo *Info) { |
135 | return CGOpenMPRegionInfo::classof(Info) && |
136 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
137 | ParallelOutlinedRegion; |
138 | } |
139 | |
140 | private: |
141 | /// A variable or parameter storing global thread id for OpenMP |
142 | /// constructs. |
143 | const VarDecl *ThreadIDVar; |
144 | StringRef HelperName; |
145 | }; |
146 | |
147 | /// API for captured statement code generation in OpenMP constructs. |
148 | class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
149 | public: |
150 | class UntiedTaskActionTy final : public PrePostActionTy { |
151 | bool Untied; |
152 | const VarDecl *PartIDVar; |
153 | const RegionCodeGenTy UntiedCodeGen; |
154 | llvm::SwitchInst *UntiedSwitch = nullptr; |
155 | |
156 | public: |
157 | UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, |
158 | const RegionCodeGenTy &UntiedCodeGen) |
159 | : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} |
160 | void Enter(CodeGenFunction &CGF) override { |
161 | if (Untied) { |
162 | // Emit task switching point. |
163 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
164 | CGF.GetAddrOfLocalVar(PartIDVar), |
165 | PartIDVar->getType()->castAs<PointerType>()); |
166 | llvm::Value *Res = |
167 | CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); |
168 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); |
169 | UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); |
170 | CGF.EmitBlock(DoneBB); |
171 | CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); |
172 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
173 | UntiedSwitch->addCase(CGF.Builder.getInt32(0), |
174 | CGF.Builder.GetInsertBlock()); |
175 | emitUntiedSwitch(CGF); |
176 | } |
177 | } |
178 | void emitUntiedSwitch(CodeGenFunction &CGF) const { |
179 | if (Untied) { |
180 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
181 | CGF.GetAddrOfLocalVar(PartIDVar), |
182 | PartIDVar->getType()->castAs<PointerType>()); |
183 | CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
184 | PartIdLVal); |
185 | UntiedCodeGen(CGF); |
186 | CodeGenFunction::JumpDest CurPoint = |
187 | CGF.getJumpDestInCurrentScope(".untied.next."); |
188 | CGF.EmitBranch(CGF.ReturnBlock.getBlock()); |
189 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
190 | UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
191 | CGF.Builder.GetInsertBlock()); |
192 | CGF.EmitBranchThroughCleanup(CurPoint); |
193 | CGF.EmitBlock(CurPoint.getBlock()); |
194 | } |
195 | } |
196 | unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } |
197 | }; |
198 | CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, |
199 | const VarDecl *ThreadIDVar, |
200 | const RegionCodeGenTy &CodeGen, |
201 | OpenMPDirectiveKind Kind, bool HasCancel, |
202 | const UntiedTaskActionTy &Action) |
203 | : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), |
204 | ThreadIDVar(ThreadIDVar), Action(Action) { |
205 | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")(static_cast <bool> (ThreadIDVar != nullptr && "No ThreadID in OpenMP region." ) ? void (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 205, __extension__ __PRETTY_FUNCTION__)); |
206 | } |
207 | |
208 | /// Get a variable or parameter for storing global thread id |
209 | /// inside OpenMP construct. |
210 | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
211 | |
212 | /// Get an LValue for the current ThreadID variable. |
213 | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; |
214 | |
215 | /// Get the name of the capture helper. |
216 | StringRef getHelperName() const override { return ".omp_outlined."; } |
217 | |
218 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
219 | Action.emitUntiedSwitch(CGF); |
220 | } |
221 | |
222 | static bool classof(const CGCapturedStmtInfo *Info) { |
223 | return CGOpenMPRegionInfo::classof(Info) && |
224 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
225 | TaskOutlinedRegion; |
226 | } |
227 | |
228 | private: |
229 | /// A variable or parameter storing global thread id for OpenMP |
230 | /// constructs. |
231 | const VarDecl *ThreadIDVar; |
232 | /// Action for emitting code for untied tasks. |
233 | const UntiedTaskActionTy &Action; |
234 | }; |
235 | |
236 | /// API for inlined captured statement code generation in OpenMP |
237 | /// constructs. |
238 | class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { |
239 | public: |
240 | CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, |
241 | const RegionCodeGenTy &CodeGen, |
242 | OpenMPDirectiveKind Kind, bool HasCancel) |
243 | : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), |
244 | OldCSI(OldCSI), |
245 | OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} |
246 | |
247 | // Retrieve the value of the context parameter. |
248 | llvm::Value *getContextValue() const override { |
249 | if (OuterRegionInfo) |
250 | return OuterRegionInfo->getContextValue(); |
251 | llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 251); |
252 | } |
253 | |
254 | void setContextValue(llvm::Value *V) override { |
255 | if (OuterRegionInfo) { |
256 | OuterRegionInfo->setContextValue(V); |
257 | return; |
258 | } |
259 | llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 259); |
260 | } |
261 | |
262 | /// Lookup the captured field decl for a variable. |
263 | const FieldDecl *lookup(const VarDecl *VD) const override { |
264 | if (OuterRegionInfo) |
265 | return OuterRegionInfo->lookup(VD); |
266 | // If there is no outer outlined region,no need to lookup in a list of |
267 | // captured variables, we can use the original one. |
268 | return nullptr; |
269 | } |
270 | |
271 | FieldDecl *getThisFieldDecl() const override { |
272 | if (OuterRegionInfo) |
273 | return OuterRegionInfo->getThisFieldDecl(); |
274 | return nullptr; |
275 | } |
276 | |
277 | /// Get a variable or parameter for storing global thread id |
278 | /// inside OpenMP construct. |
279 | const VarDecl *getThreadIDVariable() const override { |
280 | if (OuterRegionInfo) |
281 | return OuterRegionInfo->getThreadIDVariable(); |
282 | return nullptr; |
283 | } |
284 | |
285 | /// Get an LValue for the current ThreadID variable. |
286 | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { |
287 | if (OuterRegionInfo) |
288 | return OuterRegionInfo->getThreadIDVariableLValue(CGF); |
289 | llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 289); |
290 | } |
291 | |
292 | /// Get the name of the capture helper. |
293 | StringRef getHelperName() const override { |
294 | if (auto *OuterRegionInfo = getOldCSI()) |
295 | return OuterRegionInfo->getHelperName(); |
296 | llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 296); |
297 | } |
298 | |
299 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
300 | if (OuterRegionInfo) |
301 | OuterRegionInfo->emitUntiedSwitch(CGF); |
302 | } |
303 | |
304 | CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } |
305 | |
306 | static bool classof(const CGCapturedStmtInfo *Info) { |
307 | return CGOpenMPRegionInfo::classof(Info) && |
308 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; |
309 | } |
310 | |
311 | ~CGOpenMPInlinedRegionInfo() override = default; |
312 | |
313 | private: |
314 | /// CodeGen info about outer OpenMP region. |
315 | CodeGenFunction::CGCapturedStmtInfo *OldCSI; |
316 | CGOpenMPRegionInfo *OuterRegionInfo; |
317 | }; |
318 | |
319 | /// API for captured statement code generation in OpenMP target |
320 | /// constructs. For this captures, implicit parameters are used instead of the |
321 | /// captured fields. The name of the target region has to be unique in a given |
322 | /// application so it is provided by the client, because only the client has |
323 | /// the information to generate that. |
324 | class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { |
325 | public: |
326 | CGOpenMPTargetRegionInfo(const CapturedStmt &CS, |
327 | const RegionCodeGenTy &CodeGen, StringRef HelperName) |
328 | : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, |
329 | /*HasCancel=*/false), |
330 | HelperName(HelperName) {} |
331 | |
332 | /// This is unused for target regions because each starts executing |
333 | /// with a single thread. |
334 | const VarDecl *getThreadIDVariable() const override { return nullptr; } |
335 | |
336 | /// Get the name of the capture helper. |
337 | StringRef getHelperName() const override { return HelperName; } |
338 | |
339 | static bool classof(const CGCapturedStmtInfo *Info) { |
340 | return CGOpenMPRegionInfo::classof(Info) && |
341 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; |
342 | } |
343 | |
344 | private: |
345 | StringRef HelperName; |
346 | }; |
347 | |
348 | static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { |
349 | llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 349); |
350 | } |
351 | /// API for generation of expressions captured in a innermost OpenMP |
352 | /// region. |
353 | class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { |
354 | public: |
355 | CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) |
356 | : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, |
357 | OMPD_unknown, |
358 | /*HasCancel=*/false), |
359 | PrivScope(CGF) { |
360 | // Make sure the globals captured in the provided statement are local by |
361 | // using the privatization logic. We assume the same variable is not |
362 | // captured more than once. |
363 | for (const auto &C : CS.captures()) { |
364 | if (!C.capturesVariable() && !C.capturesVariableByCopy()) |
365 | continue; |
366 | |
367 | const VarDecl *VD = C.getCapturedVar(); |
368 | if (VD->isLocalVarDeclOrParm()) |
369 | continue; |
370 | |
371 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
372 | /*RefersToEnclosingVariableOrCapture=*/false, |
373 | VD->getType().getNonReferenceType(), VK_LValue, |
374 | C.getLocation()); |
375 | PrivScope.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
376 | } |
377 | (void)PrivScope.Privatize(); |
378 | } |
379 | |
380 | /// Lookup the captured field decl for a variable. |
381 | const FieldDecl *lookup(const VarDecl *VD) const override { |
382 | if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) |
383 | return FD; |
384 | return nullptr; |
385 | } |
386 | |
387 | /// Emit the captured statement body. |
388 | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { |
389 | llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 389); |
390 | } |
391 | |
392 | /// Get a variable or parameter for storing global thread id |
393 | /// inside OpenMP construct. |
394 | const VarDecl *getThreadIDVariable() const override { |
395 | llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 395); |
396 | } |
397 | |
398 | /// Get the name of the capture helper. |
399 | StringRef getHelperName() const override { |
400 | llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 400); |
401 | } |
402 | |
403 | static bool classof(const CGCapturedStmtInfo *Info) { return false; } |
404 | |
405 | private: |
406 | /// Private scope to capture global variables. |
407 | CodeGenFunction::OMPPrivateScope PrivScope; |
408 | }; |
409 | |
410 | /// RAII for emitting code of OpenMP constructs. |
411 | class InlinedOpenMPRegionRAII { |
412 | CodeGenFunction &CGF; |
413 | llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields; |
414 | FieldDecl *LambdaThisCaptureField = nullptr; |
415 | const CodeGen::CGBlockInfo *BlockInfo = nullptr; |
416 | bool NoInheritance = false; |
417 | |
418 | public: |
419 | /// Constructs region for combined constructs. |
420 | /// \param CodeGen Code generation sequence for combined directives. Includes |
421 | /// a list of functions used for code generation of implicitly inlined |
422 | /// regions. |
423 | InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, |
424 | OpenMPDirectiveKind Kind, bool HasCancel, |
425 | bool NoInheritance = true) |
426 | : CGF(CGF), NoInheritance(NoInheritance) { |
427 | // Start emission for the construct. |
428 | CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( |
429 | CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); |
430 | if (NoInheritance) { |
431 | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
432 | LambdaThisCaptureField = CGF.LambdaThisCaptureField; |
433 | CGF.LambdaThisCaptureField = nullptr; |
434 | BlockInfo = CGF.BlockInfo; |
435 | CGF.BlockInfo = nullptr; |
436 | } |
437 | } |
438 | |
439 | ~InlinedOpenMPRegionRAII() { |
440 | // Restore original CapturedStmtInfo only if we're done with code emission. |
441 | auto *OldCSI = |
442 | cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); |
443 | delete CGF.CapturedStmtInfo; |
444 | CGF.CapturedStmtInfo = OldCSI; |
445 | if (NoInheritance) { |
446 | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
447 | CGF.LambdaThisCaptureField = LambdaThisCaptureField; |
448 | CGF.BlockInfo = BlockInfo; |
449 | } |
450 | } |
451 | }; |
452 | |
453 | /// Values for bit flags used in the ident_t to describe the fields. |
454 | /// All enumeric elements are named and described in accordance with the code |
455 | /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h |
456 | enum OpenMPLocationFlags : unsigned { |
457 | /// Use trampoline for internal microtask. |
458 | OMP_IDENT_IMD = 0x01, |
459 | /// Use c-style ident structure. |
460 | OMP_IDENT_KMPC = 0x02, |
461 | /// Atomic reduction option for kmpc_reduce. |
462 | OMP_ATOMIC_REDUCE = 0x10, |
463 | /// Explicit 'barrier' directive. |
464 | OMP_IDENT_BARRIER_EXPL = 0x20, |
465 | /// Implicit barrier in code. |
466 | OMP_IDENT_BARRIER_IMPL = 0x40, |
467 | /// Implicit barrier in 'for' directive. |
468 | OMP_IDENT_BARRIER_IMPL_FOR = 0x40, |
469 | /// Implicit barrier in 'sections' directive. |
470 | OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, |
471 | /// Implicit barrier in 'single' directive. |
472 | OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, |
473 | /// Call of __kmp_for_static_init for static loop. |
474 | OMP_IDENT_WORK_LOOP = 0x200, |
475 | /// Call of __kmp_for_static_init for sections. |
476 | OMP_IDENT_WORK_SECTIONS = 0x400, |
477 | /// Call of __kmp_for_static_init for distribute. |
478 | OMP_IDENT_WORK_DISTRIBUTE = 0x800, |
479 | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE |
480 | }; |
481 | |
482 | namespace { |
483 | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail ::operator|; using ::llvm::BitmaskEnumDetail::operator&; using ::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail ::operator|=; using ::llvm::BitmaskEnumDetail::operator&= ; using ::llvm::BitmaskEnumDetail::operator^=; |
484 | /// Values for bit flags for marking which requires clauses have been used. |
485 | enum OpenMPOffloadingRequiresDirFlags : int64_t { |
486 | /// flag undefined. |
487 | OMP_REQ_UNDEFINED = 0x000, |
488 | /// no requires clause present. |
489 | OMP_REQ_NONE = 0x001, |
490 | /// reverse_offload clause. |
491 | OMP_REQ_REVERSE_OFFLOAD = 0x002, |
492 | /// unified_address clause. |
493 | OMP_REQ_UNIFIED_ADDRESS = 0x004, |
494 | /// unified_shared_memory clause. |
495 | OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, |
496 | /// dynamic_allocators clause. |
497 | OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, |
498 | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS |
499 | }; |
500 | |
501 | } // anonymous namespace |
502 | |
503 | /// Describes ident structure that describes a source location. |
504 | /// All descriptions are taken from |
505 | /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h |
506 | /// Original structure: |
507 | /// typedef struct ident { |
508 | /// kmp_int32 reserved_1; /**< might be used in Fortran; |
509 | /// see above */ |
510 | /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; |
511 | /// KMP_IDENT_KMPC identifies this union |
512 | /// member */ |
513 | /// kmp_int32 reserved_2; /**< not really used in Fortran any more; |
514 | /// see above */ |
515 | ///#if USE_ITT_BUILD |
516 | /// /* but currently used for storing |
517 | /// region-specific ITT */ |
518 | /// /* contextual information. */ |
519 | ///#endif /* USE_ITT_BUILD */ |
520 | /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for |
521 | /// C++ */ |
522 | /// char const *psource; /**< String describing the source location. |
523 | /// The string is composed of semi-colon separated |
524 | // fields which describe the source file, |
525 | /// the function and a pair of line numbers that |
526 | /// delimit the construct. |
527 | /// */ |
528 | /// } ident_t; |
529 | enum IdentFieldIndex { |
530 | /// might be used in Fortran |
531 | IdentField_Reserved_1, |
532 | /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. |
533 | IdentField_Flags, |
534 | /// Not really used in Fortran any more |
535 | IdentField_Reserved_2, |
536 | /// Source[4] in Fortran, do not use for C++ |
537 | IdentField_Reserved_3, |
538 | /// String describing the source location. The string is composed of |
539 | /// semi-colon separated fields which describe the source file, the function |
540 | /// and a pair of line numbers that delimit the construct. |
541 | IdentField_PSource |
542 | }; |
543 | |
544 | /// Schedule types for 'omp for' loops (these enumerators are taken from |
545 | /// the enum sched_type in kmp.h). |
546 | enum OpenMPSchedType { |
547 | /// Lower bound for default (unordered) versions. |
548 | OMP_sch_lower = 32, |
549 | OMP_sch_static_chunked = 33, |
550 | OMP_sch_static = 34, |
551 | OMP_sch_dynamic_chunked = 35, |
552 | OMP_sch_guided_chunked = 36, |
553 | OMP_sch_runtime = 37, |
554 | OMP_sch_auto = 38, |
555 | /// static with chunk adjustment (e.g., simd) |
556 | OMP_sch_static_balanced_chunked = 45, |
557 | /// Lower bound for 'ordered' versions. |
558 | OMP_ord_lower = 64, |
559 | OMP_ord_static_chunked = 65, |
560 | OMP_ord_static = 66, |
561 | OMP_ord_dynamic_chunked = 67, |
562 | OMP_ord_guided_chunked = 68, |
563 | OMP_ord_runtime = 69, |
564 | OMP_ord_auto = 70, |
565 | OMP_sch_default = OMP_sch_static, |
566 | /// dist_schedule types |
567 | OMP_dist_sch_static_chunked = 91, |
568 | OMP_dist_sch_static = 92, |
569 | /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. |
570 | /// Set if the monotonic schedule modifier was present. |
571 | OMP_sch_modifier_monotonic = (1 << 29), |
572 | /// Set if the nonmonotonic schedule modifier was present. |
573 | OMP_sch_modifier_nonmonotonic = (1 << 30), |
574 | }; |
575 | |
576 | /// A basic class for pre|post-action for advanced codegen sequence for OpenMP |
577 | /// region. |
578 | class CleanupTy final : public EHScopeStack::Cleanup { |
579 | PrePostActionTy *Action; |
580 | |
581 | public: |
582 | explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} |
583 | void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { |
584 | if (!CGF.HaveInsertPoint()) |
585 | return; |
586 | Action->Exit(CGF); |
587 | } |
588 | }; |
589 | |
590 | } // anonymous namespace |
591 | |
592 | void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { |
593 | CodeGenFunction::RunCleanupsScope Scope(CGF); |
594 | if (PrePostAction) { |
595 | CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); |
596 | Callback(CodeGen, CGF, *PrePostAction); |
597 | } else { |
598 | PrePostActionTy Action; |
599 | Callback(CodeGen, CGF, Action); |
600 | } |
601 | } |
602 | |
603 | /// Check if the combiner is a call to UDR combiner and if it is so return the |
604 | /// UDR decl used for reduction. |
605 | static const OMPDeclareReductionDecl * |
606 | getReductionInit(const Expr *ReductionOp) { |
607 | if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) |
608 | if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) |
609 | if (const auto *DRE = |
610 | dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) |
611 | if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) |
612 | return DRD; |
613 | return nullptr; |
614 | } |
615 | |
616 | static void emitInitWithReductionInitializer(CodeGenFunction &CGF, |
617 | const OMPDeclareReductionDecl *DRD, |
618 | const Expr *InitOp, |
619 | Address Private, Address Original, |
620 | QualType Ty) { |
621 | if (DRD->getInitializer()) { |
622 | std::pair<llvm::Function *, llvm::Function *> Reduction = |
623 | CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); |
624 | const auto *CE = cast<CallExpr>(InitOp); |
625 | const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); |
626 | const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); |
627 | const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); |
628 | const auto *LHSDRE = |
629 | cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); |
630 | const auto *RHSDRE = |
631 | cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); |
632 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
633 | PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), Private); |
634 | PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), Original); |
635 | (void)PrivateScope.Privatize(); |
636 | RValue Func = RValue::get(Reduction.second); |
637 | CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); |
638 | CGF.EmitIgnoredExpr(InitOp); |
639 | } else { |
640 | llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); |
641 | std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); |
642 | auto *GV = new llvm::GlobalVariable( |
643 | CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, |
644 | llvm::GlobalValue::PrivateLinkage, Init, Name); |
645 | LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); |
646 | RValue InitRVal; |
647 | switch (CGF.getEvaluationKind(Ty)) { |
648 | case TEK_Scalar: |
649 | InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); |
650 | break; |
651 | case TEK_Complex: |
652 | InitRVal = |
653 | RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); |
654 | break; |
655 | case TEK_Aggregate: { |
656 | OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue); |
657 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV); |
658 | CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), |
659 | /*IsInitializer=*/false); |
660 | return; |
661 | } |
662 | } |
663 | OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue); |
664 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); |
665 | CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), |
666 | /*IsInitializer=*/false); |
667 | } |
668 | } |
669 | |
670 | /// Emit initialization of arrays of complex types. |
671 | /// \param DestAddr Address of the array. |
672 | /// \param Type Type of array. |
673 | /// \param Init Initial expression of array. |
674 | /// \param SrcAddr Address of the original array. |
675 | static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, |
676 | QualType Type, bool EmitDeclareReductionInit, |
677 | const Expr *Init, |
678 | const OMPDeclareReductionDecl *DRD, |
679 | Address SrcAddr = Address::invalid()) { |
680 | // Perform element-by-element initialization. |
681 | QualType ElementTy; |
682 | |
683 | // Drill down to the base element type on both arrays. |
684 | const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); |
685 | llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); |
686 | if (DRD) |
687 | SrcAddr = |
688 | CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
689 | |
690 | llvm::Value *SrcBegin = nullptr; |
691 | if (DRD) |
692 | SrcBegin = SrcAddr.getPointer(); |
693 | llvm::Value *DestBegin = DestAddr.getPointer(); |
694 | // Cast from pointer to array type to pointer to single element. |
695 | llvm::Value *DestEnd = |
696 | CGF.Builder.CreateGEP(DestAddr.getElementType(), DestBegin, NumElements); |
697 | // The basic structure here is a while-do loop. |
698 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); |
699 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); |
700 | llvm::Value *IsEmpty = |
701 | CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); |
702 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
703 | |
704 | // Enter the loop body, making that address the current address. |
705 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
706 | CGF.EmitBlock(BodyBB); |
707 | |
708 | CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); |
709 | |
710 | llvm::PHINode *SrcElementPHI = nullptr; |
711 | Address SrcElementCurrent = Address::invalid(); |
712 | if (DRD) { |
713 | SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, |
714 | "omp.arraycpy.srcElementPast"); |
715 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
716 | SrcElementCurrent = |
717 | Address(SrcElementPHI, SrcAddr.getElementType(), |
718 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
719 | } |
720 | llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( |
721 | DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
722 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
723 | Address DestElementCurrent = |
724 | Address(DestElementPHI, DestAddr.getElementType(), |
725 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
726 | |
727 | // Emit copy. |
728 | { |
729 | CodeGenFunction::RunCleanupsScope InitScope(CGF); |
730 | if (EmitDeclareReductionInit) { |
731 | emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, |
732 | SrcElementCurrent, ElementTy); |
733 | } else |
734 | CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), |
735 | /*IsInitializer=*/false); |
736 | } |
737 | |
738 | if (DRD) { |
739 | // Shift the address forward by one element. |
740 | llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( |
741 | SrcAddr.getElementType(), SrcElementPHI, /*Idx0=*/1, |
742 | "omp.arraycpy.dest.element"); |
743 | SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); |
744 | } |
745 | |
746 | // Shift the address forward by one element. |
747 | llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( |
748 | DestAddr.getElementType(), DestElementPHI, /*Idx0=*/1, |
749 | "omp.arraycpy.dest.element"); |
750 | // Check whether we've reached the end. |
751 | llvm::Value *Done = |
752 | CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
753 | CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); |
754 | DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); |
755 | |
756 | // Done. |
757 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
758 | } |
759 | |
760 | LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { |
761 | return CGF.EmitOMPSharedLValue(E); |
762 | } |
763 | |
764 | LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, |
765 | const Expr *E) { |
766 | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) |
767 | return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); |
768 | return LValue(); |
769 | } |
770 | |
771 | void ReductionCodeGen::emitAggregateInitialization( |
772 | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, |
773 | const OMPDeclareReductionDecl *DRD) { |
774 | // Emit VarDecl with copy init for arrays. |
775 | // Get the address of the original variable captured in current |
776 | // captured region. |
777 | const auto *PrivateVD = |
778 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
779 | bool EmitDeclareReductionInit = |
780 | DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); |
781 | EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), |
782 | EmitDeclareReductionInit, |
783 | EmitDeclareReductionInit ? ClausesData[N].ReductionOp |
784 | : PrivateVD->getInit(), |
785 | DRD, SharedAddr); |
786 | } |
787 | |
788 | ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, |
789 | ArrayRef<const Expr *> Origs, |
790 | ArrayRef<const Expr *> Privates, |
791 | ArrayRef<const Expr *> ReductionOps) { |
792 | ClausesData.reserve(Shareds.size()); |
793 | SharedAddresses.reserve(Shareds.size()); |
794 | Sizes.reserve(Shareds.size()); |
795 | BaseDecls.reserve(Shareds.size()); |
796 | const auto *IOrig = Origs.begin(); |
797 | const auto *IPriv = Privates.begin(); |
798 | const auto *IRed = ReductionOps.begin(); |
799 | for (const Expr *Ref : Shareds) { |
800 | ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); |
801 | std::advance(IOrig, 1); |
802 | std::advance(IPriv, 1); |
803 | std::advance(IRed, 1); |
804 | } |
805 | } |
806 | |
807 | void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { |
808 | assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&(static_cast <bool> (SharedAddresses.size() == N && OrigAddresses.size() == N && "Number of generated lvalues must be exactly N." ) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 809, __extension__ __PRETTY_FUNCTION__)) |
809 | "Number of generated lvalues must be exactly N.")(static_cast <bool> (SharedAddresses.size() == N && OrigAddresses.size() == N && "Number of generated lvalues must be exactly N." ) ? void (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 809, __extension__ __PRETTY_FUNCTION__)); |
810 | LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); |
811 | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); |
812 | SharedAddresses.emplace_back(First, Second); |
813 | if (ClausesData[N].Shared == ClausesData[N].Ref) { |
814 | OrigAddresses.emplace_back(First, Second); |
815 | } else { |
816 | LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); |
817 | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); |
818 | OrigAddresses.emplace_back(First, Second); |
819 | } |
820 | } |
821 | |
822 | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { |
823 | QualType PrivateType = getPrivateType(N); |
824 | bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); |
825 | if (!PrivateType->isVariablyModifiedType()) { |
826 | Sizes.emplace_back( |
827 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), |
828 | nullptr); |
829 | return; |
830 | } |
831 | llvm::Value *Size; |
832 | llvm::Value *SizeInChars; |
833 | auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType(); |
834 | auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); |
835 | if (AsArraySection) { |
836 | Size = CGF.Builder.CreatePtrDiff(ElemType, |
837 | OrigAddresses[N].second.getPointer(CGF), |
838 | OrigAddresses[N].first.getPointer(CGF)); |
839 | Size = CGF.Builder.CreateNUWAdd( |
840 | Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); |
841 | SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); |
842 | } else { |
843 | SizeInChars = |
844 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); |
845 | Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); |
846 | } |
847 | Sizes.emplace_back(SizeInChars, Size); |
848 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
849 | CGF, |
850 | cast<OpaqueValueExpr>( |
851 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
852 | RValue::get(Size)); |
853 | CGF.EmitVariablyModifiedType(PrivateType); |
854 | } |
855 | |
856 | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, |
857 | llvm::Value *Size) { |
858 | QualType PrivateType = getPrivateType(N); |
859 | if (!PrivateType->isVariablyModifiedType()) { |
860 | assert(!Size && !Sizes[N].second &&(static_cast <bool> (!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 862, __extension__ __PRETTY_FUNCTION__)) |
861 | "Size should be nullptr for non-variably modified reduction "(static_cast <bool> (!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 862, __extension__ __PRETTY_FUNCTION__)) |
862 | "items.")(static_cast <bool> (!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items.") ? void (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 862, __extension__ __PRETTY_FUNCTION__)); |
863 | return; |
864 | } |
865 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
866 | CGF, |
867 | cast<OpaqueValueExpr>( |
868 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
869 | RValue::get(Size)); |
870 | CGF.EmitVariablyModifiedType(PrivateType); |
871 | } |
872 | |
873 | void ReductionCodeGen::emitInitialization( |
874 | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr, |
875 | llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { |
876 | assert(SharedAddresses.size() > N && "No variable was generated")(static_cast <bool> (SharedAddresses.size() > N && "No variable was generated") ? void (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 876, __extension__ __PRETTY_FUNCTION__)); |
877 | const auto *PrivateVD = |
878 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
879 | const OMPDeclareReductionDecl *DRD = |
880 | getReductionInit(ClausesData[N].ReductionOp); |
881 | if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { |
882 | if (DRD && DRD->getInitializer()) |
883 | (void)DefaultInit(CGF); |
884 | emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD); |
885 | } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { |
886 | (void)DefaultInit(CGF); |
887 | QualType SharedType = SharedAddresses[N].first.getType(); |
888 | emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, |
889 | PrivateAddr, SharedAddr, SharedType); |
890 | } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && |
891 | !CGF.isTrivialInitializer(PrivateVD->getInit())) { |
892 | CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, |
893 | PrivateVD->getType().getQualifiers(), |
894 | /*IsInitializer=*/false); |
895 | } |
896 | } |
897 | |
898 | bool ReductionCodeGen::needCleanups(unsigned N) { |
899 | QualType PrivateType = getPrivateType(N); |
900 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
901 | return DTorKind != QualType::DK_none; |
902 | } |
903 | |
904 | void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, |
905 | Address PrivateAddr) { |
906 | QualType PrivateType = getPrivateType(N); |
907 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
908 | if (needCleanups(N)) { |
909 | PrivateAddr = CGF.Builder.CreateElementBitCast( |
910 | PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); |
911 | CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); |
912 | } |
913 | } |
914 | |
915 | static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
916 | LValue BaseLV) { |
917 | BaseTy = BaseTy.getNonReferenceType(); |
918 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && |
919 | !CGF.getContext().hasSameType(BaseTy, ElTy)) { |
920 | if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { |
921 | BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); |
922 | } else { |
923 | LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); |
924 | BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); |
925 | } |
926 | BaseTy = BaseTy->getPointeeType(); |
927 | } |
928 | return CGF.MakeAddrLValue( |
929 | CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), |
930 | CGF.ConvertTypeForMem(ElTy)), |
931 | BaseLV.getType(), BaseLV.getBaseInfo(), |
932 | CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); |
933 | } |
934 | |
935 | static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
936 | Address OriginalBaseAddress, llvm::Value *Addr) { |
937 | Address Tmp = Address::invalid(); |
938 | Address TopTmp = Address::invalid(); |
939 | Address MostTopTmp = Address::invalid(); |
940 | BaseTy = BaseTy.getNonReferenceType(); |
941 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && |
942 | !CGF.getContext().hasSameType(BaseTy, ElTy)) { |
943 | Tmp = CGF.CreateMemTemp(BaseTy); |
944 | if (TopTmp.isValid()) |
945 | CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); |
946 | else |
947 | MostTopTmp = Tmp; |
948 | TopTmp = Tmp; |
949 | BaseTy = BaseTy->getPointeeType(); |
950 | } |
951 | |
952 | if (Tmp.isValid()) { |
953 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
954 | Addr, Tmp.getElementType()); |
955 | CGF.Builder.CreateStore(Addr, Tmp); |
956 | return MostTopTmp; |
957 | } |
958 | |
959 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
960 | Addr, OriginalBaseAddress.getType()); |
961 | return OriginalBaseAddress.withPointer(Addr, NotKnownNonNull); |
962 | } |
963 | |
964 | static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { |
965 | const VarDecl *OrigVD = nullptr; |
966 | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { |
967 | const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); |
968 | while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) |
969 | Base = TempOASE->getBase()->IgnoreParenImpCasts(); |
970 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
971 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
972 | DE = cast<DeclRefExpr>(Base); |
973 | OrigVD = cast<VarDecl>(DE->getDecl()); |
974 | } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { |
975 | const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); |
976 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
977 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
978 | DE = cast<DeclRefExpr>(Base); |
979 | OrigVD = cast<VarDecl>(DE->getDecl()); |
980 | } |
981 | return OrigVD; |
982 | } |
983 | |
984 | Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, |
985 | Address PrivateAddr) { |
986 | const DeclRefExpr *DE; |
987 | if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { |
988 | BaseDecls.emplace_back(OrigVD); |
989 | LValue OriginalBaseLValue = CGF.EmitLValue(DE); |
990 | LValue BaseLValue = |
991 | loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), |
992 | OriginalBaseLValue); |
993 | Address SharedAddr = SharedAddresses[N].first.getAddress(CGF); |
994 | llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( |
995 | SharedAddr.getElementType(), BaseLValue.getPointer(CGF), |
996 | SharedAddr.getPointer()); |
997 | llvm::Value *PrivatePointer = |
998 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
999 | PrivateAddr.getPointer(), SharedAddr.getType()); |
1000 | llvm::Value *Ptr = CGF.Builder.CreateGEP( |
1001 | SharedAddr.getElementType(), PrivatePointer, Adjustment); |
1002 | return castToBase(CGF, OrigVD->getType(), |
1003 | SharedAddresses[N].first.getType(), |
1004 | OriginalBaseLValue.getAddress(CGF), Ptr); |
1005 | } |
1006 | BaseDecls.emplace_back( |
1007 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); |
1008 | return PrivateAddr; |
1009 | } |
1010 | |
1011 | bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { |
1012 | const OMPDeclareReductionDecl *DRD = |
1013 | getReductionInit(ClausesData[N].ReductionOp); |
1014 | return DRD && DRD->getInitializer(); |
1015 | } |
1016 | |
1017 | LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { |
1018 | return CGF.EmitLoadOfPointerLValue( |
1019 | CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1020 | getThreadIDVariable()->getType()->castAs<PointerType>()); |
1021 | } |
1022 | |
1023 | void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) { |
1024 | if (!CGF.HaveInsertPoint()) |
1025 | return; |
1026 | // 1.2.2 OpenMP Language Terminology |
1027 | // Structured block - An executable statement with a single entry at the |
1028 | // top and a single exit at the bottom. |
1029 | // The point of exit cannot be a branch out of the structured block. |
1030 | // longjmp() and throw() must not violate the entry/exit criteria. |
1031 | CGF.EHStack.pushTerminate(); |
1032 | if (S) |
1033 | CGF.incrementProfileCounter(S); |
1034 | CodeGen(CGF); |
1035 | CGF.EHStack.popTerminate(); |
1036 | } |
1037 | |
1038 | LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( |
1039 | CodeGenFunction &CGF) { |
1040 | return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1041 | getThreadIDVariable()->getType(), |
1042 | AlignmentSource::Decl); |
1043 | } |
1044 | |
1045 | static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, |
1046 | QualType FieldTy) { |
1047 | auto *Field = FieldDecl::Create( |
1048 | C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, |
1049 | C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), |
1050 | /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); |
1051 | Field->setAccess(AS_public); |
1052 | DC->addDecl(Field); |
1053 | return Field; |
1054 | } |
1055 | |
1056 | CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) |
1057 | : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { |
1058 | KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); |
1059 | llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, |
1060 | hasRequiresUnifiedSharedMemory(), |
1061 | CGM.getLangOpts().OpenMPOffloadMandatory); |
1062 | // Initialize Types used in OpenMPIRBuilder from OMPKinds.def |
1063 | OMPBuilder.initialize(); |
1064 | OMPBuilder.setConfig(Config); |
1065 | OffloadEntriesInfoManager.setConfig(Config); |
1066 | loadOffloadInfoMetadata(); |
1067 | } |
1068 | |
1069 | void CGOpenMPRuntime::clear() { |
1070 | InternalVars.clear(); |
1071 | // Clean non-target variable declarations possibly used only in debug info. |
1072 | for (const auto &Data : EmittedNonTargetVariables) { |
1073 | if (!Data.getValue().pointsToAliveValue()) |
1074 | continue; |
1075 | auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); |
1076 | if (!GV) |
1077 | continue; |
1078 | if (!GV->isDeclaration() || GV->getNumUses() > 0) |
1079 | continue; |
1080 | GV->eraseFromParent(); |
1081 | } |
1082 | } |
1083 | |
1084 | std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { |
1085 | return OMPBuilder.createPlatformSpecificName(Parts); |
1086 | } |
1087 | |
1088 | static llvm::Function * |
1089 | emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, |
1090 | const Expr *CombinerInitializer, const VarDecl *In, |
1091 | const VarDecl *Out, bool IsCombiner) { |
1092 | // void .omp_combiner.(Ty *in, Ty *out); |
1093 | ASTContext &C = CGM.getContext(); |
1094 | QualType PtrTy = C.getPointerType(Ty).withRestrict(); |
1095 | FunctionArgList Args; |
1096 | ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), |
1097 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1098 | ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), |
1099 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1100 | Args.push_back(&OmpOutParm); |
1101 | Args.push_back(&OmpInParm); |
1102 | const CGFunctionInfo &FnInfo = |
1103 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
1104 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
1105 | std::string Name = CGM.getOpenMPRuntime().getName( |
1106 | {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); |
1107 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
1108 | Name, &CGM.getModule()); |
1109 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
1110 | if (CGM.getLangOpts().Optimize) { |
1111 | Fn->removeFnAttr(llvm::Attribute::NoInline); |
1112 | Fn->removeFnAttr(llvm::Attribute::OptimizeNone); |
1113 | Fn->addFnAttr(llvm::Attribute::AlwaysInline); |
1114 | } |
1115 | CodeGenFunction CGF(CGM); |
1116 | // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. |
1117 | // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. |
1118 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), |
1119 | Out->getLocation()); |
1120 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
1121 | Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); |
1122 | Scope.addPrivate( |
1123 | In, CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) |
1124 | .getAddress(CGF)); |
1125 | Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); |
1126 | Scope.addPrivate( |
1127 | Out, CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) |
1128 | .getAddress(CGF)); |
1129 | (void)Scope.Privatize(); |
1130 | if (!IsCombiner && Out->hasInit() && |
1131 | !CGF.isTrivialInitializer(Out->getInit())) { |
1132 | CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), |
1133 | Out->getType().getQualifiers(), |
1134 | /*IsInitializer=*/true); |
1135 | } |
1136 | if (CombinerInitializer) |
1137 | CGF.EmitIgnoredExpr(CombinerInitializer); |
1138 | Scope.ForceCleanup(); |
1139 | CGF.FinishFunction(); |
1140 | return Fn; |
1141 | } |
1142 | |
1143 | void CGOpenMPRuntime::emitUserDefinedReduction( |
1144 | CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { |
1145 | if (UDRMap.count(D) > 0) |
1146 | return; |
1147 | llvm::Function *Combiner = emitCombinerOrInitializer( |
1148 | CGM, D->getType(), D->getCombiner(), |
1149 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), |
1150 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), |
1151 | /*IsCombiner=*/true); |
1152 | llvm::Function *Initializer = nullptr; |
1153 | if (const Expr *Init = D->getInitializer()) { |
1154 | Initializer = emitCombinerOrInitializer( |
1155 | CGM, D->getType(), |
1156 | D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init |
1157 | : nullptr, |
1158 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), |
1159 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), |
1160 | /*IsCombiner=*/false); |
1161 | } |
1162 | UDRMap.try_emplace(D, Combiner, Initializer); |
1163 | if (CGF) { |
1164 | auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); |
1165 | Decls.second.push_back(D); |
1166 | } |
1167 | } |
1168 | |
1169 | std::pair<llvm::Function *, llvm::Function *> |
1170 | CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { |
1171 | auto I = UDRMap.find(D); |
1172 | if (I != UDRMap.end()) |
1173 | return I->second; |
1174 | emitUserDefinedReduction(/*CGF=*/nullptr, D); |
1175 | return UDRMap.lookup(D); |
1176 | } |
1177 | |
1178 | namespace { |
1179 | // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR |
1180 | // Builder if one is present. |
1181 | struct PushAndPopStackRAII { |
1182 | PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, |
1183 | bool HasCancel, llvm::omp::Directive Kind) |
1184 | : OMPBuilder(OMPBuilder) { |
1185 | if (!OMPBuilder) |
1186 | return; |
1187 | |
1188 | // The following callback is the crucial part of clangs cleanup process. |
1189 | // |
1190 | // NOTE: |
1191 | // Once the OpenMPIRBuilder is used to create parallel regions (and |
1192 | // similar), the cancellation destination (Dest below) is determined via |
1193 | // IP. That means if we have variables to finalize we split the block at IP, |
1194 | // use the new block (=BB) as destination to build a JumpDest (via |
1195 | // getJumpDestInCurrentScope(BB)) which then is fed to |
1196 | // EmitBranchThroughCleanup. Furthermore, there will not be the need |
1197 | // to push & pop an FinalizationInfo object. |
1198 | // The FiniCB will still be needed but at the point where the |
1199 | // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. |
1200 | auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { |
1201 | assert(IP.getBlock()->end() == IP.getPoint() &&(static_cast <bool> (IP.getBlock()->end() == IP.getPoint () && "Clang CG should cause non-terminated block!") ? void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1202, __extension__ __PRETTY_FUNCTION__)) |
1202 | "Clang CG should cause non-terminated block!")(static_cast <bool> (IP.getBlock()->end() == IP.getPoint () && "Clang CG should cause non-terminated block!") ? void (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1202, __extension__ __PRETTY_FUNCTION__)); |
1203 | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1204 | CGF.Builder.restoreIP(IP); |
1205 | CodeGenFunction::JumpDest Dest = |
1206 | CGF.getOMPCancelDestination(OMPD_parallel); |
1207 | CGF.EmitBranchThroughCleanup(Dest); |
1208 | }; |
1209 | |
1210 | // TODO: Remove this once we emit parallel regions through the |
1211 | // OpenMPIRBuilder as it can do this setup internally. |
1212 | llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel}); |
1213 | OMPBuilder->pushFinalizationCB(std::move(FI)); |
1214 | } |
1215 | ~PushAndPopStackRAII() { |
1216 | if (OMPBuilder) |
1217 | OMPBuilder->popFinalizationCB(); |
1218 | } |
1219 | llvm::OpenMPIRBuilder *OMPBuilder; |
1220 | }; |
1221 | } // namespace |
1222 | |
1223 | static llvm::Function *emitParallelOrTeamsOutlinedFunction( |
1224 | CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, |
1225 | const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, |
1226 | const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { |
1227 | assert(ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (ThreadIDVar->getType()->isPointerType () && "thread id variable must be of type kmp_int32 *" ) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1228, __extension__ __PRETTY_FUNCTION__)) |
1228 | "thread id variable must be of type kmp_int32 *")(static_cast <bool> (ThreadIDVar->getType()->isPointerType () && "thread id variable must be of type kmp_int32 *" ) ? void (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1228, __extension__ __PRETTY_FUNCTION__)); |
1229 | CodeGenFunction CGF(CGM, true); |
1230 | bool HasCancel = false; |
1231 | if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) |
1232 | HasCancel = OPD->hasCancel(); |
1233 | else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) |
1234 | HasCancel = OPD->hasCancel(); |
1235 | else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) |
1236 | HasCancel = OPSD->hasCancel(); |
1237 | else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) |
1238 | HasCancel = OPFD->hasCancel(); |
1239 | else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) |
1240 | HasCancel = OPFD->hasCancel(); |
1241 | else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) |
1242 | HasCancel = OPFD->hasCancel(); |
1243 | else if (const auto *OPFD = |
1244 | dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) |
1245 | HasCancel = OPFD->hasCancel(); |
1246 | else if (const auto *OPFD = |
1247 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) |
1248 | HasCancel = OPFD->hasCancel(); |
1249 | |
1250 | // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new |
1251 | // parallel region to make cancellation barriers work properly. |
1252 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1253 | PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind); |
1254 | CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, |
1255 | HasCancel, OutlinedHelperName); |
1256 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1257 | return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); |
1258 | } |
1259 | |
1260 | llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( |
1261 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1262 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1263 | const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); |
1264 | return emitParallelOrTeamsOutlinedFunction( |
1265 | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1266 | } |
1267 | |
1268 | llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( |
1269 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1270 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1271 | const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); |
1272 | return emitParallelOrTeamsOutlinedFunction( |
1273 | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1274 | } |
1275 | |
1276 | llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( |
1277 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1278 | const VarDecl *PartIDVar, const VarDecl *TaskTVar, |
1279 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1280 | bool Tied, unsigned &NumberOfParts) { |
1281 | auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, |
1282 | PrePostActionTy &) { |
1283 | llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); |
1284 | llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); |
1285 | llvm::Value *TaskArgs[] = { |
1286 | UpLoc, ThreadID, |
1287 | CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), |
1288 | TaskTVar->getType()->castAs<PointerType>()) |
1289 | .getPointer(CGF)}; |
1290 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1291 | CGM.getModule(), OMPRTL___kmpc_omp_task), |
1292 | TaskArgs); |
1293 | }; |
1294 | CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, |
1295 | UntiedCodeGen); |
1296 | CodeGen.setAction(Action); |
1297 | assert(!ThreadIDVar->getType()->isPointerType() &&(static_cast <bool> (!ThreadIDVar->getType()->isPointerType () && "thread id variable must be of type kmp_int32 for tasks" ) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1298, __extension__ __PRETTY_FUNCTION__)) |
1298 | "thread id variable must be of type kmp_int32 for tasks")(static_cast <bool> (!ThreadIDVar->getType()->isPointerType () && "thread id variable must be of type kmp_int32 for tasks" ) ? void (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1298, __extension__ __PRETTY_FUNCTION__)); |
1299 | const OpenMPDirectiveKind Region = |
1300 | isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop |
1301 | : OMPD_task; |
1302 | const CapturedStmt *CS = D.getCapturedStmt(Region); |
1303 | bool HasCancel = false; |
1304 | if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) |
1305 | HasCancel = TD->hasCancel(); |
1306 | else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) |
1307 | HasCancel = TD->hasCancel(); |
1308 | else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) |
1309 | HasCancel = TD->hasCancel(); |
1310 | else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) |
1311 | HasCancel = TD->hasCancel(); |
1312 | |
1313 | CodeGenFunction CGF(CGM, true); |
1314 | CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, |
1315 | InnermostKind, HasCancel, Action); |
1316 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1317 | llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); |
1318 | if (!Tied) |
1319 | NumberOfParts = Action.getNumberOfParts(); |
1320 | return Res; |
1321 | } |
1322 | |
1323 | void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, |
1324 | bool AtCurrentPoint) { |
1325 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1326 | assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")(static_cast <bool> (!Elem.second.ServiceInsertPt && "Insert point is set already.") ? void (0) : __assert_fail ( "!Elem.second.ServiceInsertPt && \"Insert point is set already.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1326, __extension__ __PRETTY_FUNCTION__)); |
1327 | |
1328 | llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); |
1329 | if (AtCurrentPoint) { |
1330 | Elem.second.ServiceInsertPt = new llvm::BitCastInst( |
1331 | Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); |
1332 | } else { |
1333 | Elem.second.ServiceInsertPt = |
1334 | new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); |
1335 | Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); |
1336 | } |
1337 | } |
1338 | |
1339 | void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { |
1340 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1341 | if (Elem.second.ServiceInsertPt) { |
1342 | llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; |
1343 | Elem.second.ServiceInsertPt = nullptr; |
1344 | Ptr->eraseFromParent(); |
1345 | } |
1346 | } |
1347 | |
1348 | static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, |
1349 | SourceLocation Loc, |
1350 | SmallString<128> &Buffer) { |
1351 | llvm::raw_svector_ostream OS(Buffer); |
1352 | // Build debug location |
1353 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1354 | OS << ";" << PLoc.getFilename() << ";"; |
1355 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1356 | OS << FD->getQualifiedNameAsString(); |
1357 | OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; |
1358 | return OS.str(); |
1359 | } |
1360 | |
1361 | llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, |
1362 | SourceLocation Loc, |
1363 | unsigned Flags, bool EmitLoc) { |
1364 | uint32_t SrcLocStrSize; |
1365 | llvm::Constant *SrcLocStr; |
1366 | if ((!EmitLoc && |
1367 | CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo) || |
1368 | Loc.isInvalid()) { |
1369 | SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize); |
1370 | } else { |
1371 | std::string FunctionName; |
1372 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1373 | FunctionName = FD->getQualifiedNameAsString(); |
1374 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1375 | const char *FileName = PLoc.getFilename(); |
1376 | unsigned Line = PLoc.getLine(); |
1377 | unsigned Column = PLoc.getColumn(); |
1378 | SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line, |
1379 | Column, SrcLocStrSize); |
1380 | } |
1381 | unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); |
1382 | return OMPBuilder.getOrCreateIdent( |
1383 | SrcLocStr, SrcLocStrSize, llvm::omp::IdentFlag(Flags), Reserved2Flags); |
1384 | } |
1385 | |
1386 | llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, |
1387 | SourceLocation Loc) { |
1388 | assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction." ) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1388, __extension__ __PRETTY_FUNCTION__)); |
1389 | // If the OpenMPIRBuilder is used we need to use it for all thread id calls as |
1390 | // the clang invariants used below might be broken. |
1391 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1392 | SmallString<128> Buffer; |
1393 | OMPBuilder.updateToLocation(CGF.Builder.saveIP()); |
1394 | uint32_t SrcLocStrSize; |
1395 | auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( |
1396 | getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize); |
1397 | return OMPBuilder.getOrCreateThreadID( |
1398 | OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize)); |
1399 | } |
1400 | |
1401 | llvm::Value *ThreadID = nullptr; |
1402 | // Check whether we've already cached a load of the thread id in this |
1403 | // function. |
1404 | auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); |
1405 | if (I != OpenMPLocThreadIDMap.end()) { |
1406 | ThreadID = I->second.ThreadID; |
1407 | if (ThreadID != nullptr) |
1408 | return ThreadID; |
1409 | } |
1410 | // If exceptions are enabled, do not use parameter to avoid possible crash. |
1411 | if (auto *OMPRegionInfo = |
1412 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { |
1413 | if (OMPRegionInfo->getThreadIDVariable()) { |
1414 | // Check if this an outlined function with thread id passed as argument. |
1415 | LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); |
1416 | llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); |
1417 | if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || |
1418 | !CGF.getLangOpts().CXXExceptions || |
1419 | CGF.Builder.GetInsertBlock() == TopBlock || |
1420 | !isa<llvm::Instruction>(LVal.getPointer(CGF)) || |
1421 | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1422 | TopBlock || |
1423 | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1424 | CGF.Builder.GetInsertBlock()) { |
1425 | ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); |
1426 | // If value loaded in entry block, cache it and use it everywhere in |
1427 | // function. |
1428 | if (CGF.Builder.GetInsertBlock() == TopBlock) { |
1429 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1430 | Elem.second.ThreadID = ThreadID; |
1431 | } |
1432 | return ThreadID; |
1433 | } |
1434 | } |
1435 | } |
1436 | |
1437 | // This is not an outlined function region - need to call __kmpc_int32 |
1438 | // kmpc_global_thread_num(ident_t *loc). |
1439 | // Generate thread id value and cache this value for use across the |
1440 | // function. |
1441 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1442 | if (!Elem.second.ServiceInsertPt) |
1443 | setLocThreadIdInsertPt(CGF); |
1444 | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1445 | CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); |
1446 | llvm::CallInst *Call = CGF.Builder.CreateCall( |
1447 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
1448 | OMPRTL___kmpc_global_thread_num), |
1449 | emitUpdateLocation(CGF, Loc)); |
1450 | Call->setCallingConv(CGF.getRuntimeCC()); |
1451 | Elem.second.ThreadID = Call; |
1452 | return Call; |
1453 | } |
1454 | |
1455 | void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { |
1456 | assert(CGF.CurFn && "No function in current CodeGenFunction.")(static_cast <bool> (CGF.CurFn && "No function in current CodeGenFunction." ) ? void (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1456, __extension__ __PRETTY_FUNCTION__)); |
1457 | if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { |
1458 | clearLocThreadIdInsertPt(CGF); |
1459 | OpenMPLocThreadIDMap.erase(CGF.CurFn); |
1460 | } |
1461 | if (FunctionUDRMap.count(CGF.CurFn) > 0) { |
1462 | for(const auto *D : FunctionUDRMap[CGF.CurFn]) |
1463 | UDRMap.erase(D); |
1464 | FunctionUDRMap.erase(CGF.CurFn); |
1465 | } |
1466 | auto I = FunctionUDMMap.find(CGF.CurFn); |
1467 | if (I != FunctionUDMMap.end()) { |
1468 | for(const auto *D : I->second) |
1469 | UDMMap.erase(D); |
1470 | FunctionUDMMap.erase(I); |
1471 | } |
1472 | LastprivateConditionalToTypes.erase(CGF.CurFn); |
1473 | FunctionToUntiedTaskStackMap.erase(CGF.CurFn); |
1474 | } |
1475 | |
1476 | llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { |
1477 | return OMPBuilder.IdentPtr; |
1478 | } |
1479 | |
1480 | llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { |
1481 | if (!Kmpc_MicroTy) { |
1482 | // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) |
1483 | llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), |
1484 | llvm::PointerType::getUnqual(CGM.Int32Ty)}; |
1485 | Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); |
1486 | } |
1487 | return llvm::PointerType::getUnqual(Kmpc_MicroTy); |
1488 | } |
1489 | |
1490 | llvm::FunctionCallee |
1491 | CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned, |
1492 | bool IsGPUDistribute) { |
1493 | assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1494, __extension__ __PRETTY_FUNCTION__)) |
1494 | "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1494, __extension__ __PRETTY_FUNCTION__)); |
1495 | StringRef Name; |
1496 | if (IsGPUDistribute) |
1497 | Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4" |
1498 | : "__kmpc_distribute_static_init_4u") |
1499 | : (IVSigned ? "__kmpc_distribute_static_init_8" |
1500 | : "__kmpc_distribute_static_init_8u"); |
1501 | else |
1502 | Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" |
1503 | : "__kmpc_for_static_init_4u") |
1504 | : (IVSigned ? "__kmpc_for_static_init_8" |
1505 | : "__kmpc_for_static_init_8u"); |
1506 | |
1507 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; |
1508 | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1509 | llvm::Type *TypeParams[] = { |
1510 | getIdentTyPointerTy(), // loc |
1511 | CGM.Int32Ty, // tid |
1512 | CGM.Int32Ty, // schedtype |
1513 | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1514 | PtrTy, // p_lower |
1515 | PtrTy, // p_upper |
1516 | PtrTy, // p_stride |
1517 | ITy, // incr |
1518 | ITy // chunk |
1519 | }; |
1520 | auto *FnTy = |
1521 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1522 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1523 | } |
1524 | |
1525 | llvm::FunctionCallee |
1526 | CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { |
1527 | assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1528, __extension__ __PRETTY_FUNCTION__)) |
1528 | "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1528, __extension__ __PRETTY_FUNCTION__)); |
1529 | StringRef Name = |
1530 | IVSize == 32 |
1531 | ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") |
1532 | : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); |
1533 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; |
1534 | llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc |
1535 | CGM.Int32Ty, // tid |
1536 | CGM.Int32Ty, // schedtype |
1537 | ITy, // lower |
1538 | ITy, // upper |
1539 | ITy, // stride |
1540 | ITy // chunk |
1541 | }; |
1542 | auto *FnTy = |
1543 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1544 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1545 | } |
1546 | |
1547 | llvm::FunctionCallee |
1548 | CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { |
1549 | assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1550, __extension__ __PRETTY_FUNCTION__)) |
1550 | "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1550, __extension__ __PRETTY_FUNCTION__)); |
1551 | StringRef Name = |
1552 | IVSize == 32 |
1553 | ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") |
1554 | : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); |
1555 | llvm::Type *TypeParams[] = { |
1556 | getIdentTyPointerTy(), // loc |
1557 | CGM.Int32Ty, // tid |
1558 | }; |
1559 | auto *FnTy = |
1560 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); |
1561 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1562 | } |
1563 | |
1564 | llvm::FunctionCallee |
1565 | CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { |
1566 | assert((IVSize == 32 || IVSize == 64) &&(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1567, __extension__ __PRETTY_FUNCTION__)) |
1567 | "IV size is not compatible with the omp runtime")(static_cast <bool> ((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime") ? void (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1567, __extension__ __PRETTY_FUNCTION__)); |
1568 | StringRef Name = |
1569 | IVSize == 32 |
1570 | ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") |
1571 | : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); |
1572 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; |
1573 | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1574 | llvm::Type *TypeParams[] = { |
1575 | getIdentTyPointerTy(), // loc |
1576 | CGM.Int32Ty, // tid |
1577 | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1578 | PtrTy, // p_lower |
1579 | PtrTy, // p_upper |
1580 | PtrTy // p_stride |
1581 | }; |
1582 | auto *FnTy = |
1583 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); |
1584 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1585 | } |
1586 | |
1587 | /// Obtain information that uniquely identifies a target entry. This |
1588 | /// consists of the file and device IDs as well as line number associated with |
1589 | /// the relevant entry source location. |
1590 | static llvm::TargetRegionEntryInfo |
1591 | getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, |
1592 | StringRef ParentName = "") { |
1593 | SourceManager &SM = C.getSourceManager(); |
1594 | |
1595 | // The loc should be always valid and have a file ID (the user cannot use |
1596 | // #pragma directives in macros) |
1597 | |
1598 | assert(Loc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (Loc.isValid() && "Source location is expected to be always valid." ) ? void (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1598, __extension__ __PRETTY_FUNCTION__)); |
1599 | |
1600 | PresumedLoc PLoc = SM.getPresumedLoc(Loc); |
1601 | assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid." ) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1601, __extension__ __PRETTY_FUNCTION__)); |
1602 | |
1603 | llvm::sys::fs::UniqueID ID; |
1604 | if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) { |
1605 | PLoc = SM.getPresumedLoc(Loc, /*UseLineDirectives=*/false); |
1606 | assert(PLoc.isValid() && "Source location is expected to be always valid.")(static_cast <bool> (PLoc.isValid() && "Source location is expected to be always valid." ) ? void (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1606, __extension__ __PRETTY_FUNCTION__)); |
1607 | if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) |
1608 | SM.getDiagnostics().Report(diag::err_cannot_open_file) |
1609 | << PLoc.getFilename() << EC.message(); |
1610 | } |
1611 | |
1612 | return llvm::TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(), |
1613 | PLoc.getLine()); |
1614 | } |
1615 | |
1616 | Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { |
1617 | if (CGM.getLangOpts().OpenMPSimd) |
1618 | return Address::invalid(); |
1619 | std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1620 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1621 | if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || |
1622 | ((*Res == OMPDeclareTargetDeclAttr::MT_To || |
1623 | *Res == OMPDeclareTargetDeclAttr::MT_Enter) && |
1624 | HasRequiresUnifiedSharedMemory))) { |
1625 | SmallString<64> PtrName; |
1626 | { |
1627 | llvm::raw_svector_ostream OS(PtrName); |
1628 | OS << CGM.getMangledName(GlobalDecl(VD)); |
1629 | if (!VD->isExternallyVisible()) { |
1630 | auto EntryInfo = getTargetEntryUniqueInfo( |
1631 | CGM.getContext(), VD->getCanonicalDecl()->getBeginLoc()); |
1632 | OS << llvm::format("_%x", EntryInfo.FileID); |
1633 | } |
1634 | OS << "_decl_tgt_ref_ptr"; |
1635 | } |
1636 | llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); |
1637 | QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); |
1638 | llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(PtrTy); |
1639 | if (!Ptr) { |
1640 | Ptr = OMPBuilder.getOrCreateInternalVariable(LlvmPtrTy, PtrName); |
1641 | |
1642 | auto *GV = cast<llvm::GlobalVariable>(Ptr); |
1643 | GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); |
1644 | |
1645 | if (!CGM.getLangOpts().OpenMPIsDevice) |
1646 | GV->setInitializer(CGM.GetAddrOfGlobal(VD)); |
1647 | registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); |
1648 | } |
1649 | return Address(Ptr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD)); |
1650 | } |
1651 | return Address::invalid(); |
1652 | } |
1653 | |
1654 | llvm::Constant * |
1655 | CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { |
1656 | assert(!CGM.getLangOpts().OpenMPUseTLS ||(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()) ? void ( 0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1657, __extension__ __PRETTY_FUNCTION__)) |
1657 | !CGM.getContext().getTargetInfo().isTLSSupported())(static_cast <bool> (!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()) ? void ( 0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1657, __extension__ __PRETTY_FUNCTION__)); |
1658 | // Lookup the entry, lazily creating it if necessary. |
1659 | std::string Suffix = getName({"cache", ""}); |
1660 | return OMPBuilder.getOrCreateInternalVariable( |
1661 | CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix).str()); |
1662 | } |
1663 | |
1664 | Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, |
1665 | const VarDecl *VD, |
1666 | Address VDAddr, |
1667 | SourceLocation Loc) { |
1668 | if (CGM.getLangOpts().OpenMPUseTLS && |
1669 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1670 | return VDAddr; |
1671 | |
1672 | llvm::Type *VarTy = VDAddr.getElementType(); |
1673 | llvm::Value *Args[] = { |
1674 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
1675 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.Int8PtrTy), |
1676 | CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), |
1677 | getOrCreateThreadPrivateCache(VD)}; |
1678 | return Address( |
1679 | CGF.EmitRuntimeCall( |
1680 | OMPBuilder.getOrCreateRuntimeFunction( |
1681 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
1682 | Args), |
1683 | CGF.Int8Ty, VDAddr.getAlignment()); |
1684 | } |
1685 | |
1686 | void CGOpenMPRuntime::emitThreadPrivateVarInit( |
1687 | CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, |
1688 | llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { |
1689 | // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime |
1690 | // library. |
1691 | llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); |
1692 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1693 | CGM.getModule(), OMPRTL___kmpc_global_thread_num), |
1694 | OMPLoc); |
1695 | // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) |
1696 | // to register constructor/destructor for variable. |
1697 | llvm::Value *Args[] = { |
1698 | OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), |
1699 | Ctor, CopyCtor, Dtor}; |
1700 | CGF.EmitRuntimeCall( |
1701 | OMPBuilder.getOrCreateRuntimeFunction( |
1702 | CGM.getModule(), OMPRTL___kmpc_threadprivate_register), |
1703 | Args); |
1704 | } |
1705 | |
1706 | llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( |
1707 | const VarDecl *VD, Address VDAddr, SourceLocation Loc, |
1708 | bool PerformInit, CodeGenFunction *CGF) { |
1709 | if (CGM.getLangOpts().OpenMPUseTLS && |
1710 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1711 | return nullptr; |
1712 | |
1713 | VD = VD->getDefinition(CGM.getContext()); |
1714 | if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { |
1715 | QualType ASTTy = VD->getType(); |
1716 | |
1717 | llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; |
1718 | const Expr *Init = VD->getAnyInitializer(); |
1719 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1720 | // Generate function that re-emits the declaration's initializer into the |
1721 | // threadprivate copy of the variable VD |
1722 | CodeGenFunction CtorCGF(CGM); |
1723 | FunctionArgList Args; |
1724 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1725 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1726 | ImplicitParamDecl::Other); |
1727 | Args.push_back(&Dst); |
1728 | |
1729 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1730 | CGM.getContext().VoidPtrTy, Args); |
1731 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1732 | std::string Name = getName({"__kmpc_global_ctor_", ""}); |
1733 | llvm::Function *Fn = |
1734 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1735 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, |
1736 | Args, Loc, Loc); |
1737 | llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( |
1738 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1739 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1740 | Address Arg(ArgVal, CtorCGF.Int8Ty, VDAddr.getAlignment()); |
1741 | Arg = CtorCGF.Builder.CreateElementBitCast( |
1742 | Arg, CtorCGF.ConvertTypeForMem(ASTTy)); |
1743 | CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), |
1744 | /*IsInitializer=*/true); |
1745 | ArgVal = CtorCGF.EmitLoadOfScalar( |
1746 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1747 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1748 | CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); |
1749 | CtorCGF.FinishFunction(); |
1750 | Ctor = Fn; |
1751 | } |
1752 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1753 | // Generate function that emits destructor call for the threadprivate copy |
1754 | // of the variable VD |
1755 | CodeGenFunction DtorCGF(CGM); |
1756 | FunctionArgList Args; |
1757 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1758 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1759 | ImplicitParamDecl::Other); |
1760 | Args.push_back(&Dst); |
1761 | |
1762 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1763 | CGM.getContext().VoidTy, Args); |
1764 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1765 | std::string Name = getName({"__kmpc_global_dtor_", ""}); |
1766 | llvm::Function *Fn = |
1767 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1768 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1769 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, |
1770 | Loc, Loc); |
1771 | // Create a scope with an artificial location for the body of this function. |
1772 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1773 | llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( |
1774 | DtorCGF.GetAddrOfLocalVar(&Dst), |
1775 | /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1776 | DtorCGF.emitDestroy( |
1777 | Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), ASTTy, |
1778 | DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1779 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1780 | DtorCGF.FinishFunction(); |
1781 | Dtor = Fn; |
1782 | } |
1783 | // Do not emit init function if it is not required. |
1784 | if (!Ctor && !Dtor) |
1785 | return nullptr; |
1786 | |
1787 | llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; |
1788 | auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, |
1789 | /*isVarArg=*/false) |
1790 | ->getPointerTo(); |
1791 | // Copying constructor for the threadprivate variable. |
1792 | // Must be NULL - reserved by runtime, but currently it requires that this |
1793 | // parameter is always NULL. Otherwise it fires assertion. |
1794 | CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); |
1795 | if (Ctor == nullptr) { |
1796 | auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, |
1797 | /*isVarArg=*/false) |
1798 | ->getPointerTo(); |
1799 | Ctor = llvm::Constant::getNullValue(CtorTy); |
1800 | } |
1801 | if (Dtor == nullptr) { |
1802 | auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, |
1803 | /*isVarArg=*/false) |
1804 | ->getPointerTo(); |
1805 | Dtor = llvm::Constant::getNullValue(DtorTy); |
1806 | } |
1807 | if (!CGF) { |
1808 | auto *InitFunctionTy = |
1809 | llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); |
1810 | std::string Name = getName({"__omp_threadprivate_init_", ""}); |
1811 | llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( |
1812 | InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); |
1813 | CodeGenFunction InitCGF(CGM); |
1814 | FunctionArgList ArgList; |
1815 | InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, |
1816 | CGM.getTypes().arrangeNullaryFunction(), ArgList, |
1817 | Loc, Loc); |
1818 | emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1819 | InitCGF.FinishFunction(); |
1820 | return InitFunction; |
1821 | } |
1822 | emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1823 | } |
1824 | return nullptr; |
1825 | } |
1826 | |
1827 | bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, |
1828 | llvm::GlobalVariable *Addr, |
1829 | bool PerformInit) { |
1830 | if (CGM.getLangOpts().OMPTargetTriples.empty() && |
1831 | !CGM.getLangOpts().OpenMPIsDevice) |
1832 | return false; |
1833 | std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1834 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1835 | if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || |
1836 | ((*Res == OMPDeclareTargetDeclAttr::MT_To || |
1837 | *Res == OMPDeclareTargetDeclAttr::MT_Enter) && |
1838 | HasRequiresUnifiedSharedMemory)) |
1839 | return CGM.getLangOpts().OpenMPIsDevice; |
1840 | VD = VD->getDefinition(CGM.getContext()); |
1841 | assert(VD && "Unknown VarDecl")(static_cast <bool> (VD && "Unknown VarDecl") ? void (0) : __assert_fail ("VD && \"Unknown VarDecl\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 1841, __extension__ __PRETTY_FUNCTION__)); |
1842 | |
1843 | if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) |
1844 | return CGM.getLangOpts().OpenMPIsDevice; |
1845 | |
1846 | QualType ASTTy = VD->getType(); |
1847 | SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); |
1848 | |
1849 | // Produce the unique prefix to identify the new target regions. We use |
1850 | // the source location of the variable declaration which we know to not |
1851 | // conflict with any target region. |
1852 | auto EntryInfo = |
1853 | getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); |
1854 | SmallString<128> Buffer, Out; |
1855 | OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); |
1856 | |
1857 | const Expr *Init = VD->getAnyInitializer(); |
1858 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1859 | llvm::Constant *Ctor; |
1860 | llvm::Constant *ID; |
1861 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1862 | // Generate function that re-emits the declaration's initializer into |
1863 | // the threadprivate copy of the variable VD |
1864 | CodeGenFunction CtorCGF(CGM); |
1865 | |
1866 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1867 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1868 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1869 | FTy, Twine(Buffer, "_ctor"), FI, Loc, false, |
1870 | llvm::GlobalValue::WeakODRLinkage); |
1871 | Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); |
1872 | if (CGM.getTriple().isAMDGCN()) |
1873 | Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); |
1874 | auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); |
1875 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1876 | FunctionArgList(), Loc, Loc); |
1877 | auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); |
1878 | llvm::Constant *AddrInAS0 = Addr; |
1879 | if (Addr->getAddressSpace() != 0) |
1880 | AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( |
1881 | Addr, llvm::PointerType::getWithSamePointeeType( |
1882 | cast<llvm::PointerType>(Addr->getType()), 0)); |
1883 | CtorCGF.EmitAnyExprToMem(Init, |
1884 | Address(AddrInAS0, Addr->getValueType(), |
1885 | CGM.getContext().getDeclAlign(VD)), |
1886 | Init->getType().getQualifiers(), |
1887 | /*IsInitializer=*/true); |
1888 | CtorCGF.FinishFunction(); |
1889 | Ctor = Fn; |
1890 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1891 | } else { |
1892 | Ctor = new llvm::GlobalVariable( |
1893 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1894 | llvm::GlobalValue::PrivateLinkage, |
1895 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); |
1896 | ID = Ctor; |
1897 | } |
1898 | |
1899 | // Register the information for the entry associated with the constructor. |
1900 | Out.clear(); |
1901 | auto CtorEntryInfo = EntryInfo; |
1902 | CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); |
1903 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1904 | CtorEntryInfo, Ctor, ID, |
1905 | llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); |
1906 | } |
1907 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1908 | llvm::Constant *Dtor; |
1909 | llvm::Constant *ID; |
1910 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1911 | // Generate function that emits destructor call for the threadprivate |
1912 | // copy of the variable VD |
1913 | CodeGenFunction DtorCGF(CGM); |
1914 | |
1915 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1916 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1917 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1918 | FTy, Twine(Buffer, "_dtor"), FI, Loc, false, |
1919 | llvm::GlobalValue::WeakODRLinkage); |
1920 | Fn->setVisibility(llvm::GlobalValue::ProtectedVisibility); |
1921 | if (CGM.getTriple().isAMDGCN()) |
1922 | Fn->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL); |
1923 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1924 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1925 | FunctionArgList(), Loc, Loc); |
1926 | // Create a scope with an artificial location for the body of this |
1927 | // function. |
1928 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1929 | llvm::Constant *AddrInAS0 = Addr; |
1930 | if (Addr->getAddressSpace() != 0) |
1931 | AddrInAS0 = llvm::ConstantExpr::getAddrSpaceCast( |
1932 | Addr, llvm::PointerType::getWithSamePointeeType( |
1933 | cast<llvm::PointerType>(Addr->getType()), 0)); |
1934 | DtorCGF.emitDestroy(Address(AddrInAS0, Addr->getValueType(), |
1935 | CGM.getContext().getDeclAlign(VD)), |
1936 | ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1937 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1938 | DtorCGF.FinishFunction(); |
1939 | Dtor = Fn; |
1940 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1941 | } else { |
1942 | Dtor = new llvm::GlobalVariable( |
1943 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1944 | llvm::GlobalValue::PrivateLinkage, |
1945 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); |
1946 | ID = Dtor; |
1947 | } |
1948 | // Register the information for the entry associated with the destructor. |
1949 | Out.clear(); |
1950 | auto DtorEntryInfo = EntryInfo; |
1951 | DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); |
1952 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1953 | DtorEntryInfo, Dtor, ID, |
1954 | llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); |
1955 | } |
1956 | return CGM.getLangOpts().OpenMPIsDevice; |
1957 | } |
1958 | |
1959 | Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, |
1960 | QualType VarType, |
1961 | StringRef Name) { |
1962 | std::string Suffix = getName({"artificial", ""}); |
1963 | llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); |
1964 | llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable( |
1965 | VarLVType, Twine(Name).concat(Suffix).str()); |
1966 | if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && |
1967 | CGM.getTarget().isTLSSupported()) { |
1968 | GAddr->setThreadLocal(/*Val=*/true); |
1969 | return Address(GAddr, GAddr->getValueType(), |
1970 | CGM.getContext().getTypeAlignInChars(VarType)); |
1971 | } |
1972 | std::string CacheSuffix = getName({"cache", ""}); |
1973 | llvm::Value *Args[] = { |
1974 | emitUpdateLocation(CGF, SourceLocation()), |
1975 | getThreadID(CGF, SourceLocation()), |
1976 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), |
1977 | CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, |
1978 | /*isSigned=*/false), |
1979 | OMPBuilder.getOrCreateInternalVariable( |
1980 | CGM.VoidPtrPtrTy, |
1981 | Twine(Name).concat(Suffix).concat(CacheSuffix).str())}; |
1982 | return Address( |
1983 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1984 | CGF.EmitRuntimeCall( |
1985 | OMPBuilder.getOrCreateRuntimeFunction( |
1986 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
1987 | Args), |
1988 | VarLVType->getPointerTo(/*AddrSpace=*/0)), |
1989 | VarLVType, CGM.getContext().getTypeAlignInChars(VarType)); |
1990 | } |
1991 | |
1992 | void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, |
1993 | const RegionCodeGenTy &ThenGen, |
1994 | const RegionCodeGenTy &ElseGen) { |
1995 | CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); |
1996 | |
1997 | // If the condition constant folds and can be elided, try to avoid emitting |
1998 | // the condition and the dead arm of the if/else. |
1999 | bool CondConstant; |
2000 | if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { |
2001 | if (CondConstant) |
2002 | ThenGen(CGF); |
2003 | else |
2004 | ElseGen(CGF); |
2005 | return; |
2006 | } |
2007 | |
2008 | // Otherwise, the condition did not fold, or we couldn't elide it. Just |
2009 | // emit the conditional branch. |
2010 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2011 | llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); |
2012 | llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); |
2013 | CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); |
2014 | |
2015 | // Emit the 'then' code. |
2016 | CGF.EmitBlock(ThenBlock); |
2017 | ThenGen(CGF); |
2018 | CGF.EmitBranch(ContBlock); |
2019 | // Emit the 'else' code if present. |
2020 | // There is no need to emit line number for unconditional branch. |
2021 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2022 | CGF.EmitBlock(ElseBlock); |
2023 | ElseGen(CGF); |
2024 | // There is no need to emit line number for unconditional branch. |
2025 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2026 | CGF.EmitBranch(ContBlock); |
2027 | // Emit the continuation block for code after the if. |
2028 | CGF.EmitBlock(ContBlock, /*IsFinished=*/true); |
2029 | } |
2030 | |
2031 | void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, |
2032 | llvm::Function *OutlinedFn, |
2033 | ArrayRef<llvm::Value *> CapturedVars, |
2034 | const Expr *IfCond, |
2035 | llvm::Value *NumThreads) { |
2036 | if (!CGF.HaveInsertPoint()) |
2037 | return; |
2038 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2039 | auto &M = CGM.getModule(); |
2040 | auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, |
2041 | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2042 | // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); |
2043 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2044 | llvm::Value *Args[] = { |
2045 | RTLoc, |
2046 | CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars |
2047 | CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; |
2048 | llvm::SmallVector<llvm::Value *, 16> RealArgs; |
2049 | RealArgs.append(std::begin(Args), std::end(Args)); |
2050 | RealArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2051 | |
2052 | llvm::FunctionCallee RTLFn = |
2053 | OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); |
2054 | CGF.EmitRuntimeCall(RTLFn, RealArgs); |
2055 | }; |
2056 | auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, |
2057 | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2058 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2059 | llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); |
2060 | // Build calls: |
2061 | // __kmpc_serialized_parallel(&Loc, GTid); |
2062 | llvm::Value *Args[] = {RTLoc, ThreadID}; |
2063 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2064 | M, OMPRTL___kmpc_serialized_parallel), |
2065 | Args); |
2066 | |
2067 | // OutlinedFn(>id, &zero_bound, CapturedStruct); |
2068 | Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); |
2069 | Address ZeroAddrBound = |
2070 | CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, |
2071 | /*Name=*/".bound.zero.addr"); |
2072 | CGF.Builder.CreateStore(CGF.Builder.getInt32(/*C*/ 0), ZeroAddrBound); |
2073 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2074 | // ThreadId for serialized parallels is 0. |
2075 | OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); |
2076 | OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); |
2077 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2078 | |
2079 | // Ensure we do not inline the function. This is trivially true for the ones |
2080 | // passed to __kmpc_fork_call but the ones called in serialized regions |
2081 | // could be inlined. This is not a perfect but it is closer to the invariant |
2082 | // we want, namely, every data environment starts with a new function. |
2083 | // TODO: We should pass the if condition to the runtime function and do the |
2084 | // handling there. Much cleaner code. |
2085 | OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline); |
2086 | OutlinedFn->addFnAttr(llvm::Attribute::NoInline); |
2087 | RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); |
2088 | |
2089 | // __kmpc_end_serialized_parallel(&Loc, GTid); |
2090 | llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; |
2091 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2092 | M, OMPRTL___kmpc_end_serialized_parallel), |
2093 | EndArgs); |
2094 | }; |
2095 | if (IfCond) { |
2096 | emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2097 | } else { |
2098 | RegionCodeGenTy ThenRCG(ThenGen); |
2099 | ThenRCG(CGF); |
2100 | } |
2101 | } |
2102 | |
2103 | // If we're inside an (outlined) parallel region, use the region info's |
2104 | // thread-ID variable (it is passed in a first argument of the outlined function |
2105 | // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in |
2106 | // regular serial code region, get thread ID by calling kmp_int32 |
2107 | // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and |
2108 | // return the address of that temp. |
2109 | Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, |
2110 | SourceLocation Loc) { |
2111 | if (auto *OMPRegionInfo = |
2112 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2113 | if (OMPRegionInfo->getThreadIDVariable()) |
2114 | return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); |
2115 | |
2116 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2117 | QualType Int32Ty = |
2118 | CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); |
2119 | Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); |
2120 | CGF.EmitStoreOfScalar(ThreadID, |
2121 | CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); |
2122 | |
2123 | return ThreadIDTemp; |
2124 | } |
2125 | |
2126 | llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { |
2127 | std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); |
2128 | std::string Name = getName({Prefix, "var"}); |
2129 | return OMPBuilder.getOrCreateInternalVariable(KmpCriticalNameTy, Name); |
2130 | } |
2131 | |
2132 | namespace { |
2133 | /// Common pre(post)-action for different OpenMP constructs. |
2134 | class CommonActionTy final : public PrePostActionTy { |
2135 | llvm::FunctionCallee EnterCallee; |
2136 | ArrayRef<llvm::Value *> EnterArgs; |
2137 | llvm::FunctionCallee ExitCallee; |
2138 | ArrayRef<llvm::Value *> ExitArgs; |
2139 | bool Conditional; |
2140 | llvm::BasicBlock *ContBlock = nullptr; |
2141 | |
2142 | public: |
2143 | CommonActionTy(llvm::FunctionCallee EnterCallee, |
2144 | ArrayRef<llvm::Value *> EnterArgs, |
2145 | llvm::FunctionCallee ExitCallee, |
2146 | ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) |
2147 | : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), |
2148 | ExitArgs(ExitArgs), Conditional(Conditional) {} |
2149 | void Enter(CodeGenFunction &CGF) override { |
2150 | llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); |
2151 | if (Conditional) { |
2152 | llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); |
2153 | auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2154 | ContBlock = CGF.createBasicBlock("omp_if.end"); |
2155 | // Generate the branch (If-stmt) |
2156 | CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); |
2157 | CGF.EmitBlock(ThenBlock); |
2158 | } |
2159 | } |
2160 | void Done(CodeGenFunction &CGF) { |
2161 | // Emit the rest of blocks/branches |
2162 | CGF.EmitBranch(ContBlock); |
2163 | CGF.EmitBlock(ContBlock, true); |
2164 | } |
2165 | void Exit(CodeGenFunction &CGF) override { |
2166 | CGF.EmitRuntimeCall(ExitCallee, ExitArgs); |
2167 | } |
2168 | }; |
2169 | } // anonymous namespace |
2170 | |
2171 | void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, |
2172 | StringRef CriticalName, |
2173 | const RegionCodeGenTy &CriticalOpGen, |
2174 | SourceLocation Loc, const Expr *Hint) { |
2175 | // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); |
2176 | // CriticalOpGen(); |
2177 | // __kmpc_end_critical(ident_t *, gtid, Lock); |
2178 | // Prepare arguments and build a call to __kmpc_critical |
2179 | if (!CGF.HaveInsertPoint()) |
2180 | return; |
2181 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2182 | getCriticalRegionLock(CriticalName)}; |
2183 | llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), |
2184 | std::end(Args)); |
2185 | if (Hint) { |
2186 | EnterArgs.push_back(CGF.Builder.CreateIntCast( |
2187 | CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); |
2188 | } |
2189 | CommonActionTy Action( |
2190 | OMPBuilder.getOrCreateRuntimeFunction( |
2191 | CGM.getModule(), |
2192 | Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), |
2193 | EnterArgs, |
2194 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2195 | OMPRTL___kmpc_end_critical), |
2196 | Args); |
2197 | CriticalOpGen.setAction(Action); |
2198 | emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); |
2199 | } |
2200 | |
2201 | void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, |
2202 | const RegionCodeGenTy &MasterOpGen, |
2203 | SourceLocation Loc) { |
2204 | if (!CGF.HaveInsertPoint()) |
2205 | return; |
2206 | // if(__kmpc_master(ident_t *, gtid)) { |
2207 | // MasterOpGen(); |
2208 | // __kmpc_end_master(ident_t *, gtid); |
2209 | // } |
2210 | // Prepare arguments and build a call to __kmpc_master |
2211 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2212 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2213 | CGM.getModule(), OMPRTL___kmpc_master), |
2214 | Args, |
2215 | OMPBuilder.getOrCreateRuntimeFunction( |
2216 | CGM.getModule(), OMPRTL___kmpc_end_master), |
2217 | Args, |
2218 | /*Conditional=*/true); |
2219 | MasterOpGen.setAction(Action); |
2220 | emitInlinedDirective(CGF, OMPD_master, MasterOpGen); |
2221 | Action.Done(CGF); |
2222 | } |
2223 | |
2224 | void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF, |
2225 | const RegionCodeGenTy &MaskedOpGen, |
2226 | SourceLocation Loc, const Expr *Filter) { |
2227 | if (!CGF.HaveInsertPoint()) |
2228 | return; |
2229 | // if(__kmpc_masked(ident_t *, gtid, filter)) { |
2230 | // MaskedOpGen(); |
2231 | // __kmpc_end_masked(iden_t *, gtid); |
2232 | // } |
2233 | // Prepare arguments and build a call to __kmpc_masked |
2234 | llvm::Value *FilterVal = Filter |
2235 | ? CGF.EmitScalarExpr(Filter, CGF.Int32Ty) |
2236 | : llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/0); |
2237 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2238 | FilterVal}; |
2239 | llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc), |
2240 | getThreadID(CGF, Loc)}; |
2241 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2242 | CGM.getModule(), OMPRTL___kmpc_masked), |
2243 | Args, |
2244 | OMPBuilder.getOrCreateRuntimeFunction( |
2245 | CGM.getModule(), OMPRTL___kmpc_end_masked), |
2246 | ArgsEnd, |
2247 | /*Conditional=*/true); |
2248 | MaskedOpGen.setAction(Action); |
2249 | emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen); |
2250 | Action.Done(CGF); |
2251 | } |
2252 | |
2253 | void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, |
2254 | SourceLocation Loc) { |
2255 | if (!CGF.HaveInsertPoint()) |
2256 | return; |
2257 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2258 | OMPBuilder.createTaskyield(CGF.Builder); |
2259 | } else { |
2260 | // Build call __kmpc_omp_taskyield(loc, thread_id, 0); |
2261 | llvm::Value *Args[] = { |
2262 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2263 | llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; |
2264 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2265 | CGM.getModule(), OMPRTL___kmpc_omp_taskyield), |
2266 | Args); |
2267 | } |
2268 | |
2269 | if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2270 | Region->emitUntiedSwitch(CGF); |
2271 | } |
2272 | |
2273 | void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, |
2274 | const RegionCodeGenTy &TaskgroupOpGen, |
2275 | SourceLocation Loc) { |
2276 | if (!CGF.HaveInsertPoint()) |
2277 | return; |
2278 | // __kmpc_taskgroup(ident_t *, gtid); |
2279 | // TaskgroupOpGen(); |
2280 | // __kmpc_end_taskgroup(ident_t *, gtid); |
2281 | // Prepare arguments and build a call to __kmpc_taskgroup |
2282 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2283 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2284 | CGM.getModule(), OMPRTL___kmpc_taskgroup), |
2285 | Args, |
2286 | OMPBuilder.getOrCreateRuntimeFunction( |
2287 | CGM.getModule(), OMPRTL___kmpc_end_taskgroup), |
2288 | Args); |
2289 | TaskgroupOpGen.setAction(Action); |
2290 | emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); |
2291 | } |
2292 | |
2293 | /// Given an array of pointers to variables, project the address of a |
2294 | /// given variable. |
2295 | static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, |
2296 | unsigned Index, const VarDecl *Var) { |
2297 | // Pull out the pointer to the variable. |
2298 | Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); |
2299 | llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); |
2300 | |
2301 | llvm::Type *ElemTy = CGF.ConvertTypeForMem(Var->getType()); |
2302 | return Address( |
2303 | CGF.Builder.CreateBitCast( |
2304 | Ptr, ElemTy->getPointerTo(Ptr->getType()->getPointerAddressSpace())), |
2305 | ElemTy, CGF.getContext().getDeclAlign(Var)); |
2306 | } |
2307 | |
2308 | static llvm::Value *emitCopyprivateCopyFunction( |
2309 | CodeGenModule &CGM, llvm::Type *ArgsElemType, |
2310 | ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, |
2311 | ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, |
2312 | SourceLocation Loc) { |
2313 | ASTContext &C = CGM.getContext(); |
2314 | // void copy_func(void *LHSArg, void *RHSArg); |
2315 | FunctionArgList Args; |
2316 | ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2317 | ImplicitParamDecl::Other); |
2318 | ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2319 | ImplicitParamDecl::Other); |
2320 | Args.push_back(&LHSArg); |
2321 | Args.push_back(&RHSArg); |
2322 | const auto &CGFI = |
2323 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
2324 | std::string Name = |
2325 | CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); |
2326 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
2327 | llvm::GlobalValue::InternalLinkage, Name, |
2328 | &CGM.getModule()); |
2329 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
2330 | Fn->setDoesNotRecurse(); |
2331 | CodeGenFunction CGF(CGM); |
2332 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
2333 | // Dest = (void*[n])(LHSArg); |
2334 | // Src = (void*[n])(RHSArg); |
2335 | Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2336 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), |
2337 | ArgsElemType->getPointerTo()), |
2338 | ArgsElemType, CGF.getPointerAlign()); |
2339 | Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2340 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), |
2341 | ArgsElemType->getPointerTo()), |
2342 | ArgsElemType, CGF.getPointerAlign()); |
2343 | // *(Type0*)Dst[0] = *(Type0*)Src[0]; |
2344 | // *(Type1*)Dst[1] = *(Type1*)Src[1]; |
2345 | // ... |
2346 | // *(Typen*)Dst[n] = *(Typen*)Src[n]; |
2347 | for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { |
2348 | const auto *DestVar = |
2349 | cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); |
2350 | Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); |
2351 | |
2352 | const auto *SrcVar = |
2353 | cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); |
2354 | Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); |
2355 | |
2356 | const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); |
2357 | QualType Type = VD->getType(); |
2358 | CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); |
2359 | } |
2360 | CGF.FinishFunction(); |
2361 | return Fn; |
2362 | } |
2363 | |
2364 | void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, |
2365 | const RegionCodeGenTy &SingleOpGen, |
2366 | SourceLocation Loc, |
2367 | ArrayRef<const Expr *> CopyprivateVars, |
2368 | ArrayRef<const Expr *> SrcExprs, |
2369 | ArrayRef<const Expr *> DstExprs, |
2370 | ArrayRef<const Expr *> AssignmentOps) { |
2371 | if (!CGF.HaveInsertPoint()) |
2372 | return; |
2373 | assert(CopyprivateVars.size() == SrcExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs .size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()) ? void (0) : __assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2375, __extension__ __PRETTY_FUNCTION__)) |
2374 | CopyprivateVars.size() == DstExprs.size() &&(static_cast <bool> (CopyprivateVars.size() == SrcExprs .size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()) ? void (0) : __assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2375, __extension__ __PRETTY_FUNCTION__)) |
2375 | CopyprivateVars.size() == AssignmentOps.size())(static_cast <bool> (CopyprivateVars.size() == SrcExprs .size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()) ? void (0) : __assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2375, __extension__ __PRETTY_FUNCTION__)); |
2376 | ASTContext &C = CGM.getContext(); |
2377 | // int32 did_it = 0; |
2378 | // if(__kmpc_single(ident_t *, gtid)) { |
2379 | // SingleOpGen(); |
2380 | // __kmpc_end_single(ident_t *, gtid); |
2381 | // did_it = 1; |
2382 | // } |
2383 | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2384 | // <copy_func>, did_it); |
2385 | |
2386 | Address DidIt = Address::invalid(); |
2387 | if (!CopyprivateVars.empty()) { |
2388 | // int32 did_it = 0; |
2389 | QualType KmpInt32Ty = |
2390 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
2391 | DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); |
2392 | CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); |
2393 | } |
2394 | // Prepare arguments and build a call to __kmpc_single |
2395 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2396 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2397 | CGM.getModule(), OMPRTL___kmpc_single), |
2398 | Args, |
2399 | OMPBuilder.getOrCreateRuntimeFunction( |
2400 | CGM.getModule(), OMPRTL___kmpc_end_single), |
2401 | Args, |
2402 | /*Conditional=*/true); |
2403 | SingleOpGen.setAction(Action); |
2404 | emitInlinedDirective(CGF, OMPD_single, SingleOpGen); |
2405 | if (DidIt.isValid()) { |
2406 | // did_it = 1; |
2407 | CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); |
2408 | } |
2409 | Action.Done(CGF); |
2410 | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2411 | // <copy_func>, did_it); |
2412 | if (DidIt.isValid()) { |
2413 | llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); |
2414 | QualType CopyprivateArrayTy = C.getConstantArrayType( |
2415 | C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, |
2416 | /*IndexTypeQuals=*/0); |
2417 | // Create a list of all private variables for copyprivate. |
2418 | Address CopyprivateList = |
2419 | CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); |
2420 | for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { |
2421 | Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); |
2422 | CGF.Builder.CreateStore( |
2423 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2424 | CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), |
2425 | CGF.VoidPtrTy), |
2426 | Elem); |
2427 | } |
2428 | // Build function that copies private values from single region to all other |
2429 | // threads in the corresponding parallel region. |
2430 | llvm::Value *CpyFn = emitCopyprivateCopyFunction( |
2431 | CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy), CopyprivateVars, |
2432 | SrcExprs, DstExprs, AssignmentOps, Loc); |
2433 | llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); |
2434 | Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2435 | CopyprivateList, CGF.VoidPtrTy, CGF.Int8Ty); |
2436 | llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); |
2437 | llvm::Value *Args[] = { |
2438 | emitUpdateLocation(CGF, Loc), // ident_t *<loc> |
2439 | getThreadID(CGF, Loc), // i32 <gtid> |
2440 | BufSize, // size_t <buf_size> |
2441 | CL.getPointer(), // void *<copyprivate list> |
2442 | CpyFn, // void (*) (void *, void *) <copy_func> |
2443 | DidItVal // i32 did_it |
2444 | }; |
2445 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2446 | CGM.getModule(), OMPRTL___kmpc_copyprivate), |
2447 | Args); |
2448 | } |
2449 | } |
2450 | |
2451 | void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, |
2452 | const RegionCodeGenTy &OrderedOpGen, |
2453 | SourceLocation Loc, bool IsThreads) { |
2454 | if (!CGF.HaveInsertPoint()) |
2455 | return; |
2456 | // __kmpc_ordered(ident_t *, gtid); |
2457 | // OrderedOpGen(); |
2458 | // __kmpc_end_ordered(ident_t *, gtid); |
2459 | // Prepare arguments and build a call to __kmpc_ordered |
2460 | if (IsThreads) { |
2461 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2462 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2463 | CGM.getModule(), OMPRTL___kmpc_ordered), |
2464 | Args, |
2465 | OMPBuilder.getOrCreateRuntimeFunction( |
2466 | CGM.getModule(), OMPRTL___kmpc_end_ordered), |
2467 | Args); |
2468 | OrderedOpGen.setAction(Action); |
2469 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2470 | return; |
2471 | } |
2472 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2473 | } |
2474 | |
2475 | unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { |
2476 | unsigned Flags; |
2477 | if (Kind == OMPD_for) |
2478 | Flags = OMP_IDENT_BARRIER_IMPL_FOR; |
2479 | else if (Kind == OMPD_sections) |
2480 | Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; |
2481 | else if (Kind == OMPD_single) |
2482 | Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; |
2483 | else if (Kind == OMPD_barrier) |
2484 | Flags = OMP_IDENT_BARRIER_EXPL; |
2485 | else |
2486 | Flags = OMP_IDENT_BARRIER_IMPL; |
2487 | return Flags; |
2488 | } |
2489 | |
2490 | void CGOpenMPRuntime::getDefaultScheduleAndChunk( |
2491 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
2492 | OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { |
2493 | // Check if the loop directive is actually a doacross loop directive. In this |
2494 | // case choose static, 1 schedule. |
2495 | if (llvm::any_of( |
2496 | S.getClausesOfKind<OMPOrderedClause>(), |
2497 | [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { |
2498 | ScheduleKind = OMPC_SCHEDULE_static; |
2499 | // Chunk size is 1 in this case. |
2500 | llvm::APInt ChunkSize(32, 1); |
2501 | ChunkExpr = IntegerLiteral::Create( |
2502 | CGF.getContext(), ChunkSize, |
2503 | CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
2504 | SourceLocation()); |
2505 | } |
2506 | } |
2507 | |
2508 | void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, |
2509 | OpenMPDirectiveKind Kind, bool EmitChecks, |
2510 | bool ForceSimpleCall) { |
2511 | // Check if we should use the OMPBuilder |
2512 | auto *OMPRegionInfo = |
2513 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); |
2514 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2515 | CGF.Builder.restoreIP(OMPBuilder.createBarrier( |
2516 | CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); |
2517 | return; |
2518 | } |
2519 | |
2520 | if (!CGF.HaveInsertPoint()) |
2521 | return; |
2522 | // Build call __kmpc_cancel_barrier(loc, thread_id); |
2523 | // Build call __kmpc_barrier(loc, thread_id); |
2524 | unsigned Flags = getDefaultFlagsForBarriers(Kind); |
2525 | // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, |
2526 | // thread_id); |
2527 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), |
2528 | getThreadID(CGF, Loc)}; |
2529 | if (OMPRegionInfo) { |
2530 | if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { |
2531 | llvm::Value *Result = CGF.EmitRuntimeCall( |
2532 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2533 | OMPRTL___kmpc_cancel_barrier), |
2534 | Args); |
2535 | if (EmitChecks) { |
2536 | // if (__kmpc_cancel_barrier()) { |
2537 | // exit from construct; |
2538 | // } |
2539 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); |
2540 | llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); |
2541 | llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); |
2542 | CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); |
2543 | CGF.EmitBlock(ExitBB); |
2544 | // exit from construct; |
2545 | CodeGenFunction::JumpDest CancelDestination = |
2546 | CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); |
2547 | CGF.EmitBranchThroughCleanup(CancelDestination); |
2548 | CGF.EmitBlock(ContBB, /*IsFinished=*/true); |
2549 | } |
2550 | return; |
2551 | } |
2552 | } |
2553 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2554 | CGM.getModule(), OMPRTL___kmpc_barrier), |
2555 | Args); |
2556 | } |
2557 | |
2558 | void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc, |
2559 | Expr *ME, bool IsFatal) { |
2560 | llvm::Value *MVL = |
2561 | ME ? CGF.EmitStringLiteralLValue(cast<StringLiteral>(ME)).getPointer(CGF) |
2562 | : llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
2563 | // Build call void __kmpc_error(ident_t *loc, int severity, const char |
2564 | // *message) |
2565 | llvm::Value *Args[] = { |
2566 | emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/true), |
2567 | llvm::ConstantInt::get(CGM.Int32Ty, IsFatal ? 2 : 1), |
2568 | CGF.Builder.CreatePointerCast(MVL, CGM.Int8PtrTy)}; |
2569 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2570 | CGM.getModule(), OMPRTL___kmpc_error), |
2571 | Args); |
2572 | } |
2573 | |
2574 | /// Map the OpenMP loop schedule to the runtime enumeration. |
2575 | static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, |
2576 | bool Chunked, bool Ordered) { |
2577 | switch (ScheduleKind) { |
2578 | case OMPC_SCHEDULE_static: |
2579 | return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) |
2580 | : (Ordered ? OMP_ord_static : OMP_sch_static); |
2581 | case OMPC_SCHEDULE_dynamic: |
2582 | return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; |
2583 | case OMPC_SCHEDULE_guided: |
2584 | return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; |
2585 | case OMPC_SCHEDULE_runtime: |
2586 | return Ordered ? OMP_ord_runtime : OMP_sch_runtime; |
2587 | case OMPC_SCHEDULE_auto: |
2588 | return Ordered ? OMP_ord_auto : OMP_sch_auto; |
2589 | case OMPC_SCHEDULE_unknown: |
2590 | assert(!Chunked && "chunk was specified but schedule kind not known")(static_cast <bool> (!Chunked && "chunk was specified but schedule kind not known" ) ? void (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2590, __extension__ __PRETTY_FUNCTION__)); |
2591 | return Ordered ? OMP_ord_static : OMP_sch_static; |
2592 | } |
2593 | llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2593); |
2594 | } |
2595 | |
2596 | /// Map the OpenMP distribute schedule to the runtime enumeration. |
2597 | static OpenMPSchedType |
2598 | getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { |
2599 | // only static is allowed for dist_schedule |
2600 | return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; |
2601 | } |
2602 | |
2603 | bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, |
2604 | bool Chunked) const { |
2605 | OpenMPSchedType Schedule = |
2606 | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2607 | return Schedule == OMP_sch_static; |
2608 | } |
2609 | |
2610 | bool CGOpenMPRuntime::isStaticNonchunked( |
2611 | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2612 | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2613 | return Schedule == OMP_dist_sch_static; |
2614 | } |
2615 | |
2616 | bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, |
2617 | bool Chunked) const { |
2618 | OpenMPSchedType Schedule = |
2619 | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2620 | return Schedule == OMP_sch_static_chunked; |
2621 | } |
2622 | |
2623 | bool CGOpenMPRuntime::isStaticChunked( |
2624 | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2625 | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2626 | return Schedule == OMP_dist_sch_static_chunked; |
2627 | } |
2628 | |
2629 | bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { |
2630 | OpenMPSchedType Schedule = |
2631 | getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); |
2632 | assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")(static_cast <bool> (Schedule != OMP_sch_static_chunked && "cannot be chunked here") ? void (0) : __assert_fail ("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2632, __extension__ __PRETTY_FUNCTION__)); |
2633 | return Schedule != OMP_sch_static; |
2634 | } |
2635 | |
2636 | static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, |
2637 | OpenMPScheduleClauseModifier M1, |
2638 | OpenMPScheduleClauseModifier M2) { |
2639 | int Modifier = 0; |
2640 | switch (M1) { |
2641 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2642 | Modifier = OMP_sch_modifier_monotonic; |
2643 | break; |
2644 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2645 | Modifier = OMP_sch_modifier_nonmonotonic; |
2646 | break; |
2647 | case OMPC_SCHEDULE_MODIFIER_simd: |
2648 | if (Schedule == OMP_sch_static_chunked) |
2649 | Schedule = OMP_sch_static_balanced_chunked; |
2650 | break; |
2651 | case OMPC_SCHEDULE_MODIFIER_last: |
2652 | case OMPC_SCHEDULE_MODIFIER_unknown: |
2653 | break; |
2654 | } |
2655 | switch (M2) { |
2656 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2657 | Modifier = OMP_sch_modifier_monotonic; |
2658 | break; |
2659 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2660 | Modifier = OMP_sch_modifier_nonmonotonic; |
2661 | break; |
2662 | case OMPC_SCHEDULE_MODIFIER_simd: |
2663 | if (Schedule == OMP_sch_static_chunked) |
2664 | Schedule = OMP_sch_static_balanced_chunked; |
2665 | break; |
2666 | case OMPC_SCHEDULE_MODIFIER_last: |
2667 | case OMPC_SCHEDULE_MODIFIER_unknown: |
2668 | break; |
2669 | } |
2670 | // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. |
2671 | // If the static schedule kind is specified or if the ordered clause is |
2672 | // specified, and if the nonmonotonic modifier is not specified, the effect is |
2673 | // as if the monotonic modifier is specified. Otherwise, unless the monotonic |
2674 | // modifier is specified, the effect is as if the nonmonotonic modifier is |
2675 | // specified. |
2676 | if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { |
2677 | if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || |
2678 | Schedule == OMP_sch_static_balanced_chunked || |
2679 | Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || |
2680 | Schedule == OMP_dist_sch_static_chunked || |
2681 | Schedule == OMP_dist_sch_static)) |
2682 | Modifier = OMP_sch_modifier_nonmonotonic; |
2683 | } |
2684 | return Schedule | Modifier; |
2685 | } |
2686 | |
2687 | void CGOpenMPRuntime::emitForDispatchInit( |
2688 | CodeGenFunction &CGF, SourceLocation Loc, |
2689 | const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, |
2690 | bool Ordered, const DispatchRTInput &DispatchValues) { |
2691 | if (!CGF.HaveInsertPoint()) |
2692 | return; |
2693 | OpenMPSchedType Schedule = getRuntimeSchedule( |
2694 | ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); |
2695 | assert(Ordered ||(static_cast <bool> (Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? void (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2698, __extension__ __PRETTY_FUNCTION__)) |
2696 | (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? void (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2698, __extension__ __PRETTY_FUNCTION__)) |
2697 | Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&(static_cast <bool> (Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? void (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2698, __extension__ __PRETTY_FUNCTION__)) |
2698 | Schedule != OMP_sch_static_balanced_chunked))(static_cast <bool> (Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? void (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2698, __extension__ __PRETTY_FUNCTION__)); |
2699 | // Call __kmpc_dispatch_init( |
2700 | // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, |
2701 | // kmp_int[32|64] lower, kmp_int[32|64] upper, |
2702 | // kmp_int[32|64] stride, kmp_int[32|64] chunk); |
2703 | |
2704 | // If the Chunk was not specified in the clause - use default value 1. |
2705 | llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk |
2706 | : CGF.Builder.getIntN(IVSize, 1); |
2707 | llvm::Value *Args[] = { |
2708 | emitUpdateLocation(CGF, Loc), |
2709 | getThreadID(CGF, Loc), |
2710 | CGF.Builder.getInt32(addMonoNonMonoModifier( |
2711 | CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type |
2712 | DispatchValues.LB, // Lower |
2713 | DispatchValues.UB, // Upper |
2714 | CGF.Builder.getIntN(IVSize, 1), // Stride |
2715 | Chunk // Chunk |
2716 | }; |
2717 | CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); |
2718 | } |
2719 | |
2720 | static void emitForStaticInitCall( |
2721 | CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, |
2722 | llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, |
2723 | OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, |
2724 | const CGOpenMPRuntime::StaticRTInput &Values) { |
2725 | if (!CGF.HaveInsertPoint()) |
2726 | return; |
2727 | |
2728 | assert(!Values.Ordered)(static_cast <bool> (!Values.Ordered) ? void (0) : __assert_fail ("!Values.Ordered", "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2728, __extension__ __PRETTY_FUNCTION__)); |
2729 | assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2733, __extension__ __PRETTY_FUNCTION__)) |
2730 | Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2733, __extension__ __PRETTY_FUNCTION__)) |
2731 | Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2733, __extension__ __PRETTY_FUNCTION__)) |
2732 | Schedule == OMP_dist_sch_static ||(static_cast <bool> (Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2733, __extension__ __PRETTY_FUNCTION__)) |
2733 | Schedule == OMP_dist_sch_static_chunked)(static_cast <bool> (Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? void (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2733, __extension__ __PRETTY_FUNCTION__)); |
2734 | |
2735 | // Call __kmpc_for_static_init( |
2736 | // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, |
2737 | // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, |
2738 | // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, |
2739 | // kmp_int[32|64] incr, kmp_int[32|64] chunk); |
2740 | llvm::Value *Chunk = Values.Chunk; |
2741 | if (Chunk == nullptr) { |
2742 | assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(static_cast <bool> ((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2744, __extension__ __PRETTY_FUNCTION__)) |
2743 | Schedule == OMP_dist_sch_static) &&(static_cast <bool> ((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2744, __extension__ __PRETTY_FUNCTION__)) |
2744 | "expected static non-chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2744, __extension__ __PRETTY_FUNCTION__)); |
2745 | // If the Chunk was not specified in the clause - use default value 1. |
2746 | Chunk = CGF.Builder.getIntN(Values.IVSize, 1); |
2747 | } else { |
2748 | assert((Schedule == OMP_sch_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2752, __extension__ __PRETTY_FUNCTION__)) |
2749 | Schedule == OMP_sch_static_balanced_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2752, __extension__ __PRETTY_FUNCTION__)) |
2750 | Schedule == OMP_ord_static_chunked ||(static_cast <bool> ((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2752, __extension__ __PRETTY_FUNCTION__)) |
2751 | Schedule == OMP_dist_sch_static_chunked) &&(static_cast <bool> ((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2752, __extension__ __PRETTY_FUNCTION__)) |
2752 | "expected static chunked schedule")(static_cast <bool> ((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? void (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2752, __extension__ __PRETTY_FUNCTION__)); |
2753 | } |
2754 | llvm::Value *Args[] = { |
2755 | UpdateLocation, |
2756 | ThreadId, |
2757 | CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, |
2758 | M2)), // Schedule type |
2759 | Values.IL.getPointer(), // &isLastIter |
2760 | Values.LB.getPointer(), // &LB |
2761 | Values.UB.getPointer(), // &UB |
2762 | Values.ST.getPointer(), // &Stride |
2763 | CGF.Builder.getIntN(Values.IVSize, 1), // Incr |
2764 | Chunk // Chunk |
2765 | }; |
2766 | CGF.EmitRuntimeCall(ForStaticInitFunction, Args); |
2767 | } |
2768 | |
2769 | void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, |
2770 | SourceLocation Loc, |
2771 | OpenMPDirectiveKind DKind, |
2772 | const OpenMPScheduleTy &ScheduleKind, |
2773 | const StaticRTInput &Values) { |
2774 | OpenMPSchedType ScheduleNum = getRuntimeSchedule( |
2775 | ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); |
2776 | assert(isOpenMPWorksharingDirective(DKind) &&(static_cast <bool> (isOpenMPWorksharingDirective(DKind ) && "Expected loop-based or sections-based directive." ) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2777, __extension__ __PRETTY_FUNCTION__)) |
2777 | "Expected loop-based or sections-based directive.")(static_cast <bool> (isOpenMPWorksharingDirective(DKind ) && "Expected loop-based or sections-based directive." ) ? void (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2777, __extension__ __PRETTY_FUNCTION__)); |
2778 | llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, |
2779 | isOpenMPLoopDirective(DKind) |
2780 | ? OMP_IDENT_WORK_LOOP |
2781 | : OMP_IDENT_WORK_SECTIONS); |
2782 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2783 | llvm::FunctionCallee StaticInitFunction = |
2784 | createForStaticInitFunction(Values.IVSize, Values.IVSigned, false); |
2785 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2786 | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2787 | ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); |
2788 | } |
2789 | |
2790 | void CGOpenMPRuntime::emitDistributeStaticInit( |
2791 | CodeGenFunction &CGF, SourceLocation Loc, |
2792 | OpenMPDistScheduleClauseKind SchedKind, |
2793 | const CGOpenMPRuntime::StaticRTInput &Values) { |
2794 | OpenMPSchedType ScheduleNum = |
2795 | getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); |
2796 | llvm::Value *UpdatedLocation = |
2797 | emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); |
2798 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2799 | llvm::FunctionCallee StaticInitFunction; |
2800 | bool isGPUDistribute = |
2801 | CGM.getLangOpts().OpenMPIsDevice && |
2802 | (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()); |
2803 | StaticInitFunction = createForStaticInitFunction( |
2804 | Values.IVSize, Values.IVSigned, isGPUDistribute); |
2805 | |
2806 | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2807 | ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, |
2808 | OMPC_SCHEDULE_MODIFIER_unknown, Values); |
2809 | } |
2810 | |
2811 | void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, |
2812 | SourceLocation Loc, |
2813 | OpenMPDirectiveKind DKind) { |
2814 | if (!CGF.HaveInsertPoint()) |
2815 | return; |
2816 | // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); |
2817 | llvm::Value *Args[] = { |
2818 | emitUpdateLocation(CGF, Loc, |
2819 | isOpenMPDistributeDirective(DKind) |
2820 | ? OMP_IDENT_WORK_DISTRIBUTE |
2821 | : isOpenMPLoopDirective(DKind) |
2822 | ? OMP_IDENT_WORK_LOOP |
2823 | : OMP_IDENT_WORK_SECTIONS), |
2824 | getThreadID(CGF, Loc)}; |
2825 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2826 | if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice && |
2827 | (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX())) |
2828 | CGF.EmitRuntimeCall( |
2829 | OMPBuilder.getOrCreateRuntimeFunction( |
2830 | CGM.getModule(), OMPRTL___kmpc_distribute_static_fini), |
2831 | Args); |
2832 | else |
2833 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2834 | CGM.getModule(), OMPRTL___kmpc_for_static_fini), |
2835 | Args); |
2836 | } |
2837 | |
2838 | void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, |
2839 | SourceLocation Loc, |
2840 | unsigned IVSize, |
2841 | bool IVSigned) { |
2842 | if (!CGF.HaveInsertPoint()) |
2843 | return; |
2844 | // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); |
2845 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2846 | CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); |
2847 | } |
2848 | |
2849 | llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, |
2850 | SourceLocation Loc, unsigned IVSize, |
2851 | bool IVSigned, Address IL, |
2852 | Address LB, Address UB, |
2853 | Address ST) { |
2854 | // Call __kmpc_dispatch_next( |
2855 | // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, |
2856 | // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, |
2857 | // kmp_int[32|64] *p_stride); |
2858 | llvm::Value *Args[] = { |
2859 | emitUpdateLocation(CGF, Loc), |
2860 | getThreadID(CGF, Loc), |
2861 | IL.getPointer(), // &isLastIter |
2862 | LB.getPointer(), // &Lower |
2863 | UB.getPointer(), // &Upper |
2864 | ST.getPointer() // &Stride |
2865 | }; |
2866 | llvm::Value *Call = |
2867 | CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); |
2868 | return CGF.EmitScalarConversion( |
2869 | Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), |
2870 | CGF.getContext().BoolTy, Loc); |
2871 | } |
2872 | |
2873 | void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, |
2874 | llvm::Value *NumThreads, |
2875 | SourceLocation Loc) { |
2876 | if (!CGF.HaveInsertPoint()) |
2877 | return; |
2878 | // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) |
2879 | llvm::Value *Args[] = { |
2880 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2881 | CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; |
2882 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2883 | CGM.getModule(), OMPRTL___kmpc_push_num_threads), |
2884 | Args); |
2885 | } |
2886 | |
2887 | void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, |
2888 | ProcBindKind ProcBind, |
2889 | SourceLocation Loc) { |
2890 | if (!CGF.HaveInsertPoint()) |
2891 | return; |
2892 | assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")(static_cast <bool> (ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.") ? void (0) : __assert_fail ( "ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 2892, __extension__ __PRETTY_FUNCTION__)); |
2893 | // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) |
2894 | llvm::Value *Args[] = { |
2895 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2896 | llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; |
2897 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2898 | CGM.getModule(), OMPRTL___kmpc_push_proc_bind), |
2899 | Args); |
2900 | } |
2901 | |
2902 | void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, |
2903 | SourceLocation Loc, llvm::AtomicOrdering AO) { |
2904 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2905 | OMPBuilder.createFlush(CGF.Builder); |
2906 | } else { |
2907 | if (!CGF.HaveInsertPoint()) |
2908 | return; |
2909 | // Build call void __kmpc_flush(ident_t *loc) |
2910 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2911 | CGM.getModule(), OMPRTL___kmpc_flush), |
2912 | emitUpdateLocation(CGF, Loc)); |
2913 | } |
2914 | } |
2915 | |
2916 | namespace { |
2917 | /// Indexes of fields for type kmp_task_t. |
2918 | enum KmpTaskTFields { |
2919 | /// List of shared variables. |
2920 | KmpTaskTShareds, |
2921 | /// Task routine. |
2922 | KmpTaskTRoutine, |
2923 | /// Partition id for the untied tasks. |
2924 | KmpTaskTPartId, |
2925 | /// Function with call of destructors for private variables. |
2926 | Data1, |
2927 | /// Task priority. |
2928 | Data2, |
2929 | /// (Taskloops only) Lower bound. |
2930 | KmpTaskTLowerBound, |
2931 | /// (Taskloops only) Upper bound. |
2932 | KmpTaskTUpperBound, |
2933 | /// (Taskloops only) Stride. |
2934 | KmpTaskTStride, |
2935 | /// (Taskloops only) Is last iteration flag. |
2936 | KmpTaskTLastIter, |
2937 | /// (Taskloops only) Reduction data. |
2938 | KmpTaskTReductions, |
2939 | }; |
2940 | } // anonymous namespace |
2941 | |
2942 | void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { |
2943 | // If we are in simd mode or there are no entries, we don't need to do |
2944 | // anything. |
2945 | if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) |
2946 | return; |
2947 | |
2948 | llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = |
2949 | [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind, |
2950 | const llvm::TargetRegionEntryInfo &EntryInfo) -> void { |
2951 | SourceLocation Loc; |
2952 | if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) { |
2953 | for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), |
2954 | E = CGM.getContext().getSourceManager().fileinfo_end(); |
2955 | I != E; ++I) { |
2956 | if (I->getFirst()->getUniqueID().getDevice() == EntryInfo.DeviceID && |
2957 | I->getFirst()->getUniqueID().getFile() == EntryInfo.FileID) { |
2958 | Loc = CGM.getContext().getSourceManager().translateFileLineCol( |
2959 | I->getFirst(), EntryInfo.Line, 1); |
2960 | break; |
2961 | } |
2962 | } |
2963 | } |
2964 | switch (Kind) { |
2965 | case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: { |
2966 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
2967 | DiagnosticsEngine::Error, "Offloading entry for target region in " |
2968 | "%0 is incorrect: either the " |
2969 | "address or the ID is invalid."); |
2970 | CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; |
2971 | } break; |
2972 | case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: { |
2973 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
2974 | DiagnosticsEngine::Error, "Offloading entry for declare target " |
2975 | "variable %0 is incorrect: the " |
2976 | "address is invalid."); |
2977 | CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName; |
2978 | } break; |
2979 | case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: { |
2980 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
2981 | DiagnosticsEngine::Error, |
2982 | "Offloading entry for declare target variable is incorrect: the " |
2983 | "address is invalid."); |
2984 | CGM.getDiags().Report(DiagID); |
2985 | } break; |
2986 | } |
2987 | }; |
2988 | |
2989 | OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager, |
2990 | ErrorReportFn); |
2991 | } |
2992 | |
2993 | /// Loads all the offload entries information from the host IR |
2994 | /// metadata. |
2995 | void CGOpenMPRuntime::loadOffloadInfoMetadata() { |
2996 | // If we are in target mode, load the metadata from the host IR. This code has |
2997 | // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). |
2998 | |
2999 | if (!CGM.getLangOpts().OpenMPIsDevice) |
3000 | return; |
3001 | |
3002 | if (CGM.getLangOpts().OMPHostIRFile.empty()) |
3003 | return; |
3004 | |
3005 | auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); |
3006 | if (auto EC = Buf.getError()) { |
3007 | CGM.getDiags().Report(diag::err_cannot_open_file) |
3008 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3009 | return; |
3010 | } |
3011 | |
3012 | llvm::LLVMContext C; |
3013 | auto ME = expectedToErrorOrAndEmitErrors( |
3014 | C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); |
3015 | |
3016 | if (auto EC = ME.getError()) { |
3017 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3018 | DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); |
3019 | CGM.getDiags().Report(DiagID) |
3020 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3021 | return; |
3022 | } |
3023 | |
3024 | OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); |
3025 | } |
3026 | |
3027 | void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { |
3028 | if (!KmpRoutineEntryPtrTy) { |
3029 | // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. |
3030 | ASTContext &C = CGM.getContext(); |
3031 | QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; |
3032 | FunctionProtoType::ExtProtoInfo EPI; |
3033 | KmpRoutineEntryPtrQTy = C.getPointerType( |
3034 | C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); |
3035 | KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); |
3036 | } |
3037 | } |
3038 | |
3039 | namespace { |
3040 | struct PrivateHelpersTy { |
3041 | PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, |
3042 | const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) |
3043 | : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), |
3044 | PrivateElemInit(PrivateElemInit) {} |
3045 | PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} |
3046 | const Expr *OriginalRef = nullptr; |
3047 | const VarDecl *Original = nullptr; |
3048 | const VarDecl *PrivateCopy = nullptr; |
3049 | const VarDecl *PrivateElemInit = nullptr; |
3050 | bool isLocalPrivate() const { |
3051 | return !OriginalRef && !PrivateCopy && !PrivateElemInit; |
3052 | } |
3053 | }; |
3054 | typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; |
3055 | } // anonymous namespace |
3056 | |
3057 | static bool isAllocatableDecl(const VarDecl *VD) { |
3058 | const VarDecl *CVD = VD->getCanonicalDecl(); |
3059 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
3060 | return false; |
3061 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
3062 | // Use the default allocation. |
3063 | return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc && |
3064 | !AA->getAllocator()); |
3065 | } |
3066 | |
3067 | static RecordDecl * |
3068 | createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { |
3069 | if (!Privates.empty()) { |
3070 | ASTContext &C = CGM.getContext(); |
3071 | // Build struct .kmp_privates_t. { |
3072 | // /* private vars */ |
3073 | // }; |
3074 | RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); |
3075 | RD->startDefinition(); |
3076 | for (const auto &Pair : Privates) { |
3077 | const VarDecl *VD = Pair.second.Original; |
3078 | QualType Type = VD->getType().getNonReferenceType(); |
3079 | // If the private variable is a local variable with lvalue ref type, |
3080 | // allocate the pointer instead of the pointee type. |
3081 | if (Pair.second.isLocalPrivate()) { |
3082 | if (VD->getType()->isLValueReferenceType()) |
3083 | Type = C.getPointerType(Type); |
3084 | if (isAllocatableDecl(VD)) |
3085 | Type = C.getPointerType(Type); |
3086 | } |
3087 | FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); |
3088 | if (VD->hasAttrs()) { |
3089 | for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), |
3090 | E(VD->getAttrs().end()); |
3091 | I != E; ++I) |
3092 | FD->addAttr(*I); |
3093 | } |
3094 | } |
3095 | RD->completeDefinition(); |
3096 | return RD; |
3097 | } |
3098 | return nullptr; |
3099 | } |
3100 | |
3101 | static RecordDecl * |
3102 | createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, |
3103 | QualType KmpInt32Ty, |
3104 | QualType KmpRoutineEntryPointerQTy) { |
3105 | ASTContext &C = CGM.getContext(); |
3106 | // Build struct kmp_task_t { |
3107 | // void * shareds; |
3108 | // kmp_routine_entry_t routine; |
3109 | // kmp_int32 part_id; |
3110 | // kmp_cmplrdata_t data1; |
3111 | // kmp_cmplrdata_t data2; |
3112 | // For taskloops additional fields: |
3113 | // kmp_uint64 lb; |
3114 | // kmp_uint64 ub; |
3115 | // kmp_int64 st; |
3116 | // kmp_int32 liter; |
3117 | // void * reductions; |
3118 | // }; |
3119 | RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); |
3120 | UD->startDefinition(); |
3121 | addFieldToRecordDecl(C, UD, KmpInt32Ty); |
3122 | addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); |
3123 | UD->completeDefinition(); |
3124 | QualType KmpCmplrdataTy = C.getRecordType(UD); |
3125 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); |
3126 | RD->startDefinition(); |
3127 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3128 | addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); |
3129 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3130 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3131 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3132 | if (isOpenMPTaskLoopDirective(Kind)) { |
3133 | QualType KmpUInt64Ty = |
3134 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); |
3135 | QualType KmpInt64Ty = |
3136 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); |
3137 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3138 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3139 | addFieldToRecordDecl(C, RD, KmpInt64Ty); |
3140 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3141 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3142 | } |
3143 | RD->completeDefinition(); |
3144 | return RD; |
3145 | } |
3146 | |
3147 | static RecordDecl * |
3148 | createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, |
3149 | ArrayRef<PrivateDataTy> Privates) { |
3150 | ASTContext &C = CGM.getContext(); |
3151 | // Build struct kmp_task_t_with_privates { |
3152 | // kmp_task_t task_data; |
3153 | // .kmp_privates_t. privates; |
3154 | // }; |
3155 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); |
3156 | RD->startDefinition(); |
3157 | addFieldToRecordDecl(C, RD, KmpTaskTQTy); |
3158 | if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) |
3159 | addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); |
3160 | RD->completeDefinition(); |
3161 | return RD; |
3162 | } |
3163 | |
3164 | /// Emit a proxy function which accepts kmp_task_t as the second |
3165 | /// argument. |
3166 | /// \code |
3167 | /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { |
3168 | /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, |
3169 | /// For taskloops: |
3170 | /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3171 | /// tt->reductions, tt->shareds); |
3172 | /// return 0; |
3173 | /// } |
3174 | /// \endcode |
3175 | static llvm::Function * |
3176 | emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, |
3177 | OpenMPDirectiveKind Kind, QualType KmpInt32Ty, |
3178 | QualType KmpTaskTWithPrivatesPtrQTy, |
3179 | QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, |
3180 | QualType SharedsPtrTy, llvm::Function *TaskFunction, |
3181 | llvm::Value *TaskPrivatesMap) { |
3182 | ASTContext &C = CGM.getContext(); |
3183 | FunctionArgList Args; |
3184 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3185 | ImplicitParamDecl::Other); |
3186 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3187 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3188 | ImplicitParamDecl::Other); |
3189 | Args.push_back(&GtidArg); |
3190 | Args.push_back(&TaskTypeArg); |
3191 | const auto &TaskEntryFnInfo = |
3192 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3193 | llvm::FunctionType *TaskEntryTy = |
3194 | CGM.getTypes().GetFunctionType(TaskEntryFnInfo); |
3195 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); |
3196 | auto *TaskEntry = llvm::Function::Create( |
3197 | TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3198 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); |
3199 | TaskEntry->setDoesNotRecurse(); |
3200 | CodeGenFunction CGF(CGM); |
3201 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, |
3202 | Loc, Loc); |
3203 | |
3204 | // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, |
3205 | // tt, |
3206 | // For taskloops: |
3207 | // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3208 | // tt->task_data.shareds); |
3209 | llvm::Value *GtidParam = CGF.EmitLoadOfScalar( |
3210 | CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); |
3211 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3212 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3213 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3214 | const auto *KmpTaskTWithPrivatesQTyRD = |
3215 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3216 | LValue Base = |
3217 | CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3218 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
3219 | auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); |
3220 | LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); |
3221 | llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); |
3222 | |
3223 | auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); |
3224 | LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); |
3225 | llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3226 | CGF.EmitLoadOfScalar(SharedsLVal, Loc), |
3227 | CGF.ConvertTypeForMem(SharedsPtrTy)); |
3228 | |
3229 | auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); |
3230 | llvm::Value *PrivatesParam; |
3231 | if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { |
3232 | LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); |
3233 | PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3234 | PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); |
3235 | } else { |
3236 | PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
3237 | } |
3238 | |
3239 | llvm::Value *CommonArgs[] = { |
3240 | GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap, |
3241 | CGF.Builder |
3242 | .CreatePointerBitCastOrAddrSpaceCast(TDBase.getAddress(CGF), |
3243 | CGF.VoidPtrTy, CGF.Int8Ty) |
3244 | .getPointer()}; |
3245 | SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), |
3246 | std::end(CommonArgs)); |
3247 | if (isOpenMPTaskLoopDirective(Kind)) { |
3248 | auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); |
3249 | LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); |
3250 | llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); |
3251 | auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); |
3252 | LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); |
3253 | llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); |
3254 | auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); |
3255 | LValue StLVal = CGF.EmitLValueForField(Base, *StFI); |
3256 | llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); |
3257 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3258 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3259 | llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); |
3260 | auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); |
3261 | LValue RLVal = CGF.EmitLValueForField(Base, *RFI); |
3262 | llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); |
3263 | CallArgs.push_back(LBParam); |
3264 | CallArgs.push_back(UBParam); |
3265 | CallArgs.push_back(StParam); |
3266 | CallArgs.push_back(LIParam); |
3267 | CallArgs.push_back(RParam); |
3268 | } |
3269 | CallArgs.push_back(SharedsParam); |
3270 | |
3271 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, |
3272 | CallArgs); |
3273 | CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), |
3274 | CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); |
3275 | CGF.FinishFunction(); |
3276 | return TaskEntry; |
3277 | } |
3278 | |
3279 | static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, |
3280 | SourceLocation Loc, |
3281 | QualType KmpInt32Ty, |
3282 | QualType KmpTaskTWithPrivatesPtrQTy, |
3283 | QualType KmpTaskTWithPrivatesQTy) { |
3284 | ASTContext &C = CGM.getContext(); |
3285 | FunctionArgList Args; |
3286 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3287 | ImplicitParamDecl::Other); |
3288 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3289 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3290 | ImplicitParamDecl::Other); |
3291 | Args.push_back(&GtidArg); |
3292 | Args.push_back(&TaskTypeArg); |
3293 | const auto &DestructorFnInfo = |
3294 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3295 | llvm::FunctionType *DestructorFnTy = |
3296 | CGM.getTypes().GetFunctionType(DestructorFnInfo); |
3297 | std::string Name = |
3298 | CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); |
3299 | auto *DestructorFn = |
3300 | llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, |
3301 | Name, &CGM.getModule()); |
3302 | CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, |
3303 | DestructorFnInfo); |
3304 | DestructorFn->setDoesNotRecurse(); |
3305 | CodeGenFunction CGF(CGM); |
3306 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, |
3307 | Args, Loc, Loc); |
3308 | |
3309 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3310 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3311 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3312 | const auto *KmpTaskTWithPrivatesQTyRD = |
3313 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3314 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3315 | Base = CGF.EmitLValueForField(Base, *FI); |
3316 | for (const auto *Field : |
3317 | cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { |
3318 | if (QualType::DestructionKind DtorKind = |
3319 | Field->getType().isDestructedType()) { |
3320 | LValue FieldLValue = CGF.EmitLValueForField(Base, Field); |
3321 | CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); |
3322 | } |
3323 | } |
3324 | CGF.FinishFunction(); |
3325 | return DestructorFn; |
3326 | } |
3327 | |
3328 | /// Emit a privates mapping function for correct handling of private and |
3329 | /// firstprivate variables. |
3330 | /// \code |
3331 | /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> |
3332 | /// **noalias priv1,..., <tyn> **noalias privn) { |
3333 | /// *priv1 = &.privates.priv1; |
3334 | /// ...; |
3335 | /// *privn = &.privates.privn; |
3336 | /// } |
3337 | /// \endcode |
3338 | static llvm::Value * |
3339 | emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, |
3340 | const OMPTaskDataTy &Data, QualType PrivatesQTy, |
3341 | ArrayRef<PrivateDataTy> Privates) { |
3342 | ASTContext &C = CGM.getContext(); |
3343 | FunctionArgList Args; |
3344 | ImplicitParamDecl TaskPrivatesArg( |
3345 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3346 | C.getPointerType(PrivatesQTy).withConst().withRestrict(), |
3347 | ImplicitParamDecl::Other); |
3348 | Args.push_back(&TaskPrivatesArg); |
3349 | llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; |
3350 | unsigned Counter = 1; |
3351 | for (const Expr *E : Data.PrivateVars) { |
3352 | Args.push_back(ImplicitParamDecl::Create( |
3353 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3354 | C.getPointerType(C.getPointerType(E->getType())) |
3355 | .withConst() |
3356 | .withRestrict(), |
3357 | ImplicitParamDecl::Other)); |
3358 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3359 | PrivateVarsPos[VD] = Counter; |
3360 | ++Counter; |
3361 | } |
3362 | for (const Expr *E : Data.FirstprivateVars) { |
3363 | Args.push_back(ImplicitParamDecl::Create( |
3364 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3365 | C.getPointerType(C.getPointerType(E->getType())) |
3366 | .withConst() |
3367 | .withRestrict(), |
3368 | ImplicitParamDecl::Other)); |
3369 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3370 | PrivateVarsPos[VD] = Counter; |
3371 | ++Counter; |
3372 | } |
3373 | for (const Expr *E : Data.LastprivateVars) { |
3374 | Args.push_back(ImplicitParamDecl::Create( |
3375 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3376 | C.getPointerType(C.getPointerType(E->getType())) |
3377 | .withConst() |
3378 | .withRestrict(), |
3379 | ImplicitParamDecl::Other)); |
3380 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3381 | PrivateVarsPos[VD] = Counter; |
3382 | ++Counter; |
3383 | } |
3384 | for (const VarDecl *VD : Data.PrivateLocals) { |
3385 | QualType Ty = VD->getType().getNonReferenceType(); |
3386 | if (VD->getType()->isLValueReferenceType()) |
3387 | Ty = C.getPointerType(Ty); |
3388 | if (isAllocatableDecl(VD)) |
3389 | Ty = C.getPointerType(Ty); |
3390 | Args.push_back(ImplicitParamDecl::Create( |
3391 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3392 | C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), |
3393 | ImplicitParamDecl::Other)); |
3394 | PrivateVarsPos[VD] = Counter; |
3395 | ++Counter; |
3396 | } |
3397 | const auto &TaskPrivatesMapFnInfo = |
3398 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3399 | llvm::FunctionType *TaskPrivatesMapTy = |
3400 | CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); |
3401 | std::string Name = |
3402 | CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); |
3403 | auto *TaskPrivatesMap = llvm::Function::Create( |
3404 | TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, |
3405 | &CGM.getModule()); |
3406 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, |
3407 | TaskPrivatesMapFnInfo); |
3408 | if (CGM.getLangOpts().Optimize) { |
3409 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); |
3410 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); |
3411 | TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); |
3412 | } |
3413 | CodeGenFunction CGF(CGM); |
3414 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, |
3415 | TaskPrivatesMapFnInfo, Args, Loc, Loc); |
3416 | |
3417 | // *privi = &.privates.privi; |
3418 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3419 | CGF.GetAddrOfLocalVar(&TaskPrivatesArg), |
3420 | TaskPrivatesArg.getType()->castAs<PointerType>()); |
3421 | const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); |
3422 | Counter = 0; |
3423 | for (const FieldDecl *Field : PrivatesQTyRD->fields()) { |
3424 | LValue FieldLVal = CGF.EmitLValueForField(Base, Field); |
3425 | const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; |
3426 | LValue RefLVal = |
3427 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); |
3428 | LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( |
3429 | RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); |
3430 | CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); |
3431 | ++Counter; |
3432 | } |
3433 | CGF.FinishFunction(); |
3434 | return TaskPrivatesMap; |
3435 | } |
3436 | |
3437 | /// Emit initialization for private variables in task-based directives. |
3438 | static void emitPrivatesInit(CodeGenFunction &CGF, |
3439 | const OMPExecutableDirective &D, |
3440 | Address KmpTaskSharedsPtr, LValue TDBase, |
3441 | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3442 | QualType SharedsTy, QualType SharedsPtrTy, |
3443 | const OMPTaskDataTy &Data, |
3444 | ArrayRef<PrivateDataTy> Privates, bool ForDup) { |
3445 | ASTContext &C = CGF.getContext(); |
3446 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3447 | LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); |
3448 | OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) |
3449 | ? OMPD_taskloop |
3450 | : OMPD_task; |
3451 | const CapturedStmt &CS = *D.getCapturedStmt(Kind); |
3452 | CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); |
3453 | LValue SrcBase; |
3454 | bool IsTargetTask = |
3455 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || |
3456 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
3457 | // For target-based directives skip 4 firstprivate arrays BasePointersArray, |
3458 | // PointersArray, SizesArray, and MappersArray. The original variables for |
3459 | // these arrays are not captured and we get their addresses explicitly. |
3460 | if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || |
3461 | (IsTargetTask && KmpTaskSharedsPtr.isValid())) { |
3462 | SrcBase = CGF.MakeAddrLValue( |
3463 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3464 | KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy), |
3465 | CGF.ConvertTypeForMem(SharedsTy)), |
3466 | SharedsTy); |
3467 | } |
3468 | FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); |
3469 | for (const PrivateDataTy &Pair : Privates) { |
3470 | // Do not initialize private locals. |
3471 | if (Pair.second.isLocalPrivate()) { |
3472 | ++FI; |
3473 | continue; |
3474 | } |
3475 | const VarDecl *VD = Pair.second.PrivateCopy; |
3476 | const Expr *Init = VD->getAnyInitializer(); |
3477 | if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && |
3478 | !CGF.isTrivialInitializer(Init)))) { |
3479 | LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); |
3480 | if (const VarDecl *Elem = Pair.second.PrivateElemInit) { |
3481 | const VarDecl *OriginalVD = Pair.second.Original; |
3482 | // Check if the variable is the target-based BasePointersArray, |
3483 | // PointersArray, SizesArray, or MappersArray. |
3484 | LValue SharedRefLValue; |
3485 | QualType Type = PrivateLValue.getType(); |
3486 | const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); |
3487 | if (IsTargetTask && !SharedField) { |
3488 | assert(isa<ImplicitParamDecl>(OriginalVD) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3489 | isa<CapturedDecl>(OriginalVD->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3490 | cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3491 | ->getNumParams() == 0 &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3492 | isa<TranslationUnitDecl>((static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3493 | cast<CapturedDecl>(OriginalVD->getDeclContext())(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3494 | ->getDeclContext()) &&(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)) |
3495 | "Expected artificial target data variable.")(static_cast <bool> (isa<ImplicitParamDecl>(OriginalVD ) && isa<CapturedDecl>(OriginalVD->getDeclContext ()) && cast<CapturedDecl>(OriginalVD->getDeclContext ()) ->getNumParams() == 0 && isa<TranslationUnitDecl >( cast<CapturedDecl>(OriginalVD->getDeclContext( )) ->getDeclContext()) && "Expected artificial target data variable." ) ? void (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3495, __extension__ __PRETTY_FUNCTION__)); |
3496 | SharedRefLValue = |
3497 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); |
3498 | } else if (ForDup) { |
3499 | SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); |
3500 | SharedRefLValue = CGF.MakeAddrLValue( |
3501 | SharedRefLValue.getAddress(CGF).withAlignment( |
3502 | C.getDeclAlign(OriginalVD)), |
3503 | SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), |
3504 | SharedRefLValue.getTBAAInfo()); |
3505 | } else if (CGF.LambdaCaptureFields.count( |
3506 | Pair.second.Original->getCanonicalDecl()) > 0 || |
3507 | isa_and_nonnull<BlockDecl>(CGF.CurCodeDecl)) { |
3508 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3509 | } else { |
3510 | // Processing for implicitly captured variables. |
3511 | InlinedOpenMPRegionRAII Region( |
3512 | CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, |
3513 | /*HasCancel=*/false, /*NoInheritance=*/true); |
3514 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3515 | } |
3516 | if (Type->isArrayType()) { |
3517 | // Initialize firstprivate array. |
3518 | if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { |
3519 | // Perform simple memcpy. |
3520 | CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); |
3521 | } else { |
3522 | // Initialize firstprivate array using element-by-element |
3523 | // initialization. |
3524 | CGF.EmitOMPAggregateAssign( |
3525 | PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), |
3526 | Type, |
3527 | [&CGF, Elem, Init, &CapturesInfo](Address DestElement, |
3528 | Address SrcElement) { |
3529 | // Clean up any temporaries needed by the initialization. |
3530 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3531 | InitScope.addPrivate(Elem, SrcElement); |
3532 | (void)InitScope.Privatize(); |
3533 | // Emit initialization for single element. |
3534 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( |
3535 | CGF, &CapturesInfo); |
3536 | CGF.EmitAnyExprToMem(Init, DestElement, |
3537 | Init->getType().getQualifiers(), |
3538 | /*IsInitializer=*/false); |
3539 | }); |
3540 | } |
3541 | } else { |
3542 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3543 | InitScope.addPrivate(Elem, SharedRefLValue.getAddress(CGF)); |
3544 | (void)InitScope.Privatize(); |
3545 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); |
3546 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, |
3547 | /*capturedByInit=*/false); |
3548 | } |
3549 | } else { |
3550 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); |
3551 | } |
3552 | } |
3553 | ++FI; |
3554 | } |
3555 | } |
3556 | |
3557 | /// Check if duplication function is required for taskloops. |
3558 | static bool checkInitIsRequired(CodeGenFunction &CGF, |
3559 | ArrayRef<PrivateDataTy> Privates) { |
3560 | bool InitRequired = false; |
3561 | for (const PrivateDataTy &Pair : Privates) { |
3562 | if (Pair.second.isLocalPrivate()) |
3563 | continue; |
3564 | const VarDecl *VD = Pair.second.PrivateCopy; |
3565 | const Expr *Init = VD->getAnyInitializer(); |
3566 | InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Init) && |
3567 | !CGF.isTrivialInitializer(Init)); |
3568 | if (InitRequired) |
3569 | break; |
3570 | } |
3571 | return InitRequired; |
3572 | } |
3573 | |
3574 | |
3575 | /// Emit task_dup function (for initialization of |
3576 | /// private/firstprivate/lastprivate vars and last_iter flag) |
3577 | /// \code |
3578 | /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int |
3579 | /// lastpriv) { |
3580 | /// // setup lastprivate flag |
3581 | /// task_dst->last = lastpriv; |
3582 | /// // could be constructor calls here... |
3583 | /// } |
3584 | /// \endcode |
3585 | static llvm::Value * |
3586 | emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, |
3587 | const OMPExecutableDirective &D, |
3588 | QualType KmpTaskTWithPrivatesPtrQTy, |
3589 | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3590 | const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, |
3591 | QualType SharedsPtrTy, const OMPTaskDataTy &Data, |
3592 | ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { |
3593 | ASTContext &C = CGM.getContext(); |
3594 | FunctionArgList Args; |
3595 | ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3596 | KmpTaskTWithPrivatesPtrQTy, |
3597 | ImplicitParamDecl::Other); |
3598 | ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3599 | KmpTaskTWithPrivatesPtrQTy, |
3600 | ImplicitParamDecl::Other); |
3601 | ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, |
3602 | ImplicitParamDecl::Other); |
3603 | Args.push_back(&DstArg); |
3604 | Args.push_back(&SrcArg); |
3605 | Args.push_back(&LastprivArg); |
3606 | const auto &TaskDupFnInfo = |
3607 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3608 | llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); |
3609 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); |
3610 | auto *TaskDup = llvm::Function::Create( |
3611 | TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3612 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); |
3613 | TaskDup->setDoesNotRecurse(); |
3614 | CodeGenFunction CGF(CGM); |
3615 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, |
3616 | Loc); |
3617 | |
3618 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3619 | CGF.GetAddrOfLocalVar(&DstArg), |
3620 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3621 | // task_dst->liter = lastpriv; |
3622 | if (WithLastIter) { |
3623 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3624 | LValue Base = CGF.EmitLValueForField( |
3625 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3626 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3627 | llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( |
3628 | CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); |
3629 | CGF.EmitStoreOfScalar(Lastpriv, LILVal); |
3630 | } |
3631 | |
3632 | // Emit initial values for private copies (if any). |
3633 | assert(!Privates.empty())(static_cast <bool> (!Privates.empty()) ? void (0) : __assert_fail ("!Privates.empty()", "clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3633, __extension__ __PRETTY_FUNCTION__)); |
3634 | Address KmpTaskSharedsPtr = Address::invalid(); |
3635 | if (!Data.FirstprivateVars.empty()) { |
3636 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3637 | CGF.GetAddrOfLocalVar(&SrcArg), |
3638 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3639 | LValue Base = CGF.EmitLValueForField( |
3640 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3641 | KmpTaskSharedsPtr = Address( |
3642 | CGF.EmitLoadOfScalar(CGF.EmitLValueForField( |
3643 | Base, *std::next(KmpTaskTQTyRD->field_begin(), |
3644 | KmpTaskTShareds)), |
3645 | Loc), |
3646 | CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); |
3647 | } |
3648 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, |
3649 | SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); |
3650 | CGF.FinishFunction(); |
3651 | return TaskDup; |
3652 | } |
3653 | |
3654 | /// Checks if destructor function is required to be generated. |
3655 | /// \return true if cleanups are required, false otherwise. |
3656 | static bool |
3657 | checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3658 | ArrayRef<PrivateDataTy> Privates) { |
3659 | for (const PrivateDataTy &P : Privates) { |
3660 | if (P.second.isLocalPrivate()) |
3661 | continue; |
3662 | QualType Ty = P.second.Original->getType().getNonReferenceType(); |
3663 | if (Ty.isDestructedType()) |
3664 | return true; |
3665 | } |
3666 | return false; |
3667 | } |
3668 | |
3669 | namespace { |
3670 | /// Loop generator for OpenMP iterator expression. |
3671 | class OMPIteratorGeneratorScope final |
3672 | : public CodeGenFunction::OMPPrivateScope { |
3673 | CodeGenFunction &CGF; |
3674 | const OMPIteratorExpr *E = nullptr; |
3675 | SmallVector<CodeGenFunction::JumpDest, 4> ContDests; |
3676 | SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; |
3677 | OMPIteratorGeneratorScope() = delete; |
3678 | OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; |
3679 | |
3680 | public: |
3681 | OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) |
3682 | : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { |
3683 | if (!E) |
3684 | return; |
3685 | SmallVector<llvm::Value *, 4> Uppers; |
3686 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { |
3687 | Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); |
3688 | const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); |
3689 | addPrivate(VD, CGF.CreateMemTemp(VD->getType(), VD->getName())); |
3690 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
3691 | addPrivate( |
3692 | HelperData.CounterVD, |
3693 | CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr")); |
3694 | } |
3695 | Privatize(); |
3696 | |
3697 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { |
3698 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
3699 | LValue CLVal = |
3700 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), |
3701 | HelperData.CounterVD->getType()); |
3702 | // Counter = 0; |
3703 | CGF.EmitStoreOfScalar( |
3704 | llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), |
3705 | CLVal); |
3706 | CodeGenFunction::JumpDest &ContDest = |
3707 | ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); |
3708 | CodeGenFunction::JumpDest &ExitDest = |
3709 | ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); |
3710 | // N = <number-of_iterations>; |
3711 | llvm::Value *N = Uppers[I]; |
3712 | // cont: |
3713 | // if (Counter < N) goto body; else goto exit; |
3714 | CGF.EmitBlock(ContDest.getBlock()); |
3715 | auto *CVal = |
3716 | CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); |
3717 | llvm::Value *Cmp = |
3718 | HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() |
3719 | ? CGF.Builder.CreateICmpSLT(CVal, N) |
3720 | : CGF.Builder.CreateICmpULT(CVal, N); |
3721 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); |
3722 | CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); |
3723 | // body: |
3724 | CGF.EmitBlock(BodyBB); |
3725 | // Iteri = Begini + Counter * Stepi; |
3726 | CGF.EmitIgnoredExpr(HelperData.Update); |
3727 | } |
3728 | } |
3729 | ~OMPIteratorGeneratorScope() { |
3730 | if (!E) |
3731 | return; |
3732 | for (unsigned I = E->numOfIterators(); I > 0; --I) { |
3733 | // Counter = Counter + 1; |
3734 | const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); |
3735 | CGF.EmitIgnoredExpr(HelperData.CounterUpdate); |
3736 | // goto cont; |
3737 | CGF.EmitBranchThroughCleanup(ContDests[I - 1]); |
3738 | // exit: |
3739 | CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); |
3740 | } |
3741 | } |
3742 | }; |
3743 | } // namespace |
3744 | |
3745 | static std::pair<llvm::Value *, llvm::Value *> |
3746 | getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { |
3747 | const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); |
3748 | llvm::Value *Addr; |
3749 | if (OASE) { |
3750 | const Expr *Base = OASE->getBase(); |
3751 | Addr = CGF.EmitScalarExpr(Base); |
3752 | } else { |
3753 | Addr = CGF.EmitLValue(E).getPointer(CGF); |
3754 | } |
3755 | llvm::Value *SizeVal; |
3756 | QualType Ty = E->getType(); |
3757 | if (OASE) { |
3758 | SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); |
3759 | for (const Expr *SE : OASE->getDimensions()) { |
3760 | llvm::Value *Sz = CGF.EmitScalarExpr(SE); |
3761 | Sz = CGF.EmitScalarConversion( |
3762 | Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); |
3763 | SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); |
3764 | } |
3765 | } else if (const auto *ASE = |
3766 | dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { |
3767 | LValue UpAddrLVal = |
3768 | CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); |
3769 | Address UpAddrAddress = UpAddrLVal.getAddress(CGF); |
3770 | llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32( |
3771 | UpAddrAddress.getElementType(), UpAddrAddress.getPointer(), /*Idx0=*/1); |
3772 | llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); |
3773 | llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); |
3774 | SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); |
3775 | } else { |
3776 | SizeVal = CGF.getTypeSize(Ty); |
3777 | } |
3778 | return std::make_pair(Addr, SizeVal); |
3779 | } |
3780 | |
3781 | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
3782 | static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { |
3783 | QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); |
3784 | if (KmpTaskAffinityInfoTy.isNull()) { |
3785 | RecordDecl *KmpAffinityInfoRD = |
3786 | C.buildImplicitRecord("kmp_task_affinity_info_t"); |
3787 | KmpAffinityInfoRD->startDefinition(); |
3788 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); |
3789 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); |
3790 | addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); |
3791 | KmpAffinityInfoRD->completeDefinition(); |
3792 | KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); |
3793 | } |
3794 | } |
3795 | |
3796 | CGOpenMPRuntime::TaskResultTy |
3797 | CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, |
3798 | const OMPExecutableDirective &D, |
3799 | llvm::Function *TaskFunction, QualType SharedsTy, |
3800 | Address Shareds, const OMPTaskDataTy &Data) { |
3801 | ASTContext &C = CGM.getContext(); |
3802 | llvm::SmallVector<PrivateDataTy, 4> Privates; |
3803 | // Aggregate privates and sort them by the alignment. |
3804 | const auto *I = Data.PrivateCopies.begin(); |
3805 | for (const Expr *E : Data.PrivateVars) { |
3806 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3807 | Privates.emplace_back( |
3808 | C.getDeclAlign(VD), |
3809 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
3810 | /*PrivateElemInit=*/nullptr)); |
3811 | ++I; |
3812 | } |
3813 | I = Data.FirstprivateCopies.begin(); |
3814 | const auto *IElemInitRef = Data.FirstprivateInits.begin(); |
3815 | for (const Expr *E : Data.FirstprivateVars) { |
3816 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3817 | Privates.emplace_back( |
3818 | C.getDeclAlign(VD), |
3819 | PrivateHelpersTy( |
3820 | E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
3821 | cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); |
3822 | ++I; |
3823 | ++IElemInitRef; |
3824 | } |
3825 | I = Data.LastprivateCopies.begin(); |
3826 | for (const Expr *E : Data.LastprivateVars) { |
3827 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3828 | Privates.emplace_back( |
3829 | C.getDeclAlign(VD), |
3830 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
3831 | /*PrivateElemInit=*/nullptr)); |
3832 | ++I; |
3833 | } |
3834 | for (const VarDecl *VD : Data.PrivateLocals) { |
3835 | if (isAllocatableDecl(VD)) |
3836 | Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); |
3837 | else |
3838 | Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); |
3839 | } |
3840 | llvm::stable_sort(Privates, |
3841 | [](const PrivateDataTy &L, const PrivateDataTy &R) { |
3842 | return L.first > R.first; |
3843 | }); |
3844 | QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
3845 | // Build type kmp_routine_entry_t (if not built yet). |
3846 | emitKmpRoutineEntryT(KmpInt32Ty); |
3847 | // Build type kmp_task_t (if not built yet). |
3848 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { |
3849 | if (SavedKmpTaskloopTQTy.isNull()) { |
3850 | SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
3851 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
3852 | } |
3853 | KmpTaskTQTy = SavedKmpTaskloopTQTy; |
3854 | } else { |
3855 | assert((D.getDirectiveKind() == OMPD_task ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && "Expected taskloop, task or target directive") ? void (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3858, __extension__ __PRETTY_FUNCTION__)) |
3856 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(static_cast <bool> ((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && "Expected taskloop, task or target directive") ? void (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3858, __extension__ __PRETTY_FUNCTION__)) |
3857 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(static_cast <bool> ((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && "Expected taskloop, task or target directive") ? void (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3858, __extension__ __PRETTY_FUNCTION__)) |
3858 | "Expected taskloop, task or target directive")(static_cast <bool> ((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && "Expected taskloop, task or target directive") ? void (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 3858, __extension__ __PRETTY_FUNCTION__)); |
3859 | if (SavedKmpTaskTQTy.isNull()) { |
3860 | SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
3861 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
3862 | } |
3863 | KmpTaskTQTy = SavedKmpTaskTQTy; |
3864 | } |
3865 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
3866 | // Build particular struct kmp_task_t for the given task. |
3867 | const RecordDecl *KmpTaskTWithPrivatesQTyRD = |
3868 | createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); |
3869 | QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); |
3870 | QualType KmpTaskTWithPrivatesPtrQTy = |
3871 | C.getPointerType(KmpTaskTWithPrivatesQTy); |
3872 | llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); |
3873 | llvm::Type *KmpTaskTWithPrivatesPtrTy = |
3874 | KmpTaskTWithPrivatesTy->getPointerTo(); |
3875 | llvm::Value *KmpTaskTWithPrivatesTySize = |
3876 | CGF.getTypeSize(KmpTaskTWithPrivatesQTy); |
3877 | QualType SharedsPtrTy = C.getPointerType(SharedsTy); |
3878 | |
3879 | // Emit initial values for private copies (if any). |
3880 | llvm::Value *TaskPrivatesMap = nullptr; |
3881 | llvm::Type *TaskPrivatesMapTy = |
3882 | std::next(TaskFunction->arg_begin(), 3)->getType(); |
3883 | if (!Privates.empty()) { |
3884 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3885 | TaskPrivatesMap = |
3886 | emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); |
3887 | TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3888 | TaskPrivatesMap, TaskPrivatesMapTy); |
3889 | } else { |
3890 | TaskPrivatesMap = llvm::ConstantPointerNull::get( |
3891 | cast<llvm::PointerType>(TaskPrivatesMapTy)); |
3892 | } |
3893 | // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, |
3894 | // kmp_task_t *tt); |
3895 | llvm::Function *TaskEntry = emitProxyTaskFunction( |
3896 | CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
3897 | KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, |
3898 | TaskPrivatesMap); |
3899 | |
3900 | // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, |
3901 | // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
3902 | // kmp_routine_entry_t *task_entry); |
3903 | // Task flags. Format is taken from |
3904 | // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, |
3905 | // description of kmp_tasking_flags struct. |
3906 | enum { |
3907 | TiedFlag = 0x1, |
3908 | FinalFlag = 0x2, |
3909 | DestructorsFlag = 0x8, |
3910 | PriorityFlag = 0x20, |
3911 | DetachableFlag = 0x40, |
3912 | }; |
3913 | unsigned Flags = Data.Tied ? TiedFlag : 0; |
3914 | bool NeedsCleanup = false; |
3915 | if (!Privates.empty()) { |
3916 | NeedsCleanup = |
3917 | checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); |
3918 | if (NeedsCleanup) |
3919 | Flags = Flags | DestructorsFlag; |
3920 | } |
3921 | if (Data.Priority.getInt()) |
3922 | Flags = Flags | PriorityFlag; |
3923 | if (D.hasClausesOfKind<OMPDetachClause>()) |
3924 | Flags = Flags | DetachableFlag; |
3925 | llvm::Value *TaskFlags = |
3926 | Data.Final.getPointer() |
3927 | ? CGF.Builder.CreateSelect(Data.Final.getPointer(), |
3928 | CGF.Builder.getInt32(FinalFlag), |
3929 | CGF.Builder.getInt32(/*C=*/0)) |
3930 | : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); |
3931 | TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); |
3932 | llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); |
3933 | SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), |
3934 | getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, |
3935 | SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3936 | TaskEntry, KmpRoutineEntryPtrTy)}; |
3937 | llvm::Value *NewTask; |
3938 | if (D.hasClausesOfKind<OMPNowaitClause>()) { |
3939 | // Check if we have any device clause associated with the directive. |
3940 | const Expr *Device = nullptr; |
3941 | if (auto *C = D.getSingleClause<OMPDeviceClause>()) |
3942 | Device = C->getDevice(); |
3943 | // Emit device ID if any otherwise use default value. |
3944 | llvm::Value *DeviceID; |
3945 | if (Device) |
3946 | DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), |
3947 | CGF.Int64Ty, /*isSigned=*/true); |
3948 | else |
3949 | DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); |
3950 | AllocArgs.push_back(DeviceID); |
3951 | NewTask = CGF.EmitRuntimeCall( |
3952 | OMPBuilder.getOrCreateRuntimeFunction( |
3953 | CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), |
3954 | AllocArgs); |
3955 | } else { |
3956 | NewTask = |
3957 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
3958 | CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), |
3959 | AllocArgs); |
3960 | } |
3961 | // Emit detach clause initialization. |
3962 | // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, |
3963 | // task_descriptor); |
3964 | if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { |
3965 | const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); |
3966 | LValue EvtLVal = CGF.EmitLValue(Evt); |
3967 | |
3968 | // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, |
3969 | // int gtid, kmp_task_t *task); |
3970 | llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); |
3971 | llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); |
3972 | Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); |
3973 | llvm::Value *EvtVal = CGF.EmitRuntimeCall( |
3974 | OMPBuilder.getOrCreateRuntimeFunction( |
3975 | CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), |
3976 | {Loc, Tid, NewTask}); |
3977 | EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), |
3978 | Evt->getExprLoc()); |
3979 | CGF.EmitStoreOfScalar(EvtVal, EvtLVal); |
3980 | } |
3981 | // Process affinity clauses. |
3982 | if (D.hasClausesOfKind<OMPAffinityClause>()) { |
3983 | // Process list of affinity data. |
3984 | ASTContext &C = CGM.getContext(); |
3985 | Address AffinitiesArray = Address::invalid(); |
3986 | // Calculate number of elements to form the array of affinity data. |
3987 | llvm::Value *NumOfElements = nullptr; |
3988 | unsigned NumAffinities = 0; |
3989 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
3990 | if (const Expr *Modifier = C->getModifier()) { |
3991 | const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); |
3992 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { |
3993 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
3994 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
3995 | NumOfElements = |
3996 | NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; |
3997 | } |
3998 | } else { |
3999 | NumAffinities += C->varlist_size(); |
4000 | } |
4001 | } |
4002 | getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); |
4003 | // Fields ids in kmp_task_affinity_info record. |
4004 | enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; |
4005 | |
4006 | QualType KmpTaskAffinityInfoArrayTy; |
4007 | if (NumOfElements) { |
4008 | NumOfElements = CGF.Builder.CreateNUWAdd( |
4009 | llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); |
4010 | auto *OVE = new (C) OpaqueValueExpr( |
4011 | Loc, |
4012 | C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), |
4013 | VK_PRValue); |
4014 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, |
4015 | RValue::get(NumOfElements)); |
4016 | KmpTaskAffinityInfoArrayTy = |
4017 | C.getVariableArrayType(KmpTaskAffinityInfoTy, OVE, ArrayType::Normal, |
4018 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4019 | // Properly emit variable-sized array. |
4020 | auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, |
4021 | ImplicitParamDecl::Other); |
4022 | CGF.EmitVarDecl(*PD); |
4023 | AffinitiesArray = CGF.GetAddrOfLocalVar(PD); |
4024 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4025 | /*isSigned=*/false); |
4026 | } else { |
4027 | KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( |
4028 | KmpTaskAffinityInfoTy, |
4029 | llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, |
4030 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4031 | AffinitiesArray = |
4032 | CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); |
4033 | AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); |
4034 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, |
4035 | /*isSigned=*/false); |
4036 | } |
4037 | |
4038 | const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); |
4039 | // Fill array by elements without iterators. |
4040 | unsigned Pos = 0; |
4041 | bool HasIterator = false; |
4042 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4043 | if (C->getModifier()) { |
4044 | HasIterator = true; |
4045 | continue; |
4046 | } |
4047 | for (const Expr *E : C->varlists()) { |
4048 | llvm::Value *Addr; |
4049 | llvm::Value *Size; |
4050 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4051 | LValue Base = |
4052 | CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), |
4053 | KmpTaskAffinityInfoTy); |
4054 | // affs[i].base_addr = &<Affinities[i].second>; |
4055 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4056 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4057 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4058 | BaseAddrLVal); |
4059 | // affs[i].len = sizeof(<Affinities[i].second>); |
4060 | LValue LenLVal = CGF.EmitLValueForField( |
4061 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4062 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4063 | ++Pos; |
4064 | } |
4065 | } |
4066 | LValue PosLVal; |
4067 | if (HasIterator) { |
4068 | PosLVal = CGF.MakeAddrLValue( |
4069 | CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), |
4070 | C.getSizeType()); |
4071 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4072 | } |
4073 | // Process elements with iterators. |
4074 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4075 | const Expr *Modifier = C->getModifier(); |
4076 | if (!Modifier) |
4077 | continue; |
4078 | OMPIteratorGeneratorScope IteratorScope( |
4079 | CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); |
4080 | for (const Expr *E : C->varlists()) { |
4081 | llvm::Value *Addr; |
4082 | llvm::Value *Size; |
4083 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4084 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4085 | LValue Base = CGF.MakeAddrLValue( |
4086 | CGF.Builder.CreateGEP(AffinitiesArray, Idx), KmpTaskAffinityInfoTy); |
4087 | // affs[i].base_addr = &<Affinities[i].second>; |
4088 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4089 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4090 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4091 | BaseAddrLVal); |
4092 | // affs[i].len = sizeof(<Affinities[i].second>); |
4093 | LValue LenLVal = CGF.EmitLValueForField( |
4094 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4095 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4096 | Idx = CGF.Builder.CreateNUWAdd( |
4097 | Idx, llvm::ConstantInt::get(Idx->getType(), 1)); |
4098 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4099 | } |
4100 | } |
4101 | // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, |
4102 | // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 |
4103 | // naffins, kmp_task_affinity_info_t *affin_list); |
4104 | llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); |
4105 | llvm::Value *GTid = getThreadID(CGF, Loc); |
4106 | llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4107 | AffinitiesArray.getPointer(), CGM.VoidPtrTy); |
4108 | // FIXME: Emit the function and ignore its result for now unless the |
4109 | // runtime function is properly implemented. |
4110 | (void)CGF.EmitRuntimeCall( |
4111 | OMPBuilder.getOrCreateRuntimeFunction( |
4112 | CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), |
4113 | {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); |
4114 | } |
4115 | llvm::Value *NewTaskNewTaskTTy = |
4116 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4117 | NewTask, KmpTaskTWithPrivatesPtrTy); |
4118 | LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, |
4119 | KmpTaskTWithPrivatesQTy); |
4120 | LValue TDBase = |
4121 | CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
4122 | // Fill the data in the resulting kmp_task_t record. |
4123 | // Copy shareds if there are any. |
4124 | Address KmpTaskSharedsPtr = Address::invalid(); |
4125 | if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { |
4126 | KmpTaskSharedsPtr = Address( |
4127 | CGF.EmitLoadOfScalar( |
4128 | CGF.EmitLValueForField( |
4129 | TDBase, |
4130 | *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)), |
4131 | Loc), |
4132 | CGF.Int8Ty, CGM.getNaturalTypeAlignment(SharedsTy)); |
4133 | LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); |
4134 | LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); |
4135 | CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); |
4136 | } |
4137 | // Emit initial values for private copies (if any). |
4138 | TaskResultTy Result; |
4139 | if (!Privates.empty()) { |
4140 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, |
4141 | SharedsTy, SharedsPtrTy, Data, Privates, |
4142 | /*ForDup=*/false); |
4143 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
4144 | (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { |
4145 | Result.TaskDupFn = emitTaskDupFunction( |
4146 | CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, |
4147 | KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, |
4148 | /*WithLastIter=*/!Data.LastprivateVars.empty()); |
4149 | } |
4150 | } |
4151 | // Fields of union "kmp_cmplrdata_t" for destructors and priority. |
4152 | enum { Priority = 0, Destructors = 1 }; |
4153 | // Provide pointer to function with destructors for privates. |
4154 | auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); |
4155 | const RecordDecl *KmpCmplrdataUD = |
4156 | (*FI)->getType()->getAsUnionType()->getDecl(); |
4157 | if (NeedsCleanup) { |
4158 | llvm::Value *DestructorFn = emitDestructorsFunction( |
4159 | CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4160 | KmpTaskTWithPrivatesQTy); |
4161 | LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); |
4162 | LValue DestructorsLV = CGF.EmitLValueForField( |
4163 | Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); |
4164 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4165 | DestructorFn, KmpRoutineEntryPtrTy), |
4166 | DestructorsLV); |
4167 | } |
4168 | // Set priority. |
4169 | if (Data.Priority.getInt()) { |
4170 | LValue Data2LV = CGF.EmitLValueForField( |
4171 | TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); |
4172 | LValue PriorityLV = CGF.EmitLValueForField( |
4173 | Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); |
4174 | CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); |
4175 | } |
4176 | Result.NewTask = NewTask; |
4177 | Result.TaskEntry = TaskEntry; |
4178 | Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; |
4179 | Result.TDBase = TDBase; |
4180 | Result.KmpTaskTQTyRD = KmpTaskTQTyRD; |
4181 | return Result; |
4182 | } |
4183 | |
4184 | /// Translates internal dependency kind into the runtime kind. |
4185 | static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { |
4186 | RTLDependenceKindTy DepKind; |
4187 | switch (K) { |
4188 | case OMPC_DEPEND_in: |
4189 | DepKind = RTLDependenceKindTy::DepIn; |
4190 | break; |
4191 | // Out and InOut dependencies must use the same code. |
4192 | case OMPC_DEPEND_out: |
4193 | case OMPC_DEPEND_inout: |
4194 | DepKind = RTLDependenceKindTy::DepInOut; |
4195 | break; |
4196 | case OMPC_DEPEND_mutexinoutset: |
4197 | DepKind = RTLDependenceKindTy::DepMutexInOutSet; |
4198 | break; |
4199 | case OMPC_DEPEND_inoutset: |
4200 | DepKind = RTLDependenceKindTy::DepInOutSet; |
4201 | break; |
4202 | case OMPC_DEPEND_outallmemory: |
4203 | DepKind = RTLDependenceKindTy::DepOmpAllMem; |
4204 | break; |
4205 | case OMPC_DEPEND_source: |
4206 | case OMPC_DEPEND_sink: |
4207 | case OMPC_DEPEND_depobj: |
4208 | case OMPC_DEPEND_inoutallmemory: |
4209 | case OMPC_DEPEND_unknown: |
4210 | llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4210); |
4211 | } |
4212 | return DepKind; |
4213 | } |
4214 | |
4215 | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4216 | static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, |
4217 | QualType &FlagsTy) { |
4218 | FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); |
4219 | if (KmpDependInfoTy.isNull()) { |
4220 | RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); |
4221 | KmpDependInfoRD->startDefinition(); |
4222 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); |
4223 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); |
4224 | addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); |
4225 | KmpDependInfoRD->completeDefinition(); |
4226 | KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); |
4227 | } |
4228 | } |
4229 | |
4230 | std::pair<llvm::Value *, LValue> |
4231 | CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, |
4232 | SourceLocation Loc) { |
4233 | ASTContext &C = CGM.getContext(); |
4234 | QualType FlagsTy; |
4235 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4236 | RecordDecl *KmpDependInfoRD = |
4237 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4238 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4239 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4240 | CGF.Builder.CreateElementBitCast( |
4241 | DepobjLVal.getAddress(CGF), |
4242 | CGF.ConvertTypeForMem(KmpDependInfoPtrTy)), |
4243 | KmpDependInfoPtrTy->castAs<PointerType>()); |
4244 | Address DepObjAddr = CGF.Builder.CreateGEP( |
4245 | Base.getAddress(CGF), |
4246 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4247 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4248 | DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo()); |
4249 | // NumDeps = deps[i].base_addr; |
4250 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4251 | NumDepsBase, |
4252 | *std::next(KmpDependInfoRD->field_begin(), |
4253 | static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); |
4254 | llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); |
4255 | return std::make_pair(NumDeps, Base); |
4256 | } |
4257 | |
4258 | static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4259 | llvm::PointerUnion<unsigned *, LValue *> Pos, |
4260 | const OMPTaskDataTy::DependData &Data, |
4261 | Address DependenciesArray) { |
4262 | CodeGenModule &CGM = CGF.CGM; |
4263 | ASTContext &C = CGM.getContext(); |
4264 | QualType FlagsTy; |
4265 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4266 | RecordDecl *KmpDependInfoRD = |
4267 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4268 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
4269 | |
4270 | OMPIteratorGeneratorScope IteratorScope( |
4271 | CGF, cast_or_null<OMPIteratorExpr>( |
4272 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4273 | : nullptr)); |
4274 | for (const Expr *E : Data.DepExprs) { |
4275 | llvm::Value *Addr; |
4276 | llvm::Value *Size; |
4277 | |
4278 | // The expression will be a nullptr in the 'omp_all_memory' case. |
4279 | if (E) { |
4280 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4281 | Addr = CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy); |
4282 | } else { |
4283 | Addr = llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4284 | Size = llvm::ConstantInt::get(CGF.SizeTy, 0); |
4285 | } |
4286 | LValue Base; |
4287 | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4288 | Base = CGF.MakeAddrLValue( |
4289 | CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); |
4290 | } else { |
4291 | assert(E && "Expected a non-null expression")(static_cast <bool> (E && "Expected a non-null expression" ) ? void (0) : __assert_fail ("E && \"Expected a non-null expression\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4291, __extension__ __PRETTY_FUNCTION__)); |
4292 | LValue &PosLVal = *Pos.get<LValue *>(); |
4293 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4294 | Base = CGF.MakeAddrLValue( |
4295 | CGF.Builder.CreateGEP(DependenciesArray, Idx), KmpDependInfoTy); |
4296 | } |
4297 | // deps[i].base_addr = &<Dependencies[i].second>; |
4298 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4299 | Base, |
4300 | *std::next(KmpDependInfoRD->field_begin(), |
4301 | static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); |
4302 | CGF.EmitStoreOfScalar(Addr, BaseAddrLVal); |
4303 | // deps[i].len = sizeof(<Dependencies[i].second>); |
4304 | LValue LenLVal = CGF.EmitLValueForField( |
4305 | Base, *std::next(KmpDependInfoRD->field_begin(), |
4306 | static_cast<unsigned int>(RTLDependInfoFields::Len))); |
4307 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4308 | // deps[i].flags = <Dependencies[i].first>; |
4309 | RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); |
4310 | LValue FlagsLVal = CGF.EmitLValueForField( |
4311 | Base, |
4312 | *std::next(KmpDependInfoRD->field_begin(), |
4313 | static_cast<unsigned int>(RTLDependInfoFields::Flags))); |
4314 | CGF.EmitStoreOfScalar( |
4315 | llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), |
4316 | FlagsLVal); |
4317 | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4318 | ++(*P); |
4319 | } else { |
4320 | LValue &PosLVal = *Pos.get<LValue *>(); |
4321 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4322 | Idx = CGF.Builder.CreateNUWAdd(Idx, |
4323 | llvm::ConstantInt::get(Idx->getType(), 1)); |
4324 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4325 | } |
4326 | } |
4327 | } |
4328 | |
4329 | SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes( |
4330 | CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4331 | const OMPTaskDataTy::DependData &Data) { |
4332 | assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependency kind.") ? void (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4333, __extension__ __PRETTY_FUNCTION__)) |
4333 | "Expected depobj dependency kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependency kind.") ? void (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4333, __extension__ __PRETTY_FUNCTION__)); |
4334 | SmallVector<llvm::Value *, 4> Sizes; |
4335 | SmallVector<LValue, 4> SizeLVals; |
4336 | ASTContext &C = CGF.getContext(); |
4337 | { |
4338 | OMPIteratorGeneratorScope IteratorScope( |
4339 | CGF, cast_or_null<OMPIteratorExpr>( |
4340 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4341 | : nullptr)); |
4342 | for (const Expr *E : Data.DepExprs) { |
4343 | llvm::Value *NumDeps; |
4344 | LValue Base; |
4345 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4346 | std::tie(NumDeps, Base) = |
4347 | getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); |
4348 | LValue NumLVal = CGF.MakeAddrLValue( |
4349 | CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), |
4350 | C.getUIntPtrType()); |
4351 | CGF.Builder.CreateStore(llvm::ConstantInt::get(CGF.IntPtrTy, 0), |
4352 | NumLVal.getAddress(CGF)); |
4353 | llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); |
4354 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); |
4355 | CGF.EmitStoreOfScalar(Add, NumLVal); |
4356 | SizeLVals.push_back(NumLVal); |
4357 | } |
4358 | } |
4359 | for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { |
4360 | llvm::Value *Size = |
4361 | CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); |
4362 | Sizes.push_back(Size); |
4363 | } |
4364 | return Sizes; |
4365 | } |
4366 | |
4367 | void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF, |
4368 | QualType &KmpDependInfoTy, |
4369 | LValue PosLVal, |
4370 | const OMPTaskDataTy::DependData &Data, |
4371 | Address DependenciesArray) { |
4372 | assert(Data.DepKind == OMPC_DEPEND_depobj &&(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependency kind.") ? void (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4373, __extension__ __PRETTY_FUNCTION__)) |
4373 | "Expected depobj dependency kind.")(static_cast <bool> (Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependency kind.") ? void (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependency kind.\"" , "clang/lib/CodeGen/CGOpenMPRuntime.cpp", 4373, __extension__ __PRETTY_FUNCTION__)); |
4374 | llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); |
4375 | { |
4376 | OMPIteratorGeneratorScope IteratorScope( |
4377 | CGF, cast_or_null<OMPIteratorExpr>( |
4378 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4379 | : nullptr)); |
4380 | for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { |
4381 | const Expr *E = Data.DepExprs[I]; |
4382 | llvm::Value *NumDeps; |
4383 | LValue Base; |
4384 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4385 | std::tie(NumDeps, Base) = |
4386 | getDepobjElements(CGF, DepobjLVal, E->getExprLoc()); |
4387 | |
4388 | // memcopy dependency data. |
4389 | llvm::Value *Size = CGF.Builder.CreateNUWMul( |
4390 | ElSize, |
4391 | CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); |
4392 | llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4393 | Address DepAddr = CGF.Builder.CreateGEP(DependenciesArray, Pos); |
4394 | CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); |
4395 | |
4396 | // Increase pos. |
4397 | // pos += size; |
4398 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); |
4399 | CGF.EmitStoreOfScalar(Add, PosLVal); |
4400 | } |
4401 | } |
4402 | } |
4403 | |
4404 | std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( |
4405 | CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, |
4406 | SourceLocation Loc) { |
4407 | if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { |
4408 | return D.DepExprs.empty(); |
4409 | })) |
4410 | return std::make_pair(nullptr, Address::invalid()); |
4411 | // Process list of dependencies. |
4412 | ASTContext &C = CGM.getContext(); |
4413 | Address DependenciesArray = Address::invalid(); |
4414 | llvm::Value *NumOfElements = nullptr; |
4415 | unsigned NumDependencies = std::accumulate( |
4416 | Dependencies.begin(), Dependencies.end(), 0, |
4417 | [](unsigned V, const OMPTaskDataTy::DependData &D) { |
4418 | return D.DepKind == OMPC_DEPEND_depobj |
4419 | ? V |
4420 | : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); |
4421 | }); |
4422 | QualType FlagsTy; |
4423 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4424 | bool HasDepobjDeps = false; |
4425 | bool HasRegularWithIterators = false; |
4426 | llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4427 | llvm::Value *NumOfRegularWithIterators = |
4428 | llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4429 | // Calculate number of depobj dependencies and regular deps with the |
4430 | // iterators. |
4431 | for (const OMPTaskDataTy::DependData &D : Dependencies) { |
4432 | if (D.DepKind == OMPC_DEPEND_depobj) { |
4433 | SmallVector<llvm::Value *, 4> Sizes = |
4434 | emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); |
4435 | for (llvm::Value *Size : Sizes) { |
4436 | NumOfDepobjElements = |
4437 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); |
4438 | } |
4439 | HasDepobjDeps = true; |
4440 | continue; |
4441 | } |
4442 | // Include number of iterations, if any. |
4443 | |
4444 | if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { |
4445 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { |
4446 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4447 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); |
4448 | llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul( |
4449 | Sz, llvm::ConstantInt::get(CGF.IntPtrTy, D.DepExprs.size())); |
4450 | NumOfRegularWithIterators = |
4451 | CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumClauseDeps); |
4452 | } |
4453 | HasRegularWithIterators = true; |
4454 | continue; |
4455 | } |
4456 | } |
4457 | |
4458 | QualType KmpDependInfoArrayTy; |
4459 | if (HasDepobjDeps || HasRegularWithIterators) { |
4460 | NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, |
4461 | /*isSigned=*/false); |
4462 | if (HasDepobjDeps) { |
4463 | NumOfElements = |
4464 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); |
4465 | } |
4466 | if (HasRegularWithIterators) { |
4467 | NumOfElements = |
4468 | CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); |
4469 | } |
4470 | auto *OVE = new (C) OpaqueValueExpr( |
4471 | Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), |
4472 | VK_PRValue); |
4473 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE, |
4474 | RValue::get(NumOfElements)); |
4475 | KmpDependInfoArrayTy = |
4476 | C.getVariableArrayType(KmpDependInfoTy, OVE, ArrayType::Normal, |
4477 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4478 | // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); |
4479 | // Properly emit variable-sized array. |
4480 | auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, |
4481 | ImplicitParamDecl::Other); |
4482 | CGF.EmitVarDecl(*PD); |
4483 | DependenciesArray = CGF.GetAddrOfLocalVar(PD); |
4484 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4485 | /*isSigned=*/false); |
4486 | } else { |
4487 | KmpDependInfoArrayTy = C.getConstantArrayType( |
4488 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, |
4489 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4490 | DependenciesArray = |
4491 | CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); |
4492 | DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); |
4493 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, |
4494 | /*isSigned=*/false); |
4495 | } |
4496 | unsigned Pos = 0; |
4497 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { |
4498 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4499 | Dependencies[I].IteratorExpr) |
4500 | continue; |
4501 | emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], |
4502 | DependenciesArray); |
4503 | } |
4504 | // Copy regular dependencies with iterators. |
4505 | LValue PosLVal = CGF.MakeAddrLValue( |
4506 | CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); |
4507 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4508 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { |
4509 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4510 | !Dependencies[I].IteratorExpr) |
4511 | continue; |
4512 | emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], |
4513 | DependenciesArray); |
4514 | } |
4515 | // Copy final depobj arrays without iterators. |
4516 | if (HasDepobjDeps) { |
4517 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { |
4518 | if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) |
4519 | continue; |
4520 | emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], |
4521 | DependenciesArray); |
4522 | } |
4523 | } |
4524 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4525 | DependenciesArray, CGF.VoidPtrTy, CGF.Int8Ty); |
4526 | return std::make_pair(NumOfElements, DependenciesArray); |
4527 | } |
4528 | |
4529 | Address CGOpenMPRuntime::emitDepobjDependClause( |
4530 | CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, |
4531 | SourceLocation Loc) { |
4532 | if (Dependencies.DepExprs.empty()) |
4533 | return Address::invalid(); |
4534 | // Process list of dependencies. |
4535 | ASTContext &C = CGM.getContext(); |
4536 | Address DependenciesArray = Address::invalid(); |
4537 | unsigned NumDependencies = Dependencies.DepExprs.size(); |
4538 | QualType FlagsTy; |
4539 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4540 | RecordDecl *KmpDependInfoRD = |
4541 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4542 | |
4543 | llvm::Value *Size; |
4544 | // Define type kmp_depend_info[<Dependencies.size()>]; |
4545 | // For depobj reserve one extra element to store the number of elements. |
4546 | // It is required to handle depobj(x) update(in) construct. |
4547 | // kmp_depend_info[<Dependencies.size()>] deps; |
4548 | llvm::Value *NumDepsVal; |
4549 | CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); |
4550 | if (const auto *IE = |
4551 | cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { |
4552 | NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); |
4553 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { |
4554 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4555 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4556 | NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); |
4557 | } |
4558 | Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), |
4559 | NumDepsVal); |
4560 | CharUnits SizeInBytes = |
4561 | C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); |
4562 | llvm::Value *RecSize = CGM.getSize(SizeInBytes); |
4563 | Size = CGF.Builder.CreateNUWMul(Size, RecSize); |
4564 | NumDepsVal = |
4565 | CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); |
4566 | } else { |
4567 | QualType KmpDependInfoArrayTy = C.getConstantArrayType( |
4568 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), |
4569 | nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); |
4570 | CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); |
4571 | Size = CGM.getSize(Sz.alignTo(Align)); |
4572 | NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); |
4573 | } |
4574 | // Need to allocate on the dynamic memory. |
4575 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4576 | // Use default allocator. |
4577 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4578 | llvm::Value *Args[] = {ThreadID, Size, Allocator}; |
4579 | |
4580 | llvm::Value *Addr = |
4581 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4582 | CGM.getModule(), OMPRTL___kmpc_alloc), |
4583 | Args, ".dep.arr.addr"); |
4584 | llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy); |
4585 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4586 | Addr, KmpDependInfoLlvmTy->getPointerTo()); |
4587 | DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align); |
4588 | // Write number of elements in the first element of array for depobj. |
4589 | LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); |
4590 | // deps[i].base_addr = NumDependencies; |
4591 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4592 | Base, |
4593 | *std::next(KmpDependInfoRD->field_begin(), |
4594 | static_cast<unsigned int>(RTLDependInfoFields::BaseAddr))); |
4595 | CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); |
4596 | llvm::PointerUnion<unsigned *, LValue *> Pos; |
4597 | unsigned Idx = 1; |
4598 | LValue PosLVal; |
4599 | if (Dependencies.IteratorExpr) { |
4600 | PosLVal = CGF.MakeAddrLValue( |
4601 | CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), |
4602 | C.getSizeType()); |
4603 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, |
4604 | /*IsInit=*/true); |
4605 | Pos = &PosLVal; |
4606 | } else { |
4607 | Pos = &Idx; |
4608 | } |
4609 | emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); |
4610 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4611 | CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy, |
4612 | CGF.Int8Ty); |
4613 | return DependenciesArray; |
4614 | } |
4615 | |
4616 | void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, |
4617 | SourceLocation Loc) { |
4618 | ASTContext &C = CGM.getContext(); |
4619 | QualType FlagsTy; |
4620 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4621 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4622 | DepobjLVal.getAddress(CGF), C.VoidPtrTy.castAs<PointerType>()); |
4623 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4624 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4625 | Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy), |
4626 | CGF.ConvertTypeForMem(KmpDependInfoTy)); |
4627 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4628 | Addr.getElementType(), Addr.getPointer(), |
4629 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4630 | DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, |
4631 | CGF.VoidPtrTy); |
4632 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4633 | // Use default allocator. |
4634 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4635 | llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; |
4636 | |
4637 | // _kmpc_free(gtid, addr, nullptr); |
4638 | (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4639 | CGM.getModule(), OMPRTL___kmpc_free), |
4640 | Args); |
4641 | } |
4642 | |
4643 | void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, |
4644 | OpenMPDependClauseKind NewDepKind, |
4645 | SourceLocation Loc) { |
4646 | ASTContext &C = CGM.getContext(); |
4647 | QualType FlagsTy; |
4648 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4649 | RecordDecl *KmpDependInfoRD = |
4650 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4651 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
4652 | llvm::Value *NumDeps; |
4653 | LValue Base; |
4654 | std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); |
4655 | |
4656 | Address Begin = Base.getAddress(CGF); |
4657 | // Cast from pointer to array type to pointer to single element. |
4658 | llvm::Value *End = CGF.Builder.CreateGEP( |
4659 | Begin.getElementType(), Begin.getPointer(), NumDeps); |
4660 | // The basic structure here is a while-do loop. |
4661 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); |
4662 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); |
4663 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
4664 | CGF.EmitBlock(BodyBB); |
4665 | llvm::PHINode *ElementPHI = |
4666 | CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); |
4667 | ElementPHI->addIncoming(Begin.getPointer(), EntryBB); |
4668 | Begin = Begin.withPointer(ElementPHI, KnownNonNull); |
4669 | Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), |
4670 | Base.getTBAAInfo()); |
4671 | // deps[i].flags = NewDepKind; |
4672 | RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); |
4673 | LValue FlagsLVal = CGF.EmitLValueForField( |
4674 | Base, *std::next(KmpDependInfoRD->field_begin(), |
4675 | static_cast<unsigned int>(RTLDependInfoFields::Flags))); |
4676 | CGF.EmitStoreOfScalar( |
4677 | llvm::ConstantInt::get(LLVMFlagsTy, static_cast<unsigned int>(DepKind)), |
4678 | FlagsLVal); |
4679 | |
4680 | // Shift the address forward by one element. |
4681 | Address ElementNext = |
4682 | CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); |
4683 | ElementPHI->addIncoming(ElementNext.getPointer(), |
4684 | CGF.Builder.GetInsertBlock()); |
4685 | llvm::Value *IsEmpty = |
4686 | CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); |
4687 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
4688 | // Done. |
4689 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
4690 | } |
4691 | |
4692 | void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, |
4693 | const OMPExecutableDirective &D, |
4694 | llvm::Function *TaskFunction, |
4695 | QualType SharedsTy, Address Shareds, |
4696 | const Expr *IfCond, |
4697 | const OMPTaskDataTy &Data) { |
4698 | if (!CGF.HaveInsertPoint()) |
4699 | return; |
4700 | |
4701 | TaskResultTy Result = |
4702 | emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); |
4703 | llvm::Value *NewTask = Result.NewTask; |
4704 | llvm::Function *TaskEntry = Result.TaskEntry; |
4705 | llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; |
4706 | LValue TDBase = Result.TDBase; |
4707 | const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; |
4708 | // Process list of dependences. |
4709 | Address DependenciesArray = Address::invalid(); |
4710 | llvm::Value *NumOfElements; |
4711 | std::tie(NumOfElements, DependenciesArray) = |
4712 | emitDependClause(CGF, Data.Dependences, Loc); |
4713 | |
4714 | // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() |
4715 | // libcall. |
4716 | // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, |
4717 | // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, |
4718 | // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence |
4719 | // list is not empty |
4720 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4721 | llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); |
4722 | llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; |
4723 | llvm::Value *DepTaskArgs[7]; |
4724 | if (!Data.Dependences.empty()) { |
4725 | DepTaskArgs[0] = UpLoc; |
4726 | DepTaskArgs[1] = ThreadID; |
4727 | DepTaskArgs[2] = NewTask; |
4728 | DepTaskArgs[3] = NumOfElements; |
4729 | DepTaskArgs[4] = DependenciesArray.getPointer(); |
4730 | DepTaskArgs[5] = CGF.Builder.getInt32(0); |
4731 | DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4732 | } |
4733 | auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, |
4734 | &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { |
4735 | if (!Data.Tied) { |
4736 | auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); |
4737 | LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); |
4738 | CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); |
4739 | } |
4740 | if (!Data.Dependences.empty()) { |
4741 | CGF.EmitRuntimeCall( |
4742 | OMPBuilder.getOrCreateRuntimeFunction( |
4743 | CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), |
4744 | DepTaskArgs); |
4745 | } else { |
4746 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4747 | CGM.getModule(), OMPRTL___kmpc_omp_task), |
4748 | TaskArgs); |
4749 | } |
4750 | // Check if parent region is untied and build return for untied task; |
4751 | if (auto *Region = |
4752 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
4753 | Region->emitUntiedSwitch(CGF); |
4754 | }; |
4755 | |
4756 | llvm::Value *DepWaitTaskArgs[7]; |
4757 | if (!Data.Dependences.empty()) { |
4758 | DepWaitTaskArgs[0] = UpLoc; |
4759 | DepWaitTaskArgs[1] = ThreadID; |
4760 | DepWaitTaskArgs[2] = NumOfElements; |
4761 | DepWaitTaskArgs[3] = DependenciesArray.getPointer(); |
4762 | DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); |
4763 | DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4764 | DepWaitTaskArgs[6] = |
4765 | llvm::ConstantInt::get(CGF.Int32Ty, Data.HasNowaitClause); |
4766 | } |
4767 | auto &M = CGM.getModule(); |
4768 | auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, |
4769 | TaskEntry, &Data, &DepWaitTaskArgs, |
4770 | Loc](CodeGenFunction &CGF, PrePostActionTy &) { |
4771 | CodeGenFunction::RunCleanupsScope LocalScope(CGF); |
4772 | // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, |
4773 | // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 |
4774 | // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info |
4775 | // is specified. |
4776 | if (!Data.Dependences.empty()) |
4777 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4778 | M, OMPRTL___kmpc_omp_taskwait_deps_51), |
4779 | DepWaitTaskArgs); |
4780 | // Call proxy_task_entry(gtid, new_task); |
4781 | auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, |
4782 | Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4783 | Action.Enter(CGF); |
4784 | llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; |
4785 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, |
4786 | OutlinedFnArgs); |
4787 | }; |
4788 | |
4789 | // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, |
4790 | // kmp_task_t *new_task); |
4791 | // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, |
4792 | // kmp_task_t *new_task); |
4793 | RegionCodeGenTy RCG(CodeGen); |
4794 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
4795 | M, OMPRTL___kmpc_omp_task_begin_if0), |
4796 | TaskArgs, |
4797 | OMPBuilder.getOrCreateRuntimeFunction( |
4798 | M, OMPRTL___kmpc_omp_task_complete_if0), |
4799 | TaskArgs); |
4800 | RCG.setAction(Action); |
4801 | RCG(CGF); |
4802 | }; |
4803 | |
4804 | if (IfCond) { |
4805 | emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); |
4806 | } else { |
4807 | RegionCodeGenTy ThenRCG(ThenCodeGen); |
4808 | ThenRCG(CGF); |
4809 | } |
4810 | } |
4811 | |
4812 | void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, |
4813 | const OMPLoopDirective &D, |
4814 | llvm::Function *TaskFunction, |
4815 | QualType SharedsTy, Address Shareds, |
4816 | const Expr *IfCond, |
4817 | const OMPTaskDataTy &Data) { |
4818 | if (!CGF.HaveInsertPoint()) |
4819 | return; |
4820 | TaskResultTy Result = |
4821 | emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); |
4822 | // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() |
4823 | // libcall. |
4824 | // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int |
4825 | // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int |
4826 | // sched, kmp_uint64 grainsize, void *task_dup); |
4827 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4828 | llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); |
4829 | llvm::Value *IfVal; |
4830 | if (IfCond) { |
4831 | IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, |
4832 | /*isSigned=*/true); |
4833 | } else { |
4834 | IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); |
4835 | } |
4836 | |
4837 | LValue LBLVal = CGF.EmitLValueForField( |
4838 | Result.TDBase, |
4839 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); |
4840 | const auto *LBVar = |
4841 | cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); |
4842 | CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), |
4843 | LBLVal.getQuals(), |
4844 | /*IsInitializer=*/true); |
4845 | LValue UBLVal = CGF.EmitLValueForField( |
4846 | Result.TDBase, |
4847 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); |
4848 | const auto *UBVar = |
4849 | cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); |
4850 | CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), |
4851 | UBLVal.getQuals(), |
4852 | /*IsInitializer=*/true); |
4853 | LValue StLVal = CGF.EmitLValueForField( |
4854 | Result.TDBase, |
4855 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); |
4856 | const auto *StVar = |
4857 | cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); |
4858 | CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), |
4859 | StLVal.getQuals(), |
4860 | /*IsInitializer=*/true); |
4861 | // Store reductions address. |
4862 | LValue RedLVal = CGF.EmitLValueForField( |
4863 | Result.TDBase, |
4864 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); |
4865 | if (Data.Reductions) { |
4866 | CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); |
4867 | } else { |
4868 | CGF.EmitNullInitialization(RedLVal.getAddress(CGF), |
4869 | CGF.getContext().VoidPtrTy); |
4870 | } |
4871 | enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; |
4872 | llvm::Value *TaskArgs[] = { |
4873 | UpLoc, |
4874 | ThreadID, |
4875 | Result.NewTask, |
4876 | IfVal, |
4877 | LBLVal.getPointer(CGF), |
4878 | UBLVal.getPointer(CGF), |
4879 | CGF.EmitLoadOfScalar(StLVal, Loc), |
4880 | llvm::ConstantInt::getSigned( |
4881 | CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler |
4882 | llvm::ConstantInt::getSigned( |
4883 | CGF.IntTy, Data.Schedule.getPointer() |
4884 | ? Data.Schedule.getInt() ? NumTasks : Grainsize |
4885 | : NoSchedule), |
4886 | Data.Schedule.getPointer() |
4887 | ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, |
4888 | /*isSigned=*/false) |
4889 | : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), |
4890 | Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4891 | Result.TaskDupFn, CGF.VoidPtrTy) |
4892 | : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; |
4893 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4894 | CGM.getModule(), OMPRTL___kmpc_taskloop), |
4895 | TaskArgs); |
4896 | } |
4897 | |
4898 | /// Emit reduction operation for each element of array (required for |
4899 | /// array sections) LHS op = RHS. |
4900 | /// \param Type Type of array. |
4901 | /// \param LHSVar Variable on the left side of the reduction operation |
4902 | /// (references element of array in original variable). |
4903 | /// \param RHSVar Variable on the right side of the reduction operation |
4904 | /// (references element of array in original variable). |
4905 | /// \param RedOpGen Generator of reduction operation with use of LHSVar and |
4906 | /// RHSVar. |
4907 | static void EmitOMPAggregateReduction( |
4908 | CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, |
4909 | const VarDecl *RHSVar, |
4910 | const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, |
4911 | const Expr *, const Expr *)> &RedOpGen, |
4912 | const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, |
4913 | const Expr *UpExpr = nullptr) { |
4914 | // Perform element-by-element initialization. |
4915 | QualType ElementTy; |
4916 | Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); |
4917 | Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); |
4918 | |
4919 | // Drill down to the base element type on both arrays. |
4920 | const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); |
4921 | llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); |
4922 | |
4923 | llvm::Value *RHSBegin = RHSAddr.getPointer(); |
4924 | llvm::Value *LHSBegin = LHSAddr.getPointer(); |
4925 | // Cast from pointer to array type to pointer to single element. |
4926 | llvm::Value *LHSEnd = |
4927 | CGF.Builder.CreateGEP(LHSAddr.getElementType(), LHSBegin, NumElements); |
4928 | // The basic structure here is a while-do loop. |
4929 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); |
4930 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); |
4931 | llvm::Value *IsEmpty = |
4932 | CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); |
4933 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
4934 | |
4935 | // Enter the loop body, making that address the current address. |
4936 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
4937 | CGF.EmitBlock(BodyBB); |
4938 | |
4939 | CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); |
4940 | |
4941 | llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( |
4942 | RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); |
4943 | RHSElementPHI->addIncoming(RHSBegin, EntryBB); |
4944 | Address RHSElementCurrent( |
4945 | RHSElementPHI, RHSAddr.getElementType(), |
4946 | RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
4947 | |
4948 | llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( |
4949 | LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
4950 | LHSElementPHI->addIncoming(LHSBegin, EntryBB); |
4951 | Address LHSElementCurrent( |
4952 | LHSElementPHI, LHSAddr.getElementType(), |
4953 | LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
4954 | |
4955 | // Emit copy. |
4956 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
4957 | Scope.addPrivate(LHSVar, LHSElementCurrent); |
4958 | Scope.addPrivate(RHSVar, RHSElementCurrent); |
4959 | Scope.Privatize(); |
4960 | RedOpGen(CGF, XExpr, EExpr, UpExpr); |
4961 | Scope.ForceCleanup(); |
4962 | |
4963 | // Shift the address forward by one element. |
4964 | llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( |
4965 | LHSAddr.getElementType(), LHSElementPHI, /*Idx0=*/1, |
4966 | "omp.arraycpy.dest.element"); |
4967 | llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( |
4968 | RHSAddr.getElementType(), RHSElementPHI, /*Idx0=*/1, |
4969 | "omp.arraycpy.src.element"); |
4970 | // Check whether we've reached the end. |
4971 | llvm::Value *Done = |
4972 | CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); |
4973 | CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); |
4974 | LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); |
4975 | RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); |
4976 | |
4977 | // Done. |
4978 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
4979 | } |
4980 | |
4981 | /// Emit reduction combiner. If the combiner is a simple expression emit it as |
4982 | /// is, otherwise consider it as combiner of UDR decl and emit it as a call of |
4983 | /// UDR combiner function. |
4984 | static void emitReductionCombiner(CodeGenFunction &CGF, |
4985 | const Expr *ReductionOp) { |
4986 | if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) |
4987 | if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) |
4988 | if (const auto *DRE = |
4989 | dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) |
4990 | if (const auto *DRD = |
4991 | dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { |
4992 | std::pair<llvm::Function *, llvm::Function *> Reduction = |
4993 | CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); |
4994 | RValue Func = RValue::get(Reduction.first); |
4995 | CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); |
4996 | CGF.EmitIgnoredExpr(ReductionOp); |
4997 | return; |
4998 | } |
4999 | CGF.EmitIgnoredExpr(ReductionOp); |
5000 | } |
5001 | |
5002 | llvm::Function *CGOpenMPRuntime::emitReductionFunction( |
5003 | SourceLocation Loc, llvm::Type *ArgsElemType, |
5004 | ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs, |
5005 | ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) { |
5006 | ASTContext &C = CGM.getContext(); |
5007 | |
5008 | // void reduction_func(void *LHSArg, void *RHSArg); |
5009 | FunctionArgList Args; |
5010 | ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
5011 | ImplicitParamDecl::Other); |
5012 | ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
5013 | ImplicitParamDecl::Other); |
5014 | Args.push_back(&LHSArg); |
5015 | Args.push_back(&RHSArg); |
5016 | const auto &CGFI = |
5017 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
5018 | std::string Name = getName({"omp", "reduction", "reduction_func"}); |
5019 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
5020 | llvm::GlobalValue::InternalLinkage, Name, |
5021 | &CGM.getModule()); |
5022 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
5023 | Fn->setDoesNotRecurse(); |
5024 | CodeGenFunction CGF(CGM); |
5025 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
5026 | |
5027 | // Dst = (void*[n])(LHSArg); |
5028 | // Src = (void*[n])(RHSArg); |
5029 | Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5030 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), |
5031 | ArgsElemType->getPointerTo()), |
5032 | ArgsElemType, CGF.getPointerAlign()); |
5033 | Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5034 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), |
5035 | ArgsElemType->getPointerTo()), |
5036 | ArgsElemType, CGF.getPointerAlign()); |
5037 | |
5038 | // ... |
5039 | // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); |
5040 | // ... |
5041 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
5042 | const auto *IPriv = Privates.begin(); |
5043 | unsigned Idx = 0; |
5044 | for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { |
5045 | const auto *RHSVar = |
5046 | cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); |
5047 | Scope.addPrivate(RHSVar, emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar)); |
5048 | const auto *LHSVar = |
5049 | cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); |
5050 | Scope.addPrivate(LHSVar, emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar)); |
5051 | QualType PrivTy = (*IPriv)->getType(); |
5052 | if (PrivTy->isVariablyModifiedType()) { |
5053 | // Get array size and emit VLA type. |
5054 | ++Idx; |
5055 | Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); |
5056 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); |
5057 | const VariableArrayType *VLA = |
5058 | CGF.getContext().getAsVariableArrayType(PrivTy); |
5059 | const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); |
5060 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
5061 | CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); |
5062 | CGF.EmitVariablyModifiedType(PrivTy); |
5063 | } |
5064 | } |
5065 | Scope.Privatize(); |
5066 | IPriv = Privates.begin(); |
5067 | const auto *ILHS = LHSExprs.begin(); |
5068 | const auto *IRHS = RHSExprs.begin(); |
5069 | for (const Expr *E : ReductionOps) { |
5070 | if ((*IPriv)->getType()->isArrayType()) { |
5071 | // Emit reduction for array section. |
5072 | const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
5073 | const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
5074 | EmitOMPAggregateReduction( |
5075 | CGF, (*IPriv)->getType(), LHSVar, RHSVar, |
5076 | [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { |
5077 | emitReductionCombiner(CGF, E); |
5078 | }); |
5079 | } else { |
5080 | // Emit reduction for array subscript or single variable. |
5081 | emitReductionCombiner(CGF, E); |
5082 | } |
5083 | ++IPriv; |
5084 | ++ILHS; |
5085 | ++IRHS; |
5086 | } |
5087 | Scope.ForceCleanup(); |
5088 | CGF.FinishFunction(); |
5089 | return Fn; |
5090 | } |
5091 | |
5092 | void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, |
5093 | const Expr *ReductionOp, |
5094 | const Expr *PrivateRef, |
5095 | const DeclRefExpr *LHS, |
5096 | const DeclRefExpr *RHS) { |
5097 | if (PrivateRef->getType()->isArrayType()) { |
5098 | // Emit reduction for array section. |
5099 | const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); |
5100 | const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); |
5101 | EmitOMPAggregateReduction( |
5102 | CGF, PrivateRef->getType(), LHSVar, RHSVar, |
5103 | [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { |
5104 | emitReductionCombiner(CGF, ReductionOp); |
5105 | }); |
5106 | } else { |
5107 | // Emit reduction for array subscript or single variable. |
5108 | emitReductionCombiner(CGF, ReductionOp); |
5109 | } |
5110 | } |
5111 | |
5112 | void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, |
5113 | ArrayRef<const Expr *> Privates, |
5114 | ArrayRef<const Expr *> LHSExprs, |
5115 | ArrayRef<const Expr *> RHSExprs, |
5116 | ArrayRef<const Expr *> ReductionOps, |
5117 | ReductionOptionsTy Options) { |
5118 | if (!CGF.HaveInsertPoint()) |
5119 | return; |
5120 | |
5121 | bool WithNowait = Options.WithNowait; |
5122 | bool SimpleReduction = Options.SimpleReduction; |
5123 | |
5124 | // Next code should be emitted for reduction: |
5125 | // |
5126 | // static kmp_critical_name lock = { 0 }; |
5127 | // |
5128 | // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { |
5129 | // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); |
5130 | // ... |
5131 | // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], |
5132 | // *(Type<n>-1*)rhs[<n>-1]); |
5133 | // } |
5134 | // |
5135 | // ... |
5136 | // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; |
5137 | // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), |
5138 | // RedList, reduce_func, &<lock>)) { |
5139 | // case 1: |
5140 | // ... |
5141 | // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); |
5142 | // ... |
5143 | // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); |
5144 | // break; |
5145 | // case 2: |
5146 | // ... |
5147 | // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); |
5148 | // ... |
5149 | // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] |
5150 | // break; |
5151 | // default:; |
5152 | // } |
5153 | // |
5154 | // if SimpleReduction is true, only the next code is generated: |
5155 | // ... |
5156 | // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); |
5157 | // ... |
5158 | |
5159 | ASTContext &C = CGM.getContext(); |
5160 | |
5161 | if (SimpleReduction) { |
5162 | CodeGenFunction::RunCleanupsScope Scope(CGF); |
5163 | const auto *IPriv = Privates.begin(); |
5164 | const auto *ILHS = LHSExprs.begin(); |
5165 | const auto *IRHS = RHSExprs.begin(); |
5166 | for (const Expr *E : ReductionOps) { |
5167 | emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), |
5168 | cast<DeclRefExpr>(*IRHS)); |
5169 | ++IPriv; |
5170 | ++ILHS; |
5171 | ++IRHS; |
5172 | } |
5173 | return; |
5174 | } |
5175 | |
5176 | // 1. Build a list of reduction variables. |
5177 | // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; |
5178 | auto Size = RHSExprs.size(); |
5179 | for (const Expr *E : Privates) { |
5180 | if (E->getType()->isVariablyModifiedType()) |
5181 | // Reserve place for array size. |
5182 | ++Size; |
5183 | } |
5184 | llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); |
5185 | QualType ReductionArrayTy = |
5186 | C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, |
5187 | /*IndexTypeQuals=*/0); |
5188 | Address ReductionList = |
5189 | CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); |
5190 | const auto *IPriv = Privates.begin(); |
5191 | unsigned Idx = 0; |
5192 | for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { |
5193 | Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
5194 | CGF.Builder.CreateStore( |
5195 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5196 | CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), |
5197 | Elem); |
5198 | if ((*IPriv)->getType()->isVariablyModifiedType()) { |
5199 | // Store array size. |
5200 | ++Idx; |
5201 | Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
5202 | llvm::Value *Size = CGF.Builder.CreateIntCast( |
5203 | CGF.getVLASize( |
5204 | CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) |
5205 | .NumElts, |
5206 | CGF.SizeTy, /*isSigned=*/false); |
5207 | CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), |
5208 | Elem); |
5209 | } |
5210 | } |
5211 | |
5212 | // 2. Emit reduce_func(). |
5213 | llvm::Function *ReductionFn = |
5214 | emitReductionFunction(Loc, CGF.ConvertTypeForMem(ReductionArrayTy), |
5215 | Privates, LHSExprs, RHSExprs, ReductionOps); |
5216 | |
5217 | // 3. Create static kmp_critical_name lock = { 0 }; |
5218 | std::string Name = getName({"reduction"}); |
5219 | llvm::Value *Lock = getCriticalRegionLock(Name); |
5220 | |
5221 | // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), |
5222 | // RedList, reduce_func, &<lock>); |
5223 | llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); |
5224 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
5225 | llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); |
5226 | llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5227 | ReductionList.getPointer(), CGF.VoidPtrTy); |
5228 | llvm::Value *Args[] = { |
5229 | IdentTLoc, // ident_t *<loc> |
5230 | ThreadId, // i32 <gtid> |
5231 | CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> |
5232 | ReductionArrayTySize, // size_type sizeof(RedList) |
5233 | RL, // void *RedList |
5234 | ReductionFn, // void (*) (void *, void *) <reduce_func> |
5235 | Lock // kmp_critical_name *&<lock> |
5236 | }; |
5237 | llvm::Value *Res = CGF.EmitRuntimeCall( |
5238 | OMPBuilder.getOrCreateRuntimeFunction( |
5239 | CGM.getModule(), |
5240 | WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), |
5241 | Args); |
5242 | |
5243 | // 5. Build switch(res) |
5244 | llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); |
5245 | llvm::SwitchInst *SwInst = |
5246 | CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); |
5247 | |
5248 | // 6. Build case 1: |
5249 | // ... |
5250 | // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); |
5251 | // ... |
5252 | // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); |
5253 | // break; |
5254 | llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); |
5255 | SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); |
5256 | CGF.EmitBlock(Case1BB); |
5257 | |
5258 | // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); |
5259 | llvm::Value *EndArgs[] = { |
5260 | IdentTLoc, // ident_t *<loc> |
5261 | ThreadId, // i32 <gtid> |
5262 | Lock // kmp_critical_name *&<lock> |
5263 | }; |
5264 | auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( |
5265 | CodeGenFunction &CGF, PrePostActionTy &Action) { |
5266 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
5267 | const auto *IPriv = Privates.begin(); |
5268 | const auto *ILHS = LHSExprs.begin(); |
5269 | const auto *IRHS = RHSExprs.begin(); |
5270 | for (const Expr *E : ReductionOps) { |
5271 | RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), |
5272 | cast<DeclRefExpr>(*IRHS)); |
5273 | ++IPriv; |
5274 | ++ILHS; |
5275 | ++IRHS; |
5276 | } |
5277 | }; |
5278 | RegionCodeGenTy RCG(CodeGen); |
5279 | CommonActionTy Action( |
5280 | nullptr, std::nullopt, |
5281 | OMPBuilder.getOrCreateRuntimeFunction( |
5282 | CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait |
5283 | : OMPRTL___kmpc_end_reduce), |
5284 | EndArgs); |
5285 | RCG.setAction(Action); |
5286 | RCG(CGF); |
5287 | |
5288 | CGF.EmitBranch(DefaultBB); |
5289 | |
5290 | // 7. Build case 2: |
5291 | // ... |
5292 | // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); |
5293 | // ... |
5294 | // break; |
5295 | llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); |
5296 | SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); |
5297 | CGF.EmitBlock(Case2BB); |
5298 | |
5299 | auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( |
5300 | CodeGenFunction &CGF, PrePostActionTy &Action) { |
5301 | const auto *ILHS = LHSExprs.begin(); |
5302 | const auto *IRHS = RHSExprs.begin(); |
5303 | const auto *IPriv = Privates.begin(); |
5304 | for (const Expr *E : ReductionOps) { |
5305 | const Expr *XExpr = nullptr; |
5306 | const Expr *EExpr = nullptr; |
5307 | const Expr *UpExpr = nullptr; |
5308 | BinaryOperatorKind BO = BO_Comma; |
5309 | if (const auto *BO = dyn_cast<BinaryOperator>(E)) { |
5310 | if (BO->getOpcode() == BO_Assign) { |
5311 | XExpr = BO->getLHS(); |
5312 | UpExpr = BO->getRHS(); |
5313 | } |
5314 | } |
5315 | // Try to emit update expression as a simple atomic. |
5316 | const Expr *RHSExpr = UpExpr; |
5317 | if (RHSExpr) { |
5318 | // Analyze RHS part of the whole expression. |
5319 | if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( |
5320 | RHSExpr->IgnoreParenImpCasts())) { |
5321 | // If this is a conditional operator, analyze its condition for |
5322 | // min/max reduction operator. |
5323 | RHSExpr = ACO->getCond(); |
5324 | } |
5325 | if (const auto *BORHS = |
5326 | dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { |
5327 | EExpr = BORHS->getRHS(); |
5328 | BO = BORHS->getOpcode(); |
5329 | } |
5330 | } |
5331 | if (XExpr) { |
5332 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
5333 | auto &&AtomicRedGen = [BO, VD, |
5334 | Loc](CodeGenFunction &CGF, const Expr *XExpr, |
5335 | const Expr *EExpr, const Expr *UpExpr) { |
5336 | LValue X = CGF.EmitLValue(XExpr); |
5337 | RValue E; |
5338 | if (EExpr) |
5339 | E = CGF.EmitAnyExpr(EExpr); |
5340 | CGF.EmitOMPAtomicSimpleUpdateExpr( |
5341 | X, E, BO, /*IsXLHSInRHSPart=*/true, |
5342 | llvm::AtomicOrdering::Monotonic, Loc, |
5343 | [&CGF, UpExpr, VD, Loc](RValue XRValue) { |
5344 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
5345 | Address LHSTemp = CGF.CreateMemTemp(VD->getType()); |
5346 | CGF.emitOMPSimpleStore( |
5347 | CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, |
5348 | VD->getType().getNonReferenceType(), Loc); |
5349 | PrivateScope.addPrivate(VD, LHSTemp); |
5350 | (void)PrivateScope.Privatize(); |
5351 | return CGF.EmitAnyExpr(UpExpr); |
5352 | }); |
5353 | }; |
5354 | if ((*IPriv)->getType()->isArrayType()) { |
5355 | // Emit atomic reduction for array section. |
5356 | const auto *RHSVar = |
5357 | cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
5358 | EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, |
5359 | AtomicRedGen, XExpr, EExpr, UpExpr); |
5360 | } else { |
5361 | // Emit atomic reduction for array subscript or single variable. |
5362 | AtomicRedGen(CGF, XExpr, EExpr, UpExpr); |
5363 | } |
5364 | } else { |
5365 | // Emit as a critical region. |
5366 | auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, |
5367 | const Expr *, const Expr *) { |
5368 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
5369 | std::string Name = RT.getName({"atomic_reduction"}); |
5370 | RT.emitCriticalRegion( |
5371 | CGF, Name, |
5372 | [=](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5373 | Action.Enter(CGF); |
5374 | emitReductionCombiner(CGF, E); |
5375 | }, |
5376 | Loc); |
5377 | }; |
5378 | if ((*IPriv)->getType()->isArrayType()) { |
5379 | const auto *LHSVar = |
5380 | cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
5381 | const auto *RHSVar = |
5382 | cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
5383 | EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, |
5384 | CritRedGen); |
5385 | } else { |
5386 | CritRedGen(CGF, nullptr, nullptr, nullptr); |
5387 | } |
5388 | } |
5389 | ++ILHS; |
5390 | ++IRHS; |
5391 | ++IPriv; |
5392 | } |
5393 | }; |
5394 | RegionCodeGenTy AtomicRCG(AtomicCodeGen); |
5395 | if (!WithNowait) { |
5396 | // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); |
5397 | llvm::Value *EndArgs[] = { |
5398 | IdentTLoc, // ident_t *<loc> |
5399 | ThreadId, // i32 <gtid> |
5400 | Lock // kmp_critical_name *&<lock> |
5401 | }; |
5402 | CommonActionTy Action(nullptr, std::nullopt, |
5403 | OMPBuilder.getOrCreateRuntimeFunction( |
5404 | CGM.getModule(), OMPRTL___kmpc_end_reduce), |
5405 | EndArgs); |
5406 | AtomicRCG.setAction(Action); |
5407 | AtomicRCG(CGF); |
5408 | } else { |
5409 | AtomicRCG(CGF); |
5410 | } |
5411 | |
5412 | CGF.EmitBranch(DefaultBB); |
5413 | CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); |
5414 | } |
5415 | |
5416 | /// Generates unique name for artificial threadprivate variables. |
5417 | /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" |
5418 | static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, |
5419 | const Expr *Ref) { |
5420 | SmallString<256> Buffer; |
5421 | llvm::raw_svector_ostream Out(Buffer); |
5422 | const clang::DeclRefExpr *DE; |
5423 | const VarDecl *D = ::getBaseDecl(Ref, DE); |
5424 | if (!D) |
5425 | D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); |
5426 | D = D->getCanonicalDecl(); |
5427 | std::string Name = CGM.getOpenMPRuntime().getName( |
5428 | {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); |
5429 | Out << Prefix << Name << "_" |
5430 | << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); |
5431 | return std::string(Out.str()); |
5432 | } |
5433 | |
5434 | /// Emits reduction initializer function: |
5435 | /// \code |
5436 | /// void @.red_init(void* %arg, void* %orig) { |
5437 | /// %0 = bitcast void* %arg to <type>* |
5438 | /// store <type> <init>, <type>* %0 |
5439 | /// ret void |
5440 | /// } |
5441 | /// \endcode |
5442 | static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, |
5443 | SourceLocation Loc, |
5444 | ReductionCodeGen &RCG, unsigned N) { |
5445 | ASTContext &C = CGM.getContext(); |
5446 | QualType VoidPtrTy = C.VoidPtrTy; |
5447 | VoidPtrTy.addRestrict(); |
5448 | FunctionArgList Args; |
5449 | ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, |
5450 | ImplicitParamDecl::Other); |
5451 | ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy, |
5452 | ImplicitParamDecl::Other); |
5453 | Args.emplace_back(&Param); |
5454 | Args.emplace_back(&ParamOrig); |
5455 | const auto &FnInfo = |
5456 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
5457 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
5458 | std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""}); |
5459 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
5460 | Name, &CGM.getModule()); |
5461 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
5462 | Fn->setDoesNotRecurse(); |
5463 | CodeGenFunction CGF(CGM); |
5464 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); |
5465 | QualType PrivateType = RCG.getPrivateType(N); |
5466 | Address PrivateAddr = CGF.EmitLoadOfPointer( |
5467 | CGF.Builder.CreateElementBitCast( |
5468 | CGF.GetAddrOfLocalVar(&Param), |
5469 | CGF.ConvertTypeForMem(PrivateType)->getPointerTo()), |
5470 | C.getPointerType(PrivateType)->castAs<PointerType>()); |
5471 | llvm::Value *Size = nullptr; |
5472 | // If the size of the reduction item is non-constant, load it from global |
5473 | // threadprivate variable. |
5474 | if (RCG.getSizes(N).second) { |
5475 | Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( |
5476 | CGF, CGM.getContext().getSizeType(), |
5477 | generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); |
5478 | Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, |
5479 | CGM.getContext().getSizeType(), Loc); |
5480 | } |
5481 | RCG.emitAggregateType(CGF, N, Size); |
5482 | Address OrigAddr = Address::invalid(); |
5483 | // If initializer uses initializer from declare reduction construct, emit a |
5484 | // pointer to the address of the original reduction item (reuired by reduction |
5485 | // initializer) |
5486 | if (RCG.usesReductionInitializer(N)) { |
5487 | Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig); |
5488 | OrigAddr = CGF.EmitLoadOfPointer( |
5489 | SharedAddr, |
5490 | CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr()); |
5491 | } |
5492 | // Emit the initializer: |
5493 | // %0 = bitcast void* %arg to <type>* |
5494 | // store <type> <init>, <type>* %0 |
5495 | RCG.emitInitialization(CGF, N, PrivateAddr, OrigAddr, |
5496 | [](CodeGenFunction &) { return false; }); |
5497 | CGF.FinishFunction(); |
5498 | return Fn; |
5499 | } |
5500 | |
5501 | /// Emits reduction combiner function: |
5502 | /// \code |
5503 | /// void @.red_comb(void* %arg0, void* %arg1) { |
5504 | /// %lhs = bitcast void* %arg0 to <type>* |
5505 | /// %rhs = bitcast void* %arg1 to <type>* |
5506 | /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs) |
5507 | /// store <type> %2, <type>* %lhs |
5508 | /// ret void |
5509 | /// } |
5510 | /// \endcode |
5511 | static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM, |
5512 | SourceLocation Loc, |
5513 | ReductionCodeGen &RCG, unsigned N, |
5514 | const Expr *ReductionOp, |
5515 | const Expr *LHS, const Expr *RHS, |
5516 | const Expr *PrivateRef) { |
5517 | ASTContext &C = CGM.getContext(); |
5518 | const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl()); |
5519 | const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl()); |
5520 | FunctionArgList Args; |
5521 | ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
5522 | C.VoidPtrTy, ImplicitParamDecl::Other); |
5523 | ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
5524 | ImplicitParamDecl::Other); |
5525 | Args.emplace_back(&ParamInOut); |
5526 | Args.emplace_back(&ParamIn); |
5527 | const auto &FnInfo = |
5528 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
5529 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
5530 | std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""}); |
5531 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
5532 | Name, &CGM.getModule()); |
5533 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
5534 | Fn->setDoesNotRecurse(); |
5535 | CodeGenFunction CGF(CGM); |
5536 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); |
5537 | llvm::Value *Size = nullptr; |
5538 | // If the size of the reduction item is non-constant, load it from global |
5539 | // threadprivate variable. |
5540 | if (RCG.getSizes(N).second) { |
5541 | Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( |
5542 | CGF, CGM.getContext().getSizeType(), |
5543 | generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); |
5544 | Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, |
5545 | CGM.getContext().getSizeType(), Loc); |
5546 | } |
5547 | RCG.emitAggregateType(CGF, N, Size); |
5548 | // Remap lhs and rhs variables to the addresses of the function arguments. |
5549 | // %lhs = bitcast void* %arg0 to <type>* |
5550 | // %rhs = bitcast void* %arg1 to <type>* |
5551 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
5552 | PrivateScope.addPrivate( |
5553 | LHSVD, |
5554 | // Pull out the pointer to the variable. |
5555 | CGF.EmitLoadOfPointer( |
5556 | CGF.Builder.CreateElementBitCast( |
5557 | CGF.GetAddrOfLocalVar(&ParamInOut), |
5558 | CGF.ConvertTypeForMem(LHSVD->getType())->getPointerTo()), |
5559 | C.getPointerType(LHSVD->getType())->castAs<PointerType>())); |
5560 | PrivateScope.addPrivate( |
5561 | RHSVD, |
5562 | // Pull out the pointer to the variable. |
5563 | CGF.EmitLoadOfPointer( |
5564 | CGF.Builder.CreateElementBitCast( |
5565 | CGF.GetAddrOfLocalVar(&ParamIn), |
5566 | CGF.ConvertTypeForMem(RHSVD->getType())->getPointerTo()), |
5567 | C.getPointerType(RHSVD->getType())->castAs<PointerType>())); |
5568 | PrivateScope.Privatize(); |
5569 | // Emit the combiner body: |
5570 | // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs) |
5571 | // store <type> %2, <type>* %lhs |
5572 | CGM.getOpenMPRuntime().emitSingleReductionCombiner( |
5573 | CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS), |
5574 | cast<DeclRefExpr>(RHS)); |
5575 | CGF.FinishFunction(); |
5576 | return Fn; |
5577 | } |
5578 | |
5579 | /// Emits reduction finalizer function: |
5580 | /// \code |
5581 | /// void @.red_fini(void* %arg) { |
5582 | /// %0 = bitcast void* %arg to <type>* |
5583 | /// <destroy>(<type>* %0) |
5584 | /// ret void |
5585 | /// } |
5586 | /// \endcode |
5587 | static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM, |
5588 | SourceLocation Loc, |
5589 | ReductionCodeGen &RCG, unsigned N) { |
5590 | if (!RCG.needCleanups(N)) |
5591 | return nullptr; |
5592 | ASTContext &C = CGM.getContext(); |
5593 | FunctionArgList Args; |
5594 | ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
5595 | ImplicitParamDecl::Other); |
5596 | Args.emplace_back(&Param); |
5597 | const auto &FnInfo = |
5598 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
5599 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
5600 | std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""}); |
5601 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
5602 | Name, &CGM.getModule()); |
5603 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
5604 | Fn->setDoesNotRecurse(); |
5605 | CodeGenFunction CGF(CGM); |
5606 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc); |
5607 | Address PrivateAddr = CGF.EmitLoadOfPointer( |
5608 | CGF.GetAddrOfLocalVar(&Param), C.VoidPtrTy.castAs<PointerType>()); |
5609 | llvm::Value *Size = nullptr; |
5610 | // If the size of the reduction item is non-constant, load it from global |
5611 | // threadprivate variable. |
5612 | if (RCG.getSizes(N).second) { |
5613 | Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate( |
5614 | CGF, CGM.getContext().getSizeType(), |
5615 | generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); |
5616 | Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false, |
5617 | CGM.getContext().getSizeType(), Loc); |
5618 | } |
5619 | RCG.emitAggregateType(CGF, N, Size); |
5620 | // Emit the finalizer body: |
5621 | // <destroy>(<type>* %0) |
5622 | RCG.emitCleanups(CGF, N, PrivateAddr); |
5623 | CGF.FinishFunction(Loc); |
5624 | return Fn; |
5625 | } |
5626 | |
5627 | llvm::Value *CGOpenMPRuntime::emitTaskReductionInit( |
5628 | CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs, |
5629 | ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) { |
5630 | if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty()) |
5631 | return nullptr; |
5632 | |
5633 | // Build typedef struct: |
5634 | // kmp_taskred_input { |
5635 | // void *reduce_shar; // shared reduction item |
5636 | // void *reduce_orig; // original reduction item used for initialization |
5637 | // size_t reduce_size; // size of data item |
5638 | // void *reduce_init; // data initialization routine |
5639 | // void *reduce_fini; // data finalization routine |
5640 | // void *reduce_comb; // data combiner routine |
5641 | // kmp_task_red_flags_t flags; // flags for additional info from compiler |
5642 | // } kmp_taskred_input_t; |
5643 | ASTContext &C = CGM.getContext(); |
5644 | RecordDecl *RD = C.buildImplicitRecord("kmp_taskred_input_t"); |
5645 | RD->startDefinition(); |
5646 | const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
5647 | const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
5648 | const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType()); |
5649 | const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
5650 | const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
5651 | const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
5652 | const FieldDecl *FlagsFD = addFieldToRecordDecl( |
5653 | C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false)); |
5654 | RD->completeDefinition(); |
5655 | QualType RDType = C.getRecordType(RD); |
5656 | unsigned Size = Data.ReductionVars.size(); |
5657 | llvm::APInt ArraySize(/*numBits=*/64, Size); |
5658 | QualType ArrayRDType = C.getConstantArrayType( |
5659 | RDType, ArraySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); |
5660 | // kmp_task_red_input_t .rd_input.[Size]; |
5661 | Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input."); |
5662 | ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs, |
5663 | Data.ReductionCopies, Data.ReductionOps); |
5664 | for (unsigned Cnt = 0; Cnt < Size; ++Cnt) { |
5665 | // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt]; |
5666 | llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0), |
5667 | llvm::ConstantInt::get(CGM.SizeTy, Cnt)}; |
5668 | llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP( |
5669 | TaskRedInput.getElementType(), TaskRedInput.getPointer(), Idxs, |
5670 | /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc, |
5671 | ".rd_input.gep."); |
5672 | LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType); |
5673 | // ElemLVal.reduce_shar = &Shareds[Cnt]; |
5674 | LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD); |
5675 | RCG.emitSharedOrigLValue(CGF, Cnt); |
5676 | llvm::Value *CastedShared = |
5677 | CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer(CGF)); |
5678 | CGF.EmitStoreOfScalar(CastedShared, SharedLVal); |
5679 | // ElemLVal.reduce_orig = &Origs[Cnt]; |
5680 | LValue OrigLVal = CGF.EmitLValueForField(ElemLVal, OrigFD); |
5681 | llvm::Value *CastedOrig = |
5682 | CGF.EmitCastToVoidPtr(RCG.getOrigLValue(Cnt).getPointer(CGF)); |
5683 | CGF.EmitStoreOfScalar(CastedOrig, OrigLVal); |
5684 | RCG.emitAggregateType(CGF, Cnt); |
5685 | llvm::Value *SizeValInChars; |
5686 | llvm::Value *SizeVal; |
5687 | std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt); |
5688 | // We use delayed creation/initialization for VLAs and array sections. It is |
5689 | // required because runtime does not provide the way to pass the sizes of |
5690 | // VLAs/array sections to initializer/combiner/finalizer functions. Instead |
5691 | // threadprivate global variables are used to store these values and use |
5692 | // them in the functions. |
5693 | bool DelayedCreation = !!SizeVal; |
5694 | SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy, |
5695 | /*isSigned=*/false); |
5696 | LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD); |
5697 | CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal); |
5698 | // ElemLVal.reduce_init = init; |
5699 | LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD); |
5700 | llvm::Value *InitAddr = |
5701 | CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt)); |
5702 | CGF.EmitStoreOfScalar(InitAddr, InitLVal); |
5703 | // ElemLVal.reduce_fini = fini; |
5704 | LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD); |
5705 | llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt); |
5706 | llvm::Value *FiniAddr = Fini |
5707 | ? CGF.EmitCastToVoidPtr(Fini) |
5708 | : llvm::ConstantPointerNull::get(CGM.VoidPtrTy); |
5709 | CGF.EmitStoreOfScalar(FiniAddr, FiniLVal); |
5710 | // ElemLVal.reduce_comb = comb; |
5711 | LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD); |
5712 | llvm::Value *CombAddr = CGF.EmitCastToVoidPtr(emitReduceCombFunction( |
5713 | CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt], |
5714 | RHSExprs[Cnt], Data.ReductionCopies[Cnt])); |
5715 | CGF.EmitStoreOfScalar(CombAddr, CombLVal); |
5716 | // ElemLVal.flags = 0; |
5717 | LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD); |
5718 | if (DelayedCreation) { |
5719 | CGF.EmitStoreOfScalar( |
5720 | llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true), |
5721 | FlagsLVal); |
5722 | } else |
5723 | CGF.EmitNullInitialization(FlagsLVal.getAddress(CGF), |
5724 | FlagsLVal.getType()); |
5725 | } |
5726 | if (Data.IsReductionWithTaskMod) { |
5727 | // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int |
5728 | // is_ws, int num, void *data); |
5729 | llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); |
5730 | llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), |
5731 | CGM.IntTy, /*isSigned=*/true); |
5732 | llvm::Value *Args[] = { |
5733 | IdentTLoc, GTid, |
5734 | llvm::ConstantInt::get(CGM.IntTy, Data.IsWorksharingReduction ? 1 : 0, |
5735 | /*isSigned=*/true), |
5736 | llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), |
5737 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5738 | TaskRedInput.getPointer(), CGM.VoidPtrTy)}; |
5739 | return CGF.EmitRuntimeCall( |
5740 | OMPBuilder.getOrCreateRuntimeFunction( |
5741 | CGM.getModule(), OMPRTL___kmpc_taskred_modifier_init), |
5742 | Args); |
5743 | } |
5744 | // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data); |
5745 | llvm::Value *Args[] = { |
5746 | CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy, |
5747 | /*isSigned=*/true), |
5748 | llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true), |
5749 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput.getPointer(), |
5750 | CGM.VoidPtrTy)}; |
5751 | return CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
5752 | CGM.getModule(), OMPRTL___kmpc_taskred_init), |
5753 | Args); |
5754 | } |
5755 | |
5756 | void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF, |
5757 | SourceLocation Loc, |
5758 | bool IsWorksharingReduction) { |
5759 | // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int |
5760 | // is_ws, int num, void *data); |
5761 | llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc); |
5762 | llvm::Value *GTid = CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), |
5763 | CGM.IntTy, /*isSigned=*/true); |
5764 | llvm::Value *Args[] = {IdentTLoc, GTid, |
5765 | llvm::ConstantInt::get(CGM.IntTy, |
5766 | IsWorksharingReduction ? 1 : 0, |
5767 | /*isSigned=*/true)}; |
5768 | (void)CGF.EmitRuntimeCall( |
5769 | OMPBuilder.getOrCreateRuntimeFunction( |
5770 | CGM.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini), |
5771 | Args); |
5772 | } |
5773 | |
5774 | void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF, |
5775 | SourceLocation Loc, |
5776 | ReductionCodeGen &RCG, |
5777 | unsigned N) { |
5778 | auto Sizes = RCG.getSizes(N); |
5779 | // Emit threadprivate global variable if the type is non-constant |
5780 | // (Sizes.second = nullptr). |
5781 | if (Sizes.second) { |
5782 | llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy, |
5783 | /*isSigned=*/false); |
5784 | Address SizeAddr = getAddrOfArtificialThreadPrivate( |
5785 | CGF, CGM.getContext().getSizeType(), |
5786 | generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N))); |
5787 | CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false); |
5788 | } |
5789 | } |
5790 | |
5791 | Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF, |
5792 | SourceLocation Loc, |
5793 | llvm::Value *ReductionsPtr, |
5794 | LValue SharedLVal) { |
5795 | // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void |
5796 | // *d); |
5797 | llvm::Value *Args[] = {CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), |
5798 | CGM.IntTy, |
5799 | /*isSigned=*/true), |
5800 | ReductionsPtr, |
5801 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5802 | SharedLVal.getPointer(CGF), CGM.VoidPtrTy)}; |
5803 | return Address( |
5804 | CGF.EmitRuntimeCall( |
5805 | OMPBuilder.getOrCreateRuntimeFunction( |
5806 | CGM.getModule(), OMPRTL___kmpc_task_reduction_get_th_data), |
5807 | Args), |
5808 | CGF.Int8Ty, SharedLVal.getAlignment()); |
5809 | } |
5810 | |
5811 | void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc, |
5812 | const OMPTaskDataTy &Data) { |
5813 | if (!CGF.HaveInsertPoint()) |
5814 | return; |
5815 | |
5816 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) { |
5817 | // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder. |
5818 | OMPBuilder.createTaskwait(CGF.Builder); |
5819 | } else { |
5820 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
5821 | llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); |
5822 | auto |