File: | clang/lib/CodeGen/CGOpenMPRuntime.cpp |
Warning: | line 8004, column 9 2nd function call argument is an uninitialized value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This provides a class for OpenMP runtime code generation. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGOpenMPRuntime.h" |
14 | #include "CGCXXABI.h" |
15 | #include "CGCleanup.h" |
16 | #include "CGRecordLayout.h" |
17 | #include "CodeGenFunction.h" |
18 | #include "clang/AST/Attr.h" |
19 | #include "clang/AST/Decl.h" |
20 | #include "clang/AST/OpenMPClause.h" |
21 | #include "clang/AST/StmtOpenMP.h" |
22 | #include "clang/AST/StmtVisitor.h" |
23 | #include "clang/Basic/BitmaskEnum.h" |
24 | #include "clang/Basic/FileManager.h" |
25 | #include "clang/Basic/OpenMPKinds.h" |
26 | #include "clang/Basic/SourceManager.h" |
27 | #include "clang/CodeGen/ConstantInitBuilder.h" |
28 | #include "llvm/ADT/ArrayRef.h" |
29 | #include "llvm/ADT/SetOperations.h" |
30 | #include "llvm/ADT/StringExtras.h" |
31 | #include "llvm/Bitcode/BitcodeReader.h" |
32 | #include "llvm/IR/Constants.h" |
33 | #include "llvm/IR/DerivedTypes.h" |
34 | #include "llvm/IR/GlobalValue.h" |
35 | #include "llvm/IR/Value.h" |
36 | #include "llvm/Support/AtomicOrdering.h" |
37 | #include "llvm/Support/Format.h" |
38 | #include "llvm/Support/raw_ostream.h" |
39 | #include <cassert> |
40 | #include <numeric> |
41 | |
42 | using namespace clang; |
43 | using namespace CodeGen; |
44 | using namespace llvm::omp; |
45 | |
46 | namespace { |
47 | /// Base class for handling code generation inside OpenMP regions. |
48 | class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo { |
49 | public: |
50 | /// Kinds of OpenMP regions used in codegen. |
51 | enum CGOpenMPRegionKind { |
52 | /// Region with outlined function for standalone 'parallel' |
53 | /// directive. |
54 | ParallelOutlinedRegion, |
55 | /// Region with outlined function for standalone 'task' directive. |
56 | TaskOutlinedRegion, |
57 | /// Region for constructs that do not require function outlining, |
58 | /// like 'for', 'sections', 'atomic' etc. directives. |
59 | InlinedRegion, |
60 | /// Region with outlined function for standalone 'target' directive. |
61 | TargetRegion, |
62 | }; |
63 | |
64 | CGOpenMPRegionInfo(const CapturedStmt &CS, |
65 | const CGOpenMPRegionKind RegionKind, |
66 | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
67 | bool HasCancel) |
68 | : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind), |
69 | CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {} |
70 | |
71 | CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind, |
72 | const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind, |
73 | bool HasCancel) |
74 | : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen), |
75 | Kind(Kind), HasCancel(HasCancel) {} |
76 | |
77 | /// Get a variable or parameter for storing global thread id |
78 | /// inside OpenMP construct. |
79 | virtual const VarDecl *getThreadIDVariable() const = 0; |
80 | |
81 | /// Emit the captured statement body. |
82 | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override; |
83 | |
84 | /// Get an LValue for the current ThreadID variable. |
85 | /// \return LValue for thread id variable. This LValue always has type int32*. |
86 | virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF); |
87 | |
88 | virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {} |
89 | |
90 | CGOpenMPRegionKind getRegionKind() const { return RegionKind; } |
91 | |
92 | OpenMPDirectiveKind getDirectiveKind() const { return Kind; } |
93 | |
94 | bool hasCancel() const { return HasCancel; } |
95 | |
96 | static bool classof(const CGCapturedStmtInfo *Info) { |
97 | return Info->getKind() == CR_OpenMP; |
98 | } |
99 | |
100 | ~CGOpenMPRegionInfo() override = default; |
101 | |
102 | protected: |
103 | CGOpenMPRegionKind RegionKind; |
104 | RegionCodeGenTy CodeGen; |
105 | OpenMPDirectiveKind Kind; |
106 | bool HasCancel; |
107 | }; |
108 | |
109 | /// API for captured statement code generation in OpenMP constructs. |
110 | class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
111 | public: |
112 | CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar, |
113 | const RegionCodeGenTy &CodeGen, |
114 | OpenMPDirectiveKind Kind, bool HasCancel, |
115 | StringRef HelperName) |
116 | : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind, |
117 | HasCancel), |
118 | ThreadIDVar(ThreadIDVar), HelperName(HelperName) { |
119 | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")((ThreadIDVar != nullptr && "No ThreadID in OpenMP region." ) ? static_cast<void> (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 119, __PRETTY_FUNCTION__)); |
120 | } |
121 | |
122 | /// Get a variable or parameter for storing global thread id |
123 | /// inside OpenMP construct. |
124 | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
125 | |
126 | /// Get the name of the capture helper. |
127 | StringRef getHelperName() const override { return HelperName; } |
128 | |
129 | static bool classof(const CGCapturedStmtInfo *Info) { |
130 | return CGOpenMPRegionInfo::classof(Info) && |
131 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
132 | ParallelOutlinedRegion; |
133 | } |
134 | |
135 | private: |
136 | /// A variable or parameter storing global thread id for OpenMP |
137 | /// constructs. |
138 | const VarDecl *ThreadIDVar; |
139 | StringRef HelperName; |
140 | }; |
141 | |
142 | /// API for captured statement code generation in OpenMP constructs. |
143 | class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo { |
144 | public: |
145 | class UntiedTaskActionTy final : public PrePostActionTy { |
146 | bool Untied; |
147 | const VarDecl *PartIDVar; |
148 | const RegionCodeGenTy UntiedCodeGen; |
149 | llvm::SwitchInst *UntiedSwitch = nullptr; |
150 | |
151 | public: |
152 | UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar, |
153 | const RegionCodeGenTy &UntiedCodeGen) |
154 | : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {} |
155 | void Enter(CodeGenFunction &CGF) override { |
156 | if (Untied) { |
157 | // Emit task switching point. |
158 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
159 | CGF.GetAddrOfLocalVar(PartIDVar), |
160 | PartIDVar->getType()->castAs<PointerType>()); |
161 | llvm::Value *Res = |
162 | CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation()); |
163 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done."); |
164 | UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB); |
165 | CGF.EmitBlock(DoneBB); |
166 | CGF.EmitBranchThroughCleanup(CGF.ReturnBlock); |
167 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
168 | UntiedSwitch->addCase(CGF.Builder.getInt32(0), |
169 | CGF.Builder.GetInsertBlock()); |
170 | emitUntiedSwitch(CGF); |
171 | } |
172 | } |
173 | void emitUntiedSwitch(CodeGenFunction &CGF) const { |
174 | if (Untied) { |
175 | LValue PartIdLVal = CGF.EmitLoadOfPointerLValue( |
176 | CGF.GetAddrOfLocalVar(PartIDVar), |
177 | PartIDVar->getType()->castAs<PointerType>()); |
178 | CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
179 | PartIdLVal); |
180 | UntiedCodeGen(CGF); |
181 | CodeGenFunction::JumpDest CurPoint = |
182 | CGF.getJumpDestInCurrentScope(".untied.next."); |
183 | CGF.EmitBranch(CGF.ReturnBlock.getBlock()); |
184 | CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp.")); |
185 | UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()), |
186 | CGF.Builder.GetInsertBlock()); |
187 | CGF.EmitBranchThroughCleanup(CurPoint); |
188 | CGF.EmitBlock(CurPoint.getBlock()); |
189 | } |
190 | } |
191 | unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); } |
192 | }; |
193 | CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS, |
194 | const VarDecl *ThreadIDVar, |
195 | const RegionCodeGenTy &CodeGen, |
196 | OpenMPDirectiveKind Kind, bool HasCancel, |
197 | const UntiedTaskActionTy &Action) |
198 | : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel), |
199 | ThreadIDVar(ThreadIDVar), Action(Action) { |
200 | assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.")((ThreadIDVar != nullptr && "No ThreadID in OpenMP region." ) ? static_cast<void> (0) : __assert_fail ("ThreadIDVar != nullptr && \"No ThreadID in OpenMP region.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 200, __PRETTY_FUNCTION__)); |
201 | } |
202 | |
203 | /// Get a variable or parameter for storing global thread id |
204 | /// inside OpenMP construct. |
205 | const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; } |
206 | |
207 | /// Get an LValue for the current ThreadID variable. |
208 | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override; |
209 | |
210 | /// Get the name of the capture helper. |
211 | StringRef getHelperName() const override { return ".omp_outlined."; } |
212 | |
213 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
214 | Action.emitUntiedSwitch(CGF); |
215 | } |
216 | |
217 | static bool classof(const CGCapturedStmtInfo *Info) { |
218 | return CGOpenMPRegionInfo::classof(Info) && |
219 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == |
220 | TaskOutlinedRegion; |
221 | } |
222 | |
223 | private: |
224 | /// A variable or parameter storing global thread id for OpenMP |
225 | /// constructs. |
226 | const VarDecl *ThreadIDVar; |
227 | /// Action for emitting code for untied tasks. |
228 | const UntiedTaskActionTy &Action; |
229 | }; |
230 | |
231 | /// API for inlined captured statement code generation in OpenMP |
232 | /// constructs. |
233 | class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo { |
234 | public: |
235 | CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI, |
236 | const RegionCodeGenTy &CodeGen, |
237 | OpenMPDirectiveKind Kind, bool HasCancel) |
238 | : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel), |
239 | OldCSI(OldCSI), |
240 | OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {} |
241 | |
242 | // Retrieve the value of the context parameter. |
243 | llvm::Value *getContextValue() const override { |
244 | if (OuterRegionInfo) |
245 | return OuterRegionInfo->getContextValue(); |
246 | llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 246); |
247 | } |
248 | |
249 | void setContextValue(llvm::Value *V) override { |
250 | if (OuterRegionInfo) { |
251 | OuterRegionInfo->setContextValue(V); |
252 | return; |
253 | } |
254 | llvm_unreachable("No context value for inlined OpenMP region")::llvm::llvm_unreachable_internal("No context value for inlined OpenMP region" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 254); |
255 | } |
256 | |
257 | /// Lookup the captured field decl for a variable. |
258 | const FieldDecl *lookup(const VarDecl *VD) const override { |
259 | if (OuterRegionInfo) |
260 | return OuterRegionInfo->lookup(VD); |
261 | // If there is no outer outlined region,no need to lookup in a list of |
262 | // captured variables, we can use the original one. |
263 | return nullptr; |
264 | } |
265 | |
266 | FieldDecl *getThisFieldDecl() const override { |
267 | if (OuterRegionInfo) |
268 | return OuterRegionInfo->getThisFieldDecl(); |
269 | return nullptr; |
270 | } |
271 | |
272 | /// Get a variable or parameter for storing global thread id |
273 | /// inside OpenMP construct. |
274 | const VarDecl *getThreadIDVariable() const override { |
275 | if (OuterRegionInfo) |
276 | return OuterRegionInfo->getThreadIDVariable(); |
277 | return nullptr; |
278 | } |
279 | |
280 | /// Get an LValue for the current ThreadID variable. |
281 | LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override { |
282 | if (OuterRegionInfo) |
283 | return OuterRegionInfo->getThreadIDVariableLValue(CGF); |
284 | llvm_unreachable("No LValue for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No LValue for inlined OpenMP construct" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 284); |
285 | } |
286 | |
287 | /// Get the name of the capture helper. |
288 | StringRef getHelperName() const override { |
289 | if (auto *OuterRegionInfo = getOldCSI()) |
290 | return OuterRegionInfo->getHelperName(); |
291 | llvm_unreachable("No helper name for inlined OpenMP construct")::llvm::llvm_unreachable_internal("No helper name for inlined OpenMP construct" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 291); |
292 | } |
293 | |
294 | void emitUntiedSwitch(CodeGenFunction &CGF) override { |
295 | if (OuterRegionInfo) |
296 | OuterRegionInfo->emitUntiedSwitch(CGF); |
297 | } |
298 | |
299 | CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; } |
300 | |
301 | static bool classof(const CGCapturedStmtInfo *Info) { |
302 | return CGOpenMPRegionInfo::classof(Info) && |
303 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion; |
304 | } |
305 | |
306 | ~CGOpenMPInlinedRegionInfo() override = default; |
307 | |
308 | private: |
309 | /// CodeGen info about outer OpenMP region. |
310 | CodeGenFunction::CGCapturedStmtInfo *OldCSI; |
311 | CGOpenMPRegionInfo *OuterRegionInfo; |
312 | }; |
313 | |
314 | /// API for captured statement code generation in OpenMP target |
315 | /// constructs. For this captures, implicit parameters are used instead of the |
316 | /// captured fields. The name of the target region has to be unique in a given |
317 | /// application so it is provided by the client, because only the client has |
318 | /// the information to generate that. |
319 | class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo { |
320 | public: |
321 | CGOpenMPTargetRegionInfo(const CapturedStmt &CS, |
322 | const RegionCodeGenTy &CodeGen, StringRef HelperName) |
323 | : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target, |
324 | /*HasCancel=*/false), |
325 | HelperName(HelperName) {} |
326 | |
327 | /// This is unused for target regions because each starts executing |
328 | /// with a single thread. |
329 | const VarDecl *getThreadIDVariable() const override { return nullptr; } |
330 | |
331 | /// Get the name of the capture helper. |
332 | StringRef getHelperName() const override { return HelperName; } |
333 | |
334 | static bool classof(const CGCapturedStmtInfo *Info) { |
335 | return CGOpenMPRegionInfo::classof(Info) && |
336 | cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion; |
337 | } |
338 | |
339 | private: |
340 | StringRef HelperName; |
341 | }; |
342 | |
343 | static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) { |
344 | llvm_unreachable("No codegen for expressions")::llvm::llvm_unreachable_internal("No codegen for expressions" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 344); |
345 | } |
346 | /// API for generation of expressions captured in a innermost OpenMP |
347 | /// region. |
348 | class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo { |
349 | public: |
350 | CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS) |
351 | : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen, |
352 | OMPD_unknown, |
353 | /*HasCancel=*/false), |
354 | PrivScope(CGF) { |
355 | // Make sure the globals captured in the provided statement are local by |
356 | // using the privatization logic. We assume the same variable is not |
357 | // captured more than once. |
358 | for (const auto &C : CS.captures()) { |
359 | if (!C.capturesVariable() && !C.capturesVariableByCopy()) |
360 | continue; |
361 | |
362 | const VarDecl *VD = C.getCapturedVar(); |
363 | if (VD->isLocalVarDeclOrParm()) |
364 | continue; |
365 | |
366 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
367 | /*RefersToEnclosingVariableOrCapture=*/false, |
368 | VD->getType().getNonReferenceType(), VK_LValue, |
369 | C.getLocation()); |
370 | PrivScope.addPrivate( |
371 | VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(CGF); }); |
372 | } |
373 | (void)PrivScope.Privatize(); |
374 | } |
375 | |
376 | /// Lookup the captured field decl for a variable. |
377 | const FieldDecl *lookup(const VarDecl *VD) const override { |
378 | if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD)) |
379 | return FD; |
380 | return nullptr; |
381 | } |
382 | |
383 | /// Emit the captured statement body. |
384 | void EmitBody(CodeGenFunction &CGF, const Stmt *S) override { |
385 | llvm_unreachable("No body for expressions")::llvm::llvm_unreachable_internal("No body for expressions", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 385); |
386 | } |
387 | |
388 | /// Get a variable or parameter for storing global thread id |
389 | /// inside OpenMP construct. |
390 | const VarDecl *getThreadIDVariable() const override { |
391 | llvm_unreachable("No thread id for expressions")::llvm::llvm_unreachable_internal("No thread id for expressions" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 391); |
392 | } |
393 | |
394 | /// Get the name of the capture helper. |
395 | StringRef getHelperName() const override { |
396 | llvm_unreachable("No helper name for expressions")::llvm::llvm_unreachable_internal("No helper name for expressions" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 396); |
397 | } |
398 | |
399 | static bool classof(const CGCapturedStmtInfo *Info) { return false; } |
400 | |
401 | private: |
402 | /// Private scope to capture global variables. |
403 | CodeGenFunction::OMPPrivateScope PrivScope; |
404 | }; |
405 | |
406 | /// RAII for emitting code of OpenMP constructs. |
407 | class InlinedOpenMPRegionRAII { |
408 | CodeGenFunction &CGF; |
409 | llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields; |
410 | FieldDecl *LambdaThisCaptureField = nullptr; |
411 | const CodeGen::CGBlockInfo *BlockInfo = nullptr; |
412 | |
413 | public: |
414 | /// Constructs region for combined constructs. |
415 | /// \param CodeGen Code generation sequence for combined directives. Includes |
416 | /// a list of functions used for code generation of implicitly inlined |
417 | /// regions. |
418 | InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen, |
419 | OpenMPDirectiveKind Kind, bool HasCancel) |
420 | : CGF(CGF) { |
421 | // Start emission for the construct. |
422 | CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo( |
423 | CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel); |
424 | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
425 | LambdaThisCaptureField = CGF.LambdaThisCaptureField; |
426 | CGF.LambdaThisCaptureField = nullptr; |
427 | BlockInfo = CGF.BlockInfo; |
428 | CGF.BlockInfo = nullptr; |
429 | } |
430 | |
431 | ~InlinedOpenMPRegionRAII() { |
432 | // Restore original CapturedStmtInfo only if we're done with code emission. |
433 | auto *OldCSI = |
434 | cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI(); |
435 | delete CGF.CapturedStmtInfo; |
436 | CGF.CapturedStmtInfo = OldCSI; |
437 | std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields); |
438 | CGF.LambdaThisCaptureField = LambdaThisCaptureField; |
439 | CGF.BlockInfo = BlockInfo; |
440 | } |
441 | }; |
442 | |
443 | /// Values for bit flags used in the ident_t to describe the fields. |
444 | /// All enumeric elements are named and described in accordance with the code |
445 | /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h |
446 | enum OpenMPLocationFlags : unsigned { |
447 | /// Use trampoline for internal microtask. |
448 | OMP_IDENT_IMD = 0x01, |
449 | /// Use c-style ident structure. |
450 | OMP_IDENT_KMPC = 0x02, |
451 | /// Atomic reduction option for kmpc_reduce. |
452 | OMP_ATOMIC_REDUCE = 0x10, |
453 | /// Explicit 'barrier' directive. |
454 | OMP_IDENT_BARRIER_EXPL = 0x20, |
455 | /// Implicit barrier in code. |
456 | OMP_IDENT_BARRIER_IMPL = 0x40, |
457 | /// Implicit barrier in 'for' directive. |
458 | OMP_IDENT_BARRIER_IMPL_FOR = 0x40, |
459 | /// Implicit barrier in 'sections' directive. |
460 | OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0, |
461 | /// Implicit barrier in 'single' directive. |
462 | OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140, |
463 | /// Call of __kmp_for_static_init for static loop. |
464 | OMP_IDENT_WORK_LOOP = 0x200, |
465 | /// Call of __kmp_for_static_init for sections. |
466 | OMP_IDENT_WORK_SECTIONS = 0x400, |
467 | /// Call of __kmp_for_static_init for distribute. |
468 | OMP_IDENT_WORK_DISTRIBUTE = 0x800, |
469 | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_IDENT_WORK_DISTRIBUTE |
470 | }; |
471 | |
472 | namespace { |
473 | LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()using ::llvm::BitmaskEnumDetail::operator~; using ::llvm::BitmaskEnumDetail ::operator|; using ::llvm::BitmaskEnumDetail::operator&; using ::llvm::BitmaskEnumDetail::operator^; using ::llvm::BitmaskEnumDetail ::operator|=; using ::llvm::BitmaskEnumDetail::operator&= ; using ::llvm::BitmaskEnumDetail::operator^=; |
474 | /// Values for bit flags for marking which requires clauses have been used. |
475 | enum OpenMPOffloadingRequiresDirFlags : int64_t { |
476 | /// flag undefined. |
477 | OMP_REQ_UNDEFINED = 0x000, |
478 | /// no requires clause present. |
479 | OMP_REQ_NONE = 0x001, |
480 | /// reverse_offload clause. |
481 | OMP_REQ_REVERSE_OFFLOAD = 0x002, |
482 | /// unified_address clause. |
483 | OMP_REQ_UNIFIED_ADDRESS = 0x004, |
484 | /// unified_shared_memory clause. |
485 | OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008, |
486 | /// dynamic_allocators clause. |
487 | OMP_REQ_DYNAMIC_ALLOCATORS = 0x010, |
488 | LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)LLVM_BITMASK_LARGEST_ENUMERATOR = OMP_REQ_DYNAMIC_ALLOCATORS |
489 | }; |
490 | |
491 | enum OpenMPOffloadingReservedDeviceIDs { |
492 | /// Device ID if the device was not defined, runtime should get it |
493 | /// from environment variables in the spec. |
494 | OMP_DEVICEID_UNDEF = -1, |
495 | }; |
496 | } // anonymous namespace |
497 | |
498 | /// Describes ident structure that describes a source location. |
499 | /// All descriptions are taken from |
500 | /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h |
501 | /// Original structure: |
502 | /// typedef struct ident { |
503 | /// kmp_int32 reserved_1; /**< might be used in Fortran; |
504 | /// see above */ |
505 | /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; |
506 | /// KMP_IDENT_KMPC identifies this union |
507 | /// member */ |
508 | /// kmp_int32 reserved_2; /**< not really used in Fortran any more; |
509 | /// see above */ |
510 | ///#if USE_ITT_BUILD |
511 | /// /* but currently used for storing |
512 | /// region-specific ITT */ |
513 | /// /* contextual information. */ |
514 | ///#endif /* USE_ITT_BUILD */ |
515 | /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for |
516 | /// C++ */ |
517 | /// char const *psource; /**< String describing the source location. |
518 | /// The string is composed of semi-colon separated |
519 | // fields which describe the source file, |
520 | /// the function and a pair of line numbers that |
521 | /// delimit the construct. |
522 | /// */ |
523 | /// } ident_t; |
524 | enum IdentFieldIndex { |
525 | /// might be used in Fortran |
526 | IdentField_Reserved_1, |
527 | /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member. |
528 | IdentField_Flags, |
529 | /// Not really used in Fortran any more |
530 | IdentField_Reserved_2, |
531 | /// Source[4] in Fortran, do not use for C++ |
532 | IdentField_Reserved_3, |
533 | /// String describing the source location. The string is composed of |
534 | /// semi-colon separated fields which describe the source file, the function |
535 | /// and a pair of line numbers that delimit the construct. |
536 | IdentField_PSource |
537 | }; |
538 | |
539 | /// Schedule types for 'omp for' loops (these enumerators are taken from |
540 | /// the enum sched_type in kmp.h). |
541 | enum OpenMPSchedType { |
542 | /// Lower bound for default (unordered) versions. |
543 | OMP_sch_lower = 32, |
544 | OMP_sch_static_chunked = 33, |
545 | OMP_sch_static = 34, |
546 | OMP_sch_dynamic_chunked = 35, |
547 | OMP_sch_guided_chunked = 36, |
548 | OMP_sch_runtime = 37, |
549 | OMP_sch_auto = 38, |
550 | /// static with chunk adjustment (e.g., simd) |
551 | OMP_sch_static_balanced_chunked = 45, |
552 | /// Lower bound for 'ordered' versions. |
553 | OMP_ord_lower = 64, |
554 | OMP_ord_static_chunked = 65, |
555 | OMP_ord_static = 66, |
556 | OMP_ord_dynamic_chunked = 67, |
557 | OMP_ord_guided_chunked = 68, |
558 | OMP_ord_runtime = 69, |
559 | OMP_ord_auto = 70, |
560 | OMP_sch_default = OMP_sch_static, |
561 | /// dist_schedule types |
562 | OMP_dist_sch_static_chunked = 91, |
563 | OMP_dist_sch_static = 92, |
564 | /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. |
565 | /// Set if the monotonic schedule modifier was present. |
566 | OMP_sch_modifier_monotonic = (1 << 29), |
567 | /// Set if the nonmonotonic schedule modifier was present. |
568 | OMP_sch_modifier_nonmonotonic = (1 << 30), |
569 | }; |
570 | |
571 | /// A basic class for pre|post-action for advanced codegen sequence for OpenMP |
572 | /// region. |
573 | class CleanupTy final : public EHScopeStack::Cleanup { |
574 | PrePostActionTy *Action; |
575 | |
576 | public: |
577 | explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {} |
578 | void Emit(CodeGenFunction &CGF, Flags /*flags*/) override { |
579 | if (!CGF.HaveInsertPoint()) |
580 | return; |
581 | Action->Exit(CGF); |
582 | } |
583 | }; |
584 | |
585 | } // anonymous namespace |
586 | |
587 | void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const { |
588 | CodeGenFunction::RunCleanupsScope Scope(CGF); |
589 | if (PrePostAction) { |
590 | CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction); |
591 | Callback(CodeGen, CGF, *PrePostAction); |
592 | } else { |
593 | PrePostActionTy Action; |
594 | Callback(CodeGen, CGF, Action); |
595 | } |
596 | } |
597 | |
598 | /// Check if the combiner is a call to UDR combiner and if it is so return the |
599 | /// UDR decl used for reduction. |
600 | static const OMPDeclareReductionDecl * |
601 | getReductionInit(const Expr *ReductionOp) { |
602 | if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) |
603 | if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) |
604 | if (const auto *DRE = |
605 | dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) |
606 | if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) |
607 | return DRD; |
608 | return nullptr; |
609 | } |
610 | |
611 | static void emitInitWithReductionInitializer(CodeGenFunction &CGF, |
612 | const OMPDeclareReductionDecl *DRD, |
613 | const Expr *InitOp, |
614 | Address Private, Address Original, |
615 | QualType Ty) { |
616 | if (DRD->getInitializer()) { |
617 | std::pair<llvm::Function *, llvm::Function *> Reduction = |
618 | CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); |
619 | const auto *CE = cast<CallExpr>(InitOp); |
620 | const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee()); |
621 | const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts(); |
622 | const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts(); |
623 | const auto *LHSDRE = |
624 | cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr()); |
625 | const auto *RHSDRE = |
626 | cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr()); |
627 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
628 | PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()), |
629 | [=]() { return Private; }); |
630 | PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()), |
631 | [=]() { return Original; }); |
632 | (void)PrivateScope.Privatize(); |
633 | RValue Func = RValue::get(Reduction.second); |
634 | CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); |
635 | CGF.EmitIgnoredExpr(InitOp); |
636 | } else { |
637 | llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty); |
638 | std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"}); |
639 | auto *GV = new llvm::GlobalVariable( |
640 | CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true, |
641 | llvm::GlobalValue::PrivateLinkage, Init, Name); |
642 | LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty); |
643 | RValue InitRVal; |
644 | switch (CGF.getEvaluationKind(Ty)) { |
645 | case TEK_Scalar: |
646 | InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation()); |
647 | break; |
648 | case TEK_Complex: |
649 | InitRVal = |
650 | RValue::getComplex(CGF.EmitLoadOfComplex(LV, DRD->getLocation())); |
651 | break; |
652 | case TEK_Aggregate: |
653 | InitRVal = RValue::getAggregate(LV.getAddress(CGF)); |
654 | break; |
655 | } |
656 | OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue); |
657 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal); |
658 | CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(), |
659 | /*IsInitializer=*/false); |
660 | } |
661 | } |
662 | |
663 | /// Emit initialization of arrays of complex types. |
664 | /// \param DestAddr Address of the array. |
665 | /// \param Type Type of array. |
666 | /// \param Init Initial expression of array. |
667 | /// \param SrcAddr Address of the original array. |
668 | static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, |
669 | QualType Type, bool EmitDeclareReductionInit, |
670 | const Expr *Init, |
671 | const OMPDeclareReductionDecl *DRD, |
672 | Address SrcAddr = Address::invalid()) { |
673 | // Perform element-by-element initialization. |
674 | QualType ElementTy; |
675 | |
676 | // Drill down to the base element type on both arrays. |
677 | const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); |
678 | llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr); |
679 | DestAddr = |
680 | CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType()); |
681 | if (DRD) |
682 | SrcAddr = |
683 | CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType()); |
684 | |
685 | llvm::Value *SrcBegin = nullptr; |
686 | if (DRD) |
687 | SrcBegin = SrcAddr.getPointer(); |
688 | llvm::Value *DestBegin = DestAddr.getPointer(); |
689 | // Cast from pointer to array type to pointer to single element. |
690 | llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements); |
691 | // The basic structure here is a while-do loop. |
692 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body"); |
693 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done"); |
694 | llvm::Value *IsEmpty = |
695 | CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty"); |
696 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
697 | |
698 | // Enter the loop body, making that address the current address. |
699 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
700 | CGF.EmitBlock(BodyBB); |
701 | |
702 | CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); |
703 | |
704 | llvm::PHINode *SrcElementPHI = nullptr; |
705 | Address SrcElementCurrent = Address::invalid(); |
706 | if (DRD) { |
707 | SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2, |
708 | "omp.arraycpy.srcElementPast"); |
709 | SrcElementPHI->addIncoming(SrcBegin, EntryBB); |
710 | SrcElementCurrent = |
711 | Address(SrcElementPHI, |
712 | SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
713 | } |
714 | llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI( |
715 | DestBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
716 | DestElementPHI->addIncoming(DestBegin, EntryBB); |
717 | Address DestElementCurrent = |
718 | Address(DestElementPHI, |
719 | DestAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
720 | |
721 | // Emit copy. |
722 | { |
723 | CodeGenFunction::RunCleanupsScope InitScope(CGF); |
724 | if (EmitDeclareReductionInit) { |
725 | emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent, |
726 | SrcElementCurrent, ElementTy); |
727 | } else |
728 | CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(), |
729 | /*IsInitializer=*/false); |
730 | } |
731 | |
732 | if (DRD) { |
733 | // Shift the address forward by one element. |
734 | llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32( |
735 | SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); |
736 | SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock()); |
737 | } |
738 | |
739 | // Shift the address forward by one element. |
740 | llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32( |
741 | DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); |
742 | // Check whether we've reached the end. |
743 | llvm::Value *Done = |
744 | CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done"); |
745 | CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); |
746 | DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock()); |
747 | |
748 | // Done. |
749 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
750 | } |
751 | |
752 | LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) { |
753 | return CGF.EmitOMPSharedLValue(E); |
754 | } |
755 | |
756 | LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF, |
757 | const Expr *E) { |
758 | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E)) |
759 | return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false); |
760 | return LValue(); |
761 | } |
762 | |
763 | void ReductionCodeGen::emitAggregateInitialization( |
764 | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, |
765 | const OMPDeclareReductionDecl *DRD) { |
766 | // Emit VarDecl with copy init for arrays. |
767 | // Get the address of the original variable captured in current |
768 | // captured region. |
769 | const auto *PrivateVD = |
770 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
771 | bool EmitDeclareReductionInit = |
772 | DRD && (DRD->getInitializer() || !PrivateVD->hasInit()); |
773 | EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(), |
774 | EmitDeclareReductionInit, |
775 | EmitDeclareReductionInit ? ClausesData[N].ReductionOp |
776 | : PrivateVD->getInit(), |
777 | DRD, SharedLVal.getAddress(CGF)); |
778 | } |
779 | |
780 | ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds, |
781 | ArrayRef<const Expr *> Origs, |
782 | ArrayRef<const Expr *> Privates, |
783 | ArrayRef<const Expr *> ReductionOps) { |
784 | ClausesData.reserve(Shareds.size()); |
785 | SharedAddresses.reserve(Shareds.size()); |
786 | Sizes.reserve(Shareds.size()); |
787 | BaseDecls.reserve(Shareds.size()); |
788 | const auto *IOrig = Origs.begin(); |
789 | const auto *IPriv = Privates.begin(); |
790 | const auto *IRed = ReductionOps.begin(); |
791 | for (const Expr *Ref : Shareds) { |
792 | ClausesData.emplace_back(Ref, *IOrig, *IPriv, *IRed); |
793 | std::advance(IOrig, 1); |
794 | std::advance(IPriv, 1); |
795 | std::advance(IRed, 1); |
796 | } |
797 | } |
798 | |
799 | void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) { |
800 | assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&((SharedAddresses.size() == N && OrigAddresses.size() == N && "Number of generated lvalues must be exactly N." ) ? static_cast<void> (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 801, __PRETTY_FUNCTION__)) |
801 | "Number of generated lvalues must be exactly N.")((SharedAddresses.size() == N && OrigAddresses.size() == N && "Number of generated lvalues must be exactly N." ) ? static_cast<void> (0) : __assert_fail ("SharedAddresses.size() == N && OrigAddresses.size() == N && \"Number of generated lvalues must be exactly N.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 801, __PRETTY_FUNCTION__)); |
802 | LValue First = emitSharedLValue(CGF, ClausesData[N].Shared); |
803 | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Shared); |
804 | SharedAddresses.emplace_back(First, Second); |
805 | if (ClausesData[N].Shared == ClausesData[N].Ref) { |
806 | OrigAddresses.emplace_back(First, Second); |
807 | } else { |
808 | LValue First = emitSharedLValue(CGF, ClausesData[N].Ref); |
809 | LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref); |
810 | OrigAddresses.emplace_back(First, Second); |
811 | } |
812 | } |
813 | |
814 | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) { |
815 | const auto *PrivateVD = |
816 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
817 | QualType PrivateType = PrivateVD->getType(); |
818 | bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref); |
819 | if (!PrivateType->isVariablyModifiedType()) { |
820 | Sizes.emplace_back( |
821 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()), |
822 | nullptr); |
823 | return; |
824 | } |
825 | llvm::Value *Size; |
826 | llvm::Value *SizeInChars; |
827 | auto *ElemType = |
828 | cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType()) |
829 | ->getElementType(); |
830 | auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType); |
831 | if (AsArraySection) { |
832 | Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF), |
833 | OrigAddresses[N].first.getPointer(CGF)); |
834 | Size = CGF.Builder.CreateNUWAdd( |
835 | Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1)); |
836 | SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf); |
837 | } else { |
838 | SizeInChars = |
839 | CGF.getTypeSize(OrigAddresses[N].first.getType().getNonReferenceType()); |
840 | Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf); |
841 | } |
842 | Sizes.emplace_back(SizeInChars, Size); |
843 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
844 | CGF, |
845 | cast<OpaqueValueExpr>( |
846 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
847 | RValue::get(Size)); |
848 | CGF.EmitVariablyModifiedType(PrivateType); |
849 | } |
850 | |
851 | void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N, |
852 | llvm::Value *Size) { |
853 | const auto *PrivateVD = |
854 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
855 | QualType PrivateType = PrivateVD->getType(); |
856 | if (!PrivateType->isVariablyModifiedType()) { |
857 | assert(!Size && !Sizes[N].second &&((!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items.") ? static_cast<void> (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 859, __PRETTY_FUNCTION__)) |
858 | "Size should be nullptr for non-variably modified reduction "((!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items.") ? static_cast<void> (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 859, __PRETTY_FUNCTION__)) |
859 | "items.")((!Size && !Sizes[N].second && "Size should be nullptr for non-variably modified reduction " "items.") ? static_cast<void> (0) : __assert_fail ("!Size && !Sizes[N].second && \"Size should be nullptr for non-variably modified reduction \" \"items.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 859, __PRETTY_FUNCTION__)); |
860 | return; |
861 | } |
862 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
863 | CGF, |
864 | cast<OpaqueValueExpr>( |
865 | CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()), |
866 | RValue::get(Size)); |
867 | CGF.EmitVariablyModifiedType(PrivateType); |
868 | } |
869 | |
870 | void ReductionCodeGen::emitInitialization( |
871 | CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, |
872 | llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) { |
873 | assert(SharedAddresses.size() > N && "No variable was generated")((SharedAddresses.size() > N && "No variable was generated" ) ? static_cast<void> (0) : __assert_fail ("SharedAddresses.size() > N && \"No variable was generated\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 873, __PRETTY_FUNCTION__)); |
874 | const auto *PrivateVD = |
875 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
876 | const OMPDeclareReductionDecl *DRD = |
877 | getReductionInit(ClausesData[N].ReductionOp); |
878 | QualType PrivateType = PrivateVD->getType(); |
879 | PrivateAddr = CGF.Builder.CreateElementBitCast( |
880 | PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); |
881 | QualType SharedType = SharedAddresses[N].first.getType(); |
882 | SharedLVal = CGF.MakeAddrLValue( |
883 | CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(CGF), |
884 | CGF.ConvertTypeForMem(SharedType)), |
885 | SharedType, SharedAddresses[N].first.getBaseInfo(), |
886 | CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType)); |
887 | if (CGF.getContext().getAsArrayType(PrivateVD->getType())) { |
888 | if (DRD && DRD->getInitializer()) |
889 | (void)DefaultInit(CGF); |
890 | emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD); |
891 | } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) { |
892 | (void)DefaultInit(CGF); |
893 | emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp, |
894 | PrivateAddr, SharedLVal.getAddress(CGF), |
895 | SharedLVal.getType()); |
896 | } else if (!DefaultInit(CGF) && PrivateVD->hasInit() && |
897 | !CGF.isTrivialInitializer(PrivateVD->getInit())) { |
898 | CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr, |
899 | PrivateVD->getType().getQualifiers(), |
900 | /*IsInitializer=*/false); |
901 | } |
902 | } |
903 | |
904 | bool ReductionCodeGen::needCleanups(unsigned N) { |
905 | const auto *PrivateVD = |
906 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
907 | QualType PrivateType = PrivateVD->getType(); |
908 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
909 | return DTorKind != QualType::DK_none; |
910 | } |
911 | |
912 | void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N, |
913 | Address PrivateAddr) { |
914 | const auto *PrivateVD = |
915 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl()); |
916 | QualType PrivateType = PrivateVD->getType(); |
917 | QualType::DestructionKind DTorKind = PrivateType.isDestructedType(); |
918 | if (needCleanups(N)) { |
919 | PrivateAddr = CGF.Builder.CreateElementBitCast( |
920 | PrivateAddr, CGF.ConvertTypeForMem(PrivateType)); |
921 | CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType); |
922 | } |
923 | } |
924 | |
925 | static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
926 | LValue BaseLV) { |
927 | BaseTy = BaseTy.getNonReferenceType(); |
928 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && |
929 | !CGF.getContext().hasSameType(BaseTy, ElTy)) { |
930 | if (const auto *PtrTy = BaseTy->getAs<PointerType>()) { |
931 | BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(CGF), PtrTy); |
932 | } else { |
933 | LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(CGF), BaseTy); |
934 | BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal); |
935 | } |
936 | BaseTy = BaseTy->getPointeeType(); |
937 | } |
938 | return CGF.MakeAddrLValue( |
939 | CGF.Builder.CreateElementBitCast(BaseLV.getAddress(CGF), |
940 | CGF.ConvertTypeForMem(ElTy)), |
941 | BaseLV.getType(), BaseLV.getBaseInfo(), |
942 | CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType())); |
943 | } |
944 | |
945 | static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, |
946 | llvm::Type *BaseLVType, CharUnits BaseLVAlignment, |
947 | llvm::Value *Addr) { |
948 | Address Tmp = Address::invalid(); |
949 | Address TopTmp = Address::invalid(); |
950 | Address MostTopTmp = Address::invalid(); |
951 | BaseTy = BaseTy.getNonReferenceType(); |
952 | while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) && |
953 | !CGF.getContext().hasSameType(BaseTy, ElTy)) { |
954 | Tmp = CGF.CreateMemTemp(BaseTy); |
955 | if (TopTmp.isValid()) |
956 | CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp); |
957 | else |
958 | MostTopTmp = Tmp; |
959 | TopTmp = Tmp; |
960 | BaseTy = BaseTy->getPointeeType(); |
961 | } |
962 | llvm::Type *Ty = BaseLVType; |
963 | if (Tmp.isValid()) |
964 | Ty = Tmp.getElementType(); |
965 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty); |
966 | if (Tmp.isValid()) { |
967 | CGF.Builder.CreateStore(Addr, Tmp); |
968 | return MostTopTmp; |
969 | } |
970 | return Address(Addr, BaseLVAlignment); |
971 | } |
972 | |
973 | static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) { |
974 | const VarDecl *OrigVD = nullptr; |
975 | if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) { |
976 | const Expr *Base = OASE->getBase()->IgnoreParenImpCasts(); |
977 | while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base)) |
978 | Base = TempOASE->getBase()->IgnoreParenImpCasts(); |
979 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
980 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
981 | DE = cast<DeclRefExpr>(Base); |
982 | OrigVD = cast<VarDecl>(DE->getDecl()); |
983 | } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) { |
984 | const Expr *Base = ASE->getBase()->IgnoreParenImpCasts(); |
985 | while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base)) |
986 | Base = TempASE->getBase()->IgnoreParenImpCasts(); |
987 | DE = cast<DeclRefExpr>(Base); |
988 | OrigVD = cast<VarDecl>(DE->getDecl()); |
989 | } |
990 | return OrigVD; |
991 | } |
992 | |
993 | Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, |
994 | Address PrivateAddr) { |
995 | const DeclRefExpr *DE; |
996 | if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) { |
997 | BaseDecls.emplace_back(OrigVD); |
998 | LValue OriginalBaseLValue = CGF.EmitLValue(DE); |
999 | LValue BaseLValue = |
1000 | loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(), |
1001 | OriginalBaseLValue); |
1002 | llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff( |
1003 | BaseLValue.getPointer(CGF), SharedAddresses[N].first.getPointer(CGF)); |
1004 | llvm::Value *PrivatePointer = |
1005 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1006 | PrivateAddr.getPointer(), |
1007 | SharedAddresses[N].first.getAddress(CGF).getType()); |
1008 | llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment); |
1009 | return castToBase(CGF, OrigVD->getType(), |
1010 | SharedAddresses[N].first.getType(), |
1011 | OriginalBaseLValue.getAddress(CGF).getType(), |
1012 | OriginalBaseLValue.getAlignment(), Ptr); |
1013 | } |
1014 | BaseDecls.emplace_back( |
1015 | cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl())); |
1016 | return PrivateAddr; |
1017 | } |
1018 | |
1019 | bool ReductionCodeGen::usesReductionInitializer(unsigned N) const { |
1020 | const OMPDeclareReductionDecl *DRD = |
1021 | getReductionInit(ClausesData[N].ReductionOp); |
1022 | return DRD && DRD->getInitializer(); |
1023 | } |
1024 | |
1025 | LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) { |
1026 | return CGF.EmitLoadOfPointerLValue( |
1027 | CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1028 | getThreadIDVariable()->getType()->castAs<PointerType>()); |
1029 | } |
1030 | |
1031 | void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) { |
1032 | if (!CGF.HaveInsertPoint()) |
1033 | return; |
1034 | // 1.2.2 OpenMP Language Terminology |
1035 | // Structured block - An executable statement with a single entry at the |
1036 | // top and a single exit at the bottom. |
1037 | // The point of exit cannot be a branch out of the structured block. |
1038 | // longjmp() and throw() must not violate the entry/exit criteria. |
1039 | CGF.EHStack.pushTerminate(); |
1040 | CodeGen(CGF); |
1041 | CGF.EHStack.popTerminate(); |
1042 | } |
1043 | |
1044 | LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue( |
1045 | CodeGenFunction &CGF) { |
1046 | return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()), |
1047 | getThreadIDVariable()->getType(), |
1048 | AlignmentSource::Decl); |
1049 | } |
1050 | |
1051 | static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, |
1052 | QualType FieldTy) { |
1053 | auto *Field = FieldDecl::Create( |
1054 | C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy, |
1055 | C.getTrivialTypeSourceInfo(FieldTy, SourceLocation()), |
1056 | /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); |
1057 | Field->setAccess(AS_public); |
1058 | DC->addDecl(Field); |
1059 | return Field; |
1060 | } |
1061 | |
1062 | CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, |
1063 | StringRef Separator) |
1064 | : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator), |
1065 | OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager(CGM) { |
1066 | KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); |
1067 | |
1068 | // Initialize Types used in OpenMPIRBuilder from OMPKinds.def |
1069 | OMPBuilder.initialize(); |
1070 | loadOffloadInfoMetadata(); |
1071 | } |
1072 | |
1073 | void CGOpenMPRuntime::clear() { |
1074 | InternalVars.clear(); |
1075 | // Clean non-target variable declarations possibly used only in debug info. |
1076 | for (const auto &Data : EmittedNonTargetVariables) { |
1077 | if (!Data.getValue().pointsToAliveValue()) |
1078 | continue; |
1079 | auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue()); |
1080 | if (!GV) |
1081 | continue; |
1082 | if (!GV->isDeclaration() || GV->getNumUses() > 0) |
1083 | continue; |
1084 | GV->eraseFromParent(); |
1085 | } |
1086 | } |
1087 | |
1088 | std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const { |
1089 | SmallString<128> Buffer; |
1090 | llvm::raw_svector_ostream OS(Buffer); |
1091 | StringRef Sep = FirstSeparator; |
1092 | for (StringRef Part : Parts) { |
1093 | OS << Sep << Part; |
1094 | Sep = Separator; |
1095 | } |
1096 | return std::string(OS.str()); |
1097 | } |
1098 | |
1099 | static llvm::Function * |
1100 | emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, |
1101 | const Expr *CombinerInitializer, const VarDecl *In, |
1102 | const VarDecl *Out, bool IsCombiner) { |
1103 | // void .omp_combiner.(Ty *in, Ty *out); |
1104 | ASTContext &C = CGM.getContext(); |
1105 | QualType PtrTy = C.getPointerType(Ty).withRestrict(); |
1106 | FunctionArgList Args; |
1107 | ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(), |
1108 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1109 | ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(), |
1110 | /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other); |
1111 | Args.push_back(&OmpOutParm); |
1112 | Args.push_back(&OmpInParm); |
1113 | const CGFunctionInfo &FnInfo = |
1114 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
1115 | llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo); |
1116 | std::string Name = CGM.getOpenMPRuntime().getName( |
1117 | {IsCombiner ? "omp_combiner" : "omp_initializer", ""}); |
1118 | auto *Fn = llvm::Function::Create(FnTy, llvm::GlobalValue::InternalLinkage, |
1119 | Name, &CGM.getModule()); |
1120 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo); |
1121 | if (CGM.getLangOpts().Optimize) { |
1122 | Fn->removeFnAttr(llvm::Attribute::NoInline); |
1123 | Fn->removeFnAttr(llvm::Attribute::OptimizeNone); |
1124 | Fn->addFnAttr(llvm::Attribute::AlwaysInline); |
1125 | } |
1126 | CodeGenFunction CGF(CGM); |
1127 | // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions. |
1128 | // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions. |
1129 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(), |
1130 | Out->getLocation()); |
1131 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
1132 | Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm); |
1133 | Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() { |
1134 | return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>()) |
1135 | .getAddress(CGF); |
1136 | }); |
1137 | Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm); |
1138 | Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() { |
1139 | return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>()) |
1140 | .getAddress(CGF); |
1141 | }); |
1142 | (void)Scope.Privatize(); |
1143 | if (!IsCombiner && Out->hasInit() && |
1144 | !CGF.isTrivialInitializer(Out->getInit())) { |
1145 | CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out), |
1146 | Out->getType().getQualifiers(), |
1147 | /*IsInitializer=*/true); |
1148 | } |
1149 | if (CombinerInitializer) |
1150 | CGF.EmitIgnoredExpr(CombinerInitializer); |
1151 | Scope.ForceCleanup(); |
1152 | CGF.FinishFunction(); |
1153 | return Fn; |
1154 | } |
1155 | |
1156 | void CGOpenMPRuntime::emitUserDefinedReduction( |
1157 | CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) { |
1158 | if (UDRMap.count(D) > 0) |
1159 | return; |
1160 | llvm::Function *Combiner = emitCombinerOrInitializer( |
1161 | CGM, D->getType(), D->getCombiner(), |
1162 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()), |
1163 | cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()), |
1164 | /*IsCombiner=*/true); |
1165 | llvm::Function *Initializer = nullptr; |
1166 | if (const Expr *Init = D->getInitializer()) { |
1167 | Initializer = emitCombinerOrInitializer( |
1168 | CGM, D->getType(), |
1169 | D->getInitializerKind() == OMPDeclareReductionDecl::CallInit ? Init |
1170 | : nullptr, |
1171 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()), |
1172 | cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()), |
1173 | /*IsCombiner=*/false); |
1174 | } |
1175 | UDRMap.try_emplace(D, Combiner, Initializer); |
1176 | if (CGF) { |
1177 | auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn); |
1178 | Decls.second.push_back(D); |
1179 | } |
1180 | } |
1181 | |
1182 | std::pair<llvm::Function *, llvm::Function *> |
1183 | CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) { |
1184 | auto I = UDRMap.find(D); |
1185 | if (I != UDRMap.end()) |
1186 | return I->second; |
1187 | emitUserDefinedReduction(/*CGF=*/nullptr, D); |
1188 | return UDRMap.lookup(D); |
1189 | } |
1190 | |
1191 | namespace { |
1192 | // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR |
1193 | // Builder if one is present. |
1194 | struct PushAndPopStackRAII { |
1195 | PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF, |
1196 | bool HasCancel) |
1197 | : OMPBuilder(OMPBuilder) { |
1198 | if (!OMPBuilder) |
1199 | return; |
1200 | |
1201 | // The following callback is the crucial part of clangs cleanup process. |
1202 | // |
1203 | // NOTE: |
1204 | // Once the OpenMPIRBuilder is used to create parallel regions (and |
1205 | // similar), the cancellation destination (Dest below) is determined via |
1206 | // IP. That means if we have variables to finalize we split the block at IP, |
1207 | // use the new block (=BB) as destination to build a JumpDest (via |
1208 | // getJumpDestInCurrentScope(BB)) which then is fed to |
1209 | // EmitBranchThroughCleanup. Furthermore, there will not be the need |
1210 | // to push & pop an FinalizationInfo object. |
1211 | // The FiniCB will still be needed but at the point where the |
1212 | // OpenMPIRBuilder is asked to construct a parallel (or similar) construct. |
1213 | auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) { |
1214 | assert(IP.getBlock()->end() == IP.getPoint() &&((IP.getBlock()->end() == IP.getPoint() && "Clang CG should cause non-terminated block!" ) ? static_cast<void> (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1215, __PRETTY_FUNCTION__)) |
1215 | "Clang CG should cause non-terminated block!")((IP.getBlock()->end() == IP.getPoint() && "Clang CG should cause non-terminated block!" ) ? static_cast<void> (0) : __assert_fail ("IP.getBlock()->end() == IP.getPoint() && \"Clang CG should cause non-terminated block!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1215, __PRETTY_FUNCTION__)); |
1216 | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1217 | CGF.Builder.restoreIP(IP); |
1218 | CodeGenFunction::JumpDest Dest = |
1219 | CGF.getOMPCancelDestination(OMPD_parallel); |
1220 | CGF.EmitBranchThroughCleanup(Dest); |
1221 | }; |
1222 | |
1223 | // TODO: Remove this once we emit parallel regions through the |
1224 | // OpenMPIRBuilder as it can do this setup internally. |
1225 | llvm::OpenMPIRBuilder::FinalizationInfo FI( |
1226 | {FiniCB, OMPD_parallel, HasCancel}); |
1227 | OMPBuilder->pushFinalizationCB(std::move(FI)); |
1228 | } |
1229 | ~PushAndPopStackRAII() { |
1230 | if (OMPBuilder) |
1231 | OMPBuilder->popFinalizationCB(); |
1232 | } |
1233 | llvm::OpenMPIRBuilder *OMPBuilder; |
1234 | }; |
1235 | } // namespace |
1236 | |
1237 | static llvm::Function *emitParallelOrTeamsOutlinedFunction( |
1238 | CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, |
1239 | const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, |
1240 | const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) { |
1241 | assert(ThreadIDVar->getType()->isPointerType() &&((ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *" ) ? static_cast<void> (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1242, __PRETTY_FUNCTION__)) |
1242 | "thread id variable must be of type kmp_int32 *")((ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 *" ) ? static_cast<void> (0) : __assert_fail ("ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 *\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1242, __PRETTY_FUNCTION__)); |
1243 | CodeGenFunction CGF(CGM, true); |
1244 | bool HasCancel = false; |
1245 | if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D)) |
1246 | HasCancel = OPD->hasCancel(); |
1247 | else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(&D)) |
1248 | HasCancel = OPD->hasCancel(); |
1249 | else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D)) |
1250 | HasCancel = OPSD->hasCancel(); |
1251 | else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D)) |
1252 | HasCancel = OPFD->hasCancel(); |
1253 | else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D)) |
1254 | HasCancel = OPFD->hasCancel(); |
1255 | else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D)) |
1256 | HasCancel = OPFD->hasCancel(); |
1257 | else if (const auto *OPFD = |
1258 | dyn_cast<OMPTeamsDistributeParallelForDirective>(&D)) |
1259 | HasCancel = OPFD->hasCancel(); |
1260 | else if (const auto *OPFD = |
1261 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D)) |
1262 | HasCancel = OPFD->hasCancel(); |
1263 | |
1264 | // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new |
1265 | // parallel region to make cancellation barriers work properly. |
1266 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1267 | PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel); |
1268 | CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind, |
1269 | HasCancel, OutlinedHelperName); |
1270 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1271 | return CGF.GenerateOpenMPCapturedStmtFunction(*CS, D.getBeginLoc()); |
1272 | } |
1273 | |
1274 | llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction( |
1275 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1276 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1277 | const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel); |
1278 | return emitParallelOrTeamsOutlinedFunction( |
1279 | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1280 | } |
1281 | |
1282 | llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction( |
1283 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1284 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) { |
1285 | const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams); |
1286 | return emitParallelOrTeamsOutlinedFunction( |
1287 | CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen); |
1288 | } |
1289 | |
1290 | llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction( |
1291 | const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, |
1292 | const VarDecl *PartIDVar, const VarDecl *TaskTVar, |
1293 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1294 | bool Tied, unsigned &NumberOfParts) { |
1295 | auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF, |
1296 | PrePostActionTy &) { |
1297 | llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc()); |
1298 | llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc()); |
1299 | llvm::Value *TaskArgs[] = { |
1300 | UpLoc, ThreadID, |
1301 | CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar), |
1302 | TaskTVar->getType()->castAs<PointerType>()) |
1303 | .getPointer(CGF)}; |
1304 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1305 | CGM.getModule(), OMPRTL___kmpc_omp_task), |
1306 | TaskArgs); |
1307 | }; |
1308 | CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar, |
1309 | UntiedCodeGen); |
1310 | CodeGen.setAction(Action); |
1311 | assert(!ThreadIDVar->getType()->isPointerType() &&((!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks" ) ? static_cast<void> (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1312, __PRETTY_FUNCTION__)) |
1312 | "thread id variable must be of type kmp_int32 for tasks")((!ThreadIDVar->getType()->isPointerType() && "thread id variable must be of type kmp_int32 for tasks" ) ? static_cast<void> (0) : __assert_fail ("!ThreadIDVar->getType()->isPointerType() && \"thread id variable must be of type kmp_int32 for tasks\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1312, __PRETTY_FUNCTION__)); |
1313 | const OpenMPDirectiveKind Region = |
1314 | isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop |
1315 | : OMPD_task; |
1316 | const CapturedStmt *CS = D.getCapturedStmt(Region); |
1317 | bool HasCancel = false; |
1318 | if (const auto *TD = dyn_cast<OMPTaskDirective>(&D)) |
1319 | HasCancel = TD->hasCancel(); |
1320 | else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(&D)) |
1321 | HasCancel = TD->hasCancel(); |
1322 | else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(&D)) |
1323 | HasCancel = TD->hasCancel(); |
1324 | else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(&D)) |
1325 | HasCancel = TD->hasCancel(); |
1326 | |
1327 | CodeGenFunction CGF(CGM, true); |
1328 | CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, |
1329 | InnermostKind, HasCancel, Action); |
1330 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); |
1331 | llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS); |
1332 | if (!Tied) |
1333 | NumberOfParts = Action.getNumberOfParts(); |
1334 | return Res; |
1335 | } |
1336 | |
1337 | static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, |
1338 | const RecordDecl *RD, const CGRecordLayout &RL, |
1339 | ArrayRef<llvm::Constant *> Data) { |
1340 | llvm::StructType *StructTy = RL.getLLVMType(); |
1341 | unsigned PrevIdx = 0; |
1342 | ConstantInitBuilder CIBuilder(CGM); |
1343 | auto DI = Data.begin(); |
1344 | for (const FieldDecl *FD : RD->fields()) { |
1345 | unsigned Idx = RL.getLLVMFieldNo(FD); |
1346 | // Fill the alignment. |
1347 | for (unsigned I = PrevIdx; I < Idx; ++I) |
1348 | Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I))); |
1349 | PrevIdx = Idx + 1; |
1350 | Fields.add(*DI); |
1351 | ++DI; |
1352 | } |
1353 | } |
1354 | |
1355 | template <class... As> |
1356 | static llvm::GlobalVariable * |
1357 | createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, |
1358 | ArrayRef<llvm::Constant *> Data, const Twine &Name, |
1359 | As &&... Args) { |
1360 | const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); |
1361 | const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); |
1362 | ConstantInitBuilder CIBuilder(CGM); |
1363 | ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType()); |
1364 | buildStructValue(Fields, CGM, RD, RL, Data); |
1365 | return Fields.finishAndCreateGlobal( |
1366 | Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant, |
1367 | std::forward<As>(Args)...); |
1368 | } |
1369 | |
1370 | template <typename T> |
1371 | static void |
1372 | createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, |
1373 | ArrayRef<llvm::Constant *> Data, |
1374 | T &Parent) { |
1375 | const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl()); |
1376 | const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD); |
1377 | ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType()); |
1378 | buildStructValue(Fields, CGM, RD, RL, Data); |
1379 | Fields.finishAndAddTo(Parent); |
1380 | } |
1381 | |
1382 | void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF, |
1383 | bool AtCurrentPoint) { |
1384 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1385 | assert(!Elem.second.ServiceInsertPt && "Insert point is set already.")((!Elem.second.ServiceInsertPt && "Insert point is set already." ) ? static_cast<void> (0) : __assert_fail ("!Elem.second.ServiceInsertPt && \"Insert point is set already.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1385, __PRETTY_FUNCTION__)); |
1386 | |
1387 | llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); |
1388 | if (AtCurrentPoint) { |
1389 | Elem.second.ServiceInsertPt = new llvm::BitCastInst( |
1390 | Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock()); |
1391 | } else { |
1392 | Elem.second.ServiceInsertPt = |
1393 | new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt"); |
1394 | Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt); |
1395 | } |
1396 | } |
1397 | |
1398 | void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) { |
1399 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1400 | if (Elem.second.ServiceInsertPt) { |
1401 | llvm::Instruction *Ptr = Elem.second.ServiceInsertPt; |
1402 | Elem.second.ServiceInsertPt = nullptr; |
1403 | Ptr->eraseFromParent(); |
1404 | } |
1405 | } |
1406 | |
1407 | static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF, |
1408 | SourceLocation Loc, |
1409 | SmallString<128> &Buffer) { |
1410 | llvm::raw_svector_ostream OS(Buffer); |
1411 | // Build debug location |
1412 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1413 | OS << ";" << PLoc.getFilename() << ";"; |
1414 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1415 | OS << FD->getQualifiedNameAsString(); |
1416 | OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;"; |
1417 | return OS.str(); |
1418 | } |
1419 | |
1420 | llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF, |
1421 | SourceLocation Loc, |
1422 | unsigned Flags) { |
1423 | llvm::Constant *SrcLocStr; |
1424 | if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo || |
1425 | Loc.isInvalid()) { |
1426 | SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(); |
1427 | } else { |
1428 | std::string FunctionName = ""; |
1429 | if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) |
1430 | FunctionName = FD->getQualifiedNameAsString(); |
1431 | PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc); |
1432 | const char *FileName = PLoc.getFilename(); |
1433 | unsigned Line = PLoc.getLine(); |
1434 | unsigned Column = PLoc.getColumn(); |
1435 | SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName.c_str(), FileName, |
1436 | Line, Column); |
1437 | } |
1438 | unsigned Reserved2Flags = getDefaultLocationReserved2Flags(); |
1439 | return OMPBuilder.getOrCreateIdent(SrcLocStr, llvm::omp::IdentFlag(Flags), |
1440 | Reserved2Flags); |
1441 | } |
1442 | |
1443 | llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF, |
1444 | SourceLocation Loc) { |
1445 | assert(CGF.CurFn && "No function in current CodeGenFunction.")((CGF.CurFn && "No function in current CodeGenFunction." ) ? static_cast<void> (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1445, __PRETTY_FUNCTION__)); |
1446 | // If the OpenMPIRBuilder is used we need to use it for all thread id calls as |
1447 | // the clang invariants used below might be broken. |
1448 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1449 | SmallString<128> Buffer; |
1450 | OMPBuilder.updateToLocation(CGF.Builder.saveIP()); |
1451 | auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr( |
1452 | getIdentStringFromSourceLocation(CGF, Loc, Buffer)); |
1453 | return OMPBuilder.getOrCreateThreadID( |
1454 | OMPBuilder.getOrCreateIdent(SrcLocStr)); |
1455 | } |
1456 | |
1457 | llvm::Value *ThreadID = nullptr; |
1458 | // Check whether we've already cached a load of the thread id in this |
1459 | // function. |
1460 | auto I = OpenMPLocThreadIDMap.find(CGF.CurFn); |
1461 | if (I != OpenMPLocThreadIDMap.end()) { |
1462 | ThreadID = I->second.ThreadID; |
1463 | if (ThreadID != nullptr) |
1464 | return ThreadID; |
1465 | } |
1466 | // If exceptions are enabled, do not use parameter to avoid possible crash. |
1467 | if (auto *OMPRegionInfo = |
1468 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) { |
1469 | if (OMPRegionInfo->getThreadIDVariable()) { |
1470 | // Check if this an outlined function with thread id passed as argument. |
1471 | LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF); |
1472 | llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent(); |
1473 | if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions || |
1474 | !CGF.getLangOpts().CXXExceptions || |
1475 | CGF.Builder.GetInsertBlock() == TopBlock || |
1476 | !isa<llvm::Instruction>(LVal.getPointer(CGF)) || |
1477 | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1478 | TopBlock || |
1479 | cast<llvm::Instruction>(LVal.getPointer(CGF))->getParent() == |
1480 | CGF.Builder.GetInsertBlock()) { |
1481 | ThreadID = CGF.EmitLoadOfScalar(LVal, Loc); |
1482 | // If value loaded in entry block, cache it and use it everywhere in |
1483 | // function. |
1484 | if (CGF.Builder.GetInsertBlock() == TopBlock) { |
1485 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1486 | Elem.second.ThreadID = ThreadID; |
1487 | } |
1488 | return ThreadID; |
1489 | } |
1490 | } |
1491 | } |
1492 | |
1493 | // This is not an outlined function region - need to call __kmpc_int32 |
1494 | // kmpc_global_thread_num(ident_t *loc). |
1495 | // Generate thread id value and cache this value for use across the |
1496 | // function. |
1497 | auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn); |
1498 | if (!Elem.second.ServiceInsertPt) |
1499 | setLocThreadIdInsertPt(CGF); |
1500 | CGBuilderTy::InsertPointGuard IPG(CGF.Builder); |
1501 | CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt); |
1502 | llvm::CallInst *Call = CGF.Builder.CreateCall( |
1503 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
1504 | OMPRTL___kmpc_global_thread_num), |
1505 | emitUpdateLocation(CGF, Loc)); |
1506 | Call->setCallingConv(CGF.getRuntimeCC()); |
1507 | Elem.second.ThreadID = Call; |
1508 | return Call; |
1509 | } |
1510 | |
1511 | void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) { |
1512 | assert(CGF.CurFn && "No function in current CodeGenFunction.")((CGF.CurFn && "No function in current CodeGenFunction." ) ? static_cast<void> (0) : __assert_fail ("CGF.CurFn && \"No function in current CodeGenFunction.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1512, __PRETTY_FUNCTION__)); |
1513 | if (OpenMPLocThreadIDMap.count(CGF.CurFn)) { |
1514 | clearLocThreadIdInsertPt(CGF); |
1515 | OpenMPLocThreadIDMap.erase(CGF.CurFn); |
1516 | } |
1517 | if (FunctionUDRMap.count(CGF.CurFn) > 0) { |
1518 | for(const auto *D : FunctionUDRMap[CGF.CurFn]) |
1519 | UDRMap.erase(D); |
1520 | FunctionUDRMap.erase(CGF.CurFn); |
1521 | } |
1522 | auto I = FunctionUDMMap.find(CGF.CurFn); |
1523 | if (I != FunctionUDMMap.end()) { |
1524 | for(const auto *D : I->second) |
1525 | UDMMap.erase(D); |
1526 | FunctionUDMMap.erase(I); |
1527 | } |
1528 | LastprivateConditionalToTypes.erase(CGF.CurFn); |
1529 | FunctionToUntiedTaskStackMap.erase(CGF.CurFn); |
1530 | } |
1531 | |
1532 | llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() { |
1533 | return OMPBuilder.IdentPtr; |
1534 | } |
1535 | |
1536 | llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() { |
1537 | if (!Kmpc_MicroTy) { |
1538 | // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...) |
1539 | llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty), |
1540 | llvm::PointerType::getUnqual(CGM.Int32Ty)}; |
1541 | Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true); |
1542 | } |
1543 | return llvm::PointerType::getUnqual(Kmpc_MicroTy); |
1544 | } |
1545 | |
1546 | llvm::FunctionCallee |
1547 | CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) { |
1548 | assert((IVSize == 32 || IVSize == 64) &&(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1549, __PRETTY_FUNCTION__)) |
1549 | "IV size is not compatible with the omp runtime")(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1549, __PRETTY_FUNCTION__)); |
1550 | StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4" |
1551 | : "__kmpc_for_static_init_4u") |
1552 | : (IVSigned ? "__kmpc_for_static_init_8" |
1553 | : "__kmpc_for_static_init_8u"); |
1554 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; |
1555 | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1556 | llvm::Type *TypeParams[] = { |
1557 | getIdentTyPointerTy(), // loc |
1558 | CGM.Int32Ty, // tid |
1559 | CGM.Int32Ty, // schedtype |
1560 | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1561 | PtrTy, // p_lower |
1562 | PtrTy, // p_upper |
1563 | PtrTy, // p_stride |
1564 | ITy, // incr |
1565 | ITy // chunk |
1566 | }; |
1567 | auto *FnTy = |
1568 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1569 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1570 | } |
1571 | |
1572 | llvm::FunctionCallee |
1573 | CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) { |
1574 | assert((IVSize == 32 || IVSize == 64) &&(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1575, __PRETTY_FUNCTION__)) |
1575 | "IV size is not compatible with the omp runtime")(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1575, __PRETTY_FUNCTION__)); |
1576 | StringRef Name = |
1577 | IVSize == 32 |
1578 | ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u") |
1579 | : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u"); |
1580 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; |
1581 | llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc |
1582 | CGM.Int32Ty, // tid |
1583 | CGM.Int32Ty, // schedtype |
1584 | ITy, // lower |
1585 | ITy, // upper |
1586 | ITy, // stride |
1587 | ITy // chunk |
1588 | }; |
1589 | auto *FnTy = |
1590 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false); |
1591 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1592 | } |
1593 | |
1594 | llvm::FunctionCallee |
1595 | CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) { |
1596 | assert((IVSize == 32 || IVSize == 64) &&(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1597, __PRETTY_FUNCTION__)) |
1597 | "IV size is not compatible with the omp runtime")(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1597, __PRETTY_FUNCTION__)); |
1598 | StringRef Name = |
1599 | IVSize == 32 |
1600 | ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u") |
1601 | : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u"); |
1602 | llvm::Type *TypeParams[] = { |
1603 | getIdentTyPointerTy(), // loc |
1604 | CGM.Int32Ty, // tid |
1605 | }; |
1606 | auto *FnTy = |
1607 | llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false); |
1608 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1609 | } |
1610 | |
1611 | llvm::FunctionCallee |
1612 | CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) { |
1613 | assert((IVSize == 32 || IVSize == 64) &&(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1614, __PRETTY_FUNCTION__)) |
1614 | "IV size is not compatible with the omp runtime")(((IVSize == 32 || IVSize == 64) && "IV size is not compatible with the omp runtime" ) ? static_cast<void> (0) : __assert_fail ("(IVSize == 32 || IVSize == 64) && \"IV size is not compatible with the omp runtime\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1614, __PRETTY_FUNCTION__)); |
1615 | StringRef Name = |
1616 | IVSize == 32 |
1617 | ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u") |
1618 | : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u"); |
1619 | llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty; |
1620 | auto *PtrTy = llvm::PointerType::getUnqual(ITy); |
1621 | llvm::Type *TypeParams[] = { |
1622 | getIdentTyPointerTy(), // loc |
1623 | CGM.Int32Ty, // tid |
1624 | llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter |
1625 | PtrTy, // p_lower |
1626 | PtrTy, // p_upper |
1627 | PtrTy // p_stride |
1628 | }; |
1629 | auto *FnTy = |
1630 | llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false); |
1631 | return CGM.CreateRuntimeFunction(FnTy, Name); |
1632 | } |
1633 | |
1634 | /// Obtain information that uniquely identifies a target entry. This |
1635 | /// consists of the file and device IDs as well as line number associated with |
1636 | /// the relevant entry source location. |
1637 | static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, |
1638 | unsigned &DeviceID, unsigned &FileID, |
1639 | unsigned &LineNum) { |
1640 | SourceManager &SM = C.getSourceManager(); |
1641 | |
1642 | // The loc should be always valid and have a file ID (the user cannot use |
1643 | // #pragma directives in macros) |
1644 | |
1645 | assert(Loc.isValid() && "Source location is expected to be always valid.")((Loc.isValid() && "Source location is expected to be always valid." ) ? static_cast<void> (0) : __assert_fail ("Loc.isValid() && \"Source location is expected to be always valid.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1645, __PRETTY_FUNCTION__)); |
1646 | |
1647 | PresumedLoc PLoc = SM.getPresumedLoc(Loc); |
1648 | assert(PLoc.isValid() && "Source location is expected to be always valid.")((PLoc.isValid() && "Source location is expected to be always valid." ) ? static_cast<void> (0) : __assert_fail ("PLoc.isValid() && \"Source location is expected to be always valid.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1648, __PRETTY_FUNCTION__)); |
1649 | |
1650 | llvm::sys::fs::UniqueID ID; |
1651 | if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID)) |
1652 | SM.getDiagnostics().Report(diag::err_cannot_open_file) |
1653 | << PLoc.getFilename() << EC.message(); |
1654 | |
1655 | DeviceID = ID.getDevice(); |
1656 | FileID = ID.getFile(); |
1657 | LineNum = PLoc.getLine(); |
1658 | } |
1659 | |
1660 | Address CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) { |
1661 | if (CGM.getLangOpts().OpenMPSimd) |
1662 | return Address::invalid(); |
1663 | llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1664 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1665 | if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link || |
1666 | (*Res == OMPDeclareTargetDeclAttr::MT_To && |
1667 | HasRequiresUnifiedSharedMemory))) { |
1668 | SmallString<64> PtrName; |
1669 | { |
1670 | llvm::raw_svector_ostream OS(PtrName); |
1671 | OS << CGM.getMangledName(GlobalDecl(VD)); |
1672 | if (!VD->isExternallyVisible()) { |
1673 | unsigned DeviceID, FileID, Line; |
1674 | getTargetEntryUniqueInfo(CGM.getContext(), |
1675 | VD->getCanonicalDecl()->getBeginLoc(), |
1676 | DeviceID, FileID, Line); |
1677 | OS << llvm::format("_%x", FileID); |
1678 | } |
1679 | OS << "_decl_tgt_ref_ptr"; |
1680 | } |
1681 | llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName); |
1682 | if (!Ptr) { |
1683 | QualType PtrTy = CGM.getContext().getPointerType(VD->getType()); |
1684 | Ptr = getOrCreateInternalVariable(CGM.getTypes().ConvertTypeForMem(PtrTy), |
1685 | PtrName); |
1686 | |
1687 | auto *GV = cast<llvm::GlobalVariable>(Ptr); |
1688 | GV->setLinkage(llvm::GlobalValue::WeakAnyLinkage); |
1689 | |
1690 | if (!CGM.getLangOpts().OpenMPIsDevice) |
1691 | GV->setInitializer(CGM.GetAddrOfGlobal(VD)); |
1692 | registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr)); |
1693 | } |
1694 | return Address(Ptr, CGM.getContext().getDeclAlign(VD)); |
1695 | } |
1696 | return Address::invalid(); |
1697 | } |
1698 | |
1699 | llvm::Constant * |
1700 | CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) { |
1701 | assert(!CGM.getLangOpts().OpenMPUseTLS ||((!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo ().isTLSSupported()) ? static_cast<void> (0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1702, __PRETTY_FUNCTION__)) |
1702 | !CGM.getContext().getTargetInfo().isTLSSupported())((!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo ().isTLSSupported()) ? static_cast<void> (0) : __assert_fail ("!CGM.getLangOpts().OpenMPUseTLS || !CGM.getContext().getTargetInfo().isTLSSupported()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1702, __PRETTY_FUNCTION__)); |
1703 | // Lookup the entry, lazily creating it if necessary. |
1704 | std::string Suffix = getName({"cache", ""}); |
1705 | return getOrCreateInternalVariable( |
1706 | CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix)); |
1707 | } |
1708 | |
1709 | Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF, |
1710 | const VarDecl *VD, |
1711 | Address VDAddr, |
1712 | SourceLocation Loc) { |
1713 | if (CGM.getLangOpts().OpenMPUseTLS && |
1714 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1715 | return VDAddr; |
1716 | |
1717 | llvm::Type *VarTy = VDAddr.getElementType(); |
1718 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
1719 | CGF.Builder.CreatePointerCast(VDAddr.getPointer(), |
1720 | CGM.Int8PtrTy), |
1721 | CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)), |
1722 | getOrCreateThreadPrivateCache(VD)}; |
1723 | return Address(CGF.EmitRuntimeCall( |
1724 | OMPBuilder.getOrCreateRuntimeFunction( |
1725 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
1726 | Args), |
1727 | VDAddr.getAlignment()); |
1728 | } |
1729 | |
1730 | void CGOpenMPRuntime::emitThreadPrivateVarInit( |
1731 | CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, |
1732 | llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) { |
1733 | // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime |
1734 | // library. |
1735 | llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc); |
1736 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
1737 | CGM.getModule(), OMPRTL___kmpc_global_thread_num), |
1738 | OMPLoc); |
1739 | // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor) |
1740 | // to register constructor/destructor for variable. |
1741 | llvm::Value *Args[] = { |
1742 | OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy), |
1743 | Ctor, CopyCtor, Dtor}; |
1744 | CGF.EmitRuntimeCall( |
1745 | OMPBuilder.getOrCreateRuntimeFunction( |
1746 | CGM.getModule(), OMPRTL___kmpc_threadprivate_register), |
1747 | Args); |
1748 | } |
1749 | |
1750 | llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition( |
1751 | const VarDecl *VD, Address VDAddr, SourceLocation Loc, |
1752 | bool PerformInit, CodeGenFunction *CGF) { |
1753 | if (CGM.getLangOpts().OpenMPUseTLS && |
1754 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1755 | return nullptr; |
1756 | |
1757 | VD = VD->getDefinition(CGM.getContext()); |
1758 | if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) { |
1759 | QualType ASTTy = VD->getType(); |
1760 | |
1761 | llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr; |
1762 | const Expr *Init = VD->getAnyInitializer(); |
1763 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1764 | // Generate function that re-emits the declaration's initializer into the |
1765 | // threadprivate copy of the variable VD |
1766 | CodeGenFunction CtorCGF(CGM); |
1767 | FunctionArgList Args; |
1768 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1769 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1770 | ImplicitParamDecl::Other); |
1771 | Args.push_back(&Dst); |
1772 | |
1773 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1774 | CGM.getContext().VoidPtrTy, Args); |
1775 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1776 | std::string Name = getName({"__kmpc_global_ctor_", ""}); |
1777 | llvm::Function *Fn = |
1778 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1779 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI, |
1780 | Args, Loc, Loc); |
1781 | llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar( |
1782 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1783 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1784 | Address Arg = Address(ArgVal, VDAddr.getAlignment()); |
1785 | Arg = CtorCGF.Builder.CreateElementBitCast( |
1786 | Arg, CtorCGF.ConvertTypeForMem(ASTTy)); |
1787 | CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(), |
1788 | /*IsInitializer=*/true); |
1789 | ArgVal = CtorCGF.EmitLoadOfScalar( |
1790 | CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false, |
1791 | CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1792 | CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue); |
1793 | CtorCGF.FinishFunction(); |
1794 | Ctor = Fn; |
1795 | } |
1796 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1797 | // Generate function that emits destructor call for the threadprivate copy |
1798 | // of the variable VD |
1799 | CodeGenFunction DtorCGF(CGM); |
1800 | FunctionArgList Args; |
1801 | ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc, |
1802 | /*Id=*/nullptr, CGM.getContext().VoidPtrTy, |
1803 | ImplicitParamDecl::Other); |
1804 | Args.push_back(&Dst); |
1805 | |
1806 | const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration( |
1807 | CGM.getContext().VoidTy, Args); |
1808 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1809 | std::string Name = getName({"__kmpc_global_dtor_", ""}); |
1810 | llvm::Function *Fn = |
1811 | CGM.CreateGlobalInitOrCleanUpFunction(FTy, Name, FI, Loc); |
1812 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1813 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args, |
1814 | Loc, Loc); |
1815 | // Create a scope with an artificial location for the body of this function. |
1816 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1817 | llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar( |
1818 | DtorCGF.GetAddrOfLocalVar(&Dst), |
1819 | /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation()); |
1820 | DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy, |
1821 | DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1822 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1823 | DtorCGF.FinishFunction(); |
1824 | Dtor = Fn; |
1825 | } |
1826 | // Do not emit init function if it is not required. |
1827 | if (!Ctor && !Dtor) |
1828 | return nullptr; |
1829 | |
1830 | llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy}; |
1831 | auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs, |
1832 | /*isVarArg=*/false) |
1833 | ->getPointerTo(); |
1834 | // Copying constructor for the threadprivate variable. |
1835 | // Must be NULL - reserved by runtime, but currently it requires that this |
1836 | // parameter is always NULL. Otherwise it fires assertion. |
1837 | CopyCtor = llvm::Constant::getNullValue(CopyCtorTy); |
1838 | if (Ctor == nullptr) { |
1839 | auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy, |
1840 | /*isVarArg=*/false) |
1841 | ->getPointerTo(); |
1842 | Ctor = llvm::Constant::getNullValue(CtorTy); |
1843 | } |
1844 | if (Dtor == nullptr) { |
1845 | auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, |
1846 | /*isVarArg=*/false) |
1847 | ->getPointerTo(); |
1848 | Dtor = llvm::Constant::getNullValue(DtorTy); |
1849 | } |
1850 | if (!CGF) { |
1851 | auto *InitFunctionTy = |
1852 | llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false); |
1853 | std::string Name = getName({"__omp_threadprivate_init_", ""}); |
1854 | llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction( |
1855 | InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction()); |
1856 | CodeGenFunction InitCGF(CGM); |
1857 | FunctionArgList ArgList; |
1858 | InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction, |
1859 | CGM.getTypes().arrangeNullaryFunction(), ArgList, |
1860 | Loc, Loc); |
1861 | emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1862 | InitCGF.FinishFunction(); |
1863 | return InitFunction; |
1864 | } |
1865 | emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc); |
1866 | } |
1867 | return nullptr; |
1868 | } |
1869 | |
1870 | bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, |
1871 | llvm::GlobalVariable *Addr, |
1872 | bool PerformInit) { |
1873 | if (CGM.getLangOpts().OMPTargetTriples.empty() && |
1874 | !CGM.getLangOpts().OpenMPIsDevice) |
1875 | return false; |
1876 | Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res = |
1877 | OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD); |
1878 | if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link || |
1879 | (*Res == OMPDeclareTargetDeclAttr::MT_To && |
1880 | HasRequiresUnifiedSharedMemory)) |
1881 | return CGM.getLangOpts().OpenMPIsDevice; |
1882 | VD = VD->getDefinition(CGM.getContext()); |
1883 | assert(VD && "Unknown VarDecl")((VD && "Unknown VarDecl") ? static_cast<void> ( 0) : __assert_fail ("VD && \"Unknown VarDecl\"", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 1883, __PRETTY_FUNCTION__)); |
1884 | |
1885 | if (!DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second) |
1886 | return CGM.getLangOpts().OpenMPIsDevice; |
1887 | |
1888 | QualType ASTTy = VD->getType(); |
1889 | SourceLocation Loc = VD->getCanonicalDecl()->getBeginLoc(); |
1890 | |
1891 | // Produce the unique prefix to identify the new target regions. We use |
1892 | // the source location of the variable declaration which we know to not |
1893 | // conflict with any target region. |
1894 | unsigned DeviceID; |
1895 | unsigned FileID; |
1896 | unsigned Line; |
1897 | getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line); |
1898 | SmallString<128> Buffer, Out; |
1899 | { |
1900 | llvm::raw_svector_ostream OS(Buffer); |
1901 | OS << "__omp_offloading_" << llvm::format("_%x", DeviceID) |
1902 | << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line; |
1903 | } |
1904 | |
1905 | const Expr *Init = VD->getAnyInitializer(); |
1906 | if (CGM.getLangOpts().CPlusPlus && PerformInit) { |
1907 | llvm::Constant *Ctor; |
1908 | llvm::Constant *ID; |
1909 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1910 | // Generate function that re-emits the declaration's initializer into |
1911 | // the threadprivate copy of the variable VD |
1912 | CodeGenFunction CtorCGF(CGM); |
1913 | |
1914 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1915 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1916 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1917 | FTy, Twine(Buffer, "_ctor"), FI, Loc); |
1918 | auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF); |
1919 | CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1920 | FunctionArgList(), Loc, Loc); |
1921 | auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF); |
1922 | CtorCGF.EmitAnyExprToMem(Init, |
1923 | Address(Addr, CGM.getContext().getDeclAlign(VD)), |
1924 | Init->getType().getQualifiers(), |
1925 | /*IsInitializer=*/true); |
1926 | CtorCGF.FinishFunction(); |
1927 | Ctor = Fn; |
1928 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1929 | CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor)); |
1930 | } else { |
1931 | Ctor = new llvm::GlobalVariable( |
1932 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1933 | llvm::GlobalValue::PrivateLinkage, |
1934 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor")); |
1935 | ID = Ctor; |
1936 | } |
1937 | |
1938 | // Register the information for the entry associated with the constructor. |
1939 | Out.clear(); |
1940 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1941 | DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor, |
1942 | ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryCtor); |
1943 | } |
1944 | if (VD->getType().isDestructedType() != QualType::DK_none) { |
1945 | llvm::Constant *Dtor; |
1946 | llvm::Constant *ID; |
1947 | if (CGM.getLangOpts().OpenMPIsDevice) { |
1948 | // Generate function that emits destructor call for the threadprivate |
1949 | // copy of the variable VD |
1950 | CodeGenFunction DtorCGF(CGM); |
1951 | |
1952 | const CGFunctionInfo &FI = CGM.getTypes().arrangeNullaryFunction(); |
1953 | llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); |
1954 | llvm::Function *Fn = CGM.CreateGlobalInitOrCleanUpFunction( |
1955 | FTy, Twine(Buffer, "_dtor"), FI, Loc); |
1956 | auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF); |
1957 | DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, |
1958 | FunctionArgList(), Loc, Loc); |
1959 | // Create a scope with an artificial location for the body of this |
1960 | // function. |
1961 | auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF); |
1962 | DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)), |
1963 | ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()), |
1964 | DtorCGF.needsEHCleanup(ASTTy.isDestructedType())); |
1965 | DtorCGF.FinishFunction(); |
1966 | Dtor = Fn; |
1967 | ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy); |
1968 | CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor)); |
1969 | } else { |
1970 | Dtor = new llvm::GlobalVariable( |
1971 | CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true, |
1972 | llvm::GlobalValue::PrivateLinkage, |
1973 | llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor")); |
1974 | ID = Dtor; |
1975 | } |
1976 | // Register the information for the entry associated with the destructor. |
1977 | Out.clear(); |
1978 | OffloadEntriesInfoManager.registerTargetRegionEntryInfo( |
1979 | DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor, |
1980 | ID, OffloadEntriesInfoManagerTy::OMPTargetRegionEntryDtor); |
1981 | } |
1982 | return CGM.getLangOpts().OpenMPIsDevice; |
1983 | } |
1984 | |
1985 | Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, |
1986 | QualType VarType, |
1987 | StringRef Name) { |
1988 | std::string Suffix = getName({"artificial", ""}); |
1989 | llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType); |
1990 | llvm::Value *GAddr = |
1991 | getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix)); |
1992 | if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS && |
1993 | CGM.getTarget().isTLSSupported()) { |
1994 | cast<llvm::GlobalVariable>(GAddr)->setThreadLocal(/*Val=*/true); |
1995 | return Address(GAddr, CGM.getContext().getTypeAlignInChars(VarType)); |
1996 | } |
1997 | std::string CacheSuffix = getName({"cache", ""}); |
1998 | llvm::Value *Args[] = { |
1999 | emitUpdateLocation(CGF, SourceLocation()), |
2000 | getThreadID(CGF, SourceLocation()), |
2001 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy), |
2002 | CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy, |
2003 | /*isSigned=*/false), |
2004 | getOrCreateInternalVariable( |
2005 | CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))}; |
2006 | return Address( |
2007 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2008 | CGF.EmitRuntimeCall( |
2009 | OMPBuilder.getOrCreateRuntimeFunction( |
2010 | CGM.getModule(), OMPRTL___kmpc_threadprivate_cached), |
2011 | Args), |
2012 | VarLVType->getPointerTo(/*AddrSpace=*/0)), |
2013 | CGM.getContext().getTypeAlignInChars(VarType)); |
2014 | } |
2015 | |
2016 | void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond, |
2017 | const RegionCodeGenTy &ThenGen, |
2018 | const RegionCodeGenTy &ElseGen) { |
2019 | CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange()); |
2020 | |
2021 | // If the condition constant folds and can be elided, try to avoid emitting |
2022 | // the condition and the dead arm of the if/else. |
2023 | bool CondConstant; |
2024 | if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) { |
2025 | if (CondConstant) |
2026 | ThenGen(CGF); |
2027 | else |
2028 | ElseGen(CGF); |
2029 | return; |
2030 | } |
2031 | |
2032 | // Otherwise, the condition did not fold, or we couldn't elide it. Just |
2033 | // emit the conditional branch. |
2034 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2035 | llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else"); |
2036 | llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end"); |
2037 | CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0); |
2038 | |
2039 | // Emit the 'then' code. |
2040 | CGF.EmitBlock(ThenBlock); |
2041 | ThenGen(CGF); |
2042 | CGF.EmitBranch(ContBlock); |
2043 | // Emit the 'else' code if present. |
2044 | // There is no need to emit line number for unconditional branch. |
2045 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2046 | CGF.EmitBlock(ElseBlock); |
2047 | ElseGen(CGF); |
2048 | // There is no need to emit line number for unconditional branch. |
2049 | (void)ApplyDebugLocation::CreateEmpty(CGF); |
2050 | CGF.EmitBranch(ContBlock); |
2051 | // Emit the continuation block for code after the if. |
2052 | CGF.EmitBlock(ContBlock, /*IsFinished=*/true); |
2053 | } |
2054 | |
2055 | void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, |
2056 | llvm::Function *OutlinedFn, |
2057 | ArrayRef<llvm::Value *> CapturedVars, |
2058 | const Expr *IfCond) { |
2059 | if (!CGF.HaveInsertPoint()) |
2060 | return; |
2061 | llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); |
2062 | auto &M = CGM.getModule(); |
2063 | auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc, |
2064 | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2065 | // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn); |
2066 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2067 | llvm::Value *Args[] = { |
2068 | RTLoc, |
2069 | CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars |
2070 | CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())}; |
2071 | llvm::SmallVector<llvm::Value *, 16> RealArgs; |
2072 | RealArgs.append(std::begin(Args), std::end(Args)); |
2073 | RealArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2074 | |
2075 | llvm::FunctionCallee RTLFn = |
2076 | OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_fork_call); |
2077 | CGF.EmitRuntimeCall(RTLFn, RealArgs); |
2078 | }; |
2079 | auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc, |
2080 | this](CodeGenFunction &CGF, PrePostActionTy &) { |
2081 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
2082 | llvm::Value *ThreadID = RT.getThreadID(CGF, Loc); |
2083 | // Build calls: |
2084 | // __kmpc_serialized_parallel(&Loc, GTid); |
2085 | llvm::Value *Args[] = {RTLoc, ThreadID}; |
2086 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2087 | M, OMPRTL___kmpc_serialized_parallel), |
2088 | Args); |
2089 | |
2090 | // OutlinedFn(>id, &zero_bound, CapturedStruct); |
2091 | Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc); |
2092 | Address ZeroAddrBound = |
2093 | CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty, |
2094 | /*Name=*/".bound.zero.addr"); |
2095 | CGF.InitTempAlloca(ZeroAddrBound, CGF.Builder.getInt32(/*C*/ 0)); |
2096 | llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs; |
2097 | // ThreadId for serialized parallels is 0. |
2098 | OutlinedFnArgs.push_back(ThreadIDAddr.getPointer()); |
2099 | OutlinedFnArgs.push_back(ZeroAddrBound.getPointer()); |
2100 | OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end()); |
2101 | |
2102 | // Ensure we do not inline the function. This is trivially true for the ones |
2103 | // passed to __kmpc_fork_call but the ones calles in serialized regions |
2104 | // could be inlined. This is not a perfect but it is closer to the invariant |
2105 | // we want, namely, every data environment starts with a new function. |
2106 | // TODO: We should pass the if condition to the runtime function and do the |
2107 | // handling there. Much cleaner code. |
2108 | OutlinedFn->addFnAttr(llvm::Attribute::NoInline); |
2109 | RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs); |
2110 | |
2111 | // __kmpc_end_serialized_parallel(&Loc, GTid); |
2112 | llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID}; |
2113 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2114 | M, OMPRTL___kmpc_end_serialized_parallel), |
2115 | EndArgs); |
2116 | }; |
2117 | if (IfCond) { |
2118 | emitIfClause(CGF, IfCond, ThenGen, ElseGen); |
2119 | } else { |
2120 | RegionCodeGenTy ThenRCG(ThenGen); |
2121 | ThenRCG(CGF); |
2122 | } |
2123 | } |
2124 | |
2125 | // If we're inside an (outlined) parallel region, use the region info's |
2126 | // thread-ID variable (it is passed in a first argument of the outlined function |
2127 | // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in |
2128 | // regular serial code region, get thread ID by calling kmp_int32 |
2129 | // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and |
2130 | // return the address of that temp. |
2131 | Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF, |
2132 | SourceLocation Loc) { |
2133 | if (auto *OMPRegionInfo = |
2134 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2135 | if (OMPRegionInfo->getThreadIDVariable()) |
2136 | return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF); |
2137 | |
2138 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
2139 | QualType Int32Ty = |
2140 | CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true); |
2141 | Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp."); |
2142 | CGF.EmitStoreOfScalar(ThreadID, |
2143 | CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty)); |
2144 | |
2145 | return ThreadIDTemp; |
2146 | } |
2147 | |
2148 | llvm::Constant *CGOpenMPRuntime::getOrCreateInternalVariable( |
2149 | llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) { |
2150 | SmallString<256> Buffer; |
2151 | llvm::raw_svector_ostream Out(Buffer); |
2152 | Out << Name; |
2153 | StringRef RuntimeName = Out.str(); |
2154 | auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first; |
2155 | if (Elem.second) { |
2156 | assert(Elem.second->getType()->getPointerElementType() == Ty &&((Elem.second->getType()->getPointerElementType() == Ty && "OMP internal variable has different type than requested" ) ? static_cast<void> (0) : __assert_fail ("Elem.second->getType()->getPointerElementType() == Ty && \"OMP internal variable has different type than requested\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2157, __PRETTY_FUNCTION__)) |
2157 | "OMP internal variable has different type than requested")((Elem.second->getType()->getPointerElementType() == Ty && "OMP internal variable has different type than requested" ) ? static_cast<void> (0) : __assert_fail ("Elem.second->getType()->getPointerElementType() == Ty && \"OMP internal variable has different type than requested\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2157, __PRETTY_FUNCTION__)); |
2158 | return &*Elem.second; |
2159 | } |
2160 | |
2161 | return Elem.second = new llvm::GlobalVariable( |
2162 | CGM.getModule(), Ty, /*IsConstant*/ false, |
2163 | llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty), |
2164 | Elem.first(), /*InsertBefore=*/nullptr, |
2165 | llvm::GlobalValue::NotThreadLocal, AddressSpace); |
2166 | } |
2167 | |
2168 | llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) { |
2169 | std::string Prefix = Twine("gomp_critical_user_", CriticalName).str(); |
2170 | std::string Name = getName({Prefix, "var"}); |
2171 | return getOrCreateInternalVariable(KmpCriticalNameTy, Name); |
2172 | } |
2173 | |
2174 | namespace { |
2175 | /// Common pre(post)-action for different OpenMP constructs. |
2176 | class CommonActionTy final : public PrePostActionTy { |
2177 | llvm::FunctionCallee EnterCallee; |
2178 | ArrayRef<llvm::Value *> EnterArgs; |
2179 | llvm::FunctionCallee ExitCallee; |
2180 | ArrayRef<llvm::Value *> ExitArgs; |
2181 | bool Conditional; |
2182 | llvm::BasicBlock *ContBlock = nullptr; |
2183 | |
2184 | public: |
2185 | CommonActionTy(llvm::FunctionCallee EnterCallee, |
2186 | ArrayRef<llvm::Value *> EnterArgs, |
2187 | llvm::FunctionCallee ExitCallee, |
2188 | ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false) |
2189 | : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee), |
2190 | ExitArgs(ExitArgs), Conditional(Conditional) {} |
2191 | void Enter(CodeGenFunction &CGF) override { |
2192 | llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs); |
2193 | if (Conditional) { |
2194 | llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes); |
2195 | auto *ThenBlock = CGF.createBasicBlock("omp_if.then"); |
2196 | ContBlock = CGF.createBasicBlock("omp_if.end"); |
2197 | // Generate the branch (If-stmt) |
2198 | CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock); |
2199 | CGF.EmitBlock(ThenBlock); |
2200 | } |
2201 | } |
2202 | void Done(CodeGenFunction &CGF) { |
2203 | // Emit the rest of blocks/branches |
2204 | CGF.EmitBranch(ContBlock); |
2205 | CGF.EmitBlock(ContBlock, true); |
2206 | } |
2207 | void Exit(CodeGenFunction &CGF) override { |
2208 | CGF.EmitRuntimeCall(ExitCallee, ExitArgs); |
2209 | } |
2210 | }; |
2211 | } // anonymous namespace |
2212 | |
2213 | void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF, |
2214 | StringRef CriticalName, |
2215 | const RegionCodeGenTy &CriticalOpGen, |
2216 | SourceLocation Loc, const Expr *Hint) { |
2217 | // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]); |
2218 | // CriticalOpGen(); |
2219 | // __kmpc_end_critical(ident_t *, gtid, Lock); |
2220 | // Prepare arguments and build a call to __kmpc_critical |
2221 | if (!CGF.HaveInsertPoint()) |
2222 | return; |
2223 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2224 | getCriticalRegionLock(CriticalName)}; |
2225 | llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), |
2226 | std::end(Args)); |
2227 | if (Hint) { |
2228 | EnterArgs.push_back(CGF.Builder.CreateIntCast( |
2229 | CGF.EmitScalarExpr(Hint), CGM.Int32Ty, /*isSigned=*/false)); |
2230 | } |
2231 | CommonActionTy Action( |
2232 | OMPBuilder.getOrCreateRuntimeFunction( |
2233 | CGM.getModule(), |
2234 | Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical), |
2235 | EnterArgs, |
2236 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2237 | OMPRTL___kmpc_end_critical), |
2238 | Args); |
2239 | CriticalOpGen.setAction(Action); |
2240 | emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen); |
2241 | } |
2242 | |
2243 | void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF, |
2244 | const RegionCodeGenTy &MasterOpGen, |
2245 | SourceLocation Loc) { |
2246 | if (!CGF.HaveInsertPoint()) |
2247 | return; |
2248 | // if(__kmpc_master(ident_t *, gtid)) { |
2249 | // MasterOpGen(); |
2250 | // __kmpc_end_master(ident_t *, gtid); |
2251 | // } |
2252 | // Prepare arguments and build a call to __kmpc_master |
2253 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2254 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2255 | CGM.getModule(), OMPRTL___kmpc_master), |
2256 | Args, |
2257 | OMPBuilder.getOrCreateRuntimeFunction( |
2258 | CGM.getModule(), OMPRTL___kmpc_end_master), |
2259 | Args, |
2260 | /*Conditional=*/true); |
2261 | MasterOpGen.setAction(Action); |
2262 | emitInlinedDirective(CGF, OMPD_master, MasterOpGen); |
2263 | Action.Done(CGF); |
2264 | } |
2265 | |
2266 | void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF, |
2267 | SourceLocation Loc) { |
2268 | if (!CGF.HaveInsertPoint()) |
2269 | return; |
2270 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2271 | OMPBuilder.createTaskyield(CGF.Builder); |
2272 | } else { |
2273 | // Build call __kmpc_omp_taskyield(loc, thread_id, 0); |
2274 | llvm::Value *Args[] = { |
2275 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2276 | llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)}; |
2277 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2278 | CGM.getModule(), OMPRTL___kmpc_omp_taskyield), |
2279 | Args); |
2280 | } |
2281 | |
2282 | if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
2283 | Region->emitUntiedSwitch(CGF); |
2284 | } |
2285 | |
2286 | void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF, |
2287 | const RegionCodeGenTy &TaskgroupOpGen, |
2288 | SourceLocation Loc) { |
2289 | if (!CGF.HaveInsertPoint()) |
2290 | return; |
2291 | // __kmpc_taskgroup(ident_t *, gtid); |
2292 | // TaskgroupOpGen(); |
2293 | // __kmpc_end_taskgroup(ident_t *, gtid); |
2294 | // Prepare arguments and build a call to __kmpc_taskgroup |
2295 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2296 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2297 | CGM.getModule(), OMPRTL___kmpc_taskgroup), |
2298 | Args, |
2299 | OMPBuilder.getOrCreateRuntimeFunction( |
2300 | CGM.getModule(), OMPRTL___kmpc_end_taskgroup), |
2301 | Args); |
2302 | TaskgroupOpGen.setAction(Action); |
2303 | emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen); |
2304 | } |
2305 | |
2306 | /// Given an array of pointers to variables, project the address of a |
2307 | /// given variable. |
2308 | static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, |
2309 | unsigned Index, const VarDecl *Var) { |
2310 | // Pull out the pointer to the variable. |
2311 | Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index); |
2312 | llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr); |
2313 | |
2314 | Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var)); |
2315 | Addr = CGF.Builder.CreateElementBitCast( |
2316 | Addr, CGF.ConvertTypeForMem(Var->getType())); |
2317 | return Addr; |
2318 | } |
2319 | |
2320 | static llvm::Value *emitCopyprivateCopyFunction( |
2321 | CodeGenModule &CGM, llvm::Type *ArgsType, |
2322 | ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs, |
2323 | ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps, |
2324 | SourceLocation Loc) { |
2325 | ASTContext &C = CGM.getContext(); |
2326 | // void copy_func(void *LHSArg, void *RHSArg); |
2327 | FunctionArgList Args; |
2328 | ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2329 | ImplicitParamDecl::Other); |
2330 | ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
2331 | ImplicitParamDecl::Other); |
2332 | Args.push_back(&LHSArg); |
2333 | Args.push_back(&RHSArg); |
2334 | const auto &CGFI = |
2335 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
2336 | std::string Name = |
2337 | CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"}); |
2338 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
2339 | llvm::GlobalValue::InternalLinkage, Name, |
2340 | &CGM.getModule()); |
2341 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
2342 | Fn->setDoesNotRecurse(); |
2343 | CodeGenFunction CGF(CGM); |
2344 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
2345 | // Dest = (void*[n])(LHSArg); |
2346 | // Src = (void*[n])(RHSArg); |
2347 | Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2348 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), |
2349 | ArgsType), CGF.getPointerAlign()); |
2350 | Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2351 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), |
2352 | ArgsType), CGF.getPointerAlign()); |
2353 | // *(Type0*)Dst[0] = *(Type0*)Src[0]; |
2354 | // *(Type1*)Dst[1] = *(Type1*)Src[1]; |
2355 | // ... |
2356 | // *(Typen*)Dst[n] = *(Typen*)Src[n]; |
2357 | for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) { |
2358 | const auto *DestVar = |
2359 | cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()); |
2360 | Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar); |
2361 | |
2362 | const auto *SrcVar = |
2363 | cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()); |
2364 | Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar); |
2365 | |
2366 | const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl(); |
2367 | QualType Type = VD->getType(); |
2368 | CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]); |
2369 | } |
2370 | CGF.FinishFunction(); |
2371 | return Fn; |
2372 | } |
2373 | |
2374 | void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF, |
2375 | const RegionCodeGenTy &SingleOpGen, |
2376 | SourceLocation Loc, |
2377 | ArrayRef<const Expr *> CopyprivateVars, |
2378 | ArrayRef<const Expr *> SrcExprs, |
2379 | ArrayRef<const Expr *> DstExprs, |
2380 | ArrayRef<const Expr *> AssignmentOps) { |
2381 | if (!CGF.HaveInsertPoint()) |
2382 | return; |
2383 | assert(CopyprivateVars.size() == SrcExprs.size() &&((CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars .size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()) ? static_cast<void> (0) : __assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2385, __PRETTY_FUNCTION__)) |
2384 | CopyprivateVars.size() == DstExprs.size() &&((CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars .size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()) ? static_cast<void> (0) : __assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2385, __PRETTY_FUNCTION__)) |
2385 | CopyprivateVars.size() == AssignmentOps.size())((CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars .size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()) ? static_cast<void> (0) : __assert_fail ("CopyprivateVars.size() == SrcExprs.size() && CopyprivateVars.size() == DstExprs.size() && CopyprivateVars.size() == AssignmentOps.size()" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2385, __PRETTY_FUNCTION__)); |
2386 | ASTContext &C = CGM.getContext(); |
2387 | // int32 did_it = 0; |
2388 | // if(__kmpc_single(ident_t *, gtid)) { |
2389 | // SingleOpGen(); |
2390 | // __kmpc_end_single(ident_t *, gtid); |
2391 | // did_it = 1; |
2392 | // } |
2393 | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2394 | // <copy_func>, did_it); |
2395 | |
2396 | Address DidIt = Address::invalid(); |
2397 | if (!CopyprivateVars.empty()) { |
2398 | // int32 did_it = 0; |
2399 | QualType KmpInt32Ty = |
2400 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
2401 | DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it"); |
2402 | CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt); |
2403 | } |
2404 | // Prepare arguments and build a call to __kmpc_single |
2405 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2406 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2407 | CGM.getModule(), OMPRTL___kmpc_single), |
2408 | Args, |
2409 | OMPBuilder.getOrCreateRuntimeFunction( |
2410 | CGM.getModule(), OMPRTL___kmpc_end_single), |
2411 | Args, |
2412 | /*Conditional=*/true); |
2413 | SingleOpGen.setAction(Action); |
2414 | emitInlinedDirective(CGF, OMPD_single, SingleOpGen); |
2415 | if (DidIt.isValid()) { |
2416 | // did_it = 1; |
2417 | CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt); |
2418 | } |
2419 | Action.Done(CGF); |
2420 | // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>, |
2421 | // <copy_func>, did_it); |
2422 | if (DidIt.isValid()) { |
2423 | llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size()); |
2424 | QualType CopyprivateArrayTy = C.getConstantArrayType( |
2425 | C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, |
2426 | /*IndexTypeQuals=*/0); |
2427 | // Create a list of all private variables for copyprivate. |
2428 | Address CopyprivateList = |
2429 | CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list"); |
2430 | for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) { |
2431 | Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I); |
2432 | CGF.Builder.CreateStore( |
2433 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
2434 | CGF.EmitLValue(CopyprivateVars[I]).getPointer(CGF), |
2435 | CGF.VoidPtrTy), |
2436 | Elem); |
2437 | } |
2438 | // Build function that copies private values from single region to all other |
2439 | // threads in the corresponding parallel region. |
2440 | llvm::Value *CpyFn = emitCopyprivateCopyFunction( |
2441 | CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(), |
2442 | CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc); |
2443 | llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy); |
2444 | Address CL = |
2445 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList, |
2446 | CGF.VoidPtrTy); |
2447 | llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt); |
2448 | llvm::Value *Args[] = { |
2449 | emitUpdateLocation(CGF, Loc), // ident_t *<loc> |
2450 | getThreadID(CGF, Loc), // i32 <gtid> |
2451 | BufSize, // size_t <buf_size> |
2452 | CL.getPointer(), // void *<copyprivate list> |
2453 | CpyFn, // void (*) (void *, void *) <copy_func> |
2454 | DidItVal // i32 did_it |
2455 | }; |
2456 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2457 | CGM.getModule(), OMPRTL___kmpc_copyprivate), |
2458 | Args); |
2459 | } |
2460 | } |
2461 | |
2462 | void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF, |
2463 | const RegionCodeGenTy &OrderedOpGen, |
2464 | SourceLocation Loc, bool IsThreads) { |
2465 | if (!CGF.HaveInsertPoint()) |
2466 | return; |
2467 | // __kmpc_ordered(ident_t *, gtid); |
2468 | // OrderedOpGen(); |
2469 | // __kmpc_end_ordered(ident_t *, gtid); |
2470 | // Prepare arguments and build a call to __kmpc_ordered |
2471 | if (IsThreads) { |
2472 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2473 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
2474 | CGM.getModule(), OMPRTL___kmpc_ordered), |
2475 | Args, |
2476 | OMPBuilder.getOrCreateRuntimeFunction( |
2477 | CGM.getModule(), OMPRTL___kmpc_end_ordered), |
2478 | Args); |
2479 | OrderedOpGen.setAction(Action); |
2480 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2481 | return; |
2482 | } |
2483 | emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen); |
2484 | } |
2485 | |
2486 | unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) { |
2487 | unsigned Flags; |
2488 | if (Kind == OMPD_for) |
2489 | Flags = OMP_IDENT_BARRIER_IMPL_FOR; |
2490 | else if (Kind == OMPD_sections) |
2491 | Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS; |
2492 | else if (Kind == OMPD_single) |
2493 | Flags = OMP_IDENT_BARRIER_IMPL_SINGLE; |
2494 | else if (Kind == OMPD_barrier) |
2495 | Flags = OMP_IDENT_BARRIER_EXPL; |
2496 | else |
2497 | Flags = OMP_IDENT_BARRIER_IMPL; |
2498 | return Flags; |
2499 | } |
2500 | |
2501 | void CGOpenMPRuntime::getDefaultScheduleAndChunk( |
2502 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
2503 | OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const { |
2504 | // Check if the loop directive is actually a doacross loop directive. In this |
2505 | // case choose static, 1 schedule. |
2506 | if (llvm::any_of( |
2507 | S.getClausesOfKind<OMPOrderedClause>(), |
2508 | [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) { |
2509 | ScheduleKind = OMPC_SCHEDULE_static; |
2510 | // Chunk size is 1 in this case. |
2511 | llvm::APInt ChunkSize(32, 1); |
2512 | ChunkExpr = IntegerLiteral::Create( |
2513 | CGF.getContext(), ChunkSize, |
2514 | CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0), |
2515 | SourceLocation()); |
2516 | } |
2517 | } |
2518 | |
2519 | void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, |
2520 | OpenMPDirectiveKind Kind, bool EmitChecks, |
2521 | bool ForceSimpleCall) { |
2522 | // Check if we should use the OMPBuilder |
2523 | auto *OMPRegionInfo = |
2524 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo); |
2525 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2526 | CGF.Builder.restoreIP(OMPBuilder.createBarrier( |
2527 | CGF.Builder, Kind, ForceSimpleCall, EmitChecks)); |
2528 | return; |
2529 | } |
2530 | |
2531 | if (!CGF.HaveInsertPoint()) |
2532 | return; |
2533 | // Build call __kmpc_cancel_barrier(loc, thread_id); |
2534 | // Build call __kmpc_barrier(loc, thread_id); |
2535 | unsigned Flags = getDefaultFlagsForBarriers(Kind); |
2536 | // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc, |
2537 | // thread_id); |
2538 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags), |
2539 | getThreadID(CGF, Loc)}; |
2540 | if (OMPRegionInfo) { |
2541 | if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) { |
2542 | llvm::Value *Result = CGF.EmitRuntimeCall( |
2543 | OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), |
2544 | OMPRTL___kmpc_cancel_barrier), |
2545 | Args); |
2546 | if (EmitChecks) { |
2547 | // if (__kmpc_cancel_barrier()) { |
2548 | // exit from construct; |
2549 | // } |
2550 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit"); |
2551 | llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue"); |
2552 | llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result); |
2553 | CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB); |
2554 | CGF.EmitBlock(ExitBB); |
2555 | // exit from construct; |
2556 | CodeGenFunction::JumpDest CancelDestination = |
2557 | CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind()); |
2558 | CGF.EmitBranchThroughCleanup(CancelDestination); |
2559 | CGF.EmitBlock(ContBB, /*IsFinished=*/true); |
2560 | } |
2561 | return; |
2562 | } |
2563 | } |
2564 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2565 | CGM.getModule(), OMPRTL___kmpc_barrier), |
2566 | Args); |
2567 | } |
2568 | |
2569 | /// Map the OpenMP loop schedule to the runtime enumeration. |
2570 | static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, |
2571 | bool Chunked, bool Ordered) { |
2572 | switch (ScheduleKind) { |
2573 | case OMPC_SCHEDULE_static: |
2574 | return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked) |
2575 | : (Ordered ? OMP_ord_static : OMP_sch_static); |
2576 | case OMPC_SCHEDULE_dynamic: |
2577 | return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked; |
2578 | case OMPC_SCHEDULE_guided: |
2579 | return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked; |
2580 | case OMPC_SCHEDULE_runtime: |
2581 | return Ordered ? OMP_ord_runtime : OMP_sch_runtime; |
2582 | case OMPC_SCHEDULE_auto: |
2583 | return Ordered ? OMP_ord_auto : OMP_sch_auto; |
2584 | case OMPC_SCHEDULE_unknown: |
2585 | assert(!Chunked && "chunk was specified but schedule kind not known")((!Chunked && "chunk was specified but schedule kind not known" ) ? static_cast<void> (0) : __assert_fail ("!Chunked && \"chunk was specified but schedule kind not known\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2585, __PRETTY_FUNCTION__)); |
2586 | return Ordered ? OMP_ord_static : OMP_sch_static; |
2587 | } |
2588 | llvm_unreachable("Unexpected runtime schedule")::llvm::llvm_unreachable_internal("Unexpected runtime schedule" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2588); |
2589 | } |
2590 | |
2591 | /// Map the OpenMP distribute schedule to the runtime enumeration. |
2592 | static OpenMPSchedType |
2593 | getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) { |
2594 | // only static is allowed for dist_schedule |
2595 | return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static; |
2596 | } |
2597 | |
2598 | bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, |
2599 | bool Chunked) const { |
2600 | OpenMPSchedType Schedule = |
2601 | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2602 | return Schedule == OMP_sch_static; |
2603 | } |
2604 | |
2605 | bool CGOpenMPRuntime::isStaticNonchunked( |
2606 | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2607 | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2608 | return Schedule == OMP_dist_sch_static; |
2609 | } |
2610 | |
2611 | bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, |
2612 | bool Chunked) const { |
2613 | OpenMPSchedType Schedule = |
2614 | getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false); |
2615 | return Schedule == OMP_sch_static_chunked; |
2616 | } |
2617 | |
2618 | bool CGOpenMPRuntime::isStaticChunked( |
2619 | OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const { |
2620 | OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked); |
2621 | return Schedule == OMP_dist_sch_static_chunked; |
2622 | } |
2623 | |
2624 | bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const { |
2625 | OpenMPSchedType Schedule = |
2626 | getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false); |
2627 | assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here")((Schedule != OMP_sch_static_chunked && "cannot be chunked here" ) ? static_cast<void> (0) : __assert_fail ("Schedule != OMP_sch_static_chunked && \"cannot be chunked here\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2627, __PRETTY_FUNCTION__)); |
2628 | return Schedule != OMP_sch_static; |
2629 | } |
2630 | |
2631 | static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule, |
2632 | OpenMPScheduleClauseModifier M1, |
2633 | OpenMPScheduleClauseModifier M2) { |
2634 | int Modifier = 0; |
2635 | switch (M1) { |
2636 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2637 | Modifier = OMP_sch_modifier_monotonic; |
2638 | break; |
2639 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2640 | Modifier = OMP_sch_modifier_nonmonotonic; |
2641 | break; |
2642 | case OMPC_SCHEDULE_MODIFIER_simd: |
2643 | if (Schedule == OMP_sch_static_chunked) |
2644 | Schedule = OMP_sch_static_balanced_chunked; |
2645 | break; |
2646 | case OMPC_SCHEDULE_MODIFIER_last: |
2647 | case OMPC_SCHEDULE_MODIFIER_unknown: |
2648 | break; |
2649 | } |
2650 | switch (M2) { |
2651 | case OMPC_SCHEDULE_MODIFIER_monotonic: |
2652 | Modifier = OMP_sch_modifier_monotonic; |
2653 | break; |
2654 | case OMPC_SCHEDULE_MODIFIER_nonmonotonic: |
2655 | Modifier = OMP_sch_modifier_nonmonotonic; |
2656 | break; |
2657 | case OMPC_SCHEDULE_MODIFIER_simd: |
2658 | if (Schedule == OMP_sch_static_chunked) |
2659 | Schedule = OMP_sch_static_balanced_chunked; |
2660 | break; |
2661 | case OMPC_SCHEDULE_MODIFIER_last: |
2662 | case OMPC_SCHEDULE_MODIFIER_unknown: |
2663 | break; |
2664 | } |
2665 | // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription. |
2666 | // If the static schedule kind is specified or if the ordered clause is |
2667 | // specified, and if the nonmonotonic modifier is not specified, the effect is |
2668 | // as if the monotonic modifier is specified. Otherwise, unless the monotonic |
2669 | // modifier is specified, the effect is as if the nonmonotonic modifier is |
2670 | // specified. |
2671 | if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) { |
2672 | if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static || |
2673 | Schedule == OMP_sch_static_balanced_chunked || |
2674 | Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static || |
2675 | Schedule == OMP_dist_sch_static_chunked || |
2676 | Schedule == OMP_dist_sch_static)) |
2677 | Modifier = OMP_sch_modifier_nonmonotonic; |
2678 | } |
2679 | return Schedule | Modifier; |
2680 | } |
2681 | |
2682 | void CGOpenMPRuntime::emitForDispatchInit( |
2683 | CodeGenFunction &CGF, SourceLocation Loc, |
2684 | const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, |
2685 | bool Ordered, const DispatchRTInput &DispatchValues) { |
2686 | if (!CGF.HaveInsertPoint()) |
2687 | return; |
2688 | OpenMPSchedType Schedule = getRuntimeSchedule( |
2689 | ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered); |
2690 | assert(Ordered ||((Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? static_cast<void> (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2693, __PRETTY_FUNCTION__)) |
2691 | (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&((Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? static_cast<void> (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2693, __PRETTY_FUNCTION__)) |
2692 | Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&((Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? static_cast<void> (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2693, __PRETTY_FUNCTION__)) |
2693 | Schedule != OMP_sch_static_balanced_chunked))((Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)) ? static_cast<void> (0) : __assert_fail ("Ordered || (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked && Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked && Schedule != OMP_sch_static_balanced_chunked)" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2693, __PRETTY_FUNCTION__)); |
2694 | // Call __kmpc_dispatch_init( |
2695 | // ident_t *loc, kmp_int32 tid, kmp_int32 schedule, |
2696 | // kmp_int[32|64] lower, kmp_int[32|64] upper, |
2697 | // kmp_int[32|64] stride, kmp_int[32|64] chunk); |
2698 | |
2699 | // If the Chunk was not specified in the clause - use default value 1. |
2700 | llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk |
2701 | : CGF.Builder.getIntN(IVSize, 1); |
2702 | llvm::Value *Args[] = { |
2703 | emitUpdateLocation(CGF, Loc), |
2704 | getThreadID(CGF, Loc), |
2705 | CGF.Builder.getInt32(addMonoNonMonoModifier( |
2706 | CGM, Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type |
2707 | DispatchValues.LB, // Lower |
2708 | DispatchValues.UB, // Upper |
2709 | CGF.Builder.getIntN(IVSize, 1), // Stride |
2710 | Chunk // Chunk |
2711 | }; |
2712 | CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args); |
2713 | } |
2714 | |
2715 | static void emitForStaticInitCall( |
2716 | CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, |
2717 | llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, |
2718 | OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, |
2719 | const CGOpenMPRuntime::StaticRTInput &Values) { |
2720 | if (!CGF.HaveInsertPoint()) |
2721 | return; |
2722 | |
2723 | assert(!Values.Ordered)((!Values.Ordered) ? static_cast<void> (0) : __assert_fail ("!Values.Ordered", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2723, __PRETTY_FUNCTION__)); |
2724 | assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||((Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? static_cast<void> (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2728, __PRETTY_FUNCTION__)) |
2725 | Schedule == OMP_sch_static_balanced_chunked ||((Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? static_cast<void> (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2728, __PRETTY_FUNCTION__)) |
2726 | Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||((Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? static_cast<void> (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2728, __PRETTY_FUNCTION__)) |
2727 | Schedule == OMP_dist_sch_static ||((Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? static_cast<void> (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2728, __PRETTY_FUNCTION__)) |
2728 | Schedule == OMP_dist_sch_static_chunked)((Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked ) ? static_cast<void> (0) : __assert_fail ("Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static || Schedule == OMP_dist_sch_static_chunked" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2728, __PRETTY_FUNCTION__)); |
2729 | |
2730 | // Call __kmpc_for_static_init( |
2731 | // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype, |
2732 | // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower, |
2733 | // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride, |
2734 | // kmp_int[32|64] incr, kmp_int[32|64] chunk); |
2735 | llvm::Value *Chunk = Values.Chunk; |
2736 | if (Chunk == nullptr) { |
2737 | assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||(((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule" ) ? static_cast<void> (0) : __assert_fail ("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2739, __PRETTY_FUNCTION__)) |
2738 | Schedule == OMP_dist_sch_static) &&(((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule" ) ? static_cast<void> (0) : __assert_fail ("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2739, __PRETTY_FUNCTION__)) |
2739 | "expected static non-chunked schedule")(((Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && "expected static non-chunked schedule" ) ? static_cast<void> (0) : __assert_fail ("(Schedule == OMP_sch_static || Schedule == OMP_ord_static || Schedule == OMP_dist_sch_static) && \"expected static non-chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2739, __PRETTY_FUNCTION__)); |
2740 | // If the Chunk was not specified in the clause - use default value 1. |
2741 | Chunk = CGF.Builder.getIntN(Values.IVSize, 1); |
2742 | } else { |
2743 | assert((Schedule == OMP_sch_static_chunked ||(((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? static_cast <void> (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2747, __PRETTY_FUNCTION__)) |
2744 | Schedule == OMP_sch_static_balanced_chunked ||(((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? static_cast <void> (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2747, __PRETTY_FUNCTION__)) |
2745 | Schedule == OMP_ord_static_chunked ||(((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? static_cast <void> (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2747, __PRETTY_FUNCTION__)) |
2746 | Schedule == OMP_dist_sch_static_chunked) &&(((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? static_cast <void> (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2747, __PRETTY_FUNCTION__)) |
2747 | "expected static chunked schedule")(((Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked ) && "expected static chunked schedule") ? static_cast <void> (0) : __assert_fail ("(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static_balanced_chunked || Schedule == OMP_ord_static_chunked || Schedule == OMP_dist_sch_static_chunked) && \"expected static chunked schedule\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2747, __PRETTY_FUNCTION__)); |
2748 | } |
2749 | llvm::Value *Args[] = { |
2750 | UpdateLocation, |
2751 | ThreadId, |
2752 | CGF.Builder.getInt32(addMonoNonMonoModifier(CGF.CGM, Schedule, M1, |
2753 | M2)), // Schedule type |
2754 | Values.IL.getPointer(), // &isLastIter |
2755 | Values.LB.getPointer(), // &LB |
2756 | Values.UB.getPointer(), // &UB |
2757 | Values.ST.getPointer(), // &Stride |
2758 | CGF.Builder.getIntN(Values.IVSize, 1), // Incr |
2759 | Chunk // Chunk |
2760 | }; |
2761 | CGF.EmitRuntimeCall(ForStaticInitFunction, Args); |
2762 | } |
2763 | |
2764 | void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF, |
2765 | SourceLocation Loc, |
2766 | OpenMPDirectiveKind DKind, |
2767 | const OpenMPScheduleTy &ScheduleKind, |
2768 | const StaticRTInput &Values) { |
2769 | OpenMPSchedType ScheduleNum = getRuntimeSchedule( |
2770 | ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered); |
2771 | assert(isOpenMPWorksharingDirective(DKind) &&((isOpenMPWorksharingDirective(DKind) && "Expected loop-based or sections-based directive." ) ? static_cast<void> (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2772, __PRETTY_FUNCTION__)) |
2772 | "Expected loop-based or sections-based directive.")((isOpenMPWorksharingDirective(DKind) && "Expected loop-based or sections-based directive." ) ? static_cast<void> (0) : __assert_fail ("isOpenMPWorksharingDirective(DKind) && \"Expected loop-based or sections-based directive.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2772, __PRETTY_FUNCTION__)); |
2773 | llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc, |
2774 | isOpenMPLoopDirective(DKind) |
2775 | ? OMP_IDENT_WORK_LOOP |
2776 | : OMP_IDENT_WORK_SECTIONS); |
2777 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2778 | llvm::FunctionCallee StaticInitFunction = |
2779 | createForStaticInitFunction(Values.IVSize, Values.IVSigned); |
2780 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2781 | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2782 | ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values); |
2783 | } |
2784 | |
2785 | void CGOpenMPRuntime::emitDistributeStaticInit( |
2786 | CodeGenFunction &CGF, SourceLocation Loc, |
2787 | OpenMPDistScheduleClauseKind SchedKind, |
2788 | const CGOpenMPRuntime::StaticRTInput &Values) { |
2789 | OpenMPSchedType ScheduleNum = |
2790 | getRuntimeSchedule(SchedKind, Values.Chunk != nullptr); |
2791 | llvm::Value *UpdatedLocation = |
2792 | emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE); |
2793 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
2794 | llvm::FunctionCallee StaticInitFunction = |
2795 | createForStaticInitFunction(Values.IVSize, Values.IVSigned); |
2796 | emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction, |
2797 | ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown, |
2798 | OMPC_SCHEDULE_MODIFIER_unknown, Values); |
2799 | } |
2800 | |
2801 | void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, |
2802 | SourceLocation Loc, |
2803 | OpenMPDirectiveKind DKind) { |
2804 | if (!CGF.HaveInsertPoint()) |
2805 | return; |
2806 | // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); |
2807 | llvm::Value *Args[] = { |
2808 | emitUpdateLocation(CGF, Loc, |
2809 | isOpenMPDistributeDirective(DKind) |
2810 | ? OMP_IDENT_WORK_DISTRIBUTE |
2811 | : isOpenMPLoopDirective(DKind) |
2812 | ? OMP_IDENT_WORK_LOOP |
2813 | : OMP_IDENT_WORK_SECTIONS), |
2814 | getThreadID(CGF, Loc)}; |
2815 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc); |
2816 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2817 | CGM.getModule(), OMPRTL___kmpc_for_static_fini), |
2818 | Args); |
2819 | } |
2820 | |
2821 | void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF, |
2822 | SourceLocation Loc, |
2823 | unsigned IVSize, |
2824 | bool IVSigned) { |
2825 | if (!CGF.HaveInsertPoint()) |
2826 | return; |
2827 | // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid); |
2828 | llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)}; |
2829 | CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args); |
2830 | } |
2831 | |
2832 | llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF, |
2833 | SourceLocation Loc, unsigned IVSize, |
2834 | bool IVSigned, Address IL, |
2835 | Address LB, Address UB, |
2836 | Address ST) { |
2837 | // Call __kmpc_dispatch_next( |
2838 | // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, |
2839 | // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper, |
2840 | // kmp_int[32|64] *p_stride); |
2841 | llvm::Value *Args[] = { |
2842 | emitUpdateLocation(CGF, Loc), |
2843 | getThreadID(CGF, Loc), |
2844 | IL.getPointer(), // &isLastIter |
2845 | LB.getPointer(), // &Lower |
2846 | UB.getPointer(), // &Upper |
2847 | ST.getPointer() // &Stride |
2848 | }; |
2849 | llvm::Value *Call = |
2850 | CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args); |
2851 | return CGF.EmitScalarConversion( |
2852 | Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1), |
2853 | CGF.getContext().BoolTy, Loc); |
2854 | } |
2855 | |
2856 | void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF, |
2857 | llvm::Value *NumThreads, |
2858 | SourceLocation Loc) { |
2859 | if (!CGF.HaveInsertPoint()) |
2860 | return; |
2861 | // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads) |
2862 | llvm::Value *Args[] = { |
2863 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2864 | CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)}; |
2865 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2866 | CGM.getModule(), OMPRTL___kmpc_push_num_threads), |
2867 | Args); |
2868 | } |
2869 | |
2870 | void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF, |
2871 | ProcBindKind ProcBind, |
2872 | SourceLocation Loc) { |
2873 | if (!CGF.HaveInsertPoint()) |
2874 | return; |
2875 | assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.")((ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value." ) ? static_cast<void> (0) : __assert_fail ("ProcBind != OMP_PROC_BIND_unknown && \"Unsupported proc_bind value.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2875, __PRETTY_FUNCTION__)); |
2876 | // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind) |
2877 | llvm::Value *Args[] = { |
2878 | emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc), |
2879 | llvm::ConstantInt::get(CGM.IntTy, unsigned(ProcBind), /*isSigned=*/true)}; |
2880 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2881 | CGM.getModule(), OMPRTL___kmpc_push_proc_bind), |
2882 | Args); |
2883 | } |
2884 | |
2885 | void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>, |
2886 | SourceLocation Loc, llvm::AtomicOrdering AO) { |
2887 | if (CGF.CGM.getLangOpts().OpenMPIRBuilder) { |
2888 | OMPBuilder.createFlush(CGF.Builder); |
2889 | } else { |
2890 | if (!CGF.HaveInsertPoint()) |
2891 | return; |
2892 | // Build call void __kmpc_flush(ident_t *loc) |
2893 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
2894 | CGM.getModule(), OMPRTL___kmpc_flush), |
2895 | emitUpdateLocation(CGF, Loc)); |
2896 | } |
2897 | } |
2898 | |
2899 | namespace { |
2900 | /// Indexes of fields for type kmp_task_t. |
2901 | enum KmpTaskTFields { |
2902 | /// List of shared variables. |
2903 | KmpTaskTShareds, |
2904 | /// Task routine. |
2905 | KmpTaskTRoutine, |
2906 | /// Partition id for the untied tasks. |
2907 | KmpTaskTPartId, |
2908 | /// Function with call of destructors for private variables. |
2909 | Data1, |
2910 | /// Task priority. |
2911 | Data2, |
2912 | /// (Taskloops only) Lower bound. |
2913 | KmpTaskTLowerBound, |
2914 | /// (Taskloops only) Upper bound. |
2915 | KmpTaskTUpperBound, |
2916 | /// (Taskloops only) Stride. |
2917 | KmpTaskTStride, |
2918 | /// (Taskloops only) Is last iteration flag. |
2919 | KmpTaskTLastIter, |
2920 | /// (Taskloops only) Reduction data. |
2921 | KmpTaskTReductions, |
2922 | }; |
2923 | } // anonymous namespace |
2924 | |
2925 | bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const { |
2926 | return OffloadEntriesTargetRegion.empty() && |
2927 | OffloadEntriesDeviceGlobalVar.empty(); |
2928 | } |
2929 | |
2930 | /// Initialize target region entry. |
2931 | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
2932 | initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, |
2933 | StringRef ParentName, unsigned LineNum, |
2934 | unsigned Order) { |
2935 | assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "((CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation.") ? static_cast <void> (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2937, __PRETTY_FUNCTION__)) |
2936 | "only required for the device "((CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation.") ? static_cast <void> (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2937, __PRETTY_FUNCTION__)) |
2937 | "code generation.")((CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation.") ? static_cast <void> (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2937, __PRETTY_FUNCTION__)); |
2938 | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = |
2939 | OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr, |
2940 | OMPTargetRegionEntryTargetRegion); |
2941 | ++OffloadingEntriesNum; |
2942 | } |
2943 | |
2944 | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
2945 | registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, |
2946 | StringRef ParentName, unsigned LineNum, |
2947 | llvm::Constant *Addr, llvm::Constant *ID, |
2948 | OMPTargetRegionEntryKind Flags) { |
2949 | // If we are emitting code for a target, the entry is already initialized, |
2950 | // only has to be registered. |
2951 | if (CGM.getLangOpts().OpenMPIsDevice) { |
2952 | // This could happen if the device compilation is invoked standalone. |
2953 | if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) |
2954 | initializeTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, |
2955 | OffloadingEntriesNum); |
2956 | auto &Entry = |
2957 | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum]; |
2958 | Entry.setAddress(Addr); |
2959 | Entry.setID(ID); |
2960 | Entry.setFlags(Flags); |
2961 | } else { |
2962 | if (Flags == |
2963 | OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion && |
2964 | hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum, |
2965 | /*IgnoreAddressId*/ true)) |
2966 | return; |
2967 | assert(!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&((!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum ) && "Target region entry already registered!") ? static_cast <void> (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2968, __PRETTY_FUNCTION__)) |
2968 | "Target region entry already registered!")((!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum ) && "Target region entry already registered!") ? static_cast <void> (0) : __assert_fail ("!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) && \"Target region entry already registered!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 2968, __PRETTY_FUNCTION__)); |
2969 | OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags); |
2970 | OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry; |
2971 | ++OffloadingEntriesNum; |
2972 | } |
2973 | } |
2974 | |
2975 | bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo( |
2976 | unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, |
2977 | bool IgnoreAddressId) const { |
2978 | auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID); |
2979 | if (PerDevice == OffloadEntriesTargetRegion.end()) |
2980 | return false; |
2981 | auto PerFile = PerDevice->second.find(FileID); |
2982 | if (PerFile == PerDevice->second.end()) |
2983 | return false; |
2984 | auto PerParentName = PerFile->second.find(ParentName); |
2985 | if (PerParentName == PerFile->second.end()) |
2986 | return false; |
2987 | auto PerLine = PerParentName->second.find(LineNum); |
2988 | if (PerLine == PerParentName->second.end()) |
2989 | return false; |
2990 | // Fail if this entry is already registered. |
2991 | if (!IgnoreAddressId && |
2992 | (PerLine->second.getAddress() || PerLine->second.getID())) |
2993 | return false; |
2994 | return true; |
2995 | } |
2996 | |
2997 | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo( |
2998 | const OffloadTargetRegionEntryInfoActTy &Action) { |
2999 | // Scan all target region entries and perform the provided action. |
3000 | for (const auto &D : OffloadEntriesTargetRegion) |
3001 | for (const auto &F : D.second) |
3002 | for (const auto &P : F.second) |
3003 | for (const auto &L : P.second) |
3004 | Action(D.first, F.first, P.first(), L.first, L.second); |
3005 | } |
3006 | |
3007 | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3008 | initializeDeviceGlobalVarEntryInfo(StringRef Name, |
3009 | OMPTargetGlobalVarEntryKind Flags, |
3010 | unsigned Order) { |
3011 | assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "((CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation.") ? static_cast <void> (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3013, __PRETTY_FUNCTION__)) |
3012 | "only required for the device "((CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation.") ? static_cast <void> (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3013, __PRETTY_FUNCTION__)) |
3013 | "code generation.")((CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is " "only required for the device " "code generation.") ? static_cast <void> (0) : __assert_fail ("CGM.getLangOpts().OpenMPIsDevice && \"Initialization of entries is \" \"only required for the device \" \"code generation.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3013, __PRETTY_FUNCTION__)); |
3014 | OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags); |
3015 | ++OffloadingEntriesNum; |
3016 | } |
3017 | |
3018 | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3019 | registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, |
3020 | CharUnits VarSize, |
3021 | OMPTargetGlobalVarEntryKind Flags, |
3022 | llvm::GlobalValue::LinkageTypes Linkage) { |
3023 | if (CGM.getLangOpts().OpenMPIsDevice) { |
3024 | // This could happen if the device compilation is invoked standalone. |
3025 | if (!hasDeviceGlobalVarEntryInfo(VarName)) |
3026 | initializeDeviceGlobalVarEntryInfo(VarName, Flags, OffloadingEntriesNum); |
3027 | auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; |
3028 | assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&(((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address.") ? static_cast<void> (0) : __assert_fail ("(!Entry.getAddress() || Entry.getAddress() == Addr) && \"Resetting with the new address.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3029, __PRETTY_FUNCTION__)) |
3029 | "Resetting with the new address.")(((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address.") ? static_cast<void> (0) : __assert_fail ("(!Entry.getAddress() || Entry.getAddress() == Addr) && \"Resetting with the new address.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3029, __PRETTY_FUNCTION__)); |
3030 | if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) { |
3031 | if (Entry.getVarSize().isZero()) { |
3032 | Entry.setVarSize(VarSize); |
3033 | Entry.setLinkage(Linkage); |
3034 | } |
3035 | return; |
3036 | } |
3037 | Entry.setVarSize(VarSize); |
3038 | Entry.setLinkage(Linkage); |
3039 | Entry.setAddress(Addr); |
3040 | } else { |
3041 | if (hasDeviceGlobalVarEntryInfo(VarName)) { |
3042 | auto &Entry = OffloadEntriesDeviceGlobalVar[VarName]; |
3043 | assert(Entry.isValid() && Entry.getFlags() == Flags &&((Entry.isValid() && Entry.getFlags() == Flags && "Entry not initialized!") ? static_cast<void> (0) : __assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3044, __PRETTY_FUNCTION__)) |
3044 | "Entry not initialized!")((Entry.isValid() && Entry.getFlags() == Flags && "Entry not initialized!") ? static_cast<void> (0) : __assert_fail ("Entry.isValid() && Entry.getFlags() == Flags && \"Entry not initialized!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3044, __PRETTY_FUNCTION__)); |
3045 | assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&(((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address.") ? static_cast<void> (0) : __assert_fail ("(!Entry.getAddress() || Entry.getAddress() == Addr) && \"Resetting with the new address.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3046, __PRETTY_FUNCTION__)) |
3046 | "Resetting with the new address.")(((!Entry.getAddress() || Entry.getAddress() == Addr) && "Resetting with the new address.") ? static_cast<void> (0) : __assert_fail ("(!Entry.getAddress() || Entry.getAddress() == Addr) && \"Resetting with the new address.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3046, __PRETTY_FUNCTION__)); |
3047 | if (Entry.getVarSize().isZero()) { |
3048 | Entry.setVarSize(VarSize); |
3049 | Entry.setLinkage(Linkage); |
3050 | } |
3051 | return; |
3052 | } |
3053 | OffloadEntriesDeviceGlobalVar.try_emplace( |
3054 | VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage); |
3055 | ++OffloadingEntriesNum; |
3056 | } |
3057 | } |
3058 | |
3059 | void CGOpenMPRuntime::OffloadEntriesInfoManagerTy:: |
3060 | actOnDeviceGlobalVarEntriesInfo( |
3061 | const OffloadDeviceGlobalVarEntryInfoActTy &Action) { |
3062 | // Scan all target region entries and perform the provided action. |
3063 | for (const auto &E : OffloadEntriesDeviceGlobalVar) |
3064 | Action(E.getKey(), E.getValue()); |
3065 | } |
3066 | |
3067 | void CGOpenMPRuntime::createOffloadEntry( |
3068 | llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, |
3069 | llvm::GlobalValue::LinkageTypes Linkage) { |
3070 | StringRef Name = Addr->getName(); |
3071 | llvm::Module &M = CGM.getModule(); |
3072 | llvm::LLVMContext &C = M.getContext(); |
3073 | |
3074 | // Create constant string with the name. |
3075 | llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name); |
3076 | |
3077 | std::string StringName = getName({"omp_offloading", "entry_name"}); |
3078 | auto *Str = new llvm::GlobalVariable( |
3079 | M, StrPtrInit->getType(), /*isConstant=*/true, |
3080 | llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName); |
3081 | Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); |
3082 | |
3083 | llvm::Constant *Data[] = { |
3084 | llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(ID, CGM.VoidPtrTy), |
3085 | llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(Str, CGM.Int8PtrTy), |
3086 | llvm::ConstantInt::get(CGM.SizeTy, Size), |
3087 | llvm::ConstantInt::get(CGM.Int32Ty, Flags), |
3088 | llvm::ConstantInt::get(CGM.Int32Ty, 0)}; |
3089 | std::string EntryName = getName({"omp_offloading", "entry", ""}); |
3090 | llvm::GlobalVariable *Entry = createGlobalStruct( |
3091 | CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data, |
3092 | Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage); |
3093 | |
3094 | // The entry has to be created in the section the linker expects it to be. |
3095 | Entry->setSection("omp_offloading_entries"); |
3096 | } |
3097 | |
3098 | void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { |
3099 | // Emit the offloading entries and metadata so that the device codegen side |
3100 | // can easily figure out what to emit. The produced metadata looks like |
3101 | // this: |
3102 | // |
3103 | // !omp_offload.info = !{!1, ...} |
3104 | // |
3105 | // Right now we only generate metadata for function that contain target |
3106 | // regions. |
3107 | |
3108 | // If we are in simd mode or there are no entries, we don't need to do |
3109 | // anything. |
3110 | if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) |
3111 | return; |
3112 | |
3113 | llvm::Module &M = CGM.getModule(); |
3114 | llvm::LLVMContext &C = M.getContext(); |
3115 | SmallVector<std::tuple<const OffloadEntriesInfoManagerTy::OffloadEntryInfo *, |
3116 | SourceLocation, StringRef>, |
3117 | 16> |
3118 | OrderedEntries(OffloadEntriesInfoManager.size()); |
3119 | llvm::SmallVector<StringRef, 16> ParentFunctions( |
3120 | OffloadEntriesInfoManager.size()); |
3121 | |
3122 | // Auxiliary methods to create metadata values and strings. |
3123 | auto &&GetMDInt = [this](unsigned V) { |
3124 | return llvm::ConstantAsMetadata::get( |
3125 | llvm::ConstantInt::get(CGM.Int32Ty, V)); |
3126 | }; |
3127 | |
3128 | auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); }; |
3129 | |
3130 | // Create the offloading info metadata node. |
3131 | llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info"); |
3132 | |
3133 | // Create function that emits metadata for each target region entry; |
3134 | auto &&TargetRegionMetadataEmitter = |
3135 | [this, &C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, |
3136 | &GetMDString]( |
3137 | unsigned DeviceID, unsigned FileID, StringRef ParentName, |
3138 | unsigned Line, |
3139 | const OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion &E) { |
3140 | // Generate metadata for target regions. Each entry of this metadata |
3141 | // contains: |
3142 | // - Entry 0 -> Kind of this type of metadata (0). |
3143 | // - Entry 1 -> Device ID of the file where the entry was identified. |
3144 | // - Entry 2 -> File ID of the file where the entry was identified. |
3145 | // - Entry 3 -> Mangled name of the function where the entry was |
3146 | // identified. |
3147 | // - Entry 4 -> Line in the file where the entry was identified. |
3148 | // - Entry 5 -> Order the entry was created. |
3149 | // The first element of the metadata node is the kind. |
3150 | llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID), |
3151 | GetMDInt(FileID), GetMDString(ParentName), |
3152 | GetMDInt(Line), GetMDInt(E.getOrder())}; |
3153 | |
3154 | SourceLocation Loc; |
3155 | for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(), |
3156 | E = CGM.getContext().getSourceManager().fileinfo_end(); |
3157 | I != E; ++I) { |
3158 | if (I->getFirst()->getUniqueID().getDevice() == DeviceID && |
3159 | I->getFirst()->getUniqueID().getFile() == FileID) { |
3160 | Loc = CGM.getContext().getSourceManager().translateFileLineCol( |
3161 | I->getFirst(), Line, 1); |
3162 | break; |
3163 | } |
3164 | } |
3165 | // Save this entry in the right position of the ordered entries array. |
3166 | OrderedEntries[E.getOrder()] = std::make_tuple(&E, Loc, ParentName); |
3167 | ParentFunctions[E.getOrder()] = ParentName; |
3168 | |
3169 | // Add metadata to the named metadata node. |
3170 | MD->addOperand(llvm::MDNode::get(C, Ops)); |
3171 | }; |
3172 | |
3173 | OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( |
3174 | TargetRegionMetadataEmitter); |
3175 | |
3176 | // Create function that emits metadata for each device global variable entry; |
3177 | auto &&DeviceGlobalVarMetadataEmitter = |
3178 | [&C, &OrderedEntries, &GetMDInt, &GetMDString, |
3179 | MD](StringRef MangledName, |
3180 | const OffloadEntriesInfoManagerTy::OffloadEntryInfoDeviceGlobalVar |
3181 | &E) { |
3182 | // Generate metadata for global variables. Each entry of this metadata |
3183 | // contains: |
3184 | // - Entry 0 -> Kind of this type of metadata (1). |
3185 | // - Entry 1 -> Mangled name of the variable. |
3186 | // - Entry 2 -> Declare target kind. |
3187 | // - Entry 3 -> Order the entry was created. |
3188 | // The first element of the metadata node is the kind. |
3189 | llvm::Metadata *Ops[] = { |
3190 | GetMDInt(E.getKind()), GetMDString(MangledName), |
3191 | GetMDInt(E.getFlags()), GetMDInt(E.getOrder())}; |
3192 | |
3193 | // Save this entry in the right position of the ordered entries array. |
3194 | OrderedEntries[E.getOrder()] = |
3195 | std::make_tuple(&E, SourceLocation(), MangledName); |
3196 | |
3197 | // Add metadata to the named metadata node. |
3198 | MD->addOperand(llvm::MDNode::get(C, Ops)); |
3199 | }; |
3200 | |
3201 | OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( |
3202 | DeviceGlobalVarMetadataEmitter); |
3203 | |
3204 | for (const auto &E : OrderedEntries) { |
3205 | assert(std::get<0>(E) && "All ordered entries must exist!")((std::get<0>(E) && "All ordered entries must exist!" ) ? static_cast<void> (0) : __assert_fail ("std::get<0>(E) && \"All ordered entries must exist!\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3205, __PRETTY_FUNCTION__)); |
3206 | if (const auto *CE = |
3207 | dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>( |
3208 | std::get<0>(E))) { |
3209 | if (!CE->getID() || !CE->getAddress()) { |
3210 | // Do not blame the entry if the parent funtion is not emitted. |
3211 | StringRef FnName = ParentFunctions[CE->getOrder()]; |
3212 | if (!CGM.GetGlobalValue(FnName)) |
3213 | continue; |
3214 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3215 | DiagnosticsEngine::Error, |
3216 | "Offloading entry for target region in %0 is incorrect: either the " |
3217 | "address or the ID is invalid."); |
3218 | CGM.getDiags().Report(std::get<1>(E), DiagID) << FnName; |
3219 | continue; |
3220 | } |
3221 | createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0, |
3222 | CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage); |
3223 | } else if (const auto *CE = dyn_cast<OffloadEntriesInfoManagerTy:: |
3224 | OffloadEntryInfoDeviceGlobalVar>( |
3225 | std::get<0>(E))) { |
3226 | OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind Flags = |
3227 | static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( |
3228 | CE->getFlags()); |
3229 | switch (Flags) { |
3230 | case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryTo: { |
3231 | if (CGM.getLangOpts().OpenMPIsDevice && |
3232 | CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory()) |
3233 | continue; |
3234 | if (!CE->getAddress()) { |
3235 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3236 | DiagnosticsEngine::Error, "Offloading entry for declare target " |
3237 | "variable %0 is incorrect: the " |
3238 | "address is invalid."); |
3239 | CGM.getDiags().Report(std::get<1>(E), DiagID) << std::get<2>(E); |
3240 | continue; |
3241 | } |
3242 | // The vaiable has no definition - no need to add the entry. |
3243 | if (CE->getVarSize().isZero()) |
3244 | continue; |
3245 | break; |
3246 | } |
3247 | case OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryLink: |
3248 | assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||((((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress ()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress ())) && "Declaret target link address is set.") ? static_cast <void> (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3250, __PRETTY_FUNCTION__)) |
3249 | (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&((((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress ()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress ())) && "Declaret target link address is set.") ? static_cast <void> (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3250, __PRETTY_FUNCTION__)) |
3250 | "Declaret target link address is set.")((((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress ()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress ())) && "Declaret target link address is set.") ? static_cast <void> (0) : __assert_fail ("((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) || (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) && \"Declaret target link address is set.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3250, __PRETTY_FUNCTION__)); |
3251 | if (CGM.getLangOpts().OpenMPIsDevice) |
3252 | continue; |
3253 | if (!CE->getAddress()) { |
3254 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3255 | DiagnosticsEngine::Error, |
3256 | "Offloading entry for declare target variable is incorrect: the " |
3257 | "address is invalid."); |
3258 | CGM.getDiags().Report(DiagID); |
3259 | continue; |
3260 | } |
3261 | break; |
3262 | } |
3263 | createOffloadEntry(CE->getAddress(), CE->getAddress(), |
3264 | CE->getVarSize().getQuantity(), Flags, |
3265 | CE->getLinkage()); |
3266 | } else { |
3267 | llvm_unreachable("Unsupported entry kind.")::llvm::llvm_unreachable_internal("Unsupported entry kind.", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3267); |
3268 | } |
3269 | } |
3270 | } |
3271 | |
3272 | /// Loads all the offload entries information from the host IR |
3273 | /// metadata. |
3274 | void CGOpenMPRuntime::loadOffloadInfoMetadata() { |
3275 | // If we are in target mode, load the metadata from the host IR. This code has |
3276 | // to match the metadaata creation in createOffloadEntriesAndInfoMetadata(). |
3277 | |
3278 | if (!CGM.getLangOpts().OpenMPIsDevice) |
3279 | return; |
3280 | |
3281 | if (CGM.getLangOpts().OMPHostIRFile.empty()) |
3282 | return; |
3283 | |
3284 | auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile); |
3285 | if (auto EC = Buf.getError()) { |
3286 | CGM.getDiags().Report(diag::err_cannot_open_file) |
3287 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3288 | return; |
3289 | } |
3290 | |
3291 | llvm::LLVMContext C; |
3292 | auto ME = expectedToErrorOrAndEmitErrors( |
3293 | C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C)); |
3294 | |
3295 | if (auto EC = ME.getError()) { |
3296 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
3297 | DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'"); |
3298 | CGM.getDiags().Report(DiagID) |
3299 | << CGM.getLangOpts().OMPHostIRFile << EC.message(); |
3300 | return; |
3301 | } |
3302 | |
3303 | llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info"); |
3304 | if (!MD) |
3305 | return; |
3306 | |
3307 | for (llvm::MDNode *MN : MD->operands()) { |
3308 | auto &&GetMDInt = [MN](unsigned Idx) { |
3309 | auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx)); |
3310 | return cast<llvm::ConstantInt>(V->getValue())->getZExtValue(); |
3311 | }; |
3312 | |
3313 | auto &&GetMDString = [MN](unsigned Idx) { |
3314 | auto *V = cast<llvm::MDString>(MN->getOperand(Idx)); |
3315 | return V->getString(); |
3316 | }; |
3317 | |
3318 | switch (GetMDInt(0)) { |
3319 | default: |
3320 | llvm_unreachable("Unexpected metadata!")::llvm::llvm_unreachable_internal("Unexpected metadata!", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3320); |
3321 | break; |
3322 | case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: |
3323 | OffloadingEntryInfoTargetRegion: |
3324 | OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( |
3325 | /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2), |
3326 | /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4), |
3327 | /*Order=*/GetMDInt(5)); |
3328 | break; |
3329 | case OffloadEntriesInfoManagerTy::OffloadEntryInfo:: |
3330 | OffloadingEntryInfoDeviceGlobalVar: |
3331 | OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( |
3332 | /*MangledName=*/GetMDString(1), |
3333 | static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>( |
3334 | /*Flags=*/GetMDInt(2)), |
3335 | /*Order=*/GetMDInt(3)); |
3336 | break; |
3337 | } |
3338 | } |
3339 | } |
3340 | |
3341 | void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { |
3342 | if (!KmpRoutineEntryPtrTy) { |
3343 | // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type. |
3344 | ASTContext &C = CGM.getContext(); |
3345 | QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy}; |
3346 | FunctionProtoType::ExtProtoInfo EPI; |
3347 | KmpRoutineEntryPtrQTy = C.getPointerType( |
3348 | C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI)); |
3349 | KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy); |
3350 | } |
3351 | } |
3352 | |
3353 | QualType CGOpenMPRuntime::getTgtOffloadEntryQTy() { |
3354 | // Make sure the type of the entry is already created. This is the type we |
3355 | // have to create: |
3356 | // struct __tgt_offload_entry{ |
3357 | // void *addr; // Pointer to the offload entry info. |
3358 | // // (function or global) |
3359 | // char *name; // Name of the function or global. |
3360 | // size_t size; // Size of the entry info (0 if it a function). |
3361 | // int32_t flags; // Flags associated with the entry, e.g. 'link'. |
3362 | // int32_t reserved; // Reserved, to use by the runtime library. |
3363 | // }; |
3364 | if (TgtOffloadEntryQTy.isNull()) { |
3365 | ASTContext &C = CGM.getContext(); |
3366 | RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry"); |
3367 | RD->startDefinition(); |
3368 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3369 | addFieldToRecordDecl(C, RD, C.getPointerType(C.CharTy)); |
3370 | addFieldToRecordDecl(C, RD, C.getSizeType()); |
3371 | addFieldToRecordDecl( |
3372 | C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); |
3373 | addFieldToRecordDecl( |
3374 | C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true)); |
3375 | RD->completeDefinition(); |
3376 | RD->addAttr(PackedAttr::CreateImplicit(C)); |
3377 | TgtOffloadEntryQTy = C.getRecordType(RD); |
3378 | } |
3379 | return TgtOffloadEntryQTy; |
3380 | } |
3381 | |
3382 | namespace { |
3383 | struct PrivateHelpersTy { |
3384 | PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original, |
3385 | const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit) |
3386 | : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy), |
3387 | PrivateElemInit(PrivateElemInit) {} |
3388 | PrivateHelpersTy(const VarDecl *Original) : Original(Original) {} |
3389 | const Expr *OriginalRef = nullptr; |
3390 | const VarDecl *Original = nullptr; |
3391 | const VarDecl *PrivateCopy = nullptr; |
3392 | const VarDecl *PrivateElemInit = nullptr; |
3393 | bool isLocalPrivate() const { |
3394 | return !OriginalRef && !PrivateCopy && !PrivateElemInit; |
3395 | } |
3396 | }; |
3397 | typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy; |
3398 | } // anonymous namespace |
3399 | |
3400 | static bool isAllocatableDecl(const VarDecl *VD) { |
3401 | const VarDecl *CVD = VD->getCanonicalDecl(); |
3402 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
3403 | return false; |
3404 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
3405 | // Use the default allocation. |
3406 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
3407 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
3408 | !AA->getAllocator()); |
3409 | } |
3410 | |
3411 | static RecordDecl * |
3412 | createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) { |
3413 | if (!Privates.empty()) { |
3414 | ASTContext &C = CGM.getContext(); |
3415 | // Build struct .kmp_privates_t. { |
3416 | // /* private vars */ |
3417 | // }; |
3418 | RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t"); |
3419 | RD->startDefinition(); |
3420 | for (const auto &Pair : Privates) { |
3421 | const VarDecl *VD = Pair.second.Original; |
3422 | QualType Type = VD->getType().getNonReferenceType(); |
3423 | // If the private variable is a local variable with lvalue ref type, |
3424 | // allocate the pointer instead of the pointee type. |
3425 | if (Pair.second.isLocalPrivate()) { |
3426 | if (VD->getType()->isLValueReferenceType()) |
3427 | Type = C.getPointerType(Type); |
3428 | if (isAllocatableDecl(VD)) |
3429 | Type = C.getPointerType(Type); |
3430 | } |
3431 | FieldDecl *FD = addFieldToRecordDecl(C, RD, Type); |
3432 | if (VD->hasAttrs()) { |
3433 | for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()), |
3434 | E(VD->getAttrs().end()); |
3435 | I != E; ++I) |
3436 | FD->addAttr(*I); |
3437 | } |
3438 | } |
3439 | RD->completeDefinition(); |
3440 | return RD; |
3441 | } |
3442 | return nullptr; |
3443 | } |
3444 | |
3445 | static RecordDecl * |
3446 | createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, |
3447 | QualType KmpInt32Ty, |
3448 | QualType KmpRoutineEntryPointerQTy) { |
3449 | ASTContext &C = CGM.getContext(); |
3450 | // Build struct kmp_task_t { |
3451 | // void * shareds; |
3452 | // kmp_routine_entry_t routine; |
3453 | // kmp_int32 part_id; |
3454 | // kmp_cmplrdata_t data1; |
3455 | // kmp_cmplrdata_t data2; |
3456 | // For taskloops additional fields: |
3457 | // kmp_uint64 lb; |
3458 | // kmp_uint64 ub; |
3459 | // kmp_int64 st; |
3460 | // kmp_int32 liter; |
3461 | // void * reductions; |
3462 | // }; |
3463 | RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union); |
3464 | UD->startDefinition(); |
3465 | addFieldToRecordDecl(C, UD, KmpInt32Ty); |
3466 | addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy); |
3467 | UD->completeDefinition(); |
3468 | QualType KmpCmplrdataTy = C.getRecordType(UD); |
3469 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t"); |
3470 | RD->startDefinition(); |
3471 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3472 | addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy); |
3473 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3474 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3475 | addFieldToRecordDecl(C, RD, KmpCmplrdataTy); |
3476 | if (isOpenMPTaskLoopDirective(Kind)) { |
3477 | QualType KmpUInt64Ty = |
3478 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0); |
3479 | QualType KmpInt64Ty = |
3480 | CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1); |
3481 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3482 | addFieldToRecordDecl(C, RD, KmpUInt64Ty); |
3483 | addFieldToRecordDecl(C, RD, KmpInt64Ty); |
3484 | addFieldToRecordDecl(C, RD, KmpInt32Ty); |
3485 | addFieldToRecordDecl(C, RD, C.VoidPtrTy); |
3486 | } |
3487 | RD->completeDefinition(); |
3488 | return RD; |
3489 | } |
3490 | |
3491 | static RecordDecl * |
3492 | createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, |
3493 | ArrayRef<PrivateDataTy> Privates) { |
3494 | ASTContext &C = CGM.getContext(); |
3495 | // Build struct kmp_task_t_with_privates { |
3496 | // kmp_task_t task_data; |
3497 | // .kmp_privates_t. privates; |
3498 | // }; |
3499 | RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates"); |
3500 | RD->startDefinition(); |
3501 | addFieldToRecordDecl(C, RD, KmpTaskTQTy); |
3502 | if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) |
3503 | addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD)); |
3504 | RD->completeDefinition(); |
3505 | return RD; |
3506 | } |
3507 | |
3508 | /// Emit a proxy function which accepts kmp_task_t as the second |
3509 | /// argument. |
3510 | /// \code |
3511 | /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) { |
3512 | /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt, |
3513 | /// For taskloops: |
3514 | /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3515 | /// tt->reductions, tt->shareds); |
3516 | /// return 0; |
3517 | /// } |
3518 | /// \endcode |
3519 | static llvm::Function * |
3520 | emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, |
3521 | OpenMPDirectiveKind Kind, QualType KmpInt32Ty, |
3522 | QualType KmpTaskTWithPrivatesPtrQTy, |
3523 | QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, |
3524 | QualType SharedsPtrTy, llvm::Function *TaskFunction, |
3525 | llvm::Value *TaskPrivatesMap) { |
3526 | ASTContext &C = CGM.getContext(); |
3527 | FunctionArgList Args; |
3528 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3529 | ImplicitParamDecl::Other); |
3530 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3531 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3532 | ImplicitParamDecl::Other); |
3533 | Args.push_back(&GtidArg); |
3534 | Args.push_back(&TaskTypeArg); |
3535 | const auto &TaskEntryFnInfo = |
3536 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3537 | llvm::FunctionType *TaskEntryTy = |
3538 | CGM.getTypes().GetFunctionType(TaskEntryFnInfo); |
3539 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""}); |
3540 | auto *TaskEntry = llvm::Function::Create( |
3541 | TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3542 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo); |
3543 | TaskEntry->setDoesNotRecurse(); |
3544 | CodeGenFunction CGF(CGM); |
3545 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args, |
3546 | Loc, Loc); |
3547 | |
3548 | // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map, |
3549 | // tt, |
3550 | // For taskloops: |
3551 | // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter, |
3552 | // tt->task_data.shareds); |
3553 | llvm::Value *GtidParam = CGF.EmitLoadOfScalar( |
3554 | CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc); |
3555 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3556 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3557 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3558 | const auto *KmpTaskTWithPrivatesQTyRD = |
3559 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3560 | LValue Base = |
3561 | CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3562 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
3563 | auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); |
3564 | LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI); |
3565 | llvm::Value *PartidParam = PartIdLVal.getPointer(CGF); |
3566 | |
3567 | auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds); |
3568 | LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI); |
3569 | llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3570 | CGF.EmitLoadOfScalar(SharedsLVal, Loc), |
3571 | CGF.ConvertTypeForMem(SharedsPtrTy)); |
3572 | |
3573 | auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1); |
3574 | llvm::Value *PrivatesParam; |
3575 | if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) { |
3576 | LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI); |
3577 | PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3578 | PrivatesLVal.getPointer(CGF), CGF.VoidPtrTy); |
3579 | } else { |
3580 | PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
3581 | } |
3582 | |
3583 | llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam, |
3584 | TaskPrivatesMap, |
3585 | CGF.Builder |
3586 | .CreatePointerBitCastOrAddrSpaceCast( |
3587 | TDBase.getAddress(CGF), CGF.VoidPtrTy) |
3588 | .getPointer()}; |
3589 | SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs), |
3590 | std::end(CommonArgs)); |
3591 | if (isOpenMPTaskLoopDirective(Kind)) { |
3592 | auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound); |
3593 | LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI); |
3594 | llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc); |
3595 | auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound); |
3596 | LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI); |
3597 | llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc); |
3598 | auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride); |
3599 | LValue StLVal = CGF.EmitLValueForField(Base, *StFI); |
3600 | llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc); |
3601 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3602 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3603 | llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc); |
3604 | auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions); |
3605 | LValue RLVal = CGF.EmitLValueForField(Base, *RFI); |
3606 | llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc); |
3607 | CallArgs.push_back(LBParam); |
3608 | CallArgs.push_back(UBParam); |
3609 | CallArgs.push_back(StParam); |
3610 | CallArgs.push_back(LIParam); |
3611 | CallArgs.push_back(RParam); |
3612 | } |
3613 | CallArgs.push_back(SharedsParam); |
3614 | |
3615 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction, |
3616 | CallArgs); |
3617 | CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)), |
3618 | CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty)); |
3619 | CGF.FinishFunction(); |
3620 | return TaskEntry; |
3621 | } |
3622 | |
3623 | static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM, |
3624 | SourceLocation Loc, |
3625 | QualType KmpInt32Ty, |
3626 | QualType KmpTaskTWithPrivatesPtrQTy, |
3627 | QualType KmpTaskTWithPrivatesQTy) { |
3628 | ASTContext &C = CGM.getContext(); |
3629 | FunctionArgList Args; |
3630 | ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty, |
3631 | ImplicitParamDecl::Other); |
3632 | ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3633 | KmpTaskTWithPrivatesPtrQTy.withRestrict(), |
3634 | ImplicitParamDecl::Other); |
3635 | Args.push_back(&GtidArg); |
3636 | Args.push_back(&TaskTypeArg); |
3637 | const auto &DestructorFnInfo = |
3638 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args); |
3639 | llvm::FunctionType *DestructorFnTy = |
3640 | CGM.getTypes().GetFunctionType(DestructorFnInfo); |
3641 | std::string Name = |
3642 | CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""}); |
3643 | auto *DestructorFn = |
3644 | llvm::Function::Create(DestructorFnTy, llvm::GlobalValue::InternalLinkage, |
3645 | Name, &CGM.getModule()); |
3646 | CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn, |
3647 | DestructorFnInfo); |
3648 | DestructorFn->setDoesNotRecurse(); |
3649 | CodeGenFunction CGF(CGM); |
3650 | CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo, |
3651 | Args, Loc, Loc); |
3652 | |
3653 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3654 | CGF.GetAddrOfLocalVar(&TaskTypeArg), |
3655 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3656 | const auto *KmpTaskTWithPrivatesQTyRD = |
3657 | cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl()); |
3658 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3659 | Base = CGF.EmitLValueForField(Base, *FI); |
3660 | for (const auto *Field : |
3661 | cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) { |
3662 | if (QualType::DestructionKind DtorKind = |
3663 | Field->getType().isDestructedType()) { |
3664 | LValue FieldLValue = CGF.EmitLValueForField(Base, Field); |
3665 | CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType()); |
3666 | } |
3667 | } |
3668 | CGF.FinishFunction(); |
3669 | return DestructorFn; |
3670 | } |
3671 | |
3672 | /// Emit a privates mapping function for correct handling of private and |
3673 | /// firstprivate variables. |
3674 | /// \code |
3675 | /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1> |
3676 | /// **noalias priv1,..., <tyn> **noalias privn) { |
3677 | /// *priv1 = &.privates.priv1; |
3678 | /// ...; |
3679 | /// *privn = &.privates.privn; |
3680 | /// } |
3681 | /// \endcode |
3682 | static llvm::Value * |
3683 | emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, |
3684 | const OMPTaskDataTy &Data, QualType PrivatesQTy, |
3685 | ArrayRef<PrivateDataTy> Privates) { |
3686 | ASTContext &C = CGM.getContext(); |
3687 | FunctionArgList Args; |
3688 | ImplicitParamDecl TaskPrivatesArg( |
3689 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3690 | C.getPointerType(PrivatesQTy).withConst().withRestrict(), |
3691 | ImplicitParamDecl::Other); |
3692 | Args.push_back(&TaskPrivatesArg); |
3693 | llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos; |
3694 | unsigned Counter = 1; |
3695 | for (const Expr *E : Data.PrivateVars) { |
3696 | Args.push_back(ImplicitParamDecl::Create( |
3697 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3698 | C.getPointerType(C.getPointerType(E->getType())) |
3699 | .withConst() |
3700 | .withRestrict(), |
3701 | ImplicitParamDecl::Other)); |
3702 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3703 | PrivateVarsPos[VD] = Counter; |
3704 | ++Counter; |
3705 | } |
3706 | for (const Expr *E : Data.FirstprivateVars) { |
3707 | Args.push_back(ImplicitParamDecl::Create( |
3708 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3709 | C.getPointerType(C.getPointerType(E->getType())) |
3710 | .withConst() |
3711 | .withRestrict(), |
3712 | ImplicitParamDecl::Other)); |
3713 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3714 | PrivateVarsPos[VD] = Counter; |
3715 | ++Counter; |
3716 | } |
3717 | for (const Expr *E : Data.LastprivateVars) { |
3718 | Args.push_back(ImplicitParamDecl::Create( |
3719 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3720 | C.getPointerType(C.getPointerType(E->getType())) |
3721 | .withConst() |
3722 | .withRestrict(), |
3723 | ImplicitParamDecl::Other)); |
3724 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
3725 | PrivateVarsPos[VD] = Counter; |
3726 | ++Counter; |
3727 | } |
3728 | for (const VarDecl *VD : Data.PrivateLocals) { |
3729 | QualType Ty = VD->getType().getNonReferenceType(); |
3730 | if (VD->getType()->isLValueReferenceType()) |
3731 | Ty = C.getPointerType(Ty); |
3732 | if (isAllocatableDecl(VD)) |
3733 | Ty = C.getPointerType(Ty); |
3734 | Args.push_back(ImplicitParamDecl::Create( |
3735 | C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3736 | C.getPointerType(C.getPointerType(Ty)).withConst().withRestrict(), |
3737 | ImplicitParamDecl::Other)); |
3738 | PrivateVarsPos[VD] = Counter; |
3739 | ++Counter; |
3740 | } |
3741 | const auto &TaskPrivatesMapFnInfo = |
3742 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3743 | llvm::FunctionType *TaskPrivatesMapTy = |
3744 | CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo); |
3745 | std::string Name = |
3746 | CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""}); |
3747 | auto *TaskPrivatesMap = llvm::Function::Create( |
3748 | TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name, |
3749 | &CGM.getModule()); |
3750 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap, |
3751 | TaskPrivatesMapFnInfo); |
3752 | if (CGM.getLangOpts().Optimize) { |
3753 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline); |
3754 | TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone); |
3755 | TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline); |
3756 | } |
3757 | CodeGenFunction CGF(CGM); |
3758 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap, |
3759 | TaskPrivatesMapFnInfo, Args, Loc, Loc); |
3760 | |
3761 | // *privi = &.privates.privi; |
3762 | LValue Base = CGF.EmitLoadOfPointerLValue( |
3763 | CGF.GetAddrOfLocalVar(&TaskPrivatesArg), |
3764 | TaskPrivatesArg.getType()->castAs<PointerType>()); |
3765 | const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl()); |
3766 | Counter = 0; |
3767 | for (const FieldDecl *Field : PrivatesQTyRD->fields()) { |
3768 | LValue FieldLVal = CGF.EmitLValueForField(Base, Field); |
3769 | const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]]; |
3770 | LValue RefLVal = |
3771 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); |
3772 | LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue( |
3773 | RefLVal.getAddress(CGF), RefLVal.getType()->castAs<PointerType>()); |
3774 | CGF.EmitStoreOfScalar(FieldLVal.getPointer(CGF), RefLoadLVal); |
3775 | ++Counter; |
3776 | } |
3777 | CGF.FinishFunction(); |
3778 | return TaskPrivatesMap; |
3779 | } |
3780 | |
3781 | /// Emit initialization for private variables in task-based directives. |
3782 | static void emitPrivatesInit(CodeGenFunction &CGF, |
3783 | const OMPExecutableDirective &D, |
3784 | Address KmpTaskSharedsPtr, LValue TDBase, |
3785 | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3786 | QualType SharedsTy, QualType SharedsPtrTy, |
3787 | const OMPTaskDataTy &Data, |
3788 | ArrayRef<PrivateDataTy> Privates, bool ForDup) { |
3789 | ASTContext &C = CGF.getContext(); |
3790 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
3791 | LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI); |
3792 | OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind()) |
3793 | ? OMPD_taskloop |
3794 | : OMPD_task; |
3795 | const CapturedStmt &CS = *D.getCapturedStmt(Kind); |
3796 | CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS); |
3797 | LValue SrcBase; |
3798 | bool IsTargetTask = |
3799 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) || |
3800 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
3801 | // For target-based directives skip 4 firstprivate arrays BasePointersArray, |
3802 | // PointersArray, SizesArray, and MappersArray. The original variables for |
3803 | // these arrays are not captured and we get their addresses explicitly. |
3804 | if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) || |
3805 | (IsTargetTask && KmpTaskSharedsPtr.isValid())) { |
3806 | SrcBase = CGF.MakeAddrLValue( |
3807 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
3808 | KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)), |
3809 | SharedsTy); |
3810 | } |
3811 | FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin(); |
3812 | for (const PrivateDataTy &Pair : Privates) { |
3813 | // Do not initialize private locals. |
3814 | if (Pair.second.isLocalPrivate()) { |
3815 | ++FI; |
3816 | continue; |
3817 | } |
3818 | const VarDecl *VD = Pair.second.PrivateCopy; |
3819 | const Expr *Init = VD->getAnyInitializer(); |
3820 | if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) && |
3821 | !CGF.isTrivialInitializer(Init)))) { |
3822 | LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI); |
3823 | if (const VarDecl *Elem = Pair.second.PrivateElemInit) { |
3824 | const VarDecl *OriginalVD = Pair.second.Original; |
3825 | // Check if the variable is the target-based BasePointersArray, |
3826 | // PointersArray, SizesArray, or MappersArray. |
3827 | LValue SharedRefLValue; |
3828 | QualType Type = PrivateLValue.getType(); |
3829 | const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD); |
3830 | if (IsTargetTask && !SharedField) { |
3831 | assert(isa<ImplicitParamDecl>(OriginalVD) &&((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3832 | isa<CapturedDecl>(OriginalVD->getDeclContext()) &&((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3833 | cast<CapturedDecl>(OriginalVD->getDeclContext())((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3834 | ->getNumParams() == 0 &&((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3835 | isa<TranslationUnitDecl>(((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3836 | cast<CapturedDecl>(OriginalVD->getDeclContext())((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3837 | ->getDeclContext()) &&((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)) |
3838 | "Expected artificial target data variable.")((isa<ImplicitParamDecl>(OriginalVD) && isa< CapturedDecl>(OriginalVD->getDeclContext()) && cast <CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams () == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl >(OriginalVD->getDeclContext()) ->getDeclContext()) && "Expected artificial target data variable.") ? static_cast< void> (0) : __assert_fail ("isa<ImplicitParamDecl>(OriginalVD) && isa<CapturedDecl>(OriginalVD->getDeclContext()) && cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getNumParams() == 0 && isa<TranslationUnitDecl>( cast<CapturedDecl>(OriginalVD->getDeclContext()) ->getDeclContext()) && \"Expected artificial target data variable.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3838, __PRETTY_FUNCTION__)); |
3839 | SharedRefLValue = |
3840 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type); |
3841 | } else if (ForDup) { |
3842 | SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField); |
3843 | SharedRefLValue = CGF.MakeAddrLValue( |
3844 | Address(SharedRefLValue.getPointer(CGF), |
3845 | C.getDeclAlign(OriginalVD)), |
3846 | SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl), |
3847 | SharedRefLValue.getTBAAInfo()); |
3848 | } else if (CGF.LambdaCaptureFields.count( |
3849 | Pair.second.Original->getCanonicalDecl()) > 0 || |
3850 | dyn_cast_or_null<BlockDecl>(CGF.CurCodeDecl)) { |
3851 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3852 | } else { |
3853 | // Processing for implicitly captured variables. |
3854 | InlinedOpenMPRegionRAII Region( |
3855 | CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown, |
3856 | /*HasCancel=*/false); |
3857 | SharedRefLValue = CGF.EmitLValue(Pair.second.OriginalRef); |
3858 | } |
3859 | if (Type->isArrayType()) { |
3860 | // Initialize firstprivate array. |
3861 | if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) { |
3862 | // Perform simple memcpy. |
3863 | CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type); |
3864 | } else { |
3865 | // Initialize firstprivate array using element-by-element |
3866 | // initialization. |
3867 | CGF.EmitOMPAggregateAssign( |
3868 | PrivateLValue.getAddress(CGF), SharedRefLValue.getAddress(CGF), |
3869 | Type, |
3870 | [&CGF, Elem, Init, &CapturesInfo](Address DestElement, |
3871 | Address SrcElement) { |
3872 | // Clean up any temporaries needed by the initialization. |
3873 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3874 | InitScope.addPrivate( |
3875 | Elem, [SrcElement]() -> Address { return SrcElement; }); |
3876 | (void)InitScope.Privatize(); |
3877 | // Emit initialization for single element. |
3878 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII( |
3879 | CGF, &CapturesInfo); |
3880 | CGF.EmitAnyExprToMem(Init, DestElement, |
3881 | Init->getType().getQualifiers(), |
3882 | /*IsInitializer=*/false); |
3883 | }); |
3884 | } |
3885 | } else { |
3886 | CodeGenFunction::OMPPrivateScope InitScope(CGF); |
3887 | InitScope.addPrivate(Elem, [SharedRefLValue, &CGF]() -> Address { |
3888 | return SharedRefLValue.getAddress(CGF); |
3889 | }); |
3890 | (void)InitScope.Privatize(); |
3891 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo); |
3892 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, |
3893 | /*capturedByInit=*/false); |
3894 | } |
3895 | } else { |
3896 | CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false); |
3897 | } |
3898 | } |
3899 | ++FI; |
3900 | } |
3901 | } |
3902 | |
3903 | /// Check if duplication function is required for taskloops. |
3904 | static bool checkInitIsRequired(CodeGenFunction &CGF, |
3905 | ArrayRef<PrivateDataTy> Privates) { |
3906 | bool InitRequired = false; |
3907 | for (const PrivateDataTy &Pair : Privates) { |
3908 | if (Pair.second.isLocalPrivate()) |
3909 | continue; |
3910 | const VarDecl *VD = Pair.second.PrivateCopy; |
3911 | const Expr *Init = VD->getAnyInitializer(); |
3912 | InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) && |
3913 | !CGF.isTrivialInitializer(Init)); |
3914 | if (InitRequired) |
3915 | break; |
3916 | } |
3917 | return InitRequired; |
3918 | } |
3919 | |
3920 | |
3921 | /// Emit task_dup function (for initialization of |
3922 | /// private/firstprivate/lastprivate vars and last_iter flag) |
3923 | /// \code |
3924 | /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int |
3925 | /// lastpriv) { |
3926 | /// // setup lastprivate flag |
3927 | /// task_dst->last = lastpriv; |
3928 | /// // could be constructor calls here... |
3929 | /// } |
3930 | /// \endcode |
3931 | static llvm::Value * |
3932 | emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, |
3933 | const OMPExecutableDirective &D, |
3934 | QualType KmpTaskTWithPrivatesPtrQTy, |
3935 | const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
3936 | const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, |
3937 | QualType SharedsPtrTy, const OMPTaskDataTy &Data, |
3938 | ArrayRef<PrivateDataTy> Privates, bool WithLastIter) { |
3939 | ASTContext &C = CGM.getContext(); |
3940 | FunctionArgList Args; |
3941 | ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3942 | KmpTaskTWithPrivatesPtrQTy, |
3943 | ImplicitParamDecl::Other); |
3944 | ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, |
3945 | KmpTaskTWithPrivatesPtrQTy, |
3946 | ImplicitParamDecl::Other); |
3947 | ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy, |
3948 | ImplicitParamDecl::Other); |
3949 | Args.push_back(&DstArg); |
3950 | Args.push_back(&SrcArg); |
3951 | Args.push_back(&LastprivArg); |
3952 | const auto &TaskDupFnInfo = |
3953 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
3954 | llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo); |
3955 | std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""}); |
3956 | auto *TaskDup = llvm::Function::Create( |
3957 | TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule()); |
3958 | CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo); |
3959 | TaskDup->setDoesNotRecurse(); |
3960 | CodeGenFunction CGF(CGM); |
3961 | CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc, |
3962 | Loc); |
3963 | |
3964 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3965 | CGF.GetAddrOfLocalVar(&DstArg), |
3966 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3967 | // task_dst->liter = lastpriv; |
3968 | if (WithLastIter) { |
3969 | auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter); |
3970 | LValue Base = CGF.EmitLValueForField( |
3971 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3972 | LValue LILVal = CGF.EmitLValueForField(Base, *LIFI); |
3973 | llvm::Value *Lastpriv = CGF.EmitLoadOfScalar( |
3974 | CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc); |
3975 | CGF.EmitStoreOfScalar(Lastpriv, LILVal); |
3976 | } |
3977 | |
3978 | // Emit initial values for private copies (if any). |
3979 | assert(!Privates.empty())((!Privates.empty()) ? static_cast<void> (0) : __assert_fail ("!Privates.empty()", "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 3979, __PRETTY_FUNCTION__)); |
3980 | Address KmpTaskSharedsPtr = Address::invalid(); |
3981 | if (!Data.FirstprivateVars.empty()) { |
3982 | LValue TDBase = CGF.EmitLoadOfPointerLValue( |
3983 | CGF.GetAddrOfLocalVar(&SrcArg), |
3984 | KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>()); |
3985 | LValue Base = CGF.EmitLValueForField( |
3986 | TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
3987 | KmpTaskSharedsPtr = Address( |
3988 | CGF.EmitLoadOfScalar(CGF.EmitLValueForField( |
3989 | Base, *std::next(KmpTaskTQTyRD->field_begin(), |
3990 | KmpTaskTShareds)), |
3991 | Loc), |
3992 | CGM.getNaturalTypeAlignment(SharedsTy)); |
3993 | } |
3994 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD, |
3995 | SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true); |
3996 | CGF.FinishFunction(); |
3997 | return TaskDup; |
3998 | } |
3999 | |
4000 | /// Checks if destructor function is required to be generated. |
4001 | /// \return true if cleanups are required, false otherwise. |
4002 | static bool |
4003 | checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD, |
4004 | ArrayRef<PrivateDataTy> Privates) { |
4005 | for (const PrivateDataTy &P : Privates) { |
4006 | if (P.second.isLocalPrivate()) |
4007 | continue; |
4008 | QualType Ty = P.second.Original->getType().getNonReferenceType(); |
4009 | if (Ty.isDestructedType()) |
4010 | return true; |
4011 | } |
4012 | return false; |
4013 | } |
4014 | |
4015 | namespace { |
4016 | /// Loop generator for OpenMP iterator expression. |
4017 | class OMPIteratorGeneratorScope final |
4018 | : public CodeGenFunction::OMPPrivateScope { |
4019 | CodeGenFunction &CGF; |
4020 | const OMPIteratorExpr *E = nullptr; |
4021 | SmallVector<CodeGenFunction::JumpDest, 4> ContDests; |
4022 | SmallVector<CodeGenFunction::JumpDest, 4> ExitDests; |
4023 | OMPIteratorGeneratorScope() = delete; |
4024 | OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete; |
4025 | |
4026 | public: |
4027 | OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E) |
4028 | : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) { |
4029 | if (!E) |
4030 | return; |
4031 | SmallVector<llvm::Value *, 4> Uppers; |
4032 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { |
4033 | Uppers.push_back(CGF.EmitScalarExpr(E->getHelper(I).Upper)); |
4034 | const auto *VD = cast<VarDecl>(E->getIteratorDecl(I)); |
4035 | addPrivate(VD, [&CGF, VD]() { |
4036 | return CGF.CreateMemTemp(VD->getType(), VD->getName()); |
4037 | }); |
4038 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
4039 | addPrivate(HelperData.CounterVD, [&CGF, &HelperData]() { |
4040 | return CGF.CreateMemTemp(HelperData.CounterVD->getType(), |
4041 | "counter.addr"); |
4042 | }); |
4043 | } |
4044 | Privatize(); |
4045 | |
4046 | for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) { |
4047 | const OMPIteratorHelperData &HelperData = E->getHelper(I); |
4048 | LValue CLVal = |
4049 | CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(HelperData.CounterVD), |
4050 | HelperData.CounterVD->getType()); |
4051 | // Counter = 0; |
4052 | CGF.EmitStoreOfScalar( |
4053 | llvm::ConstantInt::get(CLVal.getAddress(CGF).getElementType(), 0), |
4054 | CLVal); |
4055 | CodeGenFunction::JumpDest &ContDest = |
4056 | ContDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.cont")); |
4057 | CodeGenFunction::JumpDest &ExitDest = |
4058 | ExitDests.emplace_back(CGF.getJumpDestInCurrentScope("iter.exit")); |
4059 | // N = <number-of_iterations>; |
4060 | llvm::Value *N = Uppers[I]; |
4061 | // cont: |
4062 | // if (Counter < N) goto body; else goto exit; |
4063 | CGF.EmitBlock(ContDest.getBlock()); |
4064 | auto *CVal = |
4065 | CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation()); |
4066 | llvm::Value *Cmp = |
4067 | HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType() |
4068 | ? CGF.Builder.CreateICmpSLT(CVal, N) |
4069 | : CGF.Builder.CreateICmpULT(CVal, N); |
4070 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("iter.body"); |
4071 | CGF.Builder.CreateCondBr(Cmp, BodyBB, ExitDest.getBlock()); |
4072 | // body: |
4073 | CGF.EmitBlock(BodyBB); |
4074 | // Iteri = Begini + Counter * Stepi; |
4075 | CGF.EmitIgnoredExpr(HelperData.Update); |
4076 | } |
4077 | } |
4078 | ~OMPIteratorGeneratorScope() { |
4079 | if (!E) |
4080 | return; |
4081 | for (unsigned I = E->numOfIterators(); I > 0; --I) { |
4082 | // Counter = Counter + 1; |
4083 | const OMPIteratorHelperData &HelperData = E->getHelper(I - 1); |
4084 | CGF.EmitIgnoredExpr(HelperData.CounterUpdate); |
4085 | // goto cont; |
4086 | CGF.EmitBranchThroughCleanup(ContDests[I - 1]); |
4087 | // exit: |
4088 | CGF.EmitBlock(ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1); |
4089 | } |
4090 | } |
4091 | }; |
4092 | } // namespace |
4093 | |
4094 | static std::pair<llvm::Value *, llvm::Value *> |
4095 | getPointerAndSize(CodeGenFunction &CGF, const Expr *E) { |
4096 | const auto *OASE = dyn_cast<OMPArrayShapingExpr>(E); |
4097 | llvm::Value *Addr; |
4098 | if (OASE) { |
4099 | const Expr *Base = OASE->getBase(); |
4100 | Addr = CGF.EmitScalarExpr(Base); |
4101 | } else { |
4102 | Addr = CGF.EmitLValue(E).getPointer(CGF); |
4103 | } |
4104 | llvm::Value *SizeVal; |
4105 | QualType Ty = E->getType(); |
4106 | if (OASE) { |
4107 | SizeVal = CGF.getTypeSize(OASE->getBase()->getType()->getPointeeType()); |
4108 | for (const Expr *SE : OASE->getDimensions()) { |
4109 | llvm::Value *Sz = CGF.EmitScalarExpr(SE); |
4110 | Sz = CGF.EmitScalarConversion( |
4111 | Sz, SE->getType(), CGF.getContext().getSizeType(), SE->getExprLoc()); |
4112 | SizeVal = CGF.Builder.CreateNUWMul(SizeVal, Sz); |
4113 | } |
4114 | } else if (const auto *ASE = |
4115 | dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) { |
4116 | LValue UpAddrLVal = |
4117 | CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false); |
4118 | llvm::Value *UpAddr = |
4119 | CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(CGF), /*Idx0=*/1); |
4120 | llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(Addr, CGF.SizeTy); |
4121 | llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGF.SizeTy); |
4122 | SizeVal = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr); |
4123 | } else { |
4124 | SizeVal = CGF.getTypeSize(Ty); |
4125 | } |
4126 | return std::make_pair(Addr, SizeVal); |
4127 | } |
4128 | |
4129 | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4130 | static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) { |
4131 | QualType FlagsTy = C.getIntTypeForBitwidth(32, /*Signed=*/false); |
4132 | if (KmpTaskAffinityInfoTy.isNull()) { |
4133 | RecordDecl *KmpAffinityInfoRD = |
4134 | C.buildImplicitRecord("kmp_task_affinity_info_t"); |
4135 | KmpAffinityInfoRD->startDefinition(); |
4136 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType()); |
4137 | addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType()); |
4138 | addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy); |
4139 | KmpAffinityInfoRD->completeDefinition(); |
4140 | KmpTaskAffinityInfoTy = C.getRecordType(KmpAffinityInfoRD); |
4141 | } |
4142 | } |
4143 | |
4144 | CGOpenMPRuntime::TaskResultTy |
4145 | CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, |
4146 | const OMPExecutableDirective &D, |
4147 | llvm::Function *TaskFunction, QualType SharedsTy, |
4148 | Address Shareds, const OMPTaskDataTy &Data) { |
4149 | ASTContext &C = CGM.getContext(); |
4150 | llvm::SmallVector<PrivateDataTy, 4> Privates; |
4151 | // Aggregate privates and sort them by the alignment. |
4152 | const auto *I = Data.PrivateCopies.begin(); |
4153 | for (const Expr *E : Data.PrivateVars) { |
4154 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4155 | Privates.emplace_back( |
4156 | C.getDeclAlign(VD), |
4157 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4158 | /*PrivateElemInit=*/nullptr)); |
4159 | ++I; |
4160 | } |
4161 | I = Data.FirstprivateCopies.begin(); |
4162 | const auto *IElemInitRef = Data.FirstprivateInits.begin(); |
4163 | for (const Expr *E : Data.FirstprivateVars) { |
4164 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4165 | Privates.emplace_back( |
4166 | C.getDeclAlign(VD), |
4167 | PrivateHelpersTy( |
4168 | E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4169 | cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))); |
4170 | ++I; |
4171 | ++IElemInitRef; |
4172 | } |
4173 | I = Data.LastprivateCopies.begin(); |
4174 | for (const Expr *E : Data.LastprivateVars) { |
4175 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
4176 | Privates.emplace_back( |
4177 | C.getDeclAlign(VD), |
4178 | PrivateHelpersTy(E, VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()), |
4179 | /*PrivateElemInit=*/nullptr)); |
4180 | ++I; |
4181 | } |
4182 | for (const VarDecl *VD : Data.PrivateLocals) { |
4183 | if (isAllocatableDecl(VD)) |
4184 | Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD)); |
4185 | else |
4186 | Privates.emplace_back(C.getDeclAlign(VD), PrivateHelpersTy(VD)); |
4187 | } |
4188 | llvm::stable_sort(Privates, |
4189 | [](const PrivateDataTy &L, const PrivateDataTy &R) { |
4190 | return L.first > R.first; |
4191 | }); |
4192 | QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
4193 | // Build type kmp_routine_entry_t (if not built yet). |
4194 | emitKmpRoutineEntryT(KmpInt32Ty); |
4195 | // Build type kmp_task_t (if not built yet). |
4196 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) { |
4197 | if (SavedKmpTaskloopTQTy.isNull()) { |
4198 | SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
4199 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
4200 | } |
4201 | KmpTaskTQTy = SavedKmpTaskloopTQTy; |
4202 | } else { |
4203 | assert((D.getDirectiveKind() == OMPD_task ||(((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective (D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective (D.getDirectiveKind())) && "Expected taskloop, task or target directive" ) ? static_cast<void> (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4206, __PRETTY_FUNCTION__)) |
4204 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||(((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective (D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective (D.getDirectiveKind())) && "Expected taskloop, task or target directive" ) ? static_cast<void> (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4206, __PRETTY_FUNCTION__)) |
4205 | isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&(((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective (D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective (D.getDirectiveKind())) && "Expected taskloop, task or target directive" ) ? static_cast<void> (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4206, __PRETTY_FUNCTION__)) |
4206 | "Expected taskloop, task or target directive")(((D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective (D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective (D.getDirectiveKind())) && "Expected taskloop, task or target directive" ) ? static_cast<void> (0) : __assert_fail ("(D.getDirectiveKind() == OMPD_task || isOpenMPTargetExecutionDirective(D.getDirectiveKind()) || isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) && \"Expected taskloop, task or target directive\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4206, __PRETTY_FUNCTION__)); |
4207 | if (SavedKmpTaskTQTy.isNull()) { |
4208 | SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl( |
4209 | CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy)); |
4210 | } |
4211 | KmpTaskTQTy = SavedKmpTaskTQTy; |
4212 | } |
4213 | const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl()); |
4214 | // Build particular struct kmp_task_t for the given task. |
4215 | const RecordDecl *KmpTaskTWithPrivatesQTyRD = |
4216 | createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates); |
4217 | QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD); |
4218 | QualType KmpTaskTWithPrivatesPtrQTy = |
4219 | C.getPointerType(KmpTaskTWithPrivatesQTy); |
4220 | llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy); |
4221 | llvm::Type *KmpTaskTWithPrivatesPtrTy = |
4222 | KmpTaskTWithPrivatesTy->getPointerTo(); |
4223 | llvm::Value *KmpTaskTWithPrivatesTySize = |
4224 | CGF.getTypeSize(KmpTaskTWithPrivatesQTy); |
4225 | QualType SharedsPtrTy = C.getPointerType(SharedsTy); |
4226 | |
4227 | // Emit initial values for private copies (if any). |
4228 | llvm::Value *TaskPrivatesMap = nullptr; |
4229 | llvm::Type *TaskPrivatesMapTy = |
4230 | std::next(TaskFunction->arg_begin(), 3)->getType(); |
4231 | if (!Privates.empty()) { |
4232 | auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin()); |
4233 | TaskPrivatesMap = |
4234 | emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates); |
4235 | TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4236 | TaskPrivatesMap, TaskPrivatesMapTy); |
4237 | } else { |
4238 | TaskPrivatesMap = llvm::ConstantPointerNull::get( |
4239 | cast<llvm::PointerType>(TaskPrivatesMapTy)); |
4240 | } |
4241 | // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid, |
4242 | // kmp_task_t *tt); |
4243 | llvm::Function *TaskEntry = emitProxyTaskFunction( |
4244 | CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4245 | KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction, |
4246 | TaskPrivatesMap); |
4247 | |
4248 | // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid, |
4249 | // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, |
4250 | // kmp_routine_entry_t *task_entry); |
4251 | // Task flags. Format is taken from |
4252 | // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h, |
4253 | // description of kmp_tasking_flags struct. |
4254 | enum { |
4255 | TiedFlag = 0x1, |
4256 | FinalFlag = 0x2, |
4257 | DestructorsFlag = 0x8, |
4258 | PriorityFlag = 0x20, |
4259 | DetachableFlag = 0x40, |
4260 | }; |
4261 | unsigned Flags = Data.Tied ? TiedFlag : 0; |
4262 | bool NeedsCleanup = false; |
4263 | if (!Privates.empty()) { |
4264 | NeedsCleanup = |
4265 | checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates); |
4266 | if (NeedsCleanup) |
4267 | Flags = Flags | DestructorsFlag; |
4268 | } |
4269 | if (Data.Priority.getInt()) |
4270 | Flags = Flags | PriorityFlag; |
4271 | if (D.hasClausesOfKind<OMPDetachClause>()) |
4272 | Flags = Flags | DetachableFlag; |
4273 | llvm::Value *TaskFlags = |
4274 | Data.Final.getPointer() |
4275 | ? CGF.Builder.CreateSelect(Data.Final.getPointer(), |
4276 | CGF.Builder.getInt32(FinalFlag), |
4277 | CGF.Builder.getInt32(/*C=*/0)) |
4278 | : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0); |
4279 | TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags)); |
4280 | llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy)); |
4281 | SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc), |
4282 | getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize, |
4283 | SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4284 | TaskEntry, KmpRoutineEntryPtrTy)}; |
4285 | llvm::Value *NewTask; |
4286 | if (D.hasClausesOfKind<OMPNowaitClause>()) { |
4287 | // Check if we have any device clause associated with the directive. |
4288 | const Expr *Device = nullptr; |
4289 | if (auto *C = D.getSingleClause<OMPDeviceClause>()) |
4290 | Device = C->getDevice(); |
4291 | // Emit device ID if any otherwise use default value. |
4292 | llvm::Value *DeviceID; |
4293 | if (Device) |
4294 | DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device), |
4295 | CGF.Int64Ty, /*isSigned=*/true); |
4296 | else |
4297 | DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF); |
4298 | AllocArgs.push_back(DeviceID); |
4299 | NewTask = CGF.EmitRuntimeCall( |
4300 | OMPBuilder.getOrCreateRuntimeFunction( |
4301 | CGM.getModule(), OMPRTL___kmpc_omp_target_task_alloc), |
4302 | AllocArgs); |
4303 | } else { |
4304 | NewTask = |
4305 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4306 | CGM.getModule(), OMPRTL___kmpc_omp_task_alloc), |
4307 | AllocArgs); |
4308 | } |
4309 | // Emit detach clause initialization. |
4310 | // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid, |
4311 | // task_descriptor); |
4312 | if (const auto *DC = D.getSingleClause<OMPDetachClause>()) { |
4313 | const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts(); |
4314 | LValue EvtLVal = CGF.EmitLValue(Evt); |
4315 | |
4316 | // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref, |
4317 | // int gtid, kmp_task_t *task); |
4318 | llvm::Value *Loc = emitUpdateLocation(CGF, DC->getBeginLoc()); |
4319 | llvm::Value *Tid = getThreadID(CGF, DC->getBeginLoc()); |
4320 | Tid = CGF.Builder.CreateIntCast(Tid, CGF.IntTy, /*isSigned=*/false); |
4321 | llvm::Value *EvtVal = CGF.EmitRuntimeCall( |
4322 | OMPBuilder.getOrCreateRuntimeFunction( |
4323 | CGM.getModule(), OMPRTL___kmpc_task_allow_completion_event), |
4324 | {Loc, Tid, NewTask}); |
4325 | EvtVal = CGF.EmitScalarConversion(EvtVal, C.VoidPtrTy, Evt->getType(), |
4326 | Evt->getExprLoc()); |
4327 | CGF.EmitStoreOfScalar(EvtVal, EvtLVal); |
4328 | } |
4329 | // Process affinity clauses. |
4330 | if (D.hasClausesOfKind<OMPAffinityClause>()) { |
4331 | // Process list of affinity data. |
4332 | ASTContext &C = CGM.getContext(); |
4333 | Address AffinitiesArray = Address::invalid(); |
4334 | // Calculate number of elements to form the array of affinity data. |
4335 | llvm::Value *NumOfElements = nullptr; |
4336 | unsigned NumAffinities = 0; |
4337 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4338 | if (const Expr *Modifier = C->getModifier()) { |
4339 | const auto *IE = cast<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts()); |
4340 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { |
4341 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4342 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4343 | NumOfElements = |
4344 | NumOfElements ? CGF.Builder.CreateNUWMul(NumOfElements, Sz) : Sz; |
4345 | } |
4346 | } else { |
4347 | NumAffinities += C->varlist_size(); |
4348 | } |
4349 | } |
4350 | getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy); |
4351 | // Fields ids in kmp_task_affinity_info record. |
4352 | enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags }; |
4353 | |
4354 | QualType KmpTaskAffinityInfoArrayTy; |
4355 | if (NumOfElements) { |
4356 | NumOfElements = CGF.Builder.CreateNUWAdd( |
4357 | llvm::ConstantInt::get(CGF.SizeTy, NumAffinities), NumOfElements); |
4358 | OpaqueValueExpr OVE( |
4359 | Loc, |
4360 | C.getIntTypeForBitwidth(C.getTypeSize(C.getSizeType()), /*Signed=*/0), |
4361 | VK_RValue); |
4362 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, |
4363 | RValue::get(NumOfElements)); |
4364 | KmpTaskAffinityInfoArrayTy = |
4365 | C.getVariableArrayType(KmpTaskAffinityInfoTy, &OVE, ArrayType::Normal, |
4366 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4367 | // Properly emit variable-sized array. |
4368 | auto *PD = ImplicitParamDecl::Create(C, KmpTaskAffinityInfoArrayTy, |
4369 | ImplicitParamDecl::Other); |
4370 | CGF.EmitVarDecl(*PD); |
4371 | AffinitiesArray = CGF.GetAddrOfLocalVar(PD); |
4372 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4373 | /*isSigned=*/false); |
4374 | } else { |
4375 | KmpTaskAffinityInfoArrayTy = C.getConstantArrayType( |
4376 | KmpTaskAffinityInfoTy, |
4377 | llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr, |
4378 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4379 | AffinitiesArray = |
4380 | CGF.CreateMemTemp(KmpTaskAffinityInfoArrayTy, ".affs.arr.addr"); |
4381 | AffinitiesArray = CGF.Builder.CreateConstArrayGEP(AffinitiesArray, 0); |
4382 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumAffinities, |
4383 | /*isSigned=*/false); |
4384 | } |
4385 | |
4386 | const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl(); |
4387 | // Fill array by elements without iterators. |
4388 | unsigned Pos = 0; |
4389 | bool HasIterator = false; |
4390 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4391 | if (C->getModifier()) { |
4392 | HasIterator = true; |
4393 | continue; |
4394 | } |
4395 | for (const Expr *E : C->varlists()) { |
4396 | llvm::Value *Addr; |
4397 | llvm::Value *Size; |
4398 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4399 | LValue Base = |
4400 | CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos), |
4401 | KmpTaskAffinityInfoTy); |
4402 | // affs[i].base_addr = &<Affinities[i].second>; |
4403 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4404 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4405 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4406 | BaseAddrLVal); |
4407 | // affs[i].len = sizeof(<Affinities[i].second>); |
4408 | LValue LenLVal = CGF.EmitLValueForField( |
4409 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4410 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4411 | ++Pos; |
4412 | } |
4413 | } |
4414 | LValue PosLVal; |
4415 | if (HasIterator) { |
4416 | PosLVal = CGF.MakeAddrLValue( |
4417 | CGF.CreateMemTemp(C.getSizeType(), "affs.counter.addr"), |
4418 | C.getSizeType()); |
4419 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4420 | } |
4421 | // Process elements with iterators. |
4422 | for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) { |
4423 | const Expr *Modifier = C->getModifier(); |
4424 | if (!Modifier) |
4425 | continue; |
4426 | OMPIteratorGeneratorScope IteratorScope( |
4427 | CGF, cast_or_null<OMPIteratorExpr>(Modifier->IgnoreParenImpCasts())); |
4428 | for (const Expr *E : C->varlists()) { |
4429 | llvm::Value *Addr; |
4430 | llvm::Value *Size; |
4431 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4432 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4433 | LValue Base = CGF.MakeAddrLValue( |
4434 | Address(CGF.Builder.CreateGEP(AffinitiesArray.getPointer(), Idx), |
4435 | AffinitiesArray.getAlignment()), |
4436 | KmpTaskAffinityInfoTy); |
4437 | // affs[i].base_addr = &<Affinities[i].second>; |
4438 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4439 | Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr)); |
4440 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4441 | BaseAddrLVal); |
4442 | // affs[i].len = sizeof(<Affinities[i].second>); |
4443 | LValue LenLVal = CGF.EmitLValueForField( |
4444 | Base, *std::next(KmpAffinityInfoRD->field_begin(), Len)); |
4445 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4446 | Idx = CGF.Builder.CreateNUWAdd( |
4447 | Idx, llvm::ConstantInt::get(Idx->getType(), 1)); |
4448 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4449 | } |
4450 | } |
4451 | // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, |
4452 | // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 |
4453 | // naffins, kmp_task_affinity_info_t *affin_list); |
4454 | llvm::Value *LocRef = emitUpdateLocation(CGF, Loc); |
4455 | llvm::Value *GTid = getThreadID(CGF, Loc); |
4456 | llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4457 | AffinitiesArray.getPointer(), CGM.VoidPtrTy); |
4458 | // FIXME: Emit the function and ignore its result for now unless the |
4459 | // runtime function is properly implemented. |
4460 | (void)CGF.EmitRuntimeCall( |
4461 | OMPBuilder.getOrCreateRuntimeFunction( |
4462 | CGM.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity), |
4463 | {LocRef, GTid, NewTask, NumOfElements, AffinListPtr}); |
4464 | } |
4465 | llvm::Value *NewTaskNewTaskTTy = |
4466 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4467 | NewTask, KmpTaskTWithPrivatesPtrTy); |
4468 | LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy, |
4469 | KmpTaskTWithPrivatesQTy); |
4470 | LValue TDBase = |
4471 | CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin()); |
4472 | // Fill the data in the resulting kmp_task_t record. |
4473 | // Copy shareds if there are any. |
4474 | Address KmpTaskSharedsPtr = Address::invalid(); |
4475 | if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) { |
4476 | KmpTaskSharedsPtr = |
4477 | Address(CGF.EmitLoadOfScalar( |
4478 | CGF.EmitLValueForField( |
4479 | TDBase, *std::next(KmpTaskTQTyRD->field_begin(), |
4480 | KmpTaskTShareds)), |
4481 | Loc), |
4482 | CGM.getNaturalTypeAlignment(SharedsTy)); |
4483 | LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy); |
4484 | LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy); |
4485 | CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap); |
4486 | } |
4487 | // Emit initial values for private copies (if any). |
4488 | TaskResultTy Result; |
4489 | if (!Privates.empty()) { |
4490 | emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD, |
4491 | SharedsTy, SharedsPtrTy, Data, Privates, |
4492 | /*ForDup=*/false); |
4493 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
4494 | (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) { |
4495 | Result.TaskDupFn = emitTaskDupFunction( |
4496 | CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD, |
4497 | KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates, |
4498 | /*WithLastIter=*/!Data.LastprivateVars.empty()); |
4499 | } |
4500 | } |
4501 | // Fields of union "kmp_cmplrdata_t" for destructors and priority. |
4502 | enum { Priority = 0, Destructors = 1 }; |
4503 | // Provide pointer to function with destructors for privates. |
4504 | auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1); |
4505 | const RecordDecl *KmpCmplrdataUD = |
4506 | (*FI)->getType()->getAsUnionType()->getDecl(); |
4507 | if (NeedsCleanup) { |
4508 | llvm::Value *DestructorFn = emitDestructorsFunction( |
4509 | CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy, |
4510 | KmpTaskTWithPrivatesQTy); |
4511 | LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI); |
4512 | LValue DestructorsLV = CGF.EmitLValueForField( |
4513 | Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors)); |
4514 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4515 | DestructorFn, KmpRoutineEntryPtrTy), |
4516 | DestructorsLV); |
4517 | } |
4518 | // Set priority. |
4519 | if (Data.Priority.getInt()) { |
4520 | LValue Data2LV = CGF.EmitLValueForField( |
4521 | TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2)); |
4522 | LValue PriorityLV = CGF.EmitLValueForField( |
4523 | Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority)); |
4524 | CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV); |
4525 | } |
4526 | Result.NewTask = NewTask; |
4527 | Result.TaskEntry = TaskEntry; |
4528 | Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy; |
4529 | Result.TDBase = TDBase; |
4530 | Result.KmpTaskTQTyRD = KmpTaskTQTyRD; |
4531 | return Result; |
4532 | } |
4533 | |
4534 | namespace { |
4535 | /// Dependence kind for RTL. |
4536 | enum RTLDependenceKindTy { |
4537 | DepIn = 0x01, |
4538 | DepInOut = 0x3, |
4539 | DepMutexInOutSet = 0x4 |
4540 | }; |
4541 | /// Fields ids in kmp_depend_info record. |
4542 | enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags }; |
4543 | } // namespace |
4544 | |
4545 | /// Translates internal dependency kind into the runtime kind. |
4546 | static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) { |
4547 | RTLDependenceKindTy DepKind; |
4548 | switch (K) { |
4549 | case OMPC_DEPEND_in: |
4550 | DepKind = DepIn; |
4551 | break; |
4552 | // Out and InOut dependencies must use the same code. |
4553 | case OMPC_DEPEND_out: |
4554 | case OMPC_DEPEND_inout: |
4555 | DepKind = DepInOut; |
4556 | break; |
4557 | case OMPC_DEPEND_mutexinoutset: |
4558 | DepKind = DepMutexInOutSet; |
4559 | break; |
4560 | case OMPC_DEPEND_source: |
4561 | case OMPC_DEPEND_sink: |
4562 | case OMPC_DEPEND_depobj: |
4563 | case OMPC_DEPEND_unknown: |
4564 | llvm_unreachable("Unknown task dependence type")::llvm::llvm_unreachable_internal("Unknown task dependence type" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4564); |
4565 | } |
4566 | return DepKind; |
4567 | } |
4568 | |
4569 | /// Builds kmp_depend_info, if it is not built yet, and builds flags type. |
4570 | static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy, |
4571 | QualType &FlagsTy) { |
4572 | FlagsTy = C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false); |
4573 | if (KmpDependInfoTy.isNull()) { |
4574 | RecordDecl *KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info"); |
4575 | KmpDependInfoRD->startDefinition(); |
4576 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType()); |
4577 | addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType()); |
4578 | addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy); |
4579 | KmpDependInfoRD->completeDefinition(); |
4580 | KmpDependInfoTy = C.getRecordType(KmpDependInfoRD); |
4581 | } |
4582 | } |
4583 | |
4584 | std::pair<llvm::Value *, LValue> |
4585 | CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal, |
4586 | SourceLocation Loc) { |
4587 | ASTContext &C = CGM.getContext(); |
4588 | QualType FlagsTy; |
4589 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4590 | RecordDecl *KmpDependInfoRD = |
4591 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4592 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4593 | DepobjLVal.getAddress(CGF), |
4594 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4595 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4596 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4597 | Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); |
4598 | Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), |
4599 | Base.getTBAAInfo()); |
4600 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4601 | Addr.getPointer(), |
4602 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4603 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4604 | Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, |
4605 | Base.getBaseInfo(), Base.getTBAAInfo()); |
4606 | // NumDeps = deps[i].base_addr; |
4607 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4608 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4609 | llvm::Value *NumDeps = CGF.EmitLoadOfScalar(BaseAddrLVal, Loc); |
4610 | return std::make_pair(NumDeps, Base); |
4611 | } |
4612 | |
4613 | static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4614 | llvm::PointerUnion<unsigned *, LValue *> Pos, |
4615 | const OMPTaskDataTy::DependData &Data, |
4616 | Address DependenciesArray) { |
4617 | CodeGenModule &CGM = CGF.CGM; |
4618 | ASTContext &C = CGM.getContext(); |
4619 | QualType FlagsTy; |
4620 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4621 | RecordDecl *KmpDependInfoRD = |
4622 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4623 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
4624 | |
4625 | OMPIteratorGeneratorScope IteratorScope( |
4626 | CGF, cast_or_null<OMPIteratorExpr>( |
4627 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4628 | : nullptr)); |
4629 | for (const Expr *E : Data.DepExprs) { |
4630 | llvm::Value *Addr; |
4631 | llvm::Value *Size; |
4632 | std::tie(Addr, Size) = getPointerAndSize(CGF, E); |
4633 | LValue Base; |
4634 | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4635 | Base = CGF.MakeAddrLValue( |
4636 | CGF.Builder.CreateConstGEP(DependenciesArray, *P), KmpDependInfoTy); |
4637 | } else { |
4638 | LValue &PosLVal = *Pos.get<LValue *>(); |
4639 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4640 | Base = CGF.MakeAddrLValue( |
4641 | Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Idx), |
4642 | DependenciesArray.getAlignment()), |
4643 | KmpDependInfoTy); |
4644 | } |
4645 | // deps[i].base_addr = &<Dependencies[i].second>; |
4646 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4647 | Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4648 | CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy), |
4649 | BaseAddrLVal); |
4650 | // deps[i].len = sizeof(<Dependencies[i].second>); |
4651 | LValue LenLVal = CGF.EmitLValueForField( |
4652 | Base, *std::next(KmpDependInfoRD->field_begin(), Len)); |
4653 | CGF.EmitStoreOfScalar(Size, LenLVal); |
4654 | // deps[i].flags = <Dependencies[i].first>; |
4655 | RTLDependenceKindTy DepKind = translateDependencyKind(Data.DepKind); |
4656 | LValue FlagsLVal = CGF.EmitLValueForField( |
4657 | Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); |
4658 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), |
4659 | FlagsLVal); |
4660 | if (unsigned *P = Pos.dyn_cast<unsigned *>()) { |
4661 | ++(*P); |
4662 | } else { |
4663 | LValue &PosLVal = *Pos.get<LValue *>(); |
4664 | llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4665 | Idx = CGF.Builder.CreateNUWAdd(Idx, |
4666 | llvm::ConstantInt::get(Idx->getType(), 1)); |
4667 | CGF.EmitStoreOfScalar(Idx, PosLVal); |
4668 | } |
4669 | } |
4670 | } |
4671 | |
4672 | static SmallVector<llvm::Value *, 4> |
4673 | emitDepobjElementsSizes(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4674 | const OMPTaskDataTy::DependData &Data) { |
4675 | assert(Data.DepKind == OMPC_DEPEND_depobj &&((Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependecy kind." ) ? static_cast<void> (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4676, __PRETTY_FUNCTION__)) |
4676 | "Expected depobj dependecy kind.")((Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependecy kind." ) ? static_cast<void> (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4676, __PRETTY_FUNCTION__)); |
4677 | SmallVector<llvm::Value *, 4> Sizes; |
4678 | SmallVector<LValue, 4> SizeLVals; |
4679 | ASTContext &C = CGF.getContext(); |
4680 | QualType FlagsTy; |
4681 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4682 | RecordDecl *KmpDependInfoRD = |
4683 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4684 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4685 | llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); |
4686 | { |
4687 | OMPIteratorGeneratorScope IteratorScope( |
4688 | CGF, cast_or_null<OMPIteratorExpr>( |
4689 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4690 | : nullptr)); |
4691 | for (const Expr *E : Data.DepExprs) { |
4692 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4693 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4694 | DepobjLVal.getAddress(CGF), |
4695 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4696 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4697 | Base.getAddress(CGF), KmpDependInfoPtrT); |
4698 | Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), |
4699 | Base.getTBAAInfo()); |
4700 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4701 | Addr.getPointer(), |
4702 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4703 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4704 | Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, |
4705 | Base.getBaseInfo(), Base.getTBAAInfo()); |
4706 | // NumDeps = deps[i].base_addr; |
4707 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4708 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4709 | llvm::Value *NumDeps = |
4710 | CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); |
4711 | LValue NumLVal = CGF.MakeAddrLValue( |
4712 | CGF.CreateMemTemp(C.getUIntPtrType(), "depobj.size.addr"), |
4713 | C.getUIntPtrType()); |
4714 | CGF.InitTempAlloca(NumLVal.getAddress(CGF), |
4715 | llvm::ConstantInt::get(CGF.IntPtrTy, 0)); |
4716 | llvm::Value *PrevVal = CGF.EmitLoadOfScalar(NumLVal, E->getExprLoc()); |
4717 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(PrevVal, NumDeps); |
4718 | CGF.EmitStoreOfScalar(Add, NumLVal); |
4719 | SizeLVals.push_back(NumLVal); |
4720 | } |
4721 | } |
4722 | for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) { |
4723 | llvm::Value *Size = |
4724 | CGF.EmitLoadOfScalar(SizeLVals[I], Data.DepExprs[I]->getExprLoc()); |
4725 | Sizes.push_back(Size); |
4726 | } |
4727 | return Sizes; |
4728 | } |
4729 | |
4730 | static void emitDepobjElements(CodeGenFunction &CGF, QualType &KmpDependInfoTy, |
4731 | LValue PosLVal, |
4732 | const OMPTaskDataTy::DependData &Data, |
4733 | Address DependenciesArray) { |
4734 | assert(Data.DepKind == OMPC_DEPEND_depobj &&((Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependecy kind." ) ? static_cast<void> (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4735, __PRETTY_FUNCTION__)) |
4735 | "Expected depobj dependecy kind.")((Data.DepKind == OMPC_DEPEND_depobj && "Expected depobj dependecy kind." ) ? static_cast<void> (0) : __assert_fail ("Data.DepKind == OMPC_DEPEND_depobj && \"Expected depobj dependecy kind.\"" , "/build/llvm-toolchain-snapshot-13~++20210301100612+564f5b0734bd/clang/lib/CodeGen/CGOpenMPRuntime.cpp" , 4735, __PRETTY_FUNCTION__)); |
4736 | ASTContext &C = CGF.getContext(); |
4737 | QualType FlagsTy; |
4738 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4739 | RecordDecl *KmpDependInfoRD = |
4740 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4741 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
4742 | llvm::Type *KmpDependInfoPtrT = CGF.ConvertTypeForMem(KmpDependInfoPtrTy); |
4743 | llvm::Value *ElSize = CGF.getTypeSize(KmpDependInfoTy); |
4744 | { |
4745 | OMPIteratorGeneratorScope IteratorScope( |
4746 | CGF, cast_or_null<OMPIteratorExpr>( |
4747 | Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts() |
4748 | : nullptr)); |
4749 | for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) { |
4750 | const Expr *E = Data.DepExprs[I]; |
4751 | LValue DepobjLVal = CGF.EmitLValue(E->IgnoreParenImpCasts()); |
4752 | LValue Base = CGF.EmitLoadOfPointerLValue( |
4753 | DepobjLVal.getAddress(CGF), |
4754 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
4755 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4756 | Base.getAddress(CGF), KmpDependInfoPtrT); |
4757 | Base = CGF.MakeAddrLValue(Addr, KmpDependInfoTy, Base.getBaseInfo(), |
4758 | Base.getTBAAInfo()); |
4759 | |
4760 | // Get number of elements in a single depobj. |
4761 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
4762 | Addr.getPointer(), |
4763 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
4764 | LValue NumDepsBase = CGF.MakeAddrLValue( |
4765 | Address(DepObjAddr, Addr.getAlignment()), KmpDependInfoTy, |
4766 | Base.getBaseInfo(), Base.getTBAAInfo()); |
4767 | // NumDeps = deps[i].base_addr; |
4768 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4769 | NumDepsBase, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4770 | llvm::Value *NumDeps = |
4771 | CGF.EmitLoadOfScalar(BaseAddrLVal, E->getExprLoc()); |
4772 | |
4773 | // memcopy dependency data. |
4774 | llvm::Value *Size = CGF.Builder.CreateNUWMul( |
4775 | ElSize, |
4776 | CGF.Builder.CreateIntCast(NumDeps, CGF.SizeTy, /*isSigned=*/false)); |
4777 | llvm::Value *Pos = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc()); |
4778 | Address DepAddr = |
4779 | Address(CGF.Builder.CreateGEP(DependenciesArray.getPointer(), Pos), |
4780 | DependenciesArray.getAlignment()); |
4781 | CGF.Builder.CreateMemCpy(DepAddr, Base.getAddress(CGF), Size); |
4782 | |
4783 | // Increase pos. |
4784 | // pos += size; |
4785 | llvm::Value *Add = CGF.Builder.CreateNUWAdd(Pos, NumDeps); |
4786 | CGF.EmitStoreOfScalar(Add, PosLVal); |
4787 | } |
4788 | } |
4789 | } |
4790 | |
4791 | std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause( |
4792 | CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies, |
4793 | SourceLocation Loc) { |
4794 | if (llvm::all_of(Dependencies, [](const OMPTaskDataTy::DependData &D) { |
4795 | return D.DepExprs.empty(); |
4796 | })) |
4797 | return std::make_pair(nullptr, Address::invalid()); |
4798 | // Process list of dependencies. |
4799 | ASTContext &C = CGM.getContext(); |
4800 | Address DependenciesArray = Address::invalid(); |
4801 | llvm::Value *NumOfElements = nullptr; |
4802 | unsigned NumDependencies = std::accumulate( |
4803 | Dependencies.begin(), Dependencies.end(), 0, |
4804 | [](unsigned V, const OMPTaskDataTy::DependData &D) { |
4805 | return D.DepKind == OMPC_DEPEND_depobj |
4806 | ? V |
4807 | : (V + (D.IteratorExpr ? 0 : D.DepExprs.size())); |
4808 | }); |
4809 | QualType FlagsTy; |
4810 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4811 | bool HasDepobjDeps = false; |
4812 | bool HasRegularWithIterators = false; |
4813 | llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(CGF.IntPtrTy, 0); |
4814 | llvm::Value *NumOfRegularWithIterators = |
4815 | llvm::ConstantInt::get(CGF.IntPtrTy, 1); |
4816 | // Calculate number of depobj dependecies and regular deps with the iterators. |
4817 | for (const OMPTaskDataTy::DependData &D : Dependencies) { |
4818 | if (D.DepKind == OMPC_DEPEND_depobj) { |
4819 | SmallVector<llvm::Value *, 4> Sizes = |
4820 | emitDepobjElementsSizes(CGF, KmpDependInfoTy, D); |
4821 | for (llvm::Value *Size : Sizes) { |
4822 | NumOfDepobjElements = |
4823 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, Size); |
4824 | } |
4825 | HasDepobjDeps = true; |
4826 | continue; |
4827 | } |
4828 | // Include number of iterations, if any. |
4829 | if (const auto *IE = cast_or_null<OMPIteratorExpr>(D.IteratorExpr)) { |
4830 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { |
4831 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4832 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.IntPtrTy, /*isSigned=*/false); |
4833 | NumOfRegularWithIterators = |
4834 | CGF.Builder.CreateNUWMul(NumOfRegularWithIterators, Sz); |
4835 | } |
4836 | HasRegularWithIterators = true; |
4837 | continue; |
4838 | } |
4839 | } |
4840 | |
4841 | QualType KmpDependInfoArrayTy; |
4842 | if (HasDepobjDeps || HasRegularWithIterators) { |
4843 | NumOfElements = llvm::ConstantInt::get(CGM.IntPtrTy, NumDependencies, |
4844 | /*isSigned=*/false); |
4845 | if (HasDepobjDeps) { |
4846 | NumOfElements = |
4847 | CGF.Builder.CreateNUWAdd(NumOfDepobjElements, NumOfElements); |
4848 | } |
4849 | if (HasRegularWithIterators) { |
4850 | NumOfElements = |
4851 | CGF.Builder.CreateNUWAdd(NumOfRegularWithIterators, NumOfElements); |
4852 | } |
4853 | OpaqueValueExpr OVE(Loc, |
4854 | C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0), |
4855 | VK_RValue); |
4856 | CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, |
4857 | RValue::get(NumOfElements)); |
4858 | KmpDependInfoArrayTy = |
4859 | C.getVariableArrayType(KmpDependInfoTy, &OVE, ArrayType::Normal, |
4860 | /*IndexTypeQuals=*/0, SourceRange(Loc, Loc)); |
4861 | // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy); |
4862 | // Properly emit variable-sized array. |
4863 | auto *PD = ImplicitParamDecl::Create(C, KmpDependInfoArrayTy, |
4864 | ImplicitParamDecl::Other); |
4865 | CGF.EmitVarDecl(*PD); |
4866 | DependenciesArray = CGF.GetAddrOfLocalVar(PD); |
4867 | NumOfElements = CGF.Builder.CreateIntCast(NumOfElements, CGF.Int32Ty, |
4868 | /*isSigned=*/false); |
4869 | } else { |
4870 | KmpDependInfoArrayTy = C.getConstantArrayType( |
4871 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr, |
4872 | ArrayType::Normal, /*IndexTypeQuals=*/0); |
4873 | DependenciesArray = |
4874 | CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr"); |
4875 | DependenciesArray = CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0); |
4876 | NumOfElements = llvm::ConstantInt::get(CGM.Int32Ty, NumDependencies, |
4877 | /*isSigned=*/false); |
4878 | } |
4879 | unsigned Pos = 0; |
4880 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { |
4881 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4882 | Dependencies[I].IteratorExpr) |
4883 | continue; |
4884 | emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I], |
4885 | DependenciesArray); |
4886 | } |
4887 | // Copy regular dependecies with iterators. |
4888 | LValue PosLVal = CGF.MakeAddrLValue( |
4889 | CGF.CreateMemTemp(C.getSizeType(), "dep.counter.addr"), C.getSizeType()); |
4890 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Pos), PosLVal); |
4891 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { |
4892 | if (Dependencies[I].DepKind == OMPC_DEPEND_depobj || |
4893 | !Dependencies[I].IteratorExpr) |
4894 | continue; |
4895 | emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I], |
4896 | DependenciesArray); |
4897 | } |
4898 | // Copy final depobj arrays without iterators. |
4899 | if (HasDepobjDeps) { |
4900 | for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) { |
4901 | if (Dependencies[I].DepKind != OMPC_DEPEND_depobj) |
4902 | continue; |
4903 | emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I], |
4904 | DependenciesArray); |
4905 | } |
4906 | } |
4907 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4908 | DependenciesArray, CGF.VoidPtrTy); |
4909 | return std::make_pair(NumOfElements, DependenciesArray); |
4910 | } |
4911 | |
4912 | Address CGOpenMPRuntime::emitDepobjDependClause( |
4913 | CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies, |
4914 | SourceLocation Loc) { |
4915 | if (Dependencies.DepExprs.empty()) |
4916 | return Address::invalid(); |
4917 | // Process list of dependencies. |
4918 | ASTContext &C = CGM.getContext(); |
4919 | Address DependenciesArray = Address::invalid(); |
4920 | unsigned NumDependencies = Dependencies.DepExprs.size(); |
4921 | QualType FlagsTy; |
4922 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
4923 | RecordDecl *KmpDependInfoRD = |
4924 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
4925 | |
4926 | llvm::Value *Size; |
4927 | // Define type kmp_depend_info[<Dependencies.size()>]; |
4928 | // For depobj reserve one extra element to store the number of elements. |
4929 | // It is required to handle depobj(x) update(in) construct. |
4930 | // kmp_depend_info[<Dependencies.size()>] deps; |
4931 | llvm::Value *NumDepsVal; |
4932 | CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy); |
4933 | if (const auto *IE = |
4934 | cast_or_null<OMPIteratorExpr>(Dependencies.IteratorExpr)) { |
4935 | NumDepsVal = llvm::ConstantInt::get(CGF.SizeTy, 1); |
4936 | for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) { |
4937 | llvm::Value *Sz = CGF.EmitScalarExpr(IE->getHelper(I).Upper); |
4938 | Sz = CGF.Builder.CreateIntCast(Sz, CGF.SizeTy, /*isSigned=*/false); |
4939 | NumDepsVal = CGF.Builder.CreateNUWMul(NumDepsVal, Sz); |
4940 | } |
4941 | Size = CGF.Builder.CreateNUWAdd(llvm::ConstantInt::get(CGF.SizeTy, 1), |
4942 | NumDepsVal); |
4943 | CharUnits SizeInBytes = |
4944 | C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align); |
4945 | llvm::Value *RecSize = CGM.getSize(SizeInBytes); |
4946 | Size = CGF.Builder.CreateNUWMul(Size, RecSize); |
4947 | NumDepsVal = |
4948 | CGF.Builder.CreateIntCast(NumDepsVal, CGF.IntPtrTy, /*isSigned=*/false); |
4949 | } else { |
4950 | QualType KmpDependInfoArrayTy = C.getConstantArrayType( |
4951 | KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1), |
4952 | nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); |
4953 | CharUnits Sz = C.getTypeSizeInChars(KmpDependInfoArrayTy); |
4954 | Size = CGM.getSize(Sz.alignTo(Align)); |
4955 | NumDepsVal = llvm::ConstantInt::get(CGF.IntPtrTy, NumDependencies); |
4956 | } |
4957 | // Need to allocate on the dynamic memory. |
4958 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
4959 | // Use default allocator. |
4960 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
4961 | llvm::Value *Args[] = {ThreadID, Size, Allocator}; |
4962 | |
4963 | llvm::Value *Addr = |
4964 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
4965 | CGM.getModule(), OMPRTL___kmpc_alloc), |
4966 | Args, ".dep.arr.addr"); |
4967 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4968 | Addr, CGF.ConvertTypeForMem(KmpDependInfoTy)->getPointerTo()); |
4969 | DependenciesArray = Address(Addr, Align); |
4970 | // Write number of elements in the first element of array for depobj. |
4971 | LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy); |
4972 | // deps[i].base_addr = NumDependencies; |
4973 | LValue BaseAddrLVal = CGF.EmitLValueForField( |
4974 | Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr)); |
4975 | CGF.EmitStoreOfScalar(NumDepsVal, BaseAddrLVal); |
4976 | llvm::PointerUnion<unsigned *, LValue *> Pos; |
4977 | unsigned Idx = 1; |
4978 | LValue PosLVal; |
4979 | if (Dependencies.IteratorExpr) { |
4980 | PosLVal = CGF.MakeAddrLValue( |
4981 | CGF.CreateMemTemp(C.getSizeType(), "iterator.counter.addr"), |
4982 | C.getSizeType()); |
4983 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.SizeTy, Idx), PosLVal, |
4984 | /*IsInit=*/true); |
4985 | Pos = &PosLVal; |
4986 | } else { |
4987 | Pos = &Idx; |
4988 | } |
4989 | emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray); |
4990 | DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
4991 | CGF.Builder.CreateConstGEP(DependenciesArray, 1), CGF.VoidPtrTy); |
4992 | return DependenciesArray; |
4993 | } |
4994 | |
4995 | void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal, |
4996 | SourceLocation Loc) { |
4997 | ASTContext &C = CGM.getContext(); |
4998 | QualType FlagsTy; |
4999 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
5000 | LValue Base = CGF.EmitLoadOfPointerLValue( |
5001 | DepobjLVal.getAddress(CGF), |
5002 | C.getPointerType(C.VoidPtrTy).castAs<PointerType>()); |
5003 | QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy); |
5004 | Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5005 | Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy)); |
5006 | llvm::Value *DepObjAddr = CGF.Builder.CreateGEP( |
5007 | Addr.getPointer(), |
5008 | llvm::ConstantInt::get(CGF.IntPtrTy, -1, /*isSigned=*/true)); |
5009 | DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr, |
5010 | CGF.VoidPtrTy); |
5011 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
5012 | // Use default allocator. |
5013 | llvm::Value *Allocator = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
5014 | llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator}; |
5015 | |
5016 | // _kmpc_free(gtid, addr, nullptr); |
5017 | (void)CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
5018 | CGM.getModule(), OMPRTL___kmpc_free), |
5019 | Args); |
5020 | } |
5021 | |
5022 | void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal, |
5023 | OpenMPDependClauseKind NewDepKind, |
5024 | SourceLocation Loc) { |
5025 | ASTContext &C = CGM.getContext(); |
5026 | QualType FlagsTy; |
5027 | getDependTypes(C, KmpDependInfoTy, FlagsTy); |
5028 | RecordDecl *KmpDependInfoRD = |
5029 | cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl()); |
5030 | llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy); |
5031 | llvm::Value *NumDeps; |
5032 | LValue Base; |
5033 | std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc); |
5034 | |
5035 | Address Begin = Base.getAddress(CGF); |
5036 | // Cast from pointer to array type to pointer to single element. |
5037 | llvm::Value *End = CGF.Builder.CreateGEP(Begin.getPointer(), NumDeps); |
5038 | // The basic structure here is a while-do loop. |
5039 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.body"); |
5040 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.done"); |
5041 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
5042 | CGF.EmitBlock(BodyBB); |
5043 | llvm::PHINode *ElementPHI = |
5044 | CGF.Builder.CreatePHI(Begin.getType(), 2, "omp.elementPast"); |
5045 | ElementPHI->addIncoming(Begin.getPointer(), EntryBB); |
5046 | Begin = Address(ElementPHI, Begin.getAlignment()); |
5047 | Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(), |
5048 | Base.getTBAAInfo()); |
5049 | // deps[i].flags = NewDepKind; |
5050 | RTLDependenceKindTy DepKind = translateDependencyKind(NewDepKind); |
5051 | LValue FlagsLVal = CGF.EmitLValueForField( |
5052 | Base, *std::next(KmpDependInfoRD->field_begin(), Flags)); |
5053 | CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind), |
5054 | FlagsLVal); |
5055 | |
5056 | // Shift the address forward by one element. |
5057 | Address ElementNext = |
5058 | CGF.Builder.CreateConstGEP(Begin, /*Index=*/1, "omp.elementNext"); |
5059 | ElementPHI->addIncoming(ElementNext.getPointer(), |
5060 | CGF.Builder.GetInsertBlock()); |
5061 | llvm::Value *IsEmpty = |
5062 | CGF.Builder.CreateICmpEQ(ElementNext.getPointer(), End, "omp.isempty"); |
5063 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
5064 | // Done. |
5065 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
5066 | } |
5067 | |
5068 | void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, |
5069 | const OMPExecutableDirective &D, |
5070 | llvm::Function *TaskFunction, |
5071 | QualType SharedsTy, Address Shareds, |
5072 | const Expr *IfCond, |
5073 | const OMPTaskDataTy &Data) { |
5074 | if (!CGF.HaveInsertPoint()) |
5075 | return; |
5076 | |
5077 | TaskResultTy Result = |
5078 | emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); |
5079 | llvm::Value *NewTask = Result.NewTask; |
5080 | llvm::Function *TaskEntry = Result.TaskEntry; |
5081 | llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy; |
5082 | LValue TDBase = Result.TDBase; |
5083 | const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD; |
5084 | // Process list of dependences. |
5085 | Address DependenciesArray = Address::invalid(); |
5086 | llvm::Value *NumOfElements; |
5087 | std::tie(NumOfElements, DependenciesArray) = |
5088 | emitDependClause(CGF, Data.Dependences, Loc); |
5089 | |
5090 | // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() |
5091 | // libcall. |
5092 | // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid, |
5093 | // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list, |
5094 | // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence |
5095 | // list is not empty |
5096 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
5097 | llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); |
5098 | llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask }; |
5099 | llvm::Value *DepTaskArgs[7]; |
5100 | if (!Data.Dependences.empty()) { |
5101 | DepTaskArgs[0] = UpLoc; |
5102 | DepTaskArgs[1] = ThreadID; |
5103 | DepTaskArgs[2] = NewTask; |
5104 | DepTaskArgs[3] = NumOfElements; |
5105 | DepTaskArgs[4] = DependenciesArray.getPointer(); |
5106 | DepTaskArgs[5] = CGF.Builder.getInt32(0); |
5107 | DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
5108 | } |
5109 | auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs, |
5110 | &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) { |
5111 | if (!Data.Tied) { |
5112 | auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId); |
5113 | LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI); |
5114 | CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal); |
5115 | } |
5116 | if (!Data.Dependences.empty()) { |
5117 | CGF.EmitRuntimeCall( |
5118 | OMPBuilder.getOrCreateRuntimeFunction( |
5119 | CGM.getModule(), OMPRTL___kmpc_omp_task_with_deps), |
5120 | DepTaskArgs); |
5121 | } else { |
5122 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
5123 | CGM.getModule(), OMPRTL___kmpc_omp_task), |
5124 | TaskArgs); |
5125 | } |
5126 | // Check if parent region is untied and build return for untied task; |
5127 | if (auto *Region = |
5128 | dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) |
5129 | Region->emitUntiedSwitch(CGF); |
5130 | }; |
5131 | |
5132 | llvm::Value *DepWaitTaskArgs[6]; |
5133 | if (!Data.Dependences.empty()) { |
5134 | DepWaitTaskArgs[0] = UpLoc; |
5135 | DepWaitTaskArgs[1] = ThreadID; |
5136 | DepWaitTaskArgs[2] = NumOfElements; |
5137 | DepWaitTaskArgs[3] = DependenciesArray.getPointer(); |
5138 | DepWaitTaskArgs[4] = CGF.Builder.getInt32(0); |
5139 | DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy); |
5140 | } |
5141 | auto &M = CGM.getModule(); |
5142 | auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy, |
5143 | TaskEntry, &Data, &DepWaitTaskArgs, |
5144 | Loc](CodeGenFunction &CGF, PrePostActionTy &) { |
5145 | CodeGenFunction::RunCleanupsScope LocalScope(CGF); |
5146 | // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid, |
5147 | // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 |
5148 | // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info |
5149 | // is specified. |
5150 | if (!Data.Dependences.empty()) |
5151 | CGF.EmitRuntimeCall( |
5152 | OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_omp_wait_deps), |
5153 | DepWaitTaskArgs); |
5154 | // Call proxy_task_entry(gtid, new_task); |
5155 | auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy, |
5156 | Loc](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5157 | Action.Enter(CGF); |
5158 | llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy}; |
5159 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry, |
5160 | OutlinedFnArgs); |
5161 | }; |
5162 | |
5163 | // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid, |
5164 | // kmp_task_t *new_task); |
5165 | // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid, |
5166 | // kmp_task_t *new_task); |
5167 | RegionCodeGenTy RCG(CodeGen); |
5168 | CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction( |
5169 | M, OMPRTL___kmpc_omp_task_begin_if0), |
5170 | TaskArgs, |
5171 | OMPBuilder.getOrCreateRuntimeFunction( |
5172 | M, OMPRTL___kmpc_omp_task_complete_if0), |
5173 | TaskArgs); |
5174 | RCG.setAction(Action); |
5175 | RCG(CGF); |
5176 | }; |
5177 | |
5178 | if (IfCond) { |
5179 | emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen); |
5180 | } else { |
5181 | RegionCodeGenTy ThenRCG(ThenCodeGen); |
5182 | ThenRCG(CGF); |
5183 | } |
5184 | } |
5185 | |
5186 | void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, |
5187 | const OMPLoopDirective &D, |
5188 | llvm::Function *TaskFunction, |
5189 | QualType SharedsTy, Address Shareds, |
5190 | const Expr *IfCond, |
5191 | const OMPTaskDataTy &Data) { |
5192 | if (!CGF.HaveInsertPoint()) |
5193 | return; |
5194 | TaskResultTy Result = |
5195 | emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data); |
5196 | // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc() |
5197 | // libcall. |
5198 | // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int |
5199 | // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int |
5200 | // sched, kmp_uint64 grainsize, void *task_dup); |
5201 | llvm::Value *ThreadID = getThreadID(CGF, Loc); |
5202 | llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc); |
5203 | llvm::Value *IfVal; |
5204 | if (IfCond) { |
5205 | IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy, |
5206 | /*isSigned=*/true); |
5207 | } else { |
5208 | IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1); |
5209 | } |
5210 | |
5211 | LValue LBLVal = CGF.EmitLValueForField( |
5212 | Result.TDBase, |
5213 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound)); |
5214 | const auto *LBVar = |
5215 | cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl()); |
5216 | CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(CGF), |
5217 | LBLVal.getQuals(), |
5218 | /*IsInitializer=*/true); |
5219 | LValue UBLVal = CGF.EmitLValueForField( |
5220 | Result.TDBase, |
5221 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound)); |
5222 | const auto *UBVar = |
5223 | cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl()); |
5224 | CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(CGF), |
5225 | UBLVal.getQuals(), |
5226 | /*IsInitializer=*/true); |
5227 | LValue StLVal = CGF.EmitLValueForField( |
5228 | Result.TDBase, |
5229 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride)); |
5230 | const auto *StVar = |
5231 | cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl()); |
5232 | CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(CGF), |
5233 | StLVal.getQuals(), |
5234 | /*IsInitializer=*/true); |
5235 | // Store reductions address. |
5236 | LValue RedLVal = CGF.EmitLValueForField( |
5237 | Result.TDBase, |
5238 | *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions)); |
5239 | if (Data.Reductions) { |
5240 | CGF.EmitStoreOfScalar(Data.Reductions, RedLVal); |
5241 | } else { |
5242 | CGF.EmitNullInitialization(RedLVal.getAddress(CGF), |
5243 | CGF.getContext().VoidPtrTy); |
5244 | } |
5245 | enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 }; |
5246 | llvm::Value *TaskArgs[] = { |
5247 | UpLoc, |
5248 | ThreadID, |
5249 | Result.NewTask, |
5250 | IfVal, |
5251 | LBLVal.getPointer(CGF), |
5252 | UBLVal.getPointer(CGF), |
5253 | CGF.EmitLoadOfScalar(StLVal, Loc), |
5254 | llvm::ConstantInt::getSigned( |
5255 | CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler |
5256 | llvm::ConstantInt::getSigned( |
5257 | CGF.IntTy, Data.Schedule.getPointer() |
5258 | ? Data.Schedule.getInt() ? NumTasks : Grainsize |
5259 | : NoSchedule), |
5260 | Data.Schedule.getPointer() |
5261 | ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty, |
5262 | /*isSigned=*/false) |
5263 | : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0), |
5264 | Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5265 | Result.TaskDupFn, CGF.VoidPtrTy) |
5266 | : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)}; |
5267 | CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( |
5268 | CGM.getModule(), OMPRTL___kmpc_taskloop), |
5269 | TaskArgs); |
5270 | } |
5271 | |
5272 | /// Emit reduction operation for each element of array (required for |
5273 | /// array sections) LHS op = RHS. |
5274 | /// \param Type Type of array. |
5275 | /// \param LHSVar Variable on the left side of the reduction operation |
5276 | /// (references element of array in original variable). |
5277 | /// \param RHSVar Variable on the right side of the reduction operation |
5278 | /// (references element of array in original variable). |
5279 | /// \param RedOpGen Generator of reduction operation with use of LHSVar and |
5280 | /// RHSVar. |
5281 | static void EmitOMPAggregateReduction( |
5282 | CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, |
5283 | const VarDecl *RHSVar, |
5284 | const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *, |
5285 | const Expr *, const Expr *)> &RedOpGen, |
5286 | const Expr *XExpr = nullptr, const Expr *EExpr = nullptr, |
5287 | const Expr *UpExpr = nullptr) { |
5288 | // Perform element-by-element initialization. |
5289 | QualType ElementTy; |
5290 | Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar); |
5291 | Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar); |
5292 | |
5293 | // Drill down to the base element type on both arrays. |
5294 | const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe(); |
5295 | llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr); |
5296 | |
5297 | llvm::Value *RHSBegin = RHSAddr.getPointer(); |
5298 | llvm::Value *LHSBegin = LHSAddr.getPointer(); |
5299 | // Cast from pointer to array type to pointer to single element. |
5300 | llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements); |
5301 | // The basic structure here is a while-do loop. |
5302 | llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body"); |
5303 | llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done"); |
5304 | llvm::Value *IsEmpty = |
5305 | CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty"); |
5306 | CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB); |
5307 | |
5308 | // Enter the loop body, making that address the current address. |
5309 | llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock(); |
5310 | CGF.EmitBlock(BodyBB); |
5311 | |
5312 | CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy); |
5313 | |
5314 | llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI( |
5315 | RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast"); |
5316 | RHSElementPHI->addIncoming(RHSBegin, EntryBB); |
5317 | Address RHSElementCurrent = |
5318 | Address(RHSElementPHI, |
5319 | RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
5320 | |
5321 | llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI( |
5322 | LHSBegin->getType(), 2, "omp.arraycpy.destElementPast"); |
5323 | LHSElementPHI->addIncoming(LHSBegin, EntryBB); |
5324 | Address LHSElementCurrent = |
5325 | Address(LHSElementPHI, |
5326 | LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize)); |
5327 | |
5328 | // Emit copy. |
5329 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
5330 | Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; }); |
5331 | Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; }); |
5332 | Scope.Privatize(); |
5333 | RedOpGen(CGF, XExpr, EExpr, UpExpr); |
5334 | Scope.ForceCleanup(); |
5335 | |
5336 | // Shift the address forward by one element. |
5337 | llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32( |
5338 | LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element"); |
5339 | llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32( |
5340 | RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element"); |
5341 | // Check whether we've reached the end. |
5342 | llvm::Value *Done = |
5343 | CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done"); |
5344 | CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB); |
5345 | LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock()); |
5346 | RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock()); |
5347 | |
5348 | // Done. |
5349 | CGF.EmitBlock(DoneBB, /*IsFinished=*/true); |
5350 | } |
5351 | |
5352 | /// Emit reduction combiner. If the combiner is a simple expression emit it as |
5353 | /// is, otherwise consider it as combiner of UDR decl and emit it as a call of |
5354 | /// UDR combiner function. |
5355 | static void emitReductionCombiner(CodeGenFunction &CGF, |
5356 | const Expr *ReductionOp) { |
5357 | if (const auto *CE = dyn_cast<CallExpr>(ReductionOp)) |
5358 | if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee())) |
5359 | if (const auto *DRE = |
5360 | dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts())) |
5361 | if (const auto *DRD = |
5362 | dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) { |
5363 | std::pair<llvm::Function *, llvm::Function *> Reduction = |
5364 | CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD); |
5365 | RValue Func = RValue::get(Reduction.first); |
5366 | CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func); |
5367 | CGF.EmitIgnoredExpr(ReductionOp); |
5368 | return; |
5369 | } |
5370 | CGF.EmitIgnoredExpr(ReductionOp); |
5371 | } |
5372 | |
5373 | llvm::Function *CGOpenMPRuntime::emitReductionFunction( |
5374 | SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates, |
5375 | ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs, |
5376 | ArrayRef<const Expr *> ReductionOps) { |
5377 | ASTContext &C = CGM.getContext(); |
5378 | |
5379 | // void reduction_func(void *LHSArg, void *RHSArg); |
5380 | FunctionArgList Args; |
5381 | ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
5382 | ImplicitParamDecl::Other); |
5383 | ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy, |
5384 | ImplicitParamDecl::Other); |
5385 | Args.push_back(&LHSArg); |
5386 | Args.push_back(&RHSArg); |
5387 | const auto &CGFI = |
5388 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args); |
5389 | std::string Name = getName({"omp", "reduction", "reduction_func"}); |
5390 | auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI), |
5391 | llvm::GlobalValue::InternalLinkage, Name, |
5392 | &CGM.getModule()); |
5393 | CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI); |
5394 | Fn->setDoesNotRecurse(); |
5395 | CodeGenFunction CGF(CGM); |
5396 | CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc); |
5397 | |
5398 | // Dst = (void*[n])(LHSArg); |
5399 | // Src = (void*[n])(RHSArg); |
5400 | Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5401 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)), |
5402 | ArgsType), CGF.getPointerAlign()); |
5403 | Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5404 | CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)), |
5405 | ArgsType), CGF.getPointerAlign()); |
5406 | |
5407 | // ... |
5408 | // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]); |
5409 | // ... |
5410 | CodeGenFunction::OMPPrivateScope Scope(CGF); |
5411 | auto IPriv = Privates.begin(); |
5412 | unsigned Idx = 0; |
5413 | for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) { |
5414 | const auto *RHSVar = |
5415 | cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()); |
5416 | Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() { |
5417 | return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar); |
5418 | }); |
5419 | const auto *LHSVar = |
5420 | cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()); |
5421 | Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() { |
5422 | return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar); |
5423 | }); |
5424 | QualType PrivTy = (*IPriv)->getType(); |
5425 | if (PrivTy->isVariablyModifiedType()) { |
5426 | // Get array size and emit VLA type. |
5427 | ++Idx; |
5428 | Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx); |
5429 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem); |
5430 | const VariableArrayType *VLA = |
5431 | CGF.getContext().getAsVariableArrayType(PrivTy); |
5432 | const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr()); |
5433 | CodeGenFunction::OpaqueValueMapping OpaqueMap( |
5434 | CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy))); |
5435 | CGF.EmitVariablyModifiedType(PrivTy); |
5436 | } |
5437 | } |
5438 | Scope.Privatize(); |
5439 | IPriv = Privates.begin(); |
5440 | auto ILHS = LHSExprs.begin(); |
5441 | auto IRHS = RHSExprs.begin(); |
5442 | for (const Expr *E : ReductionOps) { |
5443 | if ((*IPriv)->getType()->isArrayType()) { |
5444 | // Emit reduction for array section. |
5445 | const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
5446 | const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
5447 | EmitOMPAggregateReduction( |
5448 | CGF, (*IPriv)->getType(), LHSVar, RHSVar, |
5449 | [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { |
5450 | emitReductionCombiner(CGF, E); |
5451 | }); |
5452 | } else { |
5453 | // Emit reduction for array subscript or single variable. |
5454 | emitReductionCombiner(CGF, E); |
5455 | } |
5456 | ++IPriv; |
5457 | ++ILHS; |
5458 | ++IRHS; |
5459 | } |
5460 | Scope.ForceCleanup(); |
5461 | CGF.FinishFunction(); |
5462 | return Fn; |
5463 | } |
5464 | |
5465 | void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF, |
5466 | const Expr *ReductionOp, |
5467 | const Expr *PrivateRef, |
5468 | const DeclRefExpr *LHS, |
5469 | const DeclRefExpr *RHS) { |
5470 | if (PrivateRef->getType()->isArrayType()) { |
5471 | // Emit reduction for array section. |
5472 | const auto *LHSVar = cast<VarDecl>(LHS->getDecl()); |
5473 | const auto *RHSVar = cast<VarDecl>(RHS->getDecl()); |
5474 | EmitOMPAggregateReduction( |
5475 | CGF, PrivateRef->getType(), LHSVar, RHSVar, |
5476 | [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) { |
5477 | emitReductionCombiner(CGF, ReductionOp); |
5478 | }); |
5479 | } else { |
5480 | // Emit reduction for array subscript or single variable. |
5481 | emitReductionCombiner(CGF, ReductionOp); |
5482 | } |
5483 | } |
5484 | |
5485 | void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, |
5486 | ArrayRef<const Expr *> Privates, |
5487 | ArrayRef<const Expr *> LHSExprs, |
5488 | ArrayRef<const Expr *> RHSExprs, |
5489 | ArrayRef<const Expr *> ReductionOps, |
5490 | ReductionOptionsTy Options) { |
5491 | if (!CGF.HaveInsertPoint()) |
5492 | return; |
5493 | |
5494 | bool WithNowait = Options.WithNowait; |
5495 | bool SimpleReduction = Options.SimpleReduction; |
5496 | |
5497 | // Next code should be emitted for reduction: |
5498 | // |
5499 | // static kmp_critical_name lock = { 0 }; |
5500 | // |
5501 | // void reduce_func(void *lhs[<n>], void *rhs[<n>]) { |
5502 | // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]); |
5503 | // ... |
5504 | // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1], |
5505 | // *(Type<n>-1*)rhs[<n>-1]); |
5506 | // } |
5507 | // |
5508 | // ... |
5509 | // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]}; |
5510 | // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), |
5511 | // RedList, reduce_func, &<lock>)) { |
5512 | // case 1: |
5513 | // ... |
5514 | // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); |
5515 | // ... |
5516 | // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); |
5517 | // break; |
5518 | // case 2: |
5519 | // ... |
5520 | // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); |
5521 | // ... |
5522 | // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);] |
5523 | // break; |
5524 | // default:; |
5525 | // } |
5526 | // |
5527 | // if SimpleReduction is true, only the next code is generated: |
5528 | // ... |
5529 | // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); |
5530 | // ... |
5531 | |
5532 | ASTContext &C = CGM.getContext(); |
5533 | |
5534 | if (SimpleReduction) { |
5535 | CodeGenFunction::RunCleanupsScope Scope(CGF); |
5536 | auto IPriv = Privates.begin(); |
5537 | auto ILHS = LHSExprs.begin(); |
5538 | auto IRHS = RHSExprs.begin(); |
5539 | for (const Expr *E : ReductionOps) { |
5540 | emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), |
5541 | cast<DeclRefExpr>(*IRHS)); |
5542 | ++IPriv; |
5543 | ++ILHS; |
5544 | ++IRHS; |
5545 | } |
5546 | return; |
5547 | } |
5548 | |
5549 | // 1. Build a list of reduction variables. |
5550 | // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]}; |
5551 | auto Size = RHSExprs.size(); |
5552 | for (const Expr *E : Privates) { |
5553 | if (E->getType()->isVariablyModifiedType()) |
5554 | // Reserve place for array size. |
5555 | ++Size; |
5556 | } |
5557 | llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size); |
5558 | QualType ReductionArrayTy = |
5559 | C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal, |
5560 | /*IndexTypeQuals=*/0); |
5561 | Address ReductionList = |
5562 | CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list"); |
5563 | auto IPriv = Privates.begin(); |
5564 | unsigned Idx = 0; |
5565 | for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) { |
5566 | Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
5567 | CGF.Builder.CreateStore( |
5568 | CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5569 | CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy), |
5570 | Elem); |
5571 | if ((*IPriv)->getType()->isVariablyModifiedType()) { |
5572 | // Store array size. |
5573 | ++Idx; |
5574 | Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx); |
5575 | llvm::Value *Size = CGF.Builder.CreateIntCast( |
5576 | CGF.getVLASize( |
5577 | CGF.getContext().getAsVariableArrayType((*IPriv)->getType())) |
5578 | .NumElts, |
5579 | CGF.SizeTy, /*isSigned=*/false); |
5580 | CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy), |
5581 | Elem); |
5582 | } |
5583 | } |
5584 | |
5585 | // 2. Emit reduce_func(). |
5586 | llvm::Function *ReductionFn = emitReductionFunction( |
5587 | Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates, |
5588 | LHSExprs, RHSExprs, ReductionOps); |
5589 | |
5590 | // 3. Create static kmp_critical_name lock = { 0 }; |
5591 | std::string Name = getName({"reduction"}); |
5592 | llvm::Value *Lock = getCriticalRegionLock(Name); |
5593 | |
5594 | // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList), |
5595 | // RedList, reduce_func, &<lock>); |
5596 | llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE); |
5597 | llvm::Value *ThreadId = getThreadID(CGF, Loc); |
5598 | llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy); |
5599 | llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
5600 | ReductionList.getPointer(), CGF.VoidPtrTy); |
5601 | llvm::Value *Args[] = { |
5602 | IdentTLoc, // ident_t *<loc> |
5603 | ThreadId, // i32 <gtid> |
5604 | CGF.Builder.getInt32(RHSExprs.size()), // i32 <n> |
5605 | ReductionArrayTySize, // size_type sizeof(RedList) |
5606 | RL, // void *RedList |
5607 | ReductionFn, // void (*) (void *, void *) <reduce_func> |
5608 | Lock // kmp_critical_name *&<lock> |
5609 | }; |
5610 | llvm::Value *Res = CGF.EmitRuntimeCall( |
5611 | OMPBuilder.getOrCreateRuntimeFunction( |
5612 | CGM.getModule(), |
5613 | WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce), |
5614 | Args); |
5615 | |
5616 | // 5. Build switch(res) |
5617 | llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default"); |
5618 | llvm::SwitchInst *SwInst = |
5619 | CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2); |
5620 | |
5621 | // 6. Build case 1: |
5622 | // ... |
5623 | // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]); |
5624 | // ... |
5625 | // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); |
5626 | // break; |
5627 | llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1"); |
5628 | SwInst->addCase(CGF.Builder.getInt32(1), Case1BB); |
5629 | CGF.EmitBlock(Case1BB); |
5630 | |
5631 | // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>); |
5632 | llvm::Value *EndArgs[] = { |
5633 | IdentTLoc, // ident_t *<loc> |
5634 | ThreadId, // i32 <gtid> |
5635 | Lock // kmp_critical_name *&<lock> |
5636 | }; |
5637 | auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps]( |
5638 | CodeGenFunction &CGF, PrePostActionTy &Action) { |
5639 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
5640 | auto IPriv = Privates.begin(); |
5641 | auto ILHS = LHSExprs.begin(); |
5642 | auto IRHS = RHSExprs.begin(); |
5643 | for (const Expr *E : ReductionOps) { |
5644 | RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS), |
5645 | cast<DeclRefExpr>(*IRHS)); |
5646 | ++IPriv; |
5647 | ++ILHS; |
5648 | ++IRHS; |
5649 | } |
5650 | }; |
5651 | RegionCodeGenTy RCG(CodeGen); |
5652 | CommonActionTy Action( |
5653 | nullptr, llvm::None, |
5654 | OMPBuilder.getOrCreateRuntimeFunction( |
5655 | CGM.getModule(), WithNowait ? OMPRTL___kmpc_end_reduce_nowait |
5656 | : OMPRTL___kmpc_end_reduce), |
5657 | EndArgs); |
5658 | RCG.setAction(Action); |
5659 | RCG(CGF); |
5660 | |
5661 | CGF.EmitBranch(DefaultBB); |
5662 | |
5663 | // 7. Build case 2: |
5664 | // ... |
5665 | // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i])); |
5666 | // ... |
5667 | // break; |
5668 | llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2"); |
5669 | SwInst->addCase(CGF.Builder.getInt32(2), Case2BB); |
5670 | CGF.EmitBlock(Case2BB); |
5671 | |
5672 | auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps]( |
5673 | CodeGenFunction &CGF, PrePostActionTy &Action) { |
5674 | auto ILHS = LHSExprs.begin(); |
5675 | auto IRHS = RHSExprs.begin(); |
5676 | auto IPriv = Privates.begin(); |
5677 | for (const Expr *E : ReductionOps) { |
5678 | const Expr *XExpr = nullptr; |
5679 | const Expr *EExpr = nullptr; |
5680 | const Expr *UpExpr = nullptr; |
5681 | BinaryOperatorKind BO = BO_Comma; |
5682 | if (const auto *BO = dyn_cast<BinaryOperator>(E)) { |
5683 | if (BO->getOpcode() == BO_Assign) { |
5684 | XExpr = BO->getLHS(); |
5685 | UpExpr = BO->getRHS(); |
5686 | } |
5687 | } |
5688 | // Try to emit update expression as a simple atomic. |
5689 | const Expr *RHSExpr = UpExpr; |
5690 | if (RHSExpr) { |
5691 | // Analyze RHS part of the whole expression. |
5692 | if (const auto *ACO = dyn_cast<AbstractConditionalOperator>( |
5693 | RHSExpr->IgnoreParenImpCasts())) { |
5694 | // If this is a conditional operator, analyze its condition for |
5695 | // min/max reduction operator. |
5696 | RHSExpr = ACO->getCond(); |
5697 | } |
5698 | if (const auto *BORHS = |
5699 | dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) { |
5700 | EExpr = BORHS->getRHS(); |
5701 | BO = BORHS->getOpcode(); |
5702 | } |
5703 | } |
5704 | if (XExpr) { |
5705 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
5706 | auto &&AtomicRedGen = [BO, VD, |
5707 | Loc](CodeGenFunction &CGF, const Expr *XExpr, |
5708 | const Expr *EExpr, const Expr *UpExpr) { |
5709 | LValue X = CGF.EmitLValue(XExpr); |
5710 | RValue E; |
5711 | if (EExpr) |
5712 | E = CGF.EmitAnyExpr(EExpr); |
5713 | CGF.EmitOMPAtomicSimpleUpdateExpr( |
5714 | X, E, BO, /*IsXLHSInRHSPart=*/true, |
5715 | llvm::AtomicOrdering::Monotonic, Loc, |
5716 | [&CGF, UpExpr, VD, Loc](RValue XRValue) { |
5717 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
5718 | PrivateScope.addPrivate( |
5719 | VD, [&CGF, VD, XRValue, Loc]() { |
5720 | Address LHSTemp = CGF.CreateMemTemp(VD->getType()); |
5721 | CGF.emitOMPSimpleStore( |
5722 | CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, |
5723 | VD->getType().getNonReferenceType(), Loc); |
5724 | return LHSTemp; |
5725 | }); |
5726 | (void)PrivateScope.Privatize(); |
5727 | return CGF.EmitAnyExpr(UpExpr); |
5728 | }); |
5729 | }; |
5730 | if ((*IPriv)->getType()->isArrayType()) { |
5731 | // Emit atomic reduction for array section. |
5732 | const auto *RHSVar = |
5733 | cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
5734 | EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar, |
5735 | AtomicRedGen, XExpr, EExpr, UpExpr); |
5736 | } else { |
5737 | // Emit atomic reduction for array subscript or single variable. |
5738 | AtomicRedGen(CGF, XExpr, EExpr, UpExpr); |
5739 | } |
5740 | } else { |
5741 | // Emit as a critical region. |
5742 | auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *, |
5743 | const Expr *, const Expr *) { |
5744 | CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime(); |
5745 | std::string Name = RT.getName({"atomic_reduction"}); |
5746 | RT.emitCriticalRegion( |
5747 | CGF, Name, |
5748 | [=](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5749 | Action.Enter(CGF); |
5750 | emitReductionCombiner(CGF, E); |
5751 | }, |
5752 | Loc); |
5753 | }; |
5754 | if ((*IPriv)->getType()->isArrayType()) { |
5755 | const auto *LHSVar = |
5756 | cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl()); |
5757 | const auto *RHSVar = |
5758 | cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl()); |
5759 | EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar, |
5760 | CritRedGen); |
5761 | } else { |
5762 | CritRedGen(CGF, nullptr, nullptr, nullptr); |
5763 | } |
5764 | } |
5765 | ++ILHS; |
5766 | ++IRHS; |
5767 | ++IPriv; |
5768 | } |
5769 | }; |
5770 | RegionCodeGenTy AtomicRCG(AtomicCodeGen); |
5771 | if (!WithNowait) { |
5772 | // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>); |
5773 | llvm::Value *EndArgs[] = { |
5774 | IdentTLoc, // ident_t *<loc> |
5775 | ThreadId, // i32 <gtid> |
5776 | Lock // kmp_critical_name *&<lock> |
5777 | }; |
5778 | CommonActionTy Action(nullptr, llvm::None, |
5779 | OMPBuilder.getOrCreateRuntimeFunction( |
5780 | CGM.getModule(), OMPRTL___kmpc_end_reduce), |
5781 | EndArgs); |
5782 | AtomicRCG.setAction(Action); |
5783 | AtomicRCG(CGF); |
5784 | } else { |
5785 | AtomicRCG(CGF); |
5786 | } |
5787 | |
5788 | CGF.EmitBranch(DefaultBB); |
5789 | CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); |
5790 | } |
5791 | |
5792 | /// Generates unique name for artificial threadprivate variables. |
5793 | /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>" |
5794 | static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, |
5795 | const Expr *Ref) { |
5796 | SmallString<256> Buffer; |
5797 | llvm::raw_svector_ostream Out(Buffer); |
5798 | const clang::DeclRefExpr *DE; |
5799 | const VarDecl *D = ::getBaseDecl(Ref, DE); |
5800 | if (!D) |
5801 | D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl()); |
5802 | D = D->getCanonicalDecl(); |
5803 | std::string Name = CGM.getOpenMPRuntime().getName( |
5804 | {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)}); |
5805 | Out << Prefix << Name << "_" |
5806 | << D->getCanonicalDecl()->getBeginLoc().getRawEncoding(); |
5807 | return std::string(Out.str()); |
5808 | } |
5809 | |
5810 | /// Emits reduction initializer function: |
5811 | /// \code |
5812 | /// void @.red_init(void* %arg, void* %orig) { |
5813 | /// %0 = bitcast void* %arg to <type>* |
5814 | /// store <type> <init>, <type>* %0 |
5815 | /// ret void |
5816 | /// } |
5817 | /// \endcode |
5818 | static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM, |
5819 | SourceLocation Loc, |
5820 | ReductionCodeGen &RCG, unsigned N) { |
5821 | ASTContext &C = CGM.getContext(); |
5822 |