clang  5.0.0
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This provides a class for OpenMP runtime code generation.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGCleanup.h"
16 #include "CGOpenMPRuntime.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/Bitcode/BitcodeReader.h"
23 #include "llvm/IR/CallSite.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// \brief Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38  /// \brief Kinds of OpenMP regions used in codegen.
39  enum CGOpenMPRegionKind {
40  /// \brief Region with outlined function for standalone 'parallel'
41  /// directive.
42  ParallelOutlinedRegion,
43  /// \brief Region with outlined function for standalone 'task' directive.
44  TaskOutlinedRegion,
45  /// \brief Region for constructs that do not require function outlining,
46  /// like 'for', 'sections', 'atomic' etc. directives.
47  InlinedRegion,
48  /// \brief Region with outlined function for standalone 'target' directive.
49  TargetRegion,
50  };
51 
52  CGOpenMPRegionInfo(const CapturedStmt &CS,
53  const CGOpenMPRegionKind RegionKind,
55  bool HasCancel)
56  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61  bool HasCancel)
62  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63  Kind(Kind), HasCancel(HasCancel) {}
64 
65  /// \brief Get a variable or parameter for storing global thread id
66  /// inside OpenMP construct.
67  virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69  /// \brief Emit the captured statement body.
70  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72  /// \brief Get an LValue for the current ThreadID variable.
73  /// \return LValue for thread id variable. This LValue always has type int32*.
74  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82  bool hasCancel() const { return HasCancel; }
83 
84  static bool classof(const CGCapturedStmtInfo *Info) {
85  return Info->getKind() == CR_OpenMP;
86  }
87 
88  ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91  CGOpenMPRegionKind RegionKind;
92  RegionCodeGenTy CodeGen;
94  bool HasCancel;
95 };
96 
97 /// \brief API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101  const RegionCodeGenTy &CodeGen,
102  OpenMPDirectiveKind Kind, bool HasCancel,
103  StringRef HelperName)
104  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105  HasCancel),
106  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108  }
109 
110  /// \brief Get a variable or parameter for storing global thread id
111  /// inside OpenMP construct.
112  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114  /// \brief Get the name of the capture helper.
115  StringRef getHelperName() const override { return HelperName; }
116 
117  static bool classof(const CGCapturedStmtInfo *Info) {
118  return CGOpenMPRegionInfo::classof(Info) &&
119  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120  ParallelOutlinedRegion;
121  }
122 
123 private:
124  /// \brief A variable or parameter storing global thread id for OpenMP
125  /// constructs.
126  const VarDecl *ThreadIDVar;
127  StringRef HelperName;
128 };
129 
130 /// \brief API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133  class UntiedTaskActionTy final : public PrePostActionTy {
134  bool Untied;
135  const VarDecl *PartIDVar;
136  const RegionCodeGenTy UntiedCodeGen;
137  llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139  public:
140  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141  const RegionCodeGenTy &UntiedCodeGen)
142  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143  void Enter(CodeGenFunction &CGF) override {
144  if (Untied) {
145  // Emit task switching point.
146  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
147  CGF.GetAddrOfLocalVar(PartIDVar),
148  PartIDVar->getType()->castAs<PointerType>());
149  auto *Res = CGF.EmitLoadOfScalar(PartIdLVal, SourceLocation());
150  auto *DoneBB = CGF.createBasicBlock(".untied.done.");
151  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
152  CGF.EmitBlock(DoneBB);
154  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
155  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
156  CGF.Builder.GetInsertBlock());
157  emitUntiedSwitch(CGF);
158  }
159  }
160  void emitUntiedSwitch(CodeGenFunction &CGF) const {
161  if (Untied) {
162  auto PartIdLVal = CGF.EmitLoadOfPointerLValue(
163  CGF.GetAddrOfLocalVar(PartIDVar),
164  PartIDVar->getType()->castAs<PointerType>());
165  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
166  PartIdLVal);
167  UntiedCodeGen(CGF);
168  CodeGenFunction::JumpDest CurPoint =
169  CGF.getJumpDestInCurrentScope(".untied.next.");
171  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
172  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
173  CGF.Builder.GetInsertBlock());
174  CGF.EmitBranchThroughCleanup(CurPoint);
175  CGF.EmitBlock(CurPoint.getBlock());
176  }
177  }
178  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
179  };
180  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
181  const VarDecl *ThreadIDVar,
182  const RegionCodeGenTy &CodeGen,
183  OpenMPDirectiveKind Kind, bool HasCancel,
184  const UntiedTaskActionTy &Action)
185  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
186  ThreadIDVar(ThreadIDVar), Action(Action) {
187  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
188  }
189 
190  /// \brief Get a variable or parameter for storing global thread id
191  /// inside OpenMP construct.
192  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
193 
194  /// \brief Get an LValue for the current ThreadID variable.
195  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
196 
197  /// \brief Get the name of the capture helper.
198  StringRef getHelperName() const override { return ".omp_outlined."; }
199 
200  void emitUntiedSwitch(CodeGenFunction &CGF) override {
201  Action.emitUntiedSwitch(CGF);
202  }
203 
204  static bool classof(const CGCapturedStmtInfo *Info) {
205  return CGOpenMPRegionInfo::classof(Info) &&
206  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
207  TaskOutlinedRegion;
208  }
209 
210 private:
211  /// \brief A variable or parameter storing global thread id for OpenMP
212  /// constructs.
213  const VarDecl *ThreadIDVar;
214  /// Action for emitting code for untied tasks.
215  const UntiedTaskActionTy &Action;
216 };
217 
218 /// \brief API for inlined captured statement code generation in OpenMP
219 /// constructs.
220 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
221 public:
222  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
223  const RegionCodeGenTy &CodeGen,
224  OpenMPDirectiveKind Kind, bool HasCancel)
225  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
226  OldCSI(OldCSI),
227  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
228 
229  // \brief Retrieve the value of the context parameter.
230  llvm::Value *getContextValue() const override {
231  if (OuterRegionInfo)
232  return OuterRegionInfo->getContextValue();
233  llvm_unreachable("No context value for inlined OpenMP region");
234  }
235 
236  void setContextValue(llvm::Value *V) override {
237  if (OuterRegionInfo) {
238  OuterRegionInfo->setContextValue(V);
239  return;
240  }
241  llvm_unreachable("No context value for inlined OpenMP region");
242  }
243 
244  /// \brief Lookup the captured field decl for a variable.
245  const FieldDecl *lookup(const VarDecl *VD) const override {
246  if (OuterRegionInfo)
247  return OuterRegionInfo->lookup(VD);
248  // If there is no outer outlined region,no need to lookup in a list of
249  // captured variables, we can use the original one.
250  return nullptr;
251  }
252 
253  FieldDecl *getThisFieldDecl() const override {
254  if (OuterRegionInfo)
255  return OuterRegionInfo->getThisFieldDecl();
256  return nullptr;
257  }
258 
259  /// \brief Get a variable or parameter for storing global thread id
260  /// inside OpenMP construct.
261  const VarDecl *getThreadIDVariable() const override {
262  if (OuterRegionInfo)
263  return OuterRegionInfo->getThreadIDVariable();
264  return nullptr;
265  }
266 
267  /// \brief Get the name of the capture helper.
268  StringRef getHelperName() const override {
269  if (auto *OuterRegionInfo = getOldCSI())
270  return OuterRegionInfo->getHelperName();
271  llvm_unreachable("No helper name for inlined OpenMP construct");
272  }
273 
274  void emitUntiedSwitch(CodeGenFunction &CGF) override {
275  if (OuterRegionInfo)
276  OuterRegionInfo->emitUntiedSwitch(CGF);
277  }
278 
279  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
280 
281  static bool classof(const CGCapturedStmtInfo *Info) {
282  return CGOpenMPRegionInfo::classof(Info) &&
283  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
284  }
285 
286  ~CGOpenMPInlinedRegionInfo() override = default;
287 
288 private:
289  /// \brief CodeGen info about outer OpenMP region.
291  CGOpenMPRegionInfo *OuterRegionInfo;
292 };
293 
294 /// \brief API for captured statement code generation in OpenMP target
295 /// constructs. For this captures, implicit parameters are used instead of the
296 /// captured fields. The name of the target region has to be unique in a given
297 /// application so it is provided by the client, because only the client has
298 /// the information to generate that.
299 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
300 public:
301  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
302  const RegionCodeGenTy &CodeGen, StringRef HelperName)
303  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
304  /*HasCancel=*/false),
305  HelperName(HelperName) {}
306 
307  /// \brief This is unused for target regions because each starts executing
308  /// with a single thread.
309  const VarDecl *getThreadIDVariable() const override { return nullptr; }
310 
311  /// \brief Get the name of the capture helper.
312  StringRef getHelperName() const override { return HelperName; }
313 
314  static bool classof(const CGCapturedStmtInfo *Info) {
315  return CGOpenMPRegionInfo::classof(Info) &&
316  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
317  }
318 
319 private:
320  StringRef HelperName;
321 };
322 
323 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
324  llvm_unreachable("No codegen for expressions");
325 }
326 /// \brief API for generation of expressions captured in a innermost OpenMP
327 /// region.
328 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
329 public:
330  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
331  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
332  OMPD_unknown,
333  /*HasCancel=*/false),
334  PrivScope(CGF) {
335  // Make sure the globals captured in the provided statement are local by
336  // using the privatization logic. We assume the same variable is not
337  // captured more than once.
338  for (auto &C : CS.captures()) {
339  if (!C.capturesVariable() && !C.capturesVariableByCopy())
340  continue;
341 
342  const VarDecl *VD = C.getCapturedVar();
343  if (VD->isLocalVarDeclOrParm())
344  continue;
345 
346  DeclRefExpr DRE(const_cast<VarDecl *>(VD),
347  /*RefersToEnclosingVariableOrCapture=*/false,
349  SourceLocation());
350  PrivScope.addPrivate(VD, [&CGF, &DRE]() -> Address {
351  return CGF.EmitLValue(&DRE).getAddress();
352  });
353  }
354  (void)PrivScope.Privatize();
355  }
356 
357  /// \brief Lookup the captured field decl for a variable.
358  const FieldDecl *lookup(const VarDecl *VD) const override {
359  if (auto *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
360  return FD;
361  return nullptr;
362  }
363 
364  /// \brief Emit the captured statement body.
365  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
366  llvm_unreachable("No body for expressions");
367  }
368 
369  /// \brief Get a variable or parameter for storing global thread id
370  /// inside OpenMP construct.
371  const VarDecl *getThreadIDVariable() const override {
372  llvm_unreachable("No thread id for expressions");
373  }
374 
375  /// \brief Get the name of the capture helper.
376  StringRef getHelperName() const override {
377  llvm_unreachable("No helper name for expressions");
378  }
379 
380  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
381 
382 private:
383  /// Private scope to capture global variables.
385 };
386 
387 /// \brief RAII for emitting code of OpenMP constructs.
388 class InlinedOpenMPRegionRAII {
389  CodeGenFunction &CGF;
390  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
391  FieldDecl *LambdaThisCaptureField = nullptr;
392 
393 public:
394  /// \brief Constructs region for combined constructs.
395  /// \param CodeGen Code generation sequence for combined directives. Includes
396  /// a list of functions used for code generation of implicitly inlined
397  /// regions.
398  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
399  OpenMPDirectiveKind Kind, bool HasCancel)
400  : CGF(CGF) {
401  // Start emission for the construct.
402  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
403  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
404  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
405  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
406  CGF.LambdaThisCaptureField = nullptr;
407  }
408 
409  ~InlinedOpenMPRegionRAII() {
410  // Restore original CapturedStmtInfo only if we're done with code emission.
411  auto *OldCSI =
412  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
413  delete CGF.CapturedStmtInfo;
414  CGF.CapturedStmtInfo = OldCSI;
415  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
416  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
417  }
418 };
419 
420 /// \brief Values for bit flags used in the ident_t to describe the fields.
421 /// All enumeric elements are named and described in accordance with the code
422 /// from http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
424  /// \brief Use trampoline for internal microtask.
425  OMP_IDENT_IMD = 0x01,
426  /// \brief Use c-style ident structure.
427  OMP_IDENT_KMPC = 0x02,
428  /// \brief Atomic reduction option for kmpc_reduce.
429  OMP_ATOMIC_REDUCE = 0x10,
430  /// \brief Explicit 'barrier' directive.
431  OMP_IDENT_BARRIER_EXPL = 0x20,
432  /// \brief Implicit barrier in code.
433  OMP_IDENT_BARRIER_IMPL = 0x40,
434  /// \brief Implicit barrier in 'for' directive.
435  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
436  /// \brief Implicit barrier in 'sections' directive.
437  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
438  /// \brief Implicit barrier in 'single' directive.
439  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140
440 };
441 
442 /// \brief Describes ident structure that describes a source location.
443 /// All descriptions are taken from
444 /// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h
445 /// Original structure:
446 /// typedef struct ident {
447 /// kmp_int32 reserved_1; /**< might be used in Fortran;
448 /// see above */
449 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
450 /// KMP_IDENT_KMPC identifies this union
451 /// member */
452 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
453 /// see above */
454 ///#if USE_ITT_BUILD
455 /// /* but currently used for storing
456 /// region-specific ITT */
457 /// /* contextual information. */
458 ///#endif /* USE_ITT_BUILD */
459 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
460 /// C++ */
461 /// char const *psource; /**< String describing the source location.
462 /// The string is composed of semi-colon separated
463 // fields which describe the source file,
464 /// the function and a pair of line numbers that
465 /// delimit the construct.
466 /// */
467 /// } ident_t;
469  /// \brief might be used in Fortran
471  /// \brief OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
473  /// \brief Not really used in Fortran any more
475  /// \brief Source[4] in Fortran, do not use for C++
477  /// \brief String describing the source location. The string is composed of
478  /// semi-colon separated fields which describe the source file, the function
479  /// and a pair of line numbers that delimit the construct.
481 };
482 
483 /// \brief Schedule types for 'omp for' loops (these enumerators are taken from
484 /// the enum sched_type in kmp.h).
486  /// \brief Lower bound for default (unordered) versions.
494  /// static with chunk adjustment (e.g., simd)
496  /// \brief Lower bound for 'ordered' versions.
505  /// \brief dist_schedule types
508  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
509  /// Set if the monotonic schedule modifier was present.
511  /// Set if the nonmonotonic schedule modifier was present.
513 };
514 
516  /// \brief Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
517  /// kmpc_micro microtask, ...);
519  /// \brief Call to void *__kmpc_threadprivate_cached(ident_t *loc,
520  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
522  /// \brief Call to void __kmpc_threadprivate_register( ident_t *,
523  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
525  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
527  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
528  // kmp_critical_name *crit);
530  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
531  // global_tid, kmp_critical_name *crit, uintptr_t hint);
533  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
534  // kmp_critical_name *crit);
536  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
537  // global_tid);
539  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
541  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
543  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
544  // global_tid);
546  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
547  // global_tid);
549  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
550  // kmp_int32 num_threads);
552  // Call to void __kmpc_flush(ident_t *loc);
554  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
556  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
558  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
559  // int end_part);
561  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
563  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
565  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
566  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
567  // kmp_routine_entry_t *task_entry);
569  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
570  // new_task);
572  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
573  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
574  // kmp_int32 didit);
576  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
577  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
578  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
580  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
581  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
582  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
583  // *lck);
585  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
586  // kmp_critical_name *lck);
588  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
589  // kmp_critical_name *lck);
591  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
592  // kmp_task_t * new_task);
594  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
595  // kmp_task_t * new_task);
597  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
599  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
601  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
602  // global_tid);
604  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
606  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
608  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
609  // int proc_bind);
611  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
612  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
613  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
615  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
616  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
617  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
619  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
620  // global_tid, kmp_int32 cncl_kind);
622  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
623  // kmp_int32 cncl_kind);
625  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
626  // kmp_int32 num_teams, kmp_int32 thread_limit);
628  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
629  // microtask, ...);
631  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
632  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
633  // sched, kmp_uint64 grainsize, void *task_dup);
635  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
636  // num_dims, struct kmp_dim *dims);
638  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
640  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
641  // *vec);
643  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
644  // *vec);
646  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
647  // *data);
649  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
650  // *d);
652 
653  //
654  // Offloading related calls
655  //
656  // Call to int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
657  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
658  // *arg_types);
660  // Call to int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
661  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
662  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
664  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
666  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
668  // Call to void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
669  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
671  // Call to void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
672  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
674  // Call to void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
675  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
677 };
678 
679 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
680 /// region.
681 class CleanupTy final : public EHScopeStack::Cleanup {
683 
684 public:
685  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
686  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
687  if (!CGF.HaveInsertPoint())
688  return;
689  Action->Exit(CGF);
690  }
691 };
692 
693 } // anonymous namespace
694 
697  if (PrePostAction) {
698  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
699  Callback(CodeGen, CGF, *PrePostAction);
700  } else {
702  Callback(CodeGen, CGF, Action);
703  }
704 }
705 
706 /// Check if the combiner is a call to UDR combiner and if it is so return the
707 /// UDR decl used for reduction.
708 static const OMPDeclareReductionDecl *
709 getReductionInit(const Expr *ReductionOp) {
710  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
711  if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
712  if (auto *DRE =
713  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
714  if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
715  return DRD;
716  return nullptr;
717 }
718 
720  const OMPDeclareReductionDecl *DRD,
721  const Expr *InitOp,
722  Address Private, Address Original,
723  QualType Ty) {
724  if (DRD->getInitializer()) {
725  std::pair<llvm::Function *, llvm::Function *> Reduction =
726  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
727  auto *CE = cast<CallExpr>(InitOp);
728  auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
729  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
730  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
731  auto *LHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
732  auto *RHSDRE = cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
733  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
734  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
735  [=]() -> Address { return Private; });
736  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
737  [=]() -> Address { return Original; });
738  (void)PrivateScope.Privatize();
739  RValue Func = RValue::get(Reduction.second);
741  CGF.EmitIgnoredExpr(InitOp);
742  } else {
743  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
744  auto *GV = new llvm::GlobalVariable(
745  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
746  llvm::GlobalValue::PrivateLinkage, Init, ".init");
747  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
748  RValue InitRVal;
749  switch (CGF.getEvaluationKind(Ty)) {
750  case TEK_Scalar:
751  InitRVal = CGF.EmitLoadOfLValue(LV, SourceLocation());
752  break;
753  case TEK_Complex:
754  InitRVal =
756  break;
757  case TEK_Aggregate:
758  InitRVal = RValue::getAggregate(LV.getAddress());
759  break;
760  }
762  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
763  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
764  /*IsInitializer=*/false);
765  }
766 }
767 
768 /// \brief Emit initialization of arrays of complex types.
769 /// \param DestAddr Address of the array.
770 /// \param Type Type of array.
771 /// \param Init Initial expression of array.
772 /// \param SrcAddr Address of the original array.
773 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
774  QualType Type, const Expr *Init,
775  const OMPDeclareReductionDecl *DRD,
776  Address SrcAddr = Address::invalid()) {
777  // Perform element-by-element initialization.
778  QualType ElementTy;
779 
780  // Drill down to the base element type on both arrays.
781  auto ArrayTy = Type->getAsArrayTypeUnsafe();
782  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
783  DestAddr =
784  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
785  if (DRD)
786  SrcAddr =
787  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
788 
789  llvm::Value *SrcBegin = nullptr;
790  if (DRD)
791  SrcBegin = SrcAddr.getPointer();
792  auto DestBegin = DestAddr.getPointer();
793  // Cast from pointer to array type to pointer to single element.
794  auto DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
795  // The basic structure here is a while-do loop.
796  auto BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
797  auto DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
798  auto IsEmpty =
799  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
800  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
801 
802  // Enter the loop body, making that address the current address.
803  auto EntryBB = CGF.Builder.GetInsertBlock();
804  CGF.EmitBlock(BodyBB);
805 
806  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
807 
808  llvm::PHINode *SrcElementPHI = nullptr;
809  Address SrcElementCurrent = Address::invalid();
810  if (DRD) {
811  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
812  "omp.arraycpy.srcElementPast");
813  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
814  SrcElementCurrent =
815  Address(SrcElementPHI,
816  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
817  }
818  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
819  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
820  DestElementPHI->addIncoming(DestBegin, EntryBB);
821  Address DestElementCurrent =
822  Address(DestElementPHI,
823  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
824 
825  // Emit copy.
826  {
827  CodeGenFunction::RunCleanupsScope InitScope(CGF);
828  if (DRD && (DRD->getInitializer() || !Init)) {
829  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
830  SrcElementCurrent, ElementTy);
831  } else
832  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
833  /*IsInitializer=*/false);
834  }
835 
836  if (DRD) {
837  // Shift the address forward by one element.
838  auto SrcElementNext = CGF.Builder.CreateConstGEP1_32(
839  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
840  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
841  }
842 
843  // Shift the address forward by one element.
844  auto DestElementNext = CGF.Builder.CreateConstGEP1_32(
845  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
846  // Check whether we've reached the end.
847  auto Done =
848  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
849  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
850  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
851 
852  // Done.
853  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
854 }
855 
856 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
857  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
858  return CGF.EmitOMPArraySectionExpr(OASE);
859  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(E))
860  return CGF.EmitLValue(ASE);
861  auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
862  DeclRefExpr DRE(const_cast<VarDecl *>(OrigVD),
863  CGF.CapturedStmtInfo &&
864  CGF.CapturedStmtInfo->lookup(OrigVD) != nullptr,
865  E->getType(), VK_LValue, E->getExprLoc());
866  // Store the address of the original variable associated with the LHS
867  // implicit variable.
868  return CGF.EmitLValue(&DRE);
869 }
870 
871 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
872  const Expr *E) {
873  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
874  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
875  return LValue();
876 }
877 
878 void ReductionCodeGen::emitAggregateInitialization(
879  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
880  const OMPDeclareReductionDecl *DRD) {
881  // Emit VarDecl with copy init for arrays.
882  // Get the address of the original variable captured in current
883  // captured region.
884  auto *PrivateVD =
885  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
886  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
887  DRD ? ClausesData[N].ReductionOp : PrivateVD->getInit(),
888  DRD, SharedLVal.getAddress());
889 }
890 
893  ArrayRef<const Expr *> ReductionOps) {
894  ClausesData.reserve(Shareds.size());
895  SharedAddresses.reserve(Shareds.size());
896  Sizes.reserve(Shareds.size());
897  BaseDecls.reserve(Shareds.size());
898  auto IPriv = Privates.begin();
899  auto IRed = ReductionOps.begin();
900  for (const auto *Ref : Shareds) {
901  ClausesData.emplace_back(Ref, *IPriv, *IRed);
902  std::advance(IPriv, 1);
903  std::advance(IRed, 1);
904  }
905 }
906 
907 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
908  assert(SharedAddresses.size() == N &&
909  "Number of generated lvalues must be exactly N.");
910  SharedAddresses.emplace_back(emitSharedLValue(CGF, ClausesData[N].Ref),
911  emitSharedLValueUB(CGF, ClausesData[N].Ref));
912 }
913 
915  auto *PrivateVD =
916  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
917  QualType PrivateType = PrivateVD->getType();
918  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
919  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
920  Sizes.emplace_back(
921  CGF.getTypeSize(
922  SharedAddresses[N].first.getType().getNonReferenceType()),
923  nullptr);
924  return;
925  }
926  llvm::Value *Size;
927  llvm::Value *SizeInChars;
928  llvm::Type *ElemType =
929  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
930  ->getElementType();
931  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
932  if (AsArraySection) {
933  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
934  SharedAddresses[N].first.getPointer());
935  Size = CGF.Builder.CreateNUWAdd(
936  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
937  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
938  } else {
939  SizeInChars = CGF.getTypeSize(
940  SharedAddresses[N].first.getType().getNonReferenceType());
941  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
942  }
943  Sizes.emplace_back(SizeInChars, Size);
945  CGF,
946  cast<OpaqueValueExpr>(
947  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
948  RValue::get(Size));
949  CGF.EmitVariablyModifiedType(PrivateType);
950 }
951 
953  llvm::Value *Size) {
954  auto *PrivateVD =
955  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
956  QualType PrivateType = PrivateVD->getType();
957  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
958  if (!AsArraySection && !PrivateType->isVariablyModifiedType()) {
959  assert(!Size && !Sizes[N].second &&
960  "Size should be nullptr for non-variably modified redution "
961  "items.");
962  return;
963  }
965  CGF,
966  cast<OpaqueValueExpr>(
967  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
968  RValue::get(Size));
969  CGF.EmitVariablyModifiedType(PrivateType);
970 }
971 
973  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
974  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
975  assert(SharedAddresses.size() > N && "No variable was generated");
976  auto *PrivateVD =
977  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
978  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
979  QualType PrivateType = PrivateVD->getType();
980  PrivateAddr = CGF.Builder.CreateElementBitCast(
981  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
982  QualType SharedType = SharedAddresses[N].first.getType();
983  SharedLVal = CGF.MakeAddrLValue(
984  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
985  CGF.ConvertTypeForMem(SharedType)),
986  SharedType, SharedAddresses[N].first.getBaseInfo());
987  if (isa<OMPArraySectionExpr>(ClausesData[N].Ref) ||
988  CGF.getContext().getAsArrayType(PrivateVD->getType())) {
989  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
990  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
991  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
992  PrivateAddr, SharedLVal.getAddress(),
993  SharedLVal.getType());
994  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
995  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
996  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
997  PrivateVD->getType().getQualifiers(),
998  /*IsInitializer=*/false);
999  }
1000 }
1001 
1003  auto *PrivateVD =
1004  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1005  QualType PrivateType = PrivateVD->getType();
1006  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1007  return DTorKind != QualType::DK_none;
1008 }
1009 
1011  Address PrivateAddr) {
1012  auto *PrivateVD =
1013  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1014  QualType PrivateType = PrivateVD->getType();
1015  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1016  if (needCleanups(N)) {
1017  PrivateAddr = CGF.Builder.CreateElementBitCast(
1018  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1019  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1020  }
1021 }
1022 
1024  LValue BaseLV) {
1025  BaseTy = BaseTy.getNonReferenceType();
1026  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1027  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1028  if (auto *PtrTy = BaseTy->getAs<PointerType>())
1029  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1030  else {
1031  BaseLV = CGF.EmitLoadOfReferenceLValue(BaseLV.getAddress(),
1032  BaseTy->castAs<ReferenceType>());
1033  }
1034  BaseTy = BaseTy->getPointeeType();
1035  }
1036  return CGF.MakeAddrLValue(
1038  CGF.ConvertTypeForMem(ElTy)),
1039  BaseLV.getType(), BaseLV.getBaseInfo());
1040 }
1041 
1043  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1044  llvm::Value *Addr) {
1045  Address Tmp = Address::invalid();
1046  Address TopTmp = Address::invalid();
1047  Address MostTopTmp = Address::invalid();
1048  BaseTy = BaseTy.getNonReferenceType();
1049  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1050  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1051  Tmp = CGF.CreateMemTemp(BaseTy);
1052  if (TopTmp.isValid())
1053  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1054  else
1055  MostTopTmp = Tmp;
1056  TopTmp = Tmp;
1057  BaseTy = BaseTy->getPointeeType();
1058  }
1059  llvm::Type *Ty = BaseLVType;
1060  if (Tmp.isValid())
1061  Ty = Tmp.getElementType();
1062  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1063  if (Tmp.isValid()) {
1064  CGF.Builder.CreateStore(Addr, Tmp);
1065  return MostTopTmp;
1066  }
1067  return Address(Addr, BaseLVAlignment);
1068 }
1069 
1071  Address PrivateAddr) {
1072  const DeclRefExpr *DE;
1073  const VarDecl *OrigVD = nullptr;
1074  if (auto *OASE = dyn_cast<OMPArraySectionExpr>(ClausesData[N].Ref)) {
1075  auto *Base = OASE->getBase()->IgnoreParenImpCasts();
1076  while (auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1077  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1078  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1079  Base = TempASE->getBase()->IgnoreParenImpCasts();
1080  DE = cast<DeclRefExpr>(Base);
1081  OrigVD = cast<VarDecl>(DE->getDecl());
1082  } else if (auto *ASE = dyn_cast<ArraySubscriptExpr>(ClausesData[N].Ref)) {
1083  auto *Base = ASE->getBase()->IgnoreParenImpCasts();
1084  while (auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1085  Base = TempASE->getBase()->IgnoreParenImpCasts();
1086  DE = cast<DeclRefExpr>(Base);
1087  OrigVD = cast<VarDecl>(DE->getDecl());
1088  }
1089  if (OrigVD) {
1090  BaseDecls.emplace_back(OrigVD);
1091  auto OriginalBaseLValue = CGF.EmitLValue(DE);
1092  LValue BaseLValue =
1093  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1094  OriginalBaseLValue);
1095  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1096  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1097  llvm::Value *Ptr =
1098  CGF.Builder.CreateGEP(PrivateAddr.getPointer(), Adjustment);
1099  return castToBase(CGF, OrigVD->getType(),
1100  SharedAddresses[N].first.getType(),
1101  OriginalBaseLValue.getPointer()->getType(),
1102  OriginalBaseLValue.getAlignment(), Ptr);
1103  }
1104  BaseDecls.emplace_back(
1105  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1106  return PrivateAddr;
1107 }
1108 
1110  auto *DRD = getReductionInit(ClausesData[N].ReductionOp);
1111  return DRD && DRD->getInitializer();
1112 }
1113 
1114 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1115  return CGF.EmitLoadOfPointerLValue(
1116  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1117  getThreadIDVariable()->getType()->castAs<PointerType>());
1118 }
1119 
1120 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1121  if (!CGF.HaveInsertPoint())
1122  return;
1123  // 1.2.2 OpenMP Language Terminology
1124  // Structured block - An executable statement with a single entry at the
1125  // top and a single exit at the bottom.
1126  // The point of exit cannot be a branch out of the structured block.
1127  // longjmp() and throw() must not violate the entry/exit criteria.
1128  CGF.EHStack.pushTerminate();
1129  CodeGen(CGF);
1130  CGF.EHStack.popTerminate();
1131 }
1132 
1133 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1134  CodeGenFunction &CGF) {
1135  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1136  getThreadIDVariable()->getType(),
1138 }
1139 
1141  : CGM(CGM), OffloadEntriesInfoManager(CGM) {
1142  IdentTy = llvm::StructType::create(
1143  "ident_t", CGM.Int32Ty /* reserved_1 */, CGM.Int32Ty /* flags */,
1144  CGM.Int32Ty /* reserved_2 */, CGM.Int32Ty /* reserved_3 */,
1145  CGM.Int8PtrTy /* psource */);
1146  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1147 
1149 }
1150 
1151 void CGOpenMPRuntime::clear() {
1152  InternalVars.clear();
1153 }
1154 
1155 static llvm::Function *
1157  const Expr *CombinerInitializer, const VarDecl *In,
1158  const VarDecl *Out, bool IsCombiner) {
1159  // void .omp_combiner.(Ty *in, Ty *out);
1160  auto &C = CGM.getContext();
1161  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1162  FunctionArgList Args;
1163  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1164  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1165  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1166  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1167  Args.push_back(&OmpOutParm);
1168  Args.push_back(&OmpInParm);
1169  auto &FnInfo =
1170  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1171  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1172  auto *Fn = llvm::Function::Create(
1174  IsCombiner ? ".omp_combiner." : ".omp_initializer.", &CGM.getModule());
1175  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
1176  Fn->removeFnAttr(llvm::Attribute::NoInline);
1177  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1178  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1179  CodeGenFunction CGF(CGM);
1180  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1181  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1182  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
1184  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1185  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() -> Address {
1186  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1187  .getAddress();
1188  });
1189  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1190  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() -> Address {
1191  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1192  .getAddress();
1193  });
1194  (void)Scope.Privatize();
1195  CGF.EmitIgnoredExpr(CombinerInitializer);
1196  Scope.ForceCleanup();
1197  CGF.FinishFunction();
1198  return Fn;
1199 }
1200 
1202  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1203  if (UDRMap.count(D) > 0)
1204  return;
1205  auto &C = CGM.getContext();
1206  if (!In || !Out) {
1207  In = &C.Idents.get("omp_in");
1208  Out = &C.Idents.get("omp_out");
1209  }
1210  llvm::Function *Combiner = emitCombinerOrInitializer(
1211  CGM, D->getType(), D->getCombiner(), cast<VarDecl>(D->lookup(In).front()),
1212  cast<VarDecl>(D->lookup(Out).front()),
1213  /*IsCombiner=*/true);
1214  llvm::Function *Initializer = nullptr;
1215  if (auto *Init = D->getInitializer()) {
1216  if (!Priv || !Orig) {
1217  Priv = &C.Idents.get("omp_priv");
1218  Orig = &C.Idents.get("omp_orig");
1219  }
1220  Initializer = emitCombinerOrInitializer(
1221  CGM, D->getType(), Init, cast<VarDecl>(D->lookup(Orig).front()),
1222  cast<VarDecl>(D->lookup(Priv).front()),
1223  /*IsCombiner=*/false);
1224  }
1225  UDRMap.insert(std::make_pair(D, std::make_pair(Combiner, Initializer)));
1226  if (CGF) {
1227  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1228  Decls.second.push_back(D);
1229  }
1230 }
1231 
1232 std::pair<llvm::Function *, llvm::Function *>
1234  auto I = UDRMap.find(D);
1235  if (I != UDRMap.end())
1236  return I->second;
1237  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1238  return UDRMap.lookup(D);
1239 }
1240 
1241 // Layout information for ident_t.
1243  return CGM.getPointerAlign();
1244 }
1246  assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
1247  return CharUnits::fromQuantity(16) + CGM.getPointerSize();
1248 }
1250  // All the fields except the last are i32, so this works beautifully.
1251  return unsigned(Field) * CharUnits::fromQuantity(4);
1252 }
1254  IdentFieldIndex Field,
1255  const llvm::Twine &Name = "") {
1256  auto Offset = getOffsetOfIdentField(Field);
1257  return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
1258 }
1259 
1261  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1262  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1263  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1264  assert(ThreadIDVar->getType()->isPointerType() &&
1265  "thread id variable must be of type kmp_int32 *");
1266  CodeGenFunction CGF(CGM, true);
1267  bool HasCancel = false;
1268  if (auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1269  HasCancel = OPD->hasCancel();
1270  else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1271  HasCancel = OPSD->hasCancel();
1272  else if (auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1273  HasCancel = OPFD->hasCancel();
1274  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1275  HasCancel, OutlinedHelperName);
1276  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1277  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1278 }
1279 
1281  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1282  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1283  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1285  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1286 }
1287 
1289  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1290  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1291  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1293  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1294 }
1295 
1297  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1298  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1299  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1300  bool Tied, unsigned &NumberOfParts) {
1301  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1302  PrePostActionTy &) {
1303  auto *ThreadID = getThreadID(CGF, D.getLocStart());
1304  auto *UpLoc = emitUpdateLocation(CGF, D.getLocStart());
1305  llvm::Value *TaskArgs[] = {
1306  UpLoc, ThreadID,
1307  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1308  TaskTVar->getType()->castAs<PointerType>())
1309  .getPointer()};
1310  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1311  };
1312  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1313  UntiedCodeGen);
1314  CodeGen.setAction(Action);
1315  assert(!ThreadIDVar->getType()->isPointerType() &&
1316  "thread id variable must be of type kmp_int32 for tasks");
1317  auto *CS = cast<CapturedStmt>(D.getAssociatedStmt());
1318  auto *TD = dyn_cast<OMPTaskDirective>(&D);
1319  CodeGenFunction CGF(CGM, true);
1320  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1321  InnermostKind,
1322  TD ? TD->hasCancel() : false, Action);
1323  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1324  auto *Res = CGF.GenerateCapturedStmtFunction(*CS);
1325  if (!Tied)
1326  NumberOfParts = Action.getNumberOfParts();
1327  return Res;
1328 }
1329 
1330 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1331  CharUnits Align = getIdentAlign(CGM);
1332  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
1333  if (!Entry) {
1334  if (!DefaultOpenMPPSource) {
1335  // Initialize default location for psource field of ident_t structure of
1336  // all ident_t objects. Format is ";file;function;line;column;;".
1337  // Taken from
1338  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
1339  DefaultOpenMPPSource =
1340  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1341  DefaultOpenMPPSource =
1342  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1343  }
1344 
1345  ConstantInitBuilder builder(CGM);
1346  auto fields = builder.beginStruct(IdentTy);
1347  fields.addInt(CGM.Int32Ty, 0);
1348  fields.addInt(CGM.Int32Ty, Flags);
1349  fields.addInt(CGM.Int32Ty, 0);
1350  fields.addInt(CGM.Int32Ty, 0);
1351  fields.add(DefaultOpenMPPSource);
1352  auto DefaultOpenMPLocation =
1353  fields.finishAndCreateGlobal("", Align, /*isConstant*/ true,
1354  llvm::GlobalValue::PrivateLinkage);
1355  DefaultOpenMPLocation->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
1356 
1357  OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
1358  }
1359  return Address(Entry, Align);
1360 }
1361 
1362 llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1363  SourceLocation Loc,
1364  unsigned Flags) {
1365  Flags |= OMP_IDENT_KMPC;
1366  // If no debug info is generated - return global default location.
1367  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1368  Loc.isInvalid())
1369  return getOrCreateDefaultLocation(Flags).getPointer();
1370 
1371  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1372 
1373  Address LocValue = Address::invalid();
1374  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1375  if (I != OpenMPLocThreadIDMap.end())
1376  LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
1377 
1378  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1379  // GetOpenMPThreadID was called before this routine.
1380  if (!LocValue.isValid()) {
1381  // Generate "ident_t .kmpc_loc.addr;"
1382  Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
1383  ".kmpc_loc.addr");
1384  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1385  Elem.second.DebugLoc = AI.getPointer();
1386  LocValue = AI;
1387 
1388  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1389  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1390  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1391  CGM.getSize(getIdentSize(CGF.CGM)));
1392  }
1393 
1394  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1395  Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
1396 
1397  auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1398  if (OMPDebugLoc == nullptr) {
1399  SmallString<128> Buffer2;
1400  llvm::raw_svector_ostream OS2(Buffer2);
1401  // Build debug location
1403  OS2 << ";" << PLoc.getFilename() << ";";
1404  if (const FunctionDecl *FD =
1405  dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl)) {
1406  OS2 << FD->getQualifiedNameAsString();
1407  }
1408  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1409  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1410  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1411  }
1412  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1413  CGF.Builder.CreateStore(OMPDebugLoc, PSource);
1414 
1415  // Our callers always pass this to a runtime function, so for
1416  // convenience, go ahead and return a naked pointer.
1417  return LocValue.getPointer();
1418 }
1419 
1420 llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1421  SourceLocation Loc) {
1422  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1423 
1424  llvm::Value *ThreadID = nullptr;
1425  // Check whether we've already cached a load of the thread id in this
1426  // function.
1427  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1428  if (I != OpenMPLocThreadIDMap.end()) {
1429  ThreadID = I->second.ThreadID;
1430  if (ThreadID != nullptr)
1431  return ThreadID;
1432  }
1433  if (auto *OMPRegionInfo =
1434  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1435  if (OMPRegionInfo->getThreadIDVariable()) {
1436  // Check if this an outlined function with thread id passed as argument.
1437  auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1438  ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
1439  // If value loaded in entry block, cache it and use it everywhere in
1440  // function.
1441  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1442  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1443  Elem.second.ThreadID = ThreadID;
1444  }
1445  return ThreadID;
1446  }
1447  }
1448 
1449  // This is not an outlined function region - need to call __kmpc_int32
1450  // kmpc_global_thread_num(ident_t *loc).
1451  // Generate thread id value and cache this value for use across the
1452  // function.
1453  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1454  CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
1455  ThreadID =
1457  emitUpdateLocation(CGF, Loc));
1458  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1459  Elem.second.ThreadID = ThreadID;
1460  return ThreadID;
1461 }
1462 
1464  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1465  if (OpenMPLocThreadIDMap.count(CGF.CurFn))
1466  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1467  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1468  for(auto *D : FunctionUDRMap[CGF.CurFn]) {
1469  UDRMap.erase(D);
1470  }
1471  FunctionUDRMap.erase(CGF.CurFn);
1472  }
1473 }
1474 
1475 llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1476  if (!IdentTy) {
1477  }
1478  return llvm::PointerType::getUnqual(IdentTy);
1479 }
1480 
1482  if (!Kmpc_MicroTy) {
1483  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1484  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1485  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1486  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1487  }
1488  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1489 }
1490 
1491 llvm::Constant *
1493  llvm::Constant *RTLFn = nullptr;
1494  switch (static_cast<OpenMPRTLFunction>(Function)) {
1495  case OMPRTL__kmpc_fork_call: {
1496  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1497  // microtask, ...);
1498  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1500  llvm::FunctionType *FnTy =
1501  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1502  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1503  break;
1504  }
1506  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1507  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1508  llvm::FunctionType *FnTy =
1509  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1510  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1511  break;
1512  }
1514  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1515  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1516  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1517  CGM.VoidPtrTy, CGM.SizeTy,
1518  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1519  llvm::FunctionType *FnTy =
1520  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1521  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1522  break;
1523  }
1524  case OMPRTL__kmpc_critical: {
1525  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1526  // kmp_critical_name *crit);
1527  llvm::Type *TypeParams[] = {
1528  getIdentTyPointerTy(), CGM.Int32Ty,
1529  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1530  llvm::FunctionType *FnTy =
1531  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1532  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1533  break;
1534  }
1536  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1537  // kmp_critical_name *crit, uintptr_t hint);
1538  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1539  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1540  CGM.IntPtrTy};
1541  llvm::FunctionType *FnTy =
1542  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1543  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1544  break;
1545  }
1547  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1548  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1549  // typedef void *(*kmpc_ctor)(void *);
1550  auto KmpcCtorTy =
1551  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1552  /*isVarArg*/ false)->getPointerTo();
1553  // typedef void *(*kmpc_cctor)(void *, void *);
1554  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1555  auto KmpcCopyCtorTy =
1556  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1557  /*isVarArg*/ false)->getPointerTo();
1558  // typedef void (*kmpc_dtor)(void *);
1559  auto KmpcDtorTy =
1560  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1561  ->getPointerTo();
1562  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1563  KmpcCopyCtorTy, KmpcDtorTy};
1564  auto FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1565  /*isVarArg*/ false);
1566  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1567  break;
1568  }
1570  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1571  // kmp_critical_name *crit);
1572  llvm::Type *TypeParams[] = {
1573  getIdentTyPointerTy(), CGM.Int32Ty,
1574  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1575  llvm::FunctionType *FnTy =
1576  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1577  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1578  break;
1579  }
1581  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1582  // global_tid);
1583  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1584  llvm::FunctionType *FnTy =
1585  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1586  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1587  break;
1588  }
1589  case OMPRTL__kmpc_barrier: {
1590  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1591  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1592  llvm::FunctionType *FnTy =
1593  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1594  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1595  break;
1596  }
1598  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1599  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1600  llvm::FunctionType *FnTy =
1601  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1602  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1603  break;
1604  }
1606  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1607  // kmp_int32 num_threads)
1608  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1609  CGM.Int32Ty};
1610  llvm::FunctionType *FnTy =
1611  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1612  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1613  break;
1614  }
1616  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1617  // global_tid);
1618  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1619  llvm::FunctionType *FnTy =
1620  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1621  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1622  break;
1623  }
1625  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1626  // global_tid);
1627  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1628  llvm::FunctionType *FnTy =
1629  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1630  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1631  break;
1632  }
1633  case OMPRTL__kmpc_flush: {
1634  // Build void __kmpc_flush(ident_t *loc);
1635  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1636  llvm::FunctionType *FnTy =
1637  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1638  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1639  break;
1640  }
1641  case OMPRTL__kmpc_master: {
1642  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1643  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1644  llvm::FunctionType *FnTy =
1645  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1646  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1647  break;
1648  }
1649  case OMPRTL__kmpc_end_master: {
1650  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1651  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1652  llvm::FunctionType *FnTy =
1653  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1654  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1655  break;
1656  }
1658  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1659  // int end_part);
1660  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1661  llvm::FunctionType *FnTy =
1662  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1663  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1664  break;
1665  }
1666  case OMPRTL__kmpc_single: {
1667  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1668  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1669  llvm::FunctionType *FnTy =
1670  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1671  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1672  break;
1673  }
1674  case OMPRTL__kmpc_end_single: {
1675  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1676  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1677  llvm::FunctionType *FnTy =
1678  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1679  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1680  break;
1681  }
1683  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1684  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1685  // kmp_routine_entry_t *task_entry);
1686  assert(KmpRoutineEntryPtrTy != nullptr &&
1687  "Type kmp_routine_entry_t must be created.");
1688  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1689  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1690  // Return void * and then cast to particular kmp_task_t type.
1691  llvm::FunctionType *FnTy =
1692  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1693  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1694  break;
1695  }
1696  case OMPRTL__kmpc_omp_task: {
1697  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1698  // *new_task);
1699  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1700  CGM.VoidPtrTy};
1701  llvm::FunctionType *FnTy =
1702  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1703  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1704  break;
1705  }
1706  case OMPRTL__kmpc_copyprivate: {
1707  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1708  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1709  // kmp_int32 didit);
1710  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1711  auto *CpyFnTy =
1712  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1713  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1714  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1715  CGM.Int32Ty};
1716  llvm::FunctionType *FnTy =
1717  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1718  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1719  break;
1720  }
1721  case OMPRTL__kmpc_reduce: {
1722  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1723  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1724  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1725  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1726  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1727  /*isVarArg=*/false);
1728  llvm::Type *TypeParams[] = {
1729  getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1730  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1731  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1732  llvm::FunctionType *FnTy =
1733  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1734  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1735  break;
1736  }
1738  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1739  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1740  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1741  // *lck);
1742  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1743  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1744  /*isVarArg=*/false);
1745  llvm::Type *TypeParams[] = {
1746  getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty, CGM.SizeTy,
1747  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1748  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1749  llvm::FunctionType *FnTy =
1750  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1751  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1752  break;
1753  }
1754  case OMPRTL__kmpc_end_reduce: {
1755  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
1756  // kmp_critical_name *lck);
1757  llvm::Type *TypeParams[] = {
1758  getIdentTyPointerTy(), CGM.Int32Ty,
1759  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1760  llvm::FunctionType *FnTy =
1761  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1762  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
1763  break;
1764  }
1766  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
1767  // kmp_critical_name *lck);
1768  llvm::Type *TypeParams[] = {
1769  getIdentTyPointerTy(), CGM.Int32Ty,
1770  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1771  llvm::FunctionType *FnTy =
1772  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1773  RTLFn =
1774  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
1775  break;
1776  }
1778  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1779  // *new_task);
1780  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1781  CGM.VoidPtrTy};
1782  llvm::FunctionType *FnTy =
1783  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1784  RTLFn =
1785  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
1786  break;
1787  }
1789  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1790  // *new_task);
1791  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1792  CGM.VoidPtrTy};
1793  llvm::FunctionType *FnTy =
1794  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1795  RTLFn = CGM.CreateRuntimeFunction(FnTy,
1796  /*Name=*/"__kmpc_omp_task_complete_if0");
1797  break;
1798  }
1799  case OMPRTL__kmpc_ordered: {
1800  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
1801  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1802  llvm::FunctionType *FnTy =
1803  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1804  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
1805  break;
1806  }
1807  case OMPRTL__kmpc_end_ordered: {
1808  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
1809  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1810  llvm::FunctionType *FnTy =
1811  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1812  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
1813  break;
1814  }
1816  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
1817  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1818  llvm::FunctionType *FnTy =
1819  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1820  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
1821  break;
1822  }
1823  case OMPRTL__kmpc_taskgroup: {
1824  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
1825  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1826  llvm::FunctionType *FnTy =
1827  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1828  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
1829  break;
1830  }
1832  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
1833  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1834  llvm::FunctionType *FnTy =
1835  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1836  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
1837  break;
1838  }
1840  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
1841  // int proc_bind)
1842  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1843  llvm::FunctionType *FnTy =
1844  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1845  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
1846  break;
1847  }
1849  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
1850  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
1851  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
1852  llvm::Type *TypeParams[] = {
1853  getIdentTyPointerTy(), CGM.Int32Ty, CGM.VoidPtrTy, CGM.Int32Ty,
1854  CGM.VoidPtrTy, CGM.Int32Ty, CGM.VoidPtrTy};
1855  llvm::FunctionType *FnTy =
1856  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1857  RTLFn =
1858  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
1859  break;
1860  }
1862  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
1863  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
1864  // kmp_depend_info_t *noalias_dep_list);
1865  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1866  CGM.Int32Ty, CGM.VoidPtrTy,
1867  CGM.Int32Ty, CGM.VoidPtrTy};
1868  llvm::FunctionType *FnTy =
1869  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1870  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
1871  break;
1872  }
1874  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
1875  // global_tid, kmp_int32 cncl_kind)
1876  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1877  llvm::FunctionType *FnTy =
1878  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1879  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
1880  break;
1881  }
1882  case OMPRTL__kmpc_cancel: {
1883  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
1884  // kmp_int32 cncl_kind)
1885  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1886  llvm::FunctionType *FnTy =
1887  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1888  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
1889  break;
1890  }
1892  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
1893  // kmp_int32 num_teams, kmp_int32 num_threads)
1894  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1895  CGM.Int32Ty};
1896  llvm::FunctionType *FnTy =
1897  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1898  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
1899  break;
1900  }
1901  case OMPRTL__kmpc_fork_teams: {
1902  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
1903  // microtask, ...);
1904  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1906  llvm::FunctionType *FnTy =
1907  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1908  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
1909  break;
1910  }
1911  case OMPRTL__kmpc_taskloop: {
1912  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
1913  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
1914  // sched, kmp_uint64 grainsize, void *task_dup);
1915  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1916  CGM.IntTy,
1917  CGM.VoidPtrTy,
1918  CGM.IntTy,
1919  CGM.Int64Ty->getPointerTo(),
1920  CGM.Int64Ty->getPointerTo(),
1921  CGM.Int64Ty,
1922  CGM.IntTy,
1923  CGM.IntTy,
1924  CGM.Int64Ty,
1925  CGM.VoidPtrTy};
1926  llvm::FunctionType *FnTy =
1927  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1928  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
1929  break;
1930  }
1932  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
1933  // num_dims, struct kmp_dim *dims);
1934  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
1935  CGM.Int32Ty,
1936  CGM.Int32Ty,
1937  CGM.VoidPtrTy};
1938  llvm::FunctionType *FnTy =
1939  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1940  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
1941  break;
1942  }
1944  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
1945  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1946  llvm::FunctionType *FnTy =
1947  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1948  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
1949  break;
1950  }
1952  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
1953  // *vec);
1954  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1955  CGM.Int64Ty->getPointerTo()};
1956  llvm::FunctionType *FnTy =
1957  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1958  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
1959  break;
1960  }
1962  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
1963  // *vec);
1964  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1965  CGM.Int64Ty->getPointerTo()};
1966  llvm::FunctionType *FnTy =
1967  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1968  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
1969  break;
1970  }
1972  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
1973  // *data);
1974  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
1975  llvm::FunctionType *FnTy =
1976  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1977  RTLFn =
1978  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
1979  break;
1980  }
1982  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
1983  // *d);
1984  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
1985  llvm::FunctionType *FnTy =
1986  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1987  RTLFn = CGM.CreateRuntimeFunction(
1988  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
1989  break;
1990  }
1991  case OMPRTL__tgt_target: {
1992  // Build int32_t __tgt_target(int32_t device_id, void *host_ptr, int32_t
1993  // arg_num, void** args_base, void **args, size_t *arg_sizes, int32_t
1994  // *arg_types);
1995  llvm::Type *TypeParams[] = {CGM.Int32Ty,
1996  CGM.VoidPtrTy,
1997  CGM.Int32Ty,
1998  CGM.VoidPtrPtrTy,
1999  CGM.VoidPtrPtrTy,
2000  CGM.SizeTy->getPointerTo(),
2001  CGM.Int32Ty->getPointerTo()};
2002  llvm::FunctionType *FnTy =
2003  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2004  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2005  break;
2006  }
2007  case OMPRTL__tgt_target_teams: {
2008  // Build int32_t __tgt_target_teams(int32_t device_id, void *host_ptr,
2009  // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
2010  // int32_t *arg_types, int32_t num_teams, int32_t thread_limit);
2011  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2012  CGM.VoidPtrTy,
2013  CGM.Int32Ty,
2014  CGM.VoidPtrPtrTy,
2015  CGM.VoidPtrPtrTy,
2016  CGM.SizeTy->getPointerTo(),
2017  CGM.Int32Ty->getPointerTo(),
2018  CGM.Int32Ty,
2019  CGM.Int32Ty};
2020  llvm::FunctionType *FnTy =
2021  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2022  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2023  break;
2024  }
2025  case OMPRTL__tgt_register_lib: {
2026  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2027  QualType ParamTy =
2028  CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2029  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2030  llvm::FunctionType *FnTy =
2031  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2032  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2033  break;
2034  }
2036  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2037  QualType ParamTy =
2038  CGM.getContext().getPointerType(getTgtBinaryDescriptorQTy());
2039  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2040  llvm::FunctionType *FnTy =
2041  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2042  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2043  break;
2044  }
2046  // Build void __tgt_target_data_begin(int32_t device_id, int32_t arg_num,
2047  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2048  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2049  CGM.Int32Ty,
2050  CGM.VoidPtrPtrTy,
2051  CGM.VoidPtrPtrTy,
2052  CGM.SizeTy->getPointerTo(),
2053  CGM.Int32Ty->getPointerTo()};
2054  llvm::FunctionType *FnTy =
2055  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2056  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2057  break;
2058  }
2060  // Build void __tgt_target_data_end(int32_t device_id, int32_t arg_num,
2061  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2062  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2063  CGM.Int32Ty,
2064  CGM.VoidPtrPtrTy,
2065  CGM.VoidPtrPtrTy,
2066  CGM.SizeTy->getPointerTo(),
2067  CGM.Int32Ty->getPointerTo()};
2068  llvm::FunctionType *FnTy =
2069  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2070  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2071  break;
2072  }
2074  // Build void __tgt_target_data_update(int32_t device_id, int32_t arg_num,
2075  // void** args_base, void **args, size_t *arg_sizes, int32_t *arg_types);
2076  llvm::Type *TypeParams[] = {CGM.Int32Ty,
2077  CGM.Int32Ty,
2078  CGM.VoidPtrPtrTy,
2079  CGM.VoidPtrPtrTy,
2080  CGM.SizeTy->getPointerTo(),
2081  CGM.Int32Ty->getPointerTo()};
2082  llvm::FunctionType *FnTy =
2083  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2084  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2085  break;
2086  }
2087  }
2088  assert(RTLFn && "Unable to find OpenMP runtime function");
2089  return RTLFn;
2090 }
2091 
2092 llvm::Constant *CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize,
2093  bool IVSigned) {
2094  assert((IVSize == 32 || IVSize == 64) &&
2095  "IV size is not compatible with the omp runtime");
2096  auto Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2097  : "__kmpc_for_static_init_4u")
2098  : (IVSigned ? "__kmpc_for_static_init_8"
2099  : "__kmpc_for_static_init_8u");
2100  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2101  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2102  llvm::Type *TypeParams[] = {
2103  getIdentTyPointerTy(), // loc
2104  CGM.Int32Ty, // tid
2105  CGM.Int32Ty, // schedtype
2106  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2107  PtrTy, // p_lower
2108  PtrTy, // p_upper
2109  PtrTy, // p_stride
2110  ITy, // incr
2111  ITy // chunk
2112  };
2113  llvm::FunctionType *FnTy =
2114  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2115  return CGM.CreateRuntimeFunction(FnTy, Name);
2116 }
2117 
2118 llvm::Constant *CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize,
2119  bool IVSigned) {
2120  assert((IVSize == 32 || IVSize == 64) &&
2121  "IV size is not compatible with the omp runtime");
2122  auto Name =
2123  IVSize == 32
2124  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2125  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2126  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2127  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2128  CGM.Int32Ty, // tid
2129  CGM.Int32Ty, // schedtype
2130  ITy, // lower
2131  ITy, // upper
2132  ITy, // stride
2133  ITy // chunk
2134  };
2135  llvm::FunctionType *FnTy =
2136  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2137  return CGM.CreateRuntimeFunction(FnTy, Name);
2138 }
2139 
2140 llvm::Constant *CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize,
2141  bool IVSigned) {
2142  assert((IVSize == 32 || IVSize == 64) &&
2143  "IV size is not compatible with the omp runtime");
2144  auto Name =
2145  IVSize == 32
2146  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2147  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2148  llvm::Type *TypeParams[] = {
2149  getIdentTyPointerTy(), // loc
2150  CGM.Int32Ty, // tid
2151  };
2152  llvm::FunctionType *FnTy =
2153  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2154  return CGM.CreateRuntimeFunction(FnTy, Name);
2155 }
2156 
2157 llvm::Constant *CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize,
2158  bool IVSigned) {
2159  assert((IVSize == 32 || IVSize == 64) &&
2160  "IV size is not compatible with the omp runtime");
2161  auto Name =
2162  IVSize == 32
2163  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2164  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2165  auto ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2166  auto PtrTy = llvm::PointerType::getUnqual(ITy);
2167  llvm::Type *TypeParams[] = {
2168  getIdentTyPointerTy(), // loc
2169  CGM.Int32Ty, // tid
2170  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2171  PtrTy, // p_lower
2172  PtrTy, // p_upper
2173  PtrTy // p_stride
2174  };
2175  llvm::FunctionType *FnTy =
2176  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2177  return CGM.CreateRuntimeFunction(FnTy, Name);
2178 }
2179 
2180 llvm::Constant *
2182  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2183  !CGM.getContext().getTargetInfo().isTLSSupported());
2184  // Lookup the entry, lazily creating it if necessary.
2185  return getOrCreateInternalVariable(CGM.Int8PtrPtrTy,
2186  Twine(CGM.getMangledName(VD)) + ".cache.");
2187 }
2188 
2190  const VarDecl *VD,
2191  Address VDAddr,
2192  SourceLocation Loc) {
2193  if (CGM.getLangOpts().OpenMPUseTLS &&
2194  CGM.getContext().getTargetInfo().isTLSSupported())
2195  return VDAddr;
2196 
2197  auto VarTy = VDAddr.getElementType();
2198  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2199  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2200  CGM.Int8PtrTy),
2201  CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
2203  return Address(CGF.EmitRuntimeCall(
2205  VDAddr.getAlignment());
2206 }
2207 
2209  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2210  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2211  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2212  // library.
2213  auto OMPLoc = emitUpdateLocation(CGF, Loc);
2215  OMPLoc);
2216  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2217  // to register constructor/destructor for variable.
2218  llvm::Value *Args[] = {OMPLoc,
2219  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2220  CGM.VoidPtrTy),
2221  Ctor, CopyCtor, Dtor};
2222  CGF.EmitRuntimeCall(
2224 }
2225 
2227  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2228  bool PerformInit, CodeGenFunction *CGF) {
2229  if (CGM.getLangOpts().OpenMPUseTLS &&
2230  CGM.getContext().getTargetInfo().isTLSSupported())
2231  return nullptr;
2232 
2233  VD = VD->getDefinition(CGM.getContext());
2234  if (VD && ThreadPrivateWithDefinition.count(VD) == 0) {
2235  ThreadPrivateWithDefinition.insert(VD);
2236  QualType ASTTy = VD->getType();
2237 
2238  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2239  auto Init = VD->getAnyInitializer();
2240  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2241  // Generate function that re-emits the declaration's initializer into the
2242  // threadprivate copy of the variable VD
2243  CodeGenFunction CtorCGF(CGM);
2244  FunctionArgList Args;
2245  ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
2247  Args.push_back(&Dst);
2248 
2249  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2250  CGM.getContext().VoidPtrTy, Args);
2251  auto FTy = CGM.getTypes().GetFunctionType(FI);
2252  auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2253  FTy, ".__kmpc_global_ctor_.", FI, Loc);
2254  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2255  Args, SourceLocation());
2256  auto ArgVal = CtorCGF.EmitLoadOfScalar(
2257  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2258  CGM.getContext().VoidPtrTy, Dst.getLocation());
2259  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2260  Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
2261  CtorCGF.ConvertTypeForMem(ASTTy));
2262  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2263  /*IsInitializer=*/true);
2264  ArgVal = CtorCGF.EmitLoadOfScalar(
2265  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2266  CGM.getContext().VoidPtrTy, Dst.getLocation());
2267  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2268  CtorCGF.FinishFunction();
2269  Ctor = Fn;
2270  }
2271  if (VD->getType().isDestructedType() != QualType::DK_none) {
2272  // Generate function that emits destructor call for the threadprivate copy
2273  // of the variable VD
2274  CodeGenFunction DtorCGF(CGM);
2275  FunctionArgList Args;
2276  ImplicitParamDecl Dst(CGM.getContext(), CGM.getContext().VoidPtrTy,
2278  Args.push_back(&Dst);
2279 
2280  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2281  CGM.getContext().VoidTy, Args);
2282  auto FTy = CGM.getTypes().GetFunctionType(FI);
2283  auto Fn = CGM.CreateGlobalInitOrDestructFunction(
2284  FTy, ".__kmpc_global_dtor_.", FI, Loc);
2285  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2286  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2287  SourceLocation());
2288  // Create a scope with an artificial location for the body of this function.
2289  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2290  auto ArgVal = DtorCGF.EmitLoadOfScalar(
2291  DtorCGF.GetAddrOfLocalVar(&Dst),
2292  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2293  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2294  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2295  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2296  DtorCGF.FinishFunction();
2297  Dtor = Fn;
2298  }
2299  // Do not emit init function if it is not required.
2300  if (!Ctor && !Dtor)
2301  return nullptr;
2302 
2303  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2304  auto CopyCtorTy =
2305  llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2306  /*isVarArg=*/false)->getPointerTo();
2307  // Copying constructor for the threadprivate variable.
2308  // Must be NULL - reserved by runtime, but currently it requires that this
2309  // parameter is always NULL. Otherwise it fires assertion.
2310  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2311  if (Ctor == nullptr) {
2312  auto CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2313  /*isVarArg=*/false)->getPointerTo();
2314  Ctor = llvm::Constant::getNullValue(CtorTy);
2315  }
2316  if (Dtor == nullptr) {
2317  auto DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2318  /*isVarArg=*/false)->getPointerTo();
2319  Dtor = llvm::Constant::getNullValue(DtorTy);
2320  }
2321  if (!CGF) {
2322  auto InitFunctionTy =
2323  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2324  auto InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2325  InitFunctionTy, ".__omp_threadprivate_init_.",
2326  CGM.getTypes().arrangeNullaryFunction());
2327  CodeGenFunction InitCGF(CGM);
2328  FunctionArgList ArgList;
2329  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2330  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2331  Loc);
2332  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2333  InitCGF.FinishFunction();
2334  return InitFunction;
2335  }
2336  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2337  }
2338  return nullptr;
2339 }
2340 
2342  QualType VarType,
2343  StringRef Name) {
2344  llvm::Twine VarName(Name, ".artificial.");
2345  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2346  llvm::Value *GAddr = getOrCreateInternalVariable(VarLVType, VarName);
2347  llvm::Value *Args[] = {
2348  emitUpdateLocation(CGF, SourceLocation()),
2349  getThreadID(CGF, SourceLocation()),
2350  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(GAddr, CGM.VoidPtrTy),
2351  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2352  /*IsSigned=*/false),
2353  getOrCreateInternalVariable(CGM.VoidPtrPtrTy, VarName + ".cache.")};
2354  return Address(
2356  CGF.EmitRuntimeCall(
2358  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2359  CGM.getPointerAlign());
2360 }
2361 
2362 /// \brief Emits code for OpenMP 'if' clause using specified \a CodeGen
2363 /// function. Here is the logic:
2364 /// if (Cond) {
2365 /// ThenGen();
2366 /// } else {
2367 /// ElseGen();
2368 /// }
2369 void CGOpenMPRuntime::emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
2370  const RegionCodeGenTy &ThenGen,
2371  const RegionCodeGenTy &ElseGen) {
2372  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2373 
2374  // If the condition constant folds and can be elided, try to avoid emitting
2375  // the condition and the dead arm of the if/else.
2376  bool CondConstant;
2377  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2378  if (CondConstant)
2379  ThenGen(CGF);
2380  else
2381  ElseGen(CGF);
2382  return;
2383  }
2384 
2385  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2386  // emit the conditional branch.
2387  auto ThenBlock = CGF.createBasicBlock("omp_if.then");
2388  auto ElseBlock = CGF.createBasicBlock("omp_if.else");
2389  auto ContBlock = CGF.createBasicBlock("omp_if.end");
2390  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2391 
2392  // Emit the 'then' code.
2393  CGF.EmitBlock(ThenBlock);
2394  ThenGen(CGF);
2395  CGF.EmitBranch(ContBlock);
2396  // Emit the 'else' code if present.
2397  // There is no need to emit line number for unconditional branch.
2399  CGF.EmitBlock(ElseBlock);
2400  ElseGen(CGF);
2401  // There is no need to emit line number for unconditional branch.
2403  CGF.EmitBranch(ContBlock);
2404  // Emit the continuation block for code after the if.
2405  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2406 }
2407 
2409  llvm::Value *OutlinedFn,
2410  ArrayRef<llvm::Value *> CapturedVars,
2411  const Expr *IfCond) {
2412  if (!CGF.HaveInsertPoint())
2413  return;
2414  auto *RTLoc = emitUpdateLocation(CGF, Loc);
2415  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2416  PrePostActionTy &) {
2417  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2418  auto &RT = CGF.CGM.getOpenMPRuntime();
2419  llvm::Value *Args[] = {
2420  RTLoc,
2421  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2422  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2424  RealArgs.append(std::begin(Args), std::end(Args));
2425  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2426 
2427  auto RTLFn = RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2428  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2429  };
2430  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2431  PrePostActionTy &) {
2432  auto &RT = CGF.CGM.getOpenMPRuntime();
2433  auto ThreadID = RT.getThreadID(CGF, Loc);
2434  // Build calls:
2435  // __kmpc_serialized_parallel(&Loc, GTid);
2436  llvm::Value *Args[] = {RTLoc, ThreadID};
2437  CGF.EmitRuntimeCall(
2438  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2439 
2440  // OutlinedFn(&GTid, &zero, CapturedStruct);
2441  auto ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
2442  Address ZeroAddr =
2443  CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
2444  /*Name*/ ".zero.addr");
2445  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2446  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2447  OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
2448  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2449  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2450  CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
2451 
2452  // __kmpc_end_serialized_parallel(&Loc, GTid);
2453  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2454  CGF.EmitRuntimeCall(
2455  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2456  EndArgs);
2457  };
2458  if (IfCond)
2459  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
2460  else {
2461  RegionCodeGenTy ThenRCG(ThenGen);
2462  ThenRCG(CGF);
2463  }
2464 }
2465 
2466 // If we're inside an (outlined) parallel region, use the region info's
2467 // thread-ID variable (it is passed in a first argument of the outlined function
2468 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2469 // regular serial code region, get thread ID by calling kmp_int32
2470 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2471 // return the address of that temp.
2473  SourceLocation Loc) {
2474  if (auto *OMPRegionInfo =
2475  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2476  if (OMPRegionInfo->getThreadIDVariable())
2477  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
2478 
2479  auto ThreadID = getThreadID(CGF, Loc);
2480  auto Int32Ty =
2481  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2482  auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
2483  CGF.EmitStoreOfScalar(ThreadID,
2484  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
2485 
2486  return ThreadIDTemp;
2487 }
2488 
2489 llvm::Constant *
2491  const llvm::Twine &Name) {
2493  llvm::raw_svector_ostream Out(Buffer);
2494  Out << Name;
2495  auto RuntimeName = Out.str();
2496  auto &Elem = *InternalVars.insert(std::make_pair(RuntimeName, nullptr)).first;
2497  if (Elem.second) {
2498  assert(Elem.second->getType()->getPointerElementType() == Ty &&
2499  "OMP internal variable has different type than requested");
2500  return &*Elem.second;
2501  }
2502 
2503  return Elem.second = new llvm::GlobalVariable(
2504  CGM.getModule(), Ty, /*IsConstant*/ false,
2505  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
2506  Elem.first());
2507 }
2508 
2510  llvm::Twine Name(".gomp_critical_user_", CriticalName);
2511  return getOrCreateInternalVariable(KmpCriticalNameTy, Name.concat(".var"));
2512 }
2513 
2514 namespace {
2515 /// Common pre(post)-action for different OpenMP constructs.
2516 class CommonActionTy final : public PrePostActionTy {
2517  llvm::Value *EnterCallee;
2518  ArrayRef<llvm::Value *> EnterArgs;
2519  llvm::Value *ExitCallee;
2520  ArrayRef<llvm::Value *> ExitArgs;
2521  bool Conditional;
2522  llvm::BasicBlock *ContBlock = nullptr;
2523 
2524 public:
2525  CommonActionTy(llvm::Value *EnterCallee, ArrayRef<llvm::Value *> EnterArgs,
2526  llvm::Value *ExitCallee, ArrayRef<llvm::Value *> ExitArgs,
2527  bool Conditional = false)
2528  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
2529  ExitArgs(ExitArgs), Conditional(Conditional) {}
2530  void Enter(CodeGenFunction &CGF) override {
2531  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
2532  if (Conditional) {
2533  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
2534  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
2535  ContBlock = CGF.createBasicBlock("omp_if.end");
2536  // Generate the branch (If-stmt)
2537  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
2538  CGF.EmitBlock(ThenBlock);
2539  }
2540  }
2541  void Done(CodeGenFunction &CGF) {
2542  // Emit the rest of blocks/branches
2543  CGF.EmitBranch(ContBlock);
2544  CGF.EmitBlock(ContBlock, true);
2545  }
2546  void Exit(CodeGenFunction &CGF) override {
2547  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
2548  }
2549 };
2550 } // anonymous namespace
2551 
2553  StringRef CriticalName,
2554  const RegionCodeGenTy &CriticalOpGen,
2555  SourceLocation Loc, const Expr *Hint) {
2556  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2557  // CriticalOpGen();
2558  // __kmpc_end_critical(ident_t *, gtid, Lock);
2559  // Prepare arguments and build a call to __kmpc_critical
2560  if (!CGF.HaveInsertPoint())
2561  return;
2562  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2563  getCriticalRegionLock(CriticalName)};
2564  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
2565  std::end(Args));
2566  if (Hint) {
2567  EnterArgs.push_back(CGF.Builder.CreateIntCast(
2568  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
2569  }
2570  CommonActionTy Action(
2574  CriticalOpGen.setAction(Action);
2575  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2576 }
2577 
2579  const RegionCodeGenTy &MasterOpGen,
2580  SourceLocation Loc) {
2581  if (!CGF.HaveInsertPoint())
2582  return;
2583  // if(__kmpc_master(ident_t *, gtid)) {
2584  // MasterOpGen();
2585  // __kmpc_end_master(ident_t *, gtid);
2586  // }
2587  // Prepare arguments and build a call to __kmpc_master
2588  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2589  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
2591  /*Conditional=*/true);
2592  MasterOpGen.setAction(Action);
2593  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2594  Action.Done(CGF);
2595 }
2596 
2598  SourceLocation Loc) {
2599  if (!CGF.HaveInsertPoint())
2600  return;
2601  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2602  llvm::Value *Args[] = {
2603  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2604  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
2606  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
2607  Region->emitUntiedSwitch(CGF);
2608 }
2609 
2611  const RegionCodeGenTy &TaskgroupOpGen,
2612  SourceLocation Loc) {
2613  if (!CGF.HaveInsertPoint())
2614  return;
2615  // __kmpc_taskgroup(ident_t *, gtid);
2616  // TaskgroupOpGen();
2617  // __kmpc_end_taskgroup(ident_t *, gtid);
2618  // Prepare arguments and build a call to __kmpc_taskgroup
2619  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2620  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
2622  Args);
2623  TaskgroupOpGen.setAction(Action);
2624  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2625 }
2626 
2627 /// Given an array of pointers to variables, project the address of a
2628 /// given variable.
2630  unsigned Index, const VarDecl *Var) {
2631  // Pull out the pointer to the variable.
2632  Address PtrAddr =
2633  CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
2634  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
2635 
2636  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
2637  Addr = CGF.Builder.CreateElementBitCast(
2638  Addr, CGF.ConvertTypeForMem(Var->getType()));
2639  return Addr;
2640 }
2641 
2643  CodeGenModule &CGM, llvm::Type *ArgsType,
2644  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2645  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps) {
2646  auto &C = CGM.getContext();
2647  // void copy_func(void *LHSArg, void *RHSArg);
2648  FunctionArgList Args;
2649  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2650  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
2651  Args.push_back(&LHSArg);
2652  Args.push_back(&RHSArg);
2653  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2654  auto *Fn = llvm::Function::Create(
2656  ".omp.copyprivate.copy_func", &CGM.getModule());
2657  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
2658  CodeGenFunction CGF(CGM);
2659  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
2660  // Dest = (void*[n])(LHSArg);
2661  // Src = (void*[n])(RHSArg);
2663  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
2664  ArgsType), CGF.getPointerAlign());
2666  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
2667  ArgsType), CGF.getPointerAlign());
2668  // *(Type0*)Dst[0] = *(Type0*)Src[0];
2669  // *(Type1*)Dst[1] = *(Type1*)Src[1];
2670  // ...
2671  // *(Typen*)Dst[n] = *(Typen*)Src[n];
2672  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2673  auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
2674  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
2675 
2676  auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
2677  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
2678 
2679  auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
2680  QualType Type = VD->getType();
2681  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
2682  }
2683  CGF.FinishFunction();
2684  return Fn;
2685 }
2686 
2688  const RegionCodeGenTy &SingleOpGen,
2689  SourceLocation Loc,
2690  ArrayRef<const Expr *> CopyprivateVars,
2691  ArrayRef<const Expr *> SrcExprs,
2692  ArrayRef<const Expr *> DstExprs,
2693  ArrayRef<const Expr *> AssignmentOps) {
2694  if (!CGF.HaveInsertPoint())
2695  return;
2696  assert(CopyprivateVars.size() == SrcExprs.size() &&
2697  CopyprivateVars.size() == DstExprs.size() &&
2698  CopyprivateVars.size() == AssignmentOps.size());
2699  auto &C = CGM.getContext();
2700  // int32 did_it = 0;
2701  // if(__kmpc_single(ident_t *, gtid)) {
2702  // SingleOpGen();
2703  // __kmpc_end_single(ident_t *, gtid);
2704  // did_it = 1;
2705  // }
2706  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2707  // <copy_func>, did_it);
2708 
2709  Address DidIt = Address::invalid();
2710  if (!CopyprivateVars.empty()) {
2711  // int32 did_it = 0;
2712  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2713  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
2714  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
2715  }
2716  // Prepare arguments and build a call to __kmpc_single
2717  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2718  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
2720  /*Conditional=*/true);
2721  SingleOpGen.setAction(Action);
2722  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2723  if (DidIt.isValid()) {
2724  // did_it = 1;
2725  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
2726  }
2727  Action.Done(CGF);
2728  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2729  // <copy_func>, did_it);
2730  if (DidIt.isValid()) {
2731  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2732  auto CopyprivateArrayTy =
2733  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
2734  /*IndexTypeQuals=*/0);
2735  // Create a list of all private variables for copyprivate.
2736  Address CopyprivateList =
2737  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
2738  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2739  Address Elem = CGF.Builder.CreateConstArrayGEP(
2740  CopyprivateList, I, CGF.getPointerSize());
2741  CGF.Builder.CreateStore(
2743  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
2744  Elem);
2745  }
2746  // Build function that copies private values from single region to all other
2747  // threads in the corresponding parallel region.
2748  auto *CpyFn = emitCopyprivateCopyFunction(
2749  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
2750  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
2751  auto *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
2752  Address CL =
2753  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
2754  CGF.VoidPtrTy);
2755  auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
2756  llvm::Value *Args[] = {
2757  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2758  getThreadID(CGF, Loc), // i32 <gtid>
2759  BufSize, // size_t <buf_size>
2760  CL.getPointer(), // void *<copyprivate list>
2761  CpyFn, // void (*) (void *, void *) <copy_func>
2762  DidItVal // i32 did_it
2763  };
2765  }
2766 }
2767 
2769  const RegionCodeGenTy &OrderedOpGen,
2770  SourceLocation Loc, bool IsThreads) {
2771  if (!CGF.HaveInsertPoint())
2772  return;
2773  // __kmpc_ordered(ident_t *, gtid);
2774  // OrderedOpGen();
2775  // __kmpc_end_ordered(ident_t *, gtid);
2776  // Prepare arguments and build a call to __kmpc_ordered
2777  if (IsThreads) {
2778  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2779  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
2781  Args);
2782  OrderedOpGen.setAction(Action);
2783  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2784  return;
2785  }
2786  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2787 }
2788 
2790  OpenMPDirectiveKind Kind, bool EmitChecks,
2791  bool ForceSimpleCall) {
2792  if (!CGF.HaveInsertPoint())
2793  return;
2794  // Build call __kmpc_cancel_barrier(loc, thread_id);
2795  // Build call __kmpc_barrier(loc, thread_id);
2796  unsigned Flags;
2797  if (Kind == OMPD_for)
2798  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2799  else if (Kind == OMPD_sections)
2800  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2801  else if (Kind == OMPD_single)
2802  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2803  else if (Kind == OMPD_barrier)
2804  Flags = OMP_IDENT_BARRIER_EXPL;
2805  else
2806  Flags = OMP_IDENT_BARRIER_IMPL;
2807  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2808  // thread_id);
2809  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2810  getThreadID(CGF, Loc)};
2811  if (auto *OMPRegionInfo =
2812  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
2813  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2814  auto *Result = CGF.EmitRuntimeCall(
2816  if (EmitChecks) {
2817  // if (__kmpc_cancel_barrier()) {
2818  // exit from construct;
2819  // }
2820  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
2821  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
2822  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
2823  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
2824  CGF.EmitBlock(ExitBB);
2825  // exit from construct;
2826  auto CancelDestination =
2827  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2828  CGF.EmitBranchThroughCleanup(CancelDestination);
2829  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
2830  }
2831  return;
2832  }
2833  }
2835 }
2836 
2837 /// \brief Map the OpenMP loop schedule to the runtime enumeration.
2839  bool Chunked, bool Ordered) {
2840  switch (ScheduleKind) {
2841  case OMPC_SCHEDULE_static:
2842  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2843  : (Ordered ? OMP_ord_static : OMP_sch_static);
2844  case OMPC_SCHEDULE_dynamic:
2846  case OMPC_SCHEDULE_guided:
2848  case OMPC_SCHEDULE_runtime:
2849  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2850  case OMPC_SCHEDULE_auto:
2851  return Ordered ? OMP_ord_auto : OMP_sch_auto;
2852  case OMPC_SCHEDULE_unknown:
2853  assert(!Chunked && "chunk was specified but schedule kind not known");
2854  return Ordered ? OMP_ord_static : OMP_sch_static;
2855  }
2856  llvm_unreachable("Unexpected runtime schedule");
2857 }
2858 
2859 /// \brief Map the OpenMP distribute schedule to the runtime enumeration.
2860 static OpenMPSchedType
2862  // only static is allowed for dist_schedule
2864 }
2865 
2867  bool Chunked) const {
2868  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2869  return Schedule == OMP_sch_static;
2870 }
2871 
2873  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2874  auto Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2875  return Schedule == OMP_dist_sch_static;
2876 }
2877 
2878 
2880  auto Schedule =
2881  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2882  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2883  return Schedule != OMP_sch_static;
2884 }
2885 
2889  int Modifier = 0;
2890  switch (M1) {
2891  case OMPC_SCHEDULE_MODIFIER_monotonic:
2892  Modifier = OMP_sch_modifier_monotonic;
2893  break;
2894  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2895  Modifier = OMP_sch_modifier_nonmonotonic;
2896  break;
2897  case OMPC_SCHEDULE_MODIFIER_simd:
2898  if (Schedule == OMP_sch_static_chunked)
2900  break;
2903  break;
2904  }
2905  switch (M2) {
2906  case OMPC_SCHEDULE_MODIFIER_monotonic:
2907  Modifier = OMP_sch_modifier_monotonic;
2908  break;
2909  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2910  Modifier = OMP_sch_modifier_nonmonotonic;
2911  break;
2912  case OMPC_SCHEDULE_MODIFIER_simd:
2913  if (Schedule == OMP_sch_static_chunked)
2915  break;
2918  break;
2919  }
2920  return Schedule | Modifier;
2921 }
2922 
2924  CodeGenFunction &CGF, SourceLocation Loc,
2925  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2926  bool Ordered, const DispatchRTInput &DispatchValues) {
2927  if (!CGF.HaveInsertPoint())
2928  return;
2930  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
2931  assert(Ordered ||
2932  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2933  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2934  Schedule != OMP_sch_static_balanced_chunked));
2935  // Call __kmpc_dispatch_init(
2936  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2937  // kmp_int[32|64] lower, kmp_int[32|64] upper,
2938  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2939 
2940  // If the Chunk was not specified in the clause - use default value 1.
2941  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2942  : CGF.Builder.getIntN(IVSize, 1);
2943  llvm::Value *Args[] = {
2944  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2945  CGF.Builder.getInt32(addMonoNonMonoModifier(
2946  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
2947  DispatchValues.LB, // Lower
2948  DispatchValues.UB, // Upper
2949  CGF.Builder.getIntN(IVSize, 1), // Stride
2950  Chunk // Chunk
2951  };
2952  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
2953 }
2954 
2956  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2957  llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule,
2959  unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB,
2960  Address ST, llvm::Value *Chunk) {
2961  if (!CGF.HaveInsertPoint())
2962  return;
2963 
2964  assert(!Ordered);
2965  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2966  Schedule == OMP_sch_static_balanced_chunked ||
2967  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2968  Schedule == OMP_dist_sch_static ||
2969  Schedule == OMP_dist_sch_static_chunked);
2970 
2971  // Call __kmpc_for_static_init(
2972  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2973  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2974  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2975  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2976  if (Chunk == nullptr) {
2977  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2978  Schedule == OMP_dist_sch_static) &&
2979  "expected static non-chunked schedule");
2980  // If the Chunk was not specified in the clause - use default value 1.
2981  Chunk = CGF.Builder.getIntN(IVSize, 1);
2982  } else {
2983  assert((Schedule == OMP_sch_static_chunked ||
2984  Schedule == OMP_sch_static_balanced_chunked ||
2985  Schedule == OMP_ord_static_chunked ||
2986  Schedule == OMP_dist_sch_static_chunked) &&
2987  "expected static chunked schedule");
2988  }
2989  llvm::Value *Args[] = {
2990  UpdateLocation, ThreadId, CGF.Builder.getInt32(addMonoNonMonoModifier(
2991  Schedule, M1, M2)), // Schedule type
2992  IL.getPointer(), // &isLastIter
2993  LB.getPointer(), // &LB
2994  UB.getPointer(), // &UB
2995  ST.getPointer(), // &Stride
2996  CGF.Builder.getIntN(IVSize, 1), // Incr
2997  Chunk // Chunk
2998  };
2999  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3000 }
3001 
3003  SourceLocation Loc,
3004  const OpenMPScheduleTy &ScheduleKind,
3005  unsigned IVSize, bool IVSigned,
3006  bool Ordered, Address IL, Address LB,
3007  Address UB, Address ST,
3008  llvm::Value *Chunk) {
3009  OpenMPSchedType ScheduleNum =
3010  getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
3011  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
3012  auto *ThreadId = getThreadID(CGF, Loc);
3013  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
3014  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3015  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, IVSize,
3016  Ordered, IL, LB, UB, ST, Chunk);
3017 }
3018 
3020  CodeGenFunction &CGF, SourceLocation Loc,
3021  OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned,
3022  bool Ordered, Address IL, Address LB, Address UB, Address ST,
3023  llvm::Value *Chunk) {
3024  OpenMPSchedType ScheduleNum = getRuntimeSchedule(SchedKind, Chunk != nullptr);
3025  auto *UpdatedLocation = emitUpdateLocation(CGF, Loc);
3026  auto *ThreadId = getThreadID(CGF, Loc);
3027  auto *StaticInitFunction = createForStaticInitFunction(IVSize, IVSigned);
3028  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3029  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3030  OMPC_SCHEDULE_MODIFIER_unknown, IVSize, Ordered, IL, LB,
3031  UB, ST, Chunk);
3032 }
3033 
3035  SourceLocation Loc) {
3036  if (!CGF.HaveInsertPoint())
3037  return;
3038  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3039  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3041  Args);
3042 }
3043 
3045  SourceLocation Loc,
3046  unsigned IVSize,
3047  bool IVSigned) {
3048  if (!CGF.HaveInsertPoint())
3049  return;
3050  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3051  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3052  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3053 }
3054 
3056  SourceLocation Loc, unsigned IVSize,
3057  bool IVSigned, Address IL,
3058  Address LB, Address UB,
3059  Address ST) {
3060  // Call __kmpc_dispatch_next(
3061  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3062  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3063  // kmp_int[32|64] *p_stride);
3064  llvm::Value *Args[] = {
3065  emitUpdateLocation(CGF, Loc),
3066  getThreadID(CGF, Loc),
3067  IL.getPointer(), // &isLastIter
3068  LB.getPointer(), // &Lower
3069  UB.getPointer(), // &Upper
3070  ST.getPointer() // &Stride
3071  };
3072  llvm::Value *Call =
3073  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3074  return CGF.EmitScalarConversion(
3075  Call, CGF.getContext().getIntTypeForBitwidth(32, /* Signed */ true),
3076  CGF.getContext().BoolTy, Loc);
3077 }
3078 
3080  llvm::Value *NumThreads,
3081  SourceLocation Loc) {
3082  if (!CGF.HaveInsertPoint())
3083  return;
3084  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3085  llvm::Value *Args[] = {
3086  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3087  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3089  Args);
3090 }
3091 
3093  OpenMPProcBindClauseKind ProcBind,
3094  SourceLocation Loc) {
3095  if (!CGF.HaveInsertPoint())
3096  return;
3097  // Constants for proc bind value accepted by the runtime.
3098  enum ProcBindTy {
3099  ProcBindFalse = 0,
3100  ProcBindTrue,
3101  ProcBindMaster,
3102  ProcBindClose,
3103  ProcBindSpread,
3104  ProcBindIntel,
3105  ProcBindDefault
3106  } RuntimeProcBind;
3107  switch (ProcBind) {
3108  case OMPC_PROC_BIND_master:
3109  RuntimeProcBind = ProcBindMaster;
3110  break;
3111  case OMPC_PROC_BIND_close:
3112  RuntimeProcBind = ProcBindClose;
3113  break;
3114  case OMPC_PROC_BIND_spread:
3115  RuntimeProcBind = ProcBindSpread;
3116  break;
3118  llvm_unreachable("Unsupported proc_bind value.");
3119  }
3120  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3121  llvm::Value *Args[] = {
3122  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3123  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3125 }
3126 
3127 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3128  SourceLocation Loc) {
3129  if (!CGF.HaveInsertPoint())
3130  return;
3131  // Build call void __kmpc_flush(ident_t *loc)
3133  emitUpdateLocation(CGF, Loc));
3134 }
3135 
3136 namespace {
3137 /// \brief Indexes of fields for type kmp_task_t.
3139  /// \brief List of shared variables.
3140  KmpTaskTShareds,
3141  /// \brief Task routine.
3142  KmpTaskTRoutine,
3143  /// \brief Partition id for the untied tasks.
3144  KmpTaskTPartId,
3145  /// Function with call of destructors for private variables.
3146  Data1,
3147  /// Task priority.
3148  Data2,
3149  /// (Taskloops only) Lower bound.
3150  KmpTaskTLowerBound,
3151  /// (Taskloops only) Upper bound.
3152  KmpTaskTUpperBound,
3153  /// (Taskloops only) Stride.
3154  KmpTaskTStride,
3155  /// (Taskloops only) Is last iteration flag.
3156  KmpTaskTLastIter,
3157  /// (Taskloops only) Reduction data.
3158  KmpTaskTReductions,
3159 };
3160 } // anonymous namespace
3161 
3162 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3163  // FIXME: Add other entries type when they become supported.
3164  return OffloadEntriesTargetRegion.empty();
3165 }
3166 
3167 /// \brief Initialize target region entry.
3168 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3169  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3170  StringRef ParentName, unsigned LineNum,
3171  unsigned Order) {
3172  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3173  "only required for the device "
3174  "code generation.");
3175  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3176  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3177  /*Flags=*/0);
3178  ++OffloadingEntriesNum;
3179 }
3180 
3181 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3182  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3183  StringRef ParentName, unsigned LineNum,
3184  llvm::Constant *Addr, llvm::Constant *ID,
3185  int32_t Flags) {
3186  // If we are emitting code for a target, the entry is already initialized,
3187  // only has to be registered.
3188  if (CGM.getLangOpts().OpenMPIsDevice) {
3189  assert(hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum) &&
3190  "Entry must exist.");
3191  auto &Entry =
3192  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3193  assert(Entry.isValid() && "Entry not initialized!");
3194  Entry.setAddress(Addr);
3195  Entry.setID(ID);
3196  Entry.setFlags(Flags);
3197  return;
3198  } else {
3199  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum++, Addr, ID, Flags);
3200  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3201  }
3202 }
3203 
3204 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3205  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3206  unsigned LineNum) const {
3207  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3208  if (PerDevice == OffloadEntriesTargetRegion.end())
3209  return false;
3210  auto PerFile = PerDevice->second.find(FileID);
3211  if (PerFile == PerDevice->second.end())
3212  return false;
3213  auto PerParentName = PerFile->second.find(ParentName);
3214  if (PerParentName == PerFile->second.end())
3215  return false;
3216  auto PerLine = PerParentName->second.find(LineNum);
3217  if (PerLine == PerParentName->second.end())
3218  return false;
3219  // Fail if this entry is already registered.
3220  if (PerLine->second.getAddress() || PerLine->second.getID())
3221  return false;
3222  return true;
3223 }
3224 
3225 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3226  const OffloadTargetRegionEntryInfoActTy &Action) {
3227  // Scan all target region entries and perform the provided action.
3228  for (auto &D : OffloadEntriesTargetRegion)
3229  for (auto &F : D.second)
3230  for (auto &P : F.second)
3231  for (auto &L : P.second)
3232  Action(D.first, F.first, P.first(), L.first, L.second);
3233 }
3234 
3235 /// \brief Create a Ctor/Dtor-like function whose body is emitted through
3236 /// \a Codegen. This is used to emit the two functions that register and
3237 /// unregister the descriptor of the current compilation unit.
3238 static llvm::Function *
3240  const RegionCodeGenTy &Codegen) {
3241  auto &C = CGM.getContext();
3242  FunctionArgList Args;
3243  ImplicitParamDecl DummyPtr(C, C.VoidPtrTy, ImplicitParamDecl::Other);
3244  Args.push_back(&DummyPtr);
3245 
3246  CodeGenFunction CGF(CGM);
3247  auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3248  auto FTy = CGM.getTypes().GetFunctionType(FI);
3249  auto *Fn =
3250  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, SourceLocation());
3251  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FI, Args, SourceLocation());
3252  Codegen(CGF);
3253  CGF.FinishFunction();
3254  return Fn;
3255 }
3256 
3257 llvm::Function *
3259 
3260  // If we don't have entries or if we are emitting code for the device, we
3261  // don't need to do anything.
3262  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3263  return nullptr;
3264 
3265  auto &M = CGM.getModule();
3266  auto &C = CGM.getContext();
3267 
3268  // Get list of devices we care about
3269  auto &Devices = CGM.getLangOpts().OMPTargetTriples;
3270 
3271  // We should be creating an offloading descriptor only if there are devices
3272  // specified.
3273  assert(!Devices.empty() && "No OpenMP offloading devices??");
3274 
3275  // Create the external variables that will point to the begin and end of the
3276  // host entries section. These will be defined by the linker.
3277  auto *OffloadEntryTy =
3278  CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy());
3279  llvm::GlobalVariable *HostEntriesBegin = new llvm::GlobalVariable(
3280  M, OffloadEntryTy, /*isConstant=*/true,
3281  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3282  ".omp_offloading.entries_begin");
3283  llvm::GlobalVariable *HostEntriesEnd = new llvm::GlobalVariable(
3284  M, OffloadEntryTy, /*isConstant=*/true,
3285  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3286  ".omp_offloading.entries_end");
3287 
3288  // Create all device images
3289  auto *DeviceImageTy = cast<llvm::StructType>(
3290  CGM.getTypes().ConvertTypeForMem(getTgtDeviceImageQTy()));
3291  ConstantInitBuilder DeviceImagesBuilder(CGM);
3292  auto DeviceImagesEntries = DeviceImagesBuilder.beginArray(DeviceImageTy);
3293 
3294  for (unsigned i = 0; i < Devices.size(); ++i) {
3295  StringRef T = Devices[i].getTriple();
3296  auto *ImgBegin = new llvm::GlobalVariable(
3297  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3298  /*Initializer=*/nullptr,
3299  Twine(".omp_offloading.img_start.") + Twine(T));
3300  auto *ImgEnd = new llvm::GlobalVariable(
3301  M, CGM.Int8Ty, /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage,
3302  /*Initializer=*/nullptr, Twine(".omp_offloading.img_end.") + Twine(T));
3303 
3304  auto Dev = DeviceImagesEntries.beginStruct(DeviceImageTy);
3305  Dev.add(ImgBegin);
3306  Dev.add(ImgEnd);
3307  Dev.add(HostEntriesBegin);
3308  Dev.add(HostEntriesEnd);
3309  Dev.finishAndAddTo(DeviceImagesEntries);
3310  }
3311 
3312  // Create device images global array.
3313  llvm::GlobalVariable *DeviceImages =
3314  DeviceImagesEntries.finishAndCreateGlobal(".omp_offloading.device_images",
3315  CGM.getPointerAlign(),
3316  /*isConstant=*/true);
3317  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3318 
3319  // This is a Zero array to be used in the creation of the constant expressions
3320  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3321  llvm::Constant::getNullValue(CGM.Int32Ty)};
3322 
3323  // Create the target region descriptor.
3324  auto *BinaryDescriptorTy = cast<llvm::StructType>(
3325  CGM.getTypes().ConvertTypeForMem(getTgtBinaryDescriptorQTy()));
3326  ConstantInitBuilder DescBuilder(CGM);
3327  auto DescInit = DescBuilder.beginStruct(BinaryDescriptorTy);
3328  DescInit.addInt(CGM.Int32Ty, Devices.size());
3329  DescInit.add(llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3330  DeviceImages,
3331  Index));
3332  DescInit.add(HostEntriesBegin);
3333  DescInit.add(HostEntriesEnd);
3334 
3335  auto *Desc = DescInit.finishAndCreateGlobal(".omp_offloading.descriptor",
3336  CGM.getPointerAlign(),
3337  /*isConstant=*/true);
3338 
3339  // Emit code to register or unregister the descriptor at execution
3340  // startup or closing, respectively.
3341 
3342  // Create a variable to drive the registration and unregistration of the
3343  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
3344  auto *IdentInfo = &C.Idents.get(".omp_offloading.reg_unreg_var");
3345  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(), SourceLocation(),
3346  IdentInfo, C.CharTy, ImplicitParamDecl::Other);
3347 
3349  CGM, ".omp_offloading.descriptor_unreg",
3350  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3352  Desc);
3353  });
3355  CGM, ".omp_offloading.descriptor_reg",
3356  [&](CodeGenFunction &CGF, PrePostActionTy &) {
3358  Desc);
3359  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
3360  });
3361  if (CGM.supportsCOMDAT()) {
3362  // It is sufficient to call registration function only once, so create a
3363  // COMDAT group for registration/unregistration functions and associated
3364  // data. That would reduce startup time and code size. Registration
3365  // function serves as a COMDAT group key.
3366  auto ComdatKey = M.getOrInsertComdat(RegFn->getName());
3367  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
3368  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
3369  RegFn->setComdat(ComdatKey);
3370  UnRegFn->setComdat(ComdatKey);
3371  DeviceImages->setComdat(ComdatKey);
3372  Desc->setComdat(ComdatKey);
3373  }
3374  return RegFn;
3375 }
3376 
3377 void CGOpenMPRuntime::createOffloadEntry(llvm::Constant *ID,
3378  llvm::Constant *Addr, uint64_t Size,
3379  int32_t Flags) {
3380  StringRef Name = Addr->getName();
3381  auto *TgtOffloadEntryType = cast<llvm::StructType>(
3382  CGM.getTypes().ConvertTypeForMem(getTgtOffloadEntryQTy()));
3383  llvm::LLVMContext &C = CGM.getModule().getContext();
3384  llvm::Module &M = CGM.getModule();
3385 
3386  // Make sure the address has the right type.
3387  llvm::Constant *AddrPtr = llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy);
3388 
3389  // Create constant string with the name.
3390  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
3391 
3392  llvm::GlobalVariable *Str =
3393  new llvm::GlobalVariable(M, StrPtrInit->getType(), /*isConstant=*/true,
3395  ".omp_offloading.entry_name");
3396  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3397  llvm::Constant *StrPtr = llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy);
3398 
3399  // We can't have any padding between symbols, so we need to have 1-byte
3400  // alignment.
3401  auto Align = CharUnits::fromQuantity(1);
3402 
3403  // Create the entry struct.
3404  ConstantInitBuilder EntryBuilder(CGM);
3405  auto EntryInit = EntryBuilder.beginStruct(TgtOffloadEntryType);
3406  EntryInit.add(AddrPtr);
3407  EntryInit.add(StrPtr);
3408  EntryInit.addInt(CGM.SizeTy, Size);
3409  EntryInit.addInt(CGM.Int32Ty, Flags);
3410  EntryInit.addInt(CGM.Int32Ty, 0);
3411  llvm::GlobalVariable *Entry =
3412  EntryInit.finishAndCreateGlobal(".omp_offloading.entry",
3413  Align,
3414  /*constant*/ true,
3416 
3417  // The entry has to be created in the section the linker expects it to be.
3418  Entry->setSection(".omp_offloading.entries");
3419 }
3420 
3422  // Emit the offloading entries and metadata so that the device codegen side
3423  // can easily figure out what to emit. The produced metadata looks like
3424  // this:
3425  //
3426  // !omp_offload.info = !{!1, ...}
3427  //
3428  // Right now we only generate metadata for function that contain target
3429  // regions.
3430 
3431  // If we do not have entries, we dont need to do anything.
3433  return;
3434 
3435  llvm::Module &M = CGM.getModule();
3436  llvm::LLVMContext &C = M.getContext();
3438  OrderedEntries(OffloadEntriesInfoManager.size());
3439 
3440  // Create the offloading info metadata node.
3441  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
3442 
3443  // Auxiliary methods to create metadata values and strings.
3444  auto getMDInt = [&](unsigned v) {
3445  return llvm::ConstantAsMetadata::get(
3446  llvm::ConstantInt::get(llvm::Type::getInt32Ty(C), v));
3447  };
3448 
3449  auto getMDString = [&](StringRef v) { return llvm::MDString::get(C, v); };
3450 
3451  // Create function that emits metadata for each target region entry;
3452  auto &&TargetRegionMetadataEmitter = [&](
3453  unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned Line,
3456  // Generate metadata for target regions. Each entry of this metadata
3457  // contains:
3458  // - Entry 0 -> Kind of this type of metadata (0).
3459  // - Entry 1 -> Device ID of the file where the entry was identified.
3460  // - Entry 2 -> File ID of the file where the entry was identified.
3461  // - Entry 3 -> Mangled name of the function where the entry was identified.
3462  // - Entry 4 -> Line in the file where the entry was identified.
3463  // - Entry 5 -> Order the entry was created.
3464  // The first element of the metadata node is the kind.
3465  Ops.push_back(getMDInt(E.getKind()));
3466  Ops.push_back(getMDInt(DeviceID));
3467  Ops.push_back(getMDInt(FileID));
3468  Ops.push_back(getMDString(ParentName));
3469  Ops.push_back(getMDInt(Line));
3470  Ops.push_back(getMDInt(E.getOrder()));
3471 
3472  // Save this entry in the right position of the ordered entries array.
3473  OrderedEntries[E.getOrder()] = &E;
3474 
3475  // Add metadata to the named metadata node.
3476  MD->addOperand(llvm::MDNode::get(C, Ops));
3477  };
3478 
3480  TargetRegionMetadataEmitter);
3481 
3482  for (auto *E : OrderedEntries) {
3483  assert(E && "All ordered entries must exist!");
3484  if (auto *CE =
3485  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
3486  E)) {
3487  assert(CE->getID() && CE->getAddress() &&
3488  "Entry ID and Addr are invalid!");
3489  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0);
3490  } else
3491  llvm_unreachable("Unsupported entry kind.");
3492  }
3493 }
3494 
3495 /// \brief Loads all the offload entries information from the host IR
3496 /// metadata.
3498  // If we are in target mode, load the metadata from the host IR. This code has
3499  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3500 
3501  if (!CGM.getLangOpts().OpenMPIsDevice)
3502  return;
3503 
3504  if (CGM.getLangOpts().OMPHostIRFile.empty())
3505  return;
3506 
3507  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
3508  if (Buf.getError())
3509  return;
3510 
3511  llvm::LLVMContext C;
3512  auto ME = expectedToErrorOrAndEmitErrors(
3513  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
3514 
3515  if (ME.getError())
3516  return;
3517 
3518  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
3519  if (!MD)
3520  return;
3521 
3522  for (auto I : MD->operands()) {
3523  llvm::MDNode *MN = cast<llvm::MDNode>(I);
3524 
3525  auto getMDInt = [&](unsigned Idx) {
3526  llvm::ConstantAsMetadata *V =
3527  cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
3528  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
3529  };
3530 
3531  auto getMDString = [&](unsigned Idx) {
3532  llvm::MDString *V = cast<llvm::MDString>(MN->getOperand(Idx));
3533  return V->getString();
3534  };
3535 
3536  switch (getMDInt(0)) {
3537  default:
3538  llvm_unreachable("Unexpected metadata!");
3539  break;
3540  case OffloadEntriesInfoManagerTy::OffloadEntryInfo::
3541  OFFLOAD_ENTRY_INFO_TARGET_REGION:
3543  /*DeviceID=*/getMDInt(1), /*FileID=*/getMDInt(2),
3544  /*ParentName=*/getMDString(3), /*Line=*/getMDInt(4),
3545  /*Order=*/getMDInt(5));
3546  break;
3547  }
3548  }
3549 }
3550 
3552  if (!KmpRoutineEntryPtrTy) {
3553  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3554  auto &C = CGM.getContext();
3555  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
3557  KmpRoutineEntryPtrQTy = C.getPointerType(
3558  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
3559  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
3560  }
3561 }
3562 
3564  QualType FieldTy) {
3565  auto *Field = FieldDecl::Create(
3566  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
3568  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
3569  Field->setAccess(AS_public);
3570  DC->addDecl(Field);
3571  return Field;
3572 }
3573 
3575 
3576  // Make sure the type of the entry is already created. This is the type we
3577  // have to create:
3578  // struct __tgt_offload_entry{
3579  // void *addr; // Pointer to the offload entry info.
3580  // // (function or global)
3581  // char *name; // Name of the function or global.
3582  // size_t size; // Size of the entry info (0 if it a function).
3583  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
3584  // int32_t reserved; // Reserved, to use by the runtime library.
3585  // };
3586  if (TgtOffloadEntryQTy.isNull()) {
3587  ASTContext &C = CGM.getContext();
3588  auto *RD = C.buildImplicitRecord("__tgt_offload_entry");
3589  RD->startDefinition();
3590  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3592  addFieldToRecordDecl(C, RD, C.getSizeType());
3594  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3596  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3597  RD->completeDefinition();
3599  }
3600  return TgtOffloadEntryQTy;
3601 }
3602 
3604  // These are the types we need to build:
3605  // struct __tgt_device_image{
3606  // void *ImageStart; // Pointer to the target code start.
3607  // void *ImageEnd; // Pointer to the target code end.
3608  // // We also add the host entries to the device image, as it may be useful
3609  // // for the target runtime to have access to that information.
3610  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
3611  // // the entries.
3612  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3613  // // entries (non inclusive).
3614  // };
3615  if (TgtDeviceImageQTy.isNull()) {
3616  ASTContext &C = CGM.getContext();
3617  auto *RD = C.buildImplicitRecord("__tgt_device_image");
3618  RD->startDefinition();
3619  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3620  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3623  RD->completeDefinition();
3625  }
3626  return TgtDeviceImageQTy;
3627 }
3628 
3630  // struct __tgt_bin_desc{
3631  // int32_t NumDevices; // Number of devices supported.
3632  // __tgt_device_image *DeviceImages; // Arrays of device images
3633  // // (one per device).
3634  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
3635  // // entries.
3636  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
3637  // // entries (non inclusive).
3638  // };
3640  ASTContext &C = CGM.getContext();
3641  auto *RD = C.buildImplicitRecord("__tgt_bin_desc");
3642  RD->startDefinition();
3644  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
3648  RD->completeDefinition();
3650  }
3651  return TgtBinaryDescriptorQTy;
3652 }
3653 
3654 namespace {
3655 struct PrivateHelpersTy {
3656  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
3657  const VarDecl *PrivateElemInit)
3658  : Original(Original), PrivateCopy(PrivateCopy),
3659  PrivateElemInit(PrivateElemInit) {}
3660  const VarDecl *Original;
3661  const VarDecl *PrivateCopy;
3662  const VarDecl *PrivateElemInit;
3663 };
3664 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
3665 } // anonymous namespace
3666 
3667 static RecordDecl *
3668 createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
3669  if (!Privates.empty()) {
3670  auto &C = CGM.getContext();
3671  // Build struct .kmp_privates_t. {
3672  // /* private vars */
3673  // };
3674  auto *RD = C.buildImplicitRecord(".kmp_privates.t");
3675  RD->startDefinition();
3676  for (auto &&Pair : Privates) {
3677  auto *VD = Pair.second.Original;
3678  auto Type = VD->getType();
3679  Type = Type.getNonReferenceType();
3680  auto *FD = addFieldToRecordDecl(C, RD, Type);
3681  if (VD->hasAttrs()) {
3682  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
3683  E(VD->getAttrs().end());
3684  I != E; ++I)
3685  FD->addAttr(*I);
3686  }
3687  }
3688  RD->completeDefinition();
3689  return RD;
3690  }
3691  return nullptr;
3692 }
3693 
3694 static RecordDecl *
3696  QualType KmpInt32Ty,
3697  QualType KmpRoutineEntryPointerQTy) {
3698  auto &C = CGM.getContext();
3699  // Build struct kmp_task_t {
3700  // void * shareds;
3701  // kmp_routine_entry_t routine;
3702  // kmp_int32 part_id;
3703  // kmp_cmplrdata_t data1;
3704  // kmp_cmplrdata_t data2;
3705  // For taskloops additional fields:
3706  // kmp_uint64 lb;
3707  // kmp_uint64 ub;
3708  // kmp_int64 st;
3709  // kmp_int32 liter;
3710  // void * reductions;
3711  // };
3712  auto *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
3713  UD->startDefinition();
3714  addFieldToRecordDecl(C, UD, KmpInt32Ty);
3715  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
3716  UD->completeDefinition();
3717  QualType KmpCmplrdataTy = C.getRecordType(UD);
3718  auto *RD = C.buildImplicitRecord("kmp_task_t");
3719  RD->startDefinition();
3720  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3721  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
3722  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3723  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3724  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
3725  if (isOpenMPTaskLoopDirective(Kind)) {
3726  QualType KmpUInt64Ty =
3727  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3728  QualType KmpInt64Ty =
3729  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3730  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3731  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
3732  addFieldToRecordDecl(C, RD, KmpInt64Ty);
3733  addFieldToRecordDecl(C, RD, KmpInt32Ty);
3734  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
3735  }
3736  RD->completeDefinition();
3737  return RD;
3738 }
3739 
3740 static RecordDecl *
3742  ArrayRef<PrivateDataTy> Privates) {
3743  auto &C = CGM.getContext();
3744  // Build struct kmp_task_t_with_privates {
3745  // kmp_task_t task_data;
3746  // .kmp_privates_t. privates;
3747  // };
3748  auto *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
3749  RD->startDefinition();
3750  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
3751  if (auto *PrivateRD = createPrivatesRecordDecl(CGM, Privates)) {
3752  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
3753  }
3754  RD->completeDefinition();
3755  return RD;
3756 }
3757 
3758 /// \brief Emit a proxy function which accepts kmp_task_t as the second
3759 /// argument.
3760 /// \code
3761 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3762 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3763 /// For taskloops:
3764 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3765 /// tt->reductions, tt->shareds);
3766 /// return 0;
3767 /// }
3768 /// \endcode
3769 static llvm::Value *
3771  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
3772  QualType KmpTaskTWithPrivatesPtrQTy,
3773  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
3774  QualType SharedsPtrTy, llvm::Value *TaskFunction,
3775  llvm::Value *TaskPrivatesMap) {
3776  auto &C = CGM.getContext();
3777  FunctionArgList Args;
3778  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3780  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3781  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3783  Args.push_back(&GtidArg);
3784  Args.push_back(&TaskTypeArg);
3785  auto &TaskEntryFnInfo =
3786  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3787  auto *TaskEntryTy = CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
3788  auto *TaskEntry =
3790  ".omp_task_entry.", &CGM.getModule());
3791  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskEntry, TaskEntryFnInfo);
3792  CodeGenFunction CGF(CGM);
3793  CGF.disableDebugInfo();
3794  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args);
3795 
3796  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3797  // tt,
3798  // For taskloops:
3799  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3800  // tt->task_data.shareds);
3801  auto *GtidParam = CGF.EmitLoadOfScalar(
3802  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
3803  LValue TDBase = CGF.EmitLoadOfPointerLValue(
3804  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3805  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3806  auto *KmpTaskTWithPrivatesQTyRD =
3807  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3808  LValue Base =
3809  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
3810  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3811  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
3812  auto PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
3813  auto *PartidParam = PartIdLVal.getPointer();
3814 
3815  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
3816  auto SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
3817  auto *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3818  CGF.EmitLoadOfLValue(SharedsLVal, Loc).getScalarVal(),
3819  CGF.ConvertTypeForMem(SharedsPtrTy));
3820 
3821  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
3822  llvm::Value *PrivatesParam;
3823  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3824  auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
3825  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3826  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
3827  } else
3828  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
3829 
3830  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
3831  TaskPrivatesMap,
3832  CGF.Builder
3834  TDBase.getAddress(), CGF.VoidPtrTy)
3835  .getPointer()};
3836  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
3837  std::end(CommonArgs));
3838  if (isOpenMPTaskLoopDirective(Kind)) {
3839  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
3840  auto LBLVal = CGF.EmitLValueForField(Base, *LBFI);
3841  auto *LBParam = CGF.EmitLoadOfLValue(LBLVal, Loc).getScalarVal();
3842  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
3843  auto UBLVal = CGF.EmitLValueForField(Base, *UBFI);
3844  auto *UBParam = CGF.EmitLoadOfLValue(UBLVal, Loc).getScalarVal();
3845  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
3846  auto StLVal = CGF.EmitLValueForField(Base, *StFI);
3847  auto *StParam = CGF.EmitLoadOfLValue(StLVal, Loc).getScalarVal();
3848  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
3849  auto LILVal = CGF.EmitLValueForField(Base, *LIFI);
3850  auto *LIParam = CGF.EmitLoadOfLValue(LILVal, Loc).getScalarVal();
3851  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
3852  auto RLVal = CGF.EmitLValueForField(Base, *RFI);
3853  auto *RParam = CGF.EmitLoadOfLValue(RLVal, Loc).getScalarVal();
3854  CallArgs.push_back(LBParam);
3855  CallArgs.push_back(UBParam);
3856  CallArgs.push_back(StParam);
3857  CallArgs.push_back(LIParam);
3858  CallArgs.push_back(RParam);
3859  }
3860  CallArgs.push_back(SharedsParam);
3861 
3862  CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
3864  RValue::get(CGF.Builder.getInt32(/*C=*/0)),
3865  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
3866  CGF.FinishFunction();
3867  return TaskEntry;
3868 }
3869 
3871  SourceLocation Loc,
3872  QualType KmpInt32Ty,
3873  QualType KmpTaskTWithPrivatesPtrQTy,
3874  QualType KmpTaskTWithPrivatesQTy) {
3875  auto &C = CGM.getContext();
3876  FunctionArgList Args;
3877  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3879  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3880  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3882  Args.push_back(&GtidArg);
3883  Args.push_back(&TaskTypeArg);
3884  FunctionType::ExtInfo Info;
3885  auto &DestructorFnInfo =
3886  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
3887  auto *DestructorFnTy = CGM.getTypes().GetFunctionType(DestructorFnInfo);
3888  auto *DestructorFn =
3890  ".omp_task_destructor.", &CGM.getModule());
3891  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, DestructorFn,
3892  DestructorFnInfo);
3893  CodeGenFunction CGF(CGM);
3894  CGF.disableDebugInfo();
3895  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
3896  Args);
3897 
3899  CGF.GetAddrOfLocalVar(&TaskTypeArg),
3900  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3901  auto *KmpTaskTWithPrivatesQTyRD =
3902  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
3903  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
3904  Base = CGF.EmitLValueForField(Base, *FI);
3905  for (auto *Field :
3906  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3907  if (auto DtorKind = Field->getType().isDestructedType()) {
3908  auto FieldLValue = CGF.EmitLValueForField(Base, Field);
3909  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3910  }
3911  }
3912  CGF.FinishFunction();
3913  return DestructorFn;
3914 }
3915 
3916 /// \brief Emit a privates mapping function for correct handling of private and
3917 /// firstprivate variables.
3918 /// \code
3919 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3920 /// **noalias priv1,..., <tyn> **noalias privn) {
3921 /// *priv1 = &.privates.priv1;
3922 /// ...;
3923 /// *privn = &.privates.privn;
3924 /// }
3925 /// \endcode
3926 static llvm::Value *
3928  ArrayRef<const Expr *> PrivateVars,
3929  ArrayRef<const Expr *> FirstprivateVars,
3930  ArrayRef<const Expr *> LastprivateVars,
3931  QualType PrivatesQTy,
3932  ArrayRef<PrivateDataTy> Privates) {
3933  auto &C = CGM.getContext();
3934  FunctionArgList Args;
3935  ImplicitParamDecl TaskPrivatesArg(
3936  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3937  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
3939  Args.push_back(&TaskPrivatesArg);
3940  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
3941  unsigned Counter = 1;
3942  for (auto *E: PrivateVars) {
3943  Args.push_back(ImplicitParamDecl::Create(
3944  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3945  C.getPointerType(C.getPointerType(E->getType()))
3946  .withConst()
3947  .withRestrict(),
3949  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3950  PrivateVarsPos[VD] = Counter;
3951  ++Counter;
3952  }
3953  for (auto *E : FirstprivateVars) {
3954  Args.push_back(ImplicitParamDecl::Create(
3955  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3956  C.getPointerType(C.getPointerType(E->getType()))
3957  .withConst()
3958  .withRestrict(),
3960  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3961  PrivateVarsPos[VD] = Counter;
3962  ++Counter;
3963  }
3964  for (auto *E: LastprivateVars) {
3965  Args.push_back(ImplicitParamDecl::Create(
3966  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3967  C.getPointerType(C.getPointerType(E->getType()))
3968  .withConst()
3969  .withRestrict(),
3971  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
3972  PrivateVarsPos[VD] = Counter;
3973  ++Counter;
3974  }
3975  auto &TaskPrivatesMapFnInfo =
3976  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3977  auto *TaskPrivatesMapTy =
3978  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3979  auto *TaskPrivatesMap = llvm::Function::Create(
3980  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage,
3981  ".omp_task_privates_map.", &CGM.getModule());
3982  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskPrivatesMap,
3983  TaskPrivatesMapFnInfo);
3984  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3985  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3986  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3987  CodeGenFunction CGF(CGM);
3988  CGF.disableDebugInfo();
3989  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
3990  TaskPrivatesMapFnInfo, Args);
3991 
3992  // *privi = &.privates.privi;
3994  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3995  TaskPrivatesArg.getType()->castAs<PointerType>());
3996  auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
3997  Counter = 0;
3998  for (auto *Field : PrivatesQTyRD->fields()) {
3999  auto FieldLVal = CGF.EmitLValueForField(Base, Field);
4000  auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4001  auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4002  auto RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4003  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4004  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4005  ++Counter;
4006  }
4007  CGF.FinishFunction();
4008  return TaskPrivatesMap;
4009 }
4010 
4011 static int array_pod_sort_comparator(const PrivateDataTy *P1,
4012  const PrivateDataTy *P2) {
4013  return P1->first < P2->first ? 1 : (P2->first < P1->first ? -1 : 0);
4014 }
4015 
4016 /// Emit initialization for private variables in task-based directives.
4018  const OMPExecutableDirective &D,
4019  Address KmpTaskSharedsPtr, LValue TDBase,
4020  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4021  QualType SharedsTy, QualType SharedsPtrTy,
4022  const OMPTaskDataTy &Data,
4023  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4024  auto &C = CGF.getContext();
4025  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4026  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4027  LValue SrcBase;
4028  if (!Data.FirstprivateVars.empty()) {
4029  SrcBase = CGF.MakeAddrLValue(
4031  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4032  SharedsTy);
4033  }
4035  cast<CapturedStmt>(*D.getAssociatedStmt()));
4036  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4037  for (auto &&Pair : Privates) {
4038  auto *VD = Pair.second.PrivateCopy;
4039  auto *Init = VD->getAnyInitializer();
4040  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4041  !CGF.isTrivialInitializer(Init)))) {
4042  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4043  if (auto *Elem = Pair.second.PrivateElemInit) {
4044  auto *OriginalVD = Pair.second.Original;
4045  auto *SharedField = CapturesInfo.lookup(OriginalVD);
4046  auto SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4047  SharedRefLValue = CGF.MakeAddrLValue(
4048  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4049  SharedRefLValue.getType(),
4050  LValueBaseInfo(AlignmentSource::Decl,
4051  SharedRefLValue.getBaseInfo().getMayAlias()));
4052  QualType Type = OriginalVD->getType();
4053  if (Type->isArrayType()) {
4054  // Initialize firstprivate array.
4055  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4056  // Perform simple memcpy.
4057  CGF.EmitAggregateAssign(PrivateLValue.getAddress(),
4058  SharedRefLValue.getAddress(), Type);
4059  } else {
4060  // Initialize firstprivate array using element-by-element
4061  // initialization.
4063  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4064  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4065  Address SrcElement) {
4066  // Clean up any temporaries needed by the initialization.
4067  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4068  InitScope.addPrivate(
4069  Elem, [SrcElement]() -> Address { return SrcElement; });
4070  (void)InitScope.Privatize();
4071  // Emit initialization for single element.
4073  CGF, &CapturesInfo);
4074  CGF.EmitAnyExprToMem(Init, DestElement,
4075  Init->getType().getQualifiers(),
4076  /*IsInitializer=*/false);
4077  });
4078  }
4079  } else {
4080  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4081  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4082  return SharedRefLValue.getAddress();
4083  });
4084  (void)InitScope.Privatize();
4085  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4086  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4087  /*capturedByInit=*/false);
4088  }
4089  } else
4090  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4091  }
4092  ++FI;
4093  }
4094 }
4095 
4096 /// Check if duplication function is required for taskloops.
4098  ArrayRef<PrivateDataTy> Privates) {
4099  bool InitRequired = false;
4100  for (auto &&Pair : Privates) {
4101  auto *VD = Pair.second.PrivateCopy;
4102  auto *Init = VD->getAnyInitializer();
4103  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4104  !CGF.isTrivialInitializer(Init));
4105  }
4106  return InitRequired;
4107 }
4108 
4109 
4110 /// Emit task_dup function (for initialization of
4111 /// private/firstprivate/lastprivate vars and last_iter flag)
4112 /// \code
4113 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4114 /// lastpriv) {
4115 /// // setup lastprivate flag
4116 /// task_dst->last = lastpriv;
4117 /// // could be constructor calls here...
4118 /// }
4119 /// \endcode
4120 static llvm::Value *
4122  const OMPExecutableDirective &D,
4123  QualType KmpTaskTWithPrivatesPtrQTy,
4124  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4125  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4126  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4127  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4128  auto &C = CGM.getContext();
4129  FunctionArgList Args;
4130  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4131  KmpTaskTWithPrivatesPtrQTy,
4133  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4134  KmpTaskTWithPrivatesPtrQTy,
4136  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4138  Args.push_back(&DstArg);
4139  Args.push_back(&SrcArg);
4140  Args.push_back(&LastprivArg);
4141  auto &TaskDupFnInfo =
4142  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4143  auto *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4144  auto *TaskDup =
4146  ".omp_task_dup.", &CGM.getModule());
4147  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, TaskDup, TaskDupFnInfo);
4148  CodeGenFunction CGF(CGM);
4149  CGF.disableDebugInfo();
4150  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args);
4151 
4152  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4153  CGF.GetAddrOfLocalVar(&DstArg),
4154  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4155  // task_dst->liter = lastpriv;
4156  if (WithLastIter) {
4157  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4159  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4160  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4161  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4162  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4163  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4164  }
4165 
4166  // Emit initial values for private copies (if any).
4167  assert(!Privates.empty());
4168  Address KmpTaskSharedsPtr = Address::invalid();
4169  if (!Data.FirstprivateVars.empty()) {
4170  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4171  CGF.GetAddrOfLocalVar(&SrcArg),
4172  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4174  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4175  KmpTaskSharedsPtr = Address(
4177  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4178  KmpTaskTShareds)),
4179  Loc),
4180  CGF.getNaturalTypeAlignment(SharedsTy));
4181  }
4182  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4183  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4184  CGF.FinishFunction();
4185  return TaskDup;
4186 }
4187 
4188 /// Checks if destructor function is required to be generated.
4189 /// \return true if cleanups are required, false otherwise.
4190 static bool
4191 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4192  bool NeedsCleanup = false;
4193  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4194  auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4195  for (auto *FD : PrivateRD->fields()) {
4196  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4197  if (NeedsCleanup)
4198  break;
4199  }
4200  return NeedsCleanup;
4201 }
4202 
4203 CGOpenMPRuntime::TaskResultTy
4205  const OMPExecutableDirective &D,
4206  llvm::Value *TaskFunction, QualType SharedsTy,
4207  Address Shareds, const OMPTaskDataTy &Data) {
4208  auto &C = CGM.getContext();
4210  // Aggregate privates and sort them by the alignment.
4211  auto I = Data.PrivateCopies.begin();
4212  for (auto *E : Data.PrivateVars) {
4213  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4214  Privates.push_back(std::make_pair(
4215  C.getDeclAlign(VD),
4216  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4217  /*PrivateElemInit=*/nullptr)));
4218  ++I;
4219  }
4220  I = Data.FirstprivateCopies.begin();
4221  auto IElemInitRef = Data.FirstprivateInits.begin();
4222  for (auto *E : Data.FirstprivateVars) {
4223  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4224  Privates.push_back(std::make_pair(
4225  C.getDeclAlign(VD),
4226  PrivateHelpersTy(
4227  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4228  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl()))));
4229  ++I;
4230  ++IElemInitRef;
4231  }
4232  I = Data.LastprivateCopies.begin();
4233  for (auto *E : Data.LastprivateVars) {
4234  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4235  Privates.push_back(std::make_pair(
4236  C.getDeclAlign(VD),
4237  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4238  /*PrivateElemInit=*/nullptr)));
4239  ++I;
4240  }
4241  llvm::array_pod_sort(Privates.begin(), Privates.end(),
4243  auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
4244  // Build type kmp_routine_entry_t (if not built yet).
4245  emitKmpRoutineEntryT(KmpInt32Ty);
4246  // Build type kmp_task_t (if not built yet).
4247  if (KmpTaskTQTy.isNull()) {
4248  KmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
4249  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
4250  }
4251  auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4252  // Build particular struct kmp_task_t for the given task.
4253  auto *KmpTaskTWithPrivatesQTyRD =
4255  auto KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
4256  QualType KmpTaskTWithPrivatesPtrQTy =
4257  C.getPointerType(KmpTaskTWithPrivatesQTy);
4258  auto *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
4259  auto *KmpTaskTWithPrivatesPtrTy = KmpTaskTWithPrivatesTy->getPointerTo();
4260  auto *KmpTaskTWithPrivatesTySize = CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
4261  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
4262 
4263  // Emit initial values for private copies (if any).
4264  llvm::Value *TaskPrivatesMap = nullptr;
4265  auto *TaskPrivatesMapTy =
4266  std::next(cast<llvm::Function>(TaskFunction)->arg_begin(), 3)->getType();
4267  if (!Privates.empty()) {
4268  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4269  TaskPrivatesMap = emitTaskPrivateMappingFunction(
4270  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
4271  FI->getType(), Privates);
4272  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4273  TaskPrivatesMap, TaskPrivatesMapTy);
4274  } else {
4275  TaskPrivatesMap = llvm::ConstantPointerNull::get(
4276  cast<llvm::PointerType>(TaskPrivatesMapTy));
4277  }
4278  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
4279  // kmp_task_t *tt);
4280  auto *TaskEntry = emitProxyTaskFunction(
4281  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4282  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
4283  TaskPrivatesMap);
4284 
4285  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
4286  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
4287  // kmp_routine_entry_t *task_entry);
4288  // Task flags. Format is taken from
4289  // http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp.h,
4290  // description of kmp_tasking_flags struct.
4291  enum {
4292  TiedFlag = 0x1,
4293  FinalFlag = 0x2,
4294  DestructorsFlag = 0x8,
4295  PriorityFlag = 0x20
4296  };
4297  unsigned Flags = Data.Tied ? TiedFlag : 0;
4298  bool NeedsCleanup = false;
4299  if (!Privates.empty()) {
4300  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
4301  if (NeedsCleanup)
4302  Flags = Flags | DestructorsFlag;
4303  }
4304  if (Data.Priority.getInt())
4305  Flags = Flags | PriorityFlag;
4306  auto *TaskFlags =
4307  Data.Final.getPointer()
4308  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
4309  CGF.Builder.getInt32(FinalFlag),
4310  CGF.Builder.getInt32(/*C=*/0))
4311  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
4312  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
4313  auto *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
4314  llvm::Value *AllocArgs[] = {emitUpdateLocation(CGF, Loc),
4315  getThreadID(CGF, Loc), TaskFlags,
4316  KmpTaskTWithPrivatesTySize, SharedsSize,
4318  TaskEntry, KmpRoutineEntryPtrTy)};
4319  auto *NewTask = CGF.EmitRuntimeCall(
4321  auto *NewTaskNewTaskTTy = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4322  NewTask, KmpTaskTWithPrivatesPtrTy);
4323  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
4324  KmpTaskTWithPrivatesQTy);
4325  LValue TDBase =
4326  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
4327  // Fill the data in the resulting kmp_task_t record.
4328  // Copy shareds if there are any.
4329  Address KmpTaskSharedsPtr = Address::invalid();
4330  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
4331  KmpTaskSharedsPtr =
4333  CGF.EmitLValueForField(
4334  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
4335  KmpTaskTShareds)),
4336  Loc),
4337  CGF.getNaturalTypeAlignment(SharedsTy));
4338  CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
4339  }
4340  // Emit initial values for private copies (if any).
4342  if (!Privates.empty()) {
4343  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
4344  SharedsTy, SharedsPtrTy, Data, Privates,
4345  /*ForDup=*/false);
4347  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
4348  Result.TaskDupFn = emitTaskDupFunction(
4349  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
4350  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
4351  /*WithLastIter=*/!Data.LastprivateVars.empty());
4352  }
4353  }
4354  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4355  enum { Priority = 0, Destructors = 1 };
4356  // Provide pointer to function with destructors for privates.
4357  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
4358  auto *KmpCmplrdataUD = (*FI)->getType()->getAsUnionType()->getDecl();
4359  if (NeedsCleanup) {
4360  llvm::Value *DestructorFn = emitDestructorsFunction(
4361  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
4362  KmpTaskTWithPrivatesQTy);
4363  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
4364  LValue DestructorsLV = CGF.EmitLValueForField(
4365  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
4367  DestructorFn, KmpRoutineEntryPtrTy),
4368  DestructorsLV);
4369  }
4370  // Set priority.
4371  if (Data.Priority.getInt()) {
4372  LValue Data2LV = CGF.EmitLValueForField(
4373  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
4374  LValue PriorityLV = CGF.EmitLValueForField(
4375  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
4376  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
4377  }
4378  Result.NewTask = NewTask;
4379  Result.TaskEntry = TaskEntry;
4380  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
4381  Result.TDBase = TDBase;
4382  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
4383  return Result;
4384 }
4385 
4387  const OMPExecutableDirective &D,
4388  llvm::Value *TaskFunction,
4389  QualType SharedsTy, Address Shareds,
4390  const Expr *IfCond,
4391  const OMPTaskDataTy &Data) {
4392  if (!CGF.HaveInsertPoint())
4393  return;
4394 
4396  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4397  llvm::Value *NewTask = Result.NewTask;
4398  llvm::Value *TaskEntry = Result.TaskEntry;
4399  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4400  LValue TDBase = Result.TDBase;
4401  RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4402  auto &C = CGM.getContext();
4403  // Process list of dependences.
4404  Address DependenciesArray = Address::invalid();
4405  unsigned NumDependencies = Data.Dependences.size();
4406  if (NumDependencies) {
4407  // Dependence kind for RTL.
4408  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3 };
4409  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
4410  RecordDecl *KmpDependInfoRD;
4411  QualType FlagsTy =
4412  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
4413  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
4414  if (KmpDependInfoTy.isNull()) {
4415  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
4416  KmpDependInfoRD->startDefinition();
4417  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4418  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4419  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4420  KmpDependInfoRD->completeDefinition();
4421  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
4422  } else
4423  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4424  CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
4425  // Define type kmp_depend_info[<Dependences.size()>];
4426  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4427  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
4428  ArrayType::Normal, /*IndexTypeQuals=*/0);
4429  // kmp_depend_info[<Dependences.size()>] deps;
4430  DependenciesArray =
4431  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
4432  for (unsigned i = 0; i < NumDependencies; ++i) {
4433  const Expr *E = Data.Dependences[i].second;
4434  auto Addr = CGF.EmitLValue(E);
4435  llvm::Value *Size;
4436  QualType Ty = E->getType();
4437  if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
4438  LValue UpAddrLVal =
4439  CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
4440  llvm::Value *UpAddr =
4441  CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
4442  llvm::Value *LowIntPtr =
4443  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
4444  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
4445  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
4446  } else
4447  Size = CGF.getTypeSize(Ty);
4448  auto Base = CGF.MakeAddrLValue(
4449  CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
4450  KmpDependInfoTy);
4451  // deps[i].base_addr = &<Dependences[i].second>;
4452  auto BaseAddrLVal = CGF.EmitLValueForField(
4453  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
4454  CGF.EmitStoreOfScalar(
4455  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
4456  BaseAddrLVal);
4457  // deps[i].len = sizeof(<Dependences[i].second>);
4458  auto LenLVal = CGF.EmitLValueForField(
4459  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
4460  CGF.EmitStoreOfScalar(Size, LenLVal);
4461  // deps[i].flags = <Dependences[i].first>;
4462  RTLDependenceKindTy DepKind;
4463  switch (Data.Dependences[i].first) {
4464  case OMPC_DEPEND_in:
4465  DepKind = DepIn;
4466  break;
4467  // Out and InOut dependencies must use the same code.
4468  case OMPC_DEPEND_out:
4469  case OMPC_DEPEND_inout:
4470  DepKind = DepInOut;
4471  break;
4472  case OMPC_DEPEND_source:
4473  case OMPC_DEPEND_sink:
4474  case OMPC_DEPEND_unknown:
4475  llvm_unreachable("Unknown task dependence type");
4476  }
4477  auto FlagsLVal = CGF.EmitLValueForField(
4478  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
4479  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
4480  FlagsLVal);
4481  }
4482  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4483  CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
4484  CGF.VoidPtrTy);
4485  }
4486 
4487  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4488  // libcall.
4489  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4490  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4491  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4492  // list is not empty
4493  auto *ThreadID = getThreadID(CGF, Loc);
4494  auto *UpLoc = emitUpdateLocation(CGF, Loc);
4495  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4496  llvm::Value *DepTaskArgs[7];
4497  if (NumDependencies) {
4498  DepTaskArgs[0] = UpLoc;
4499  DepTaskArgs[1] = ThreadID;
4500  DepTaskArgs[2] = NewTask;
4501  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
4502  DepTaskArgs[4] = DependenciesArray.getPointer();
4503  DepTaskArgs[5] = CGF.Builder.getInt32(0);
4504  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4505  }
4506  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
4507  &TaskArgs,
4508  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4509  if (!Data.Tied) {
4510  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4511  auto PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
4512  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
4513  }
4514  if (NumDependencies) {
4515  CGF.EmitRuntimeCall(
4517  } else {
4518  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
4519  TaskArgs);
4520  }
4521  // Check if parent region is untied and build return for untied task;
4522  if (auto *Region =
4523  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
4524  Region->emitUntiedSwitch(CGF);
4525  };
4526 
4527  llvm::Value *DepWaitTaskArgs[6];
4528  if (NumDependencies) {
4529  DepWaitTaskArgs[0] = UpLoc;
4530  DepWaitTaskArgs[1] = ThreadID;
4531  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
4532  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
4533  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
4534  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4535  }
4536  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
4537  NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF,
4538  PrePostActionTy &) {
4539  auto &RT = CGF.CGM.getOpenMPRuntime();
4540  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4541  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4542  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4543  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4544  // is specified.
4545  if (NumDependencies)
4546  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
4547  DepWaitTaskArgs);
4548  // Call proxy_task_entry(gtid, new_task);
4549  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy](
4551  Action.Enter(CGF);
4552  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4553  CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
4554  };
4555 
4556  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4557  // kmp_task_t *new_task);
4558  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4559  // kmp_task_t *new_task);
4560  RegionCodeGenTy RCG(CodeGen);
4561  CommonActionTy Action(
4562  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
4563  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
4564  RCG.setAction(Action);
4565  RCG(CGF);
4566  };
4567 
4568  if (IfCond)
4569  emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4570  else {
4571  RegionCodeGenTy ThenRCG(ThenCodeGen);
4572  ThenRCG(CGF);
4573  }
4574 }
4575 
4577  const OMPLoopDirective &D,
4578  llvm::Value *TaskFunction,
4579  QualType SharedsTy, Address Shareds,
4580  const Expr *IfCond,
4581  const OMPTaskDataTy &Data) {
4582  if (!CGF.HaveInsertPoint())
4583  return;
4585  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4586  // NOTE: routine and part_id fields are intialized by __kmpc_omp_task_alloc()
4587  // libcall.
4588  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4589  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4590  // sched, kmp_uint64 grainsize, void *task_dup);
4591  llvm::Value *ThreadID = getThreadID(CGF, Loc);
4592  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4593  llvm::Value *IfVal;
4594  if (IfCond) {
4595  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
4596  /*isSigned=*/true);
4597  } else
4598  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
4599 
4600  LValue LBLVal = CGF.EmitLValueForField(
4601  Result.TDBase,
4602  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
4603  auto *LBVar =
4604  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
4605  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
4606  /*IsInitializer=*/true);
4607  LValue UBLVal = CGF.EmitLValueForField(
4608  Result.TDBase,
4609  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
4610  auto *UBVar =
4611  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
4612  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
4613  /*IsInitializer=*/true);
4614  LValue StLVal = CGF.EmitLValueForField(
4615  Result.TDBase,
4616  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
4617  auto *StVar =
4618  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
4619  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
4620  /*IsInitializer=*/true);
4621  // Store reductions address.
4622  LValue RedLVal = CGF.EmitLValueForField(
4623  Result.TDBase,
4624  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
4625  if (Data.Reductions)
4626  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
4627  else {
4628  CGF.EmitNullInitialization(RedLVal.getAddress(),
4629  CGF.getContext().VoidPtrTy);
4630  }
4631  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4632  llvm::Value *TaskArgs[] = {
4633  UpLoc,
4634  ThreadID,
4635  Result.NewTask,
4636  IfVal,
4637  LBLVal.getPointer(),
4638  UBLVal.getPointer(),
4639  CGF.EmitLoadOfScalar(StLVal, SourceLocation()),
4640  llvm::ConstantInt::getNullValue(
4641  CGF.IntTy), // Always 0 because taskgroup emitted by the compiler
4642  llvm::ConstantInt::getSigned(
4643  CGF.IntTy, Data.Schedule.getPointer()
4644  ? Data.Schedule.getInt() ? NumTasks : Grainsize
4645  : NoSchedule),
4646  Data.Schedule.getPointer()
4647  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
4648  /*isSigned=*/false)
4649  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
4651  Result.TaskDupFn, CGF.VoidPtrTy)
4652  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
4654 }
4655 
4656 /// \brief Emit reduction operation for each element of array (required for
4657 /// array sections) LHS op = RHS.
4658 /// \param Type Type of array.
4659 /// \param LHSVar Variable on the left side of the reduction operation
4660 /// (references element of array in original variable).
4661 /// \param RHSVar Variable on the right side of the reduction operation
4662 /// (references element of array in original variable).
4663 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4664 /// RHSVar.
4666  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4667  const VarDecl *RHSVar,
4668  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4669  const Expr *, const Expr *)> &RedOpGen,
4670  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4671  const Expr *UpExpr = nullptr) {
4672  // Perform element-by-element initialization.
4673  QualType ElementTy;
4674  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
4675  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
4676 
4677  // Drill down to the base element type on both arrays.
4678  auto ArrayTy = Type->getAsArrayTypeUnsafe();
4679  auto NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
4680 
4681  auto RHSBegin = RHSAddr.getPointer();
4682  auto LHSBegin = LHSAddr.getPointer();
4683  // Cast from pointer to array type to pointer to single element.
4684  auto LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
4685  // The basic structure here is a while-do loop.
4686  auto BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
4687  auto DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
4688  auto IsEmpty =
4689  CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
4690  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
4691 
4692  // Enter the loop body, making that address the current address.
4693  auto EntryBB = CGF.Builder.GetInsertBlock();
4694  CGF.EmitBlock(BodyBB);
4695 
4696  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
4697 
4698  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4699  RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
4700  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
4701  Address RHSElementCurrent =
4702  Address(RHSElementPHI,
4703  RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4704 
4705  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4706  LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
4707  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
4708  Address LHSElementCurrent =
4709  Address(LHSElementPHI,
4710  LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
4711 
4712  // Emit copy.
4714  Scope.addPrivate(LHSVar, [=]() -> Address { return LHSElementCurrent; });
4715  Scope.addPrivate(RHSVar, [=]() -> Address { return RHSElementCurrent; });
4716  Scope.Privatize();
4717  RedOpGen(CGF, XExpr, EExpr, UpExpr);
4718  Scope.ForceCleanup();
4719 
4720  // Shift the address forward by one element.
4721  auto LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4722  LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
4723  auto RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4724  RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
4725  // Check whether we've reached the end.
4726  auto Done =
4727  CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
4728  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
4729  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
4730  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
4731 
4732  // Done.
4733  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
4734 }
4735 
4736 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4737 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4738 /// UDR combiner function.
4740  const Expr *ReductionOp) {
4741  if (auto *CE = dyn_cast<CallExpr>(ReductionOp))
4742  if (auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
4743  if (auto *DRE =
4744  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
4745  if (auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
4746  std::pair<llvm::Function *, llvm::Function *> Reduction =
4747  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
4748  RValue Func = RValue::get(Reduction.first);
4749  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4750  CGF.EmitIgnoredExpr(ReductionOp);
4751  return;
4752  }
4753  CGF.EmitIgnoredExpr(ReductionOp);
4754 }
4755 
4757  CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
4758  ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
4759  ArrayRef<const Expr *> ReductionOps) {
4760  auto &C = CGM.getContext();
4761 
4762  // void reduction_func(void *LHSArg, void *RHSArg);
4763  FunctionArgList Args;
4764  ImplicitParamDecl LHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4765  ImplicitParamDecl RHSArg(C, C.VoidPtrTy, ImplicitParamDecl::Other);
4766  Args.push_back(&LHSArg);
4767  Args.push_back(&RHSArg);
4768  auto &CGFI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4769  auto *Fn = llvm::Function::Create(
4771  ".omp.reduction.reduction_func", &CGM.getModule());
4772  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, CGFI);
4773  CodeGenFunction CGF(CGM);
4774  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
4775 
4776  // Dst = (void*[n])(LHSArg);
4777  // Src = (void*[n])(RHSArg);
4779  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
4780  ArgsType), CGF.getPointerAlign());
4782  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
4783  ArgsType), CGF.getPointerAlign());
4784 
4785  // ...
4786  // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4787  // ...
4789  auto IPriv = Privates.begin();
4790  unsigned Idx = 0;
4791  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4792  auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
4793  Scope.addPrivate(RHSVar, [&]() -> Address {
4794  return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
4795  });
4796  auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
4797  Scope.addPrivate(LHSVar, [&]() -> Address {
4798  return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
4799  });
4800  QualType PrivTy = (*IPriv)->getType();
4801  if (PrivTy->isVariablyModifiedType()) {
4802  // Get array size and emit VLA type.
4803  ++Idx;
4804  Address Elem =
4805  CGF.Builder.CreateConstArrayGEP(LHS, Idx, CGF.getPointerSize());
4806  llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
4807  auto *VLA = CGF.getContext().getAsVariableArrayType(PrivTy);
4808  auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
4810  CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
4811  CGF.EmitVariablyModifiedType(PrivTy);
4812  }
4813  }
4814  Scope.Privatize();
4815  IPriv = Privates.begin();
4816  auto ILHS = LHSExprs.begin();
4817  auto IRHS = RHSExprs.begin();
4818  for (auto *E : ReductionOps) {
4819  if ((*IPriv)->getType()->isArrayType()) {
4820  // Emit reduction for array section.
4821  auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
4822  auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
4824  CGF, (*IPriv)->getType(), LHSVar, RHSVar,
4825  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4826  emitReductionCombiner(CGF, E);
4827  });
4828  } else
4829  // Emit reduction for array subscript or single variable.
4830  emitReductionCombiner(CGF, E);
4831  ++IPriv;
4832  ++ILHS;
4833  ++IRHS;
4834  }
4835  Scope.ForceCleanup();
4836  CGF.FinishFunction();
4837  return Fn;
4838 }
4839 
4841  const Expr *ReductionOp,
4842  const Expr *PrivateRef,
4843  const DeclRefExpr *LHS,
4844  const DeclRefExpr *RHS) {
4845  if (PrivateRef->getType()->isArrayType()) {
4846  // Emit reduction for array section.
4847  auto *LHSVar = cast<VarDecl>(LHS->getDecl());
4848  auto *RHSVar = cast<VarDecl>(RHS->getDecl());
4850  CGF, PrivateRef->getType(), LHSVar, RHSVar,
4851  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4852  emitReductionCombiner(CGF, ReductionOp);
4853  });
4854  } else
4855  // Emit reduction for array subscript or single variable.
4856  emitReductionCombiner(CGF, ReductionOp);
4857 }
4858 
4860  ArrayRef<const Expr *> Privates,
4861  ArrayRef<const Expr *> LHSExprs,
4862  ArrayRef<const Expr *> RHSExprs,
4863  ArrayRef<const Expr *> ReductionOps,
4865  if (!CGF.HaveInsertPoint())
4866  return;
4867 
4868  bool WithNowait = Options.WithNowait;
4869  bool SimpleReduction = Options.SimpleReduction;
4870 
4871  // Next code should be emitted for reduction:
4872  //
4873  // static kmp_critical_name lock = { 0 };
4874  //
4875  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4876  // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4877  // ...
4878  // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4879  // *(Type<n>-1*)rhs[<n>-1]);
4880  // }
4881  //
4882  // ...
4883  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4884  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4885  // RedList, reduce_func, &<lock>)) {
4886  // case 1:
4887  // ...
4888  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4889  // ...
4890  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4891  // break;
4892  // case 2:
4893  // ...
4894  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4895  // ...
4896  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4897  // break;
4898  // default:;
4899  // }
4900  //
4901  // if SimpleReduction is true, only the next code is generated:
4902  // ...
4903  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4904  // ...
4905 
4906  auto &C = CGM.getContext();
4907 
4908  if (SimpleReduction) {
4910  auto IPriv = Privates.begin();
4911  auto ILHS = LHSExprs.begin();
4912  auto IRHS = RHSExprs.begin();
4913  for (auto *E : ReductionOps) {
4914  emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
4915  cast<DeclRefExpr>(*IRHS));
4916  ++IPriv;
4917  ++ILHS;
4918  ++IRHS;
4919  }
4920  return;
4921  }
4922 
4923  // 1. Build a list of reduction variables.
4924  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4925  auto Size = RHSExprs.size();
4926  for (auto *E : Privates) {
4927  if (E->getType()->isVariablyModifiedType())
4928  // Reserve place for array size.
4929  ++Size;
4930  }
4931  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4932  QualType ReductionArrayTy =
4933  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
4934  /*IndexTypeQuals=*/0);
4935  Address ReductionList =
4936  CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
4937  auto IPriv = Privates.begin();
4938  unsigned Idx = 0;
4939  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
4940  Address Elem =
4941  CGF.Builder.CreateConstArrayGEP(ReductionList, Idx, CGF.getPointerSize());
4942  CGF.Builder.CreateStore(
4944  CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
4945  Elem);
4946  if ((*IPriv)->getType()->isVariablyModifiedType()) {
4947  // Store array size.
4948  ++Idx;
4949  Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx,
4950  CGF.getPointerSize());
4951  llvm::Value *Size = CGF.Builder.CreateIntCast(
4952  CGF.getVLASize(
4953  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
4954  .first,
4955  CGF.SizeTy, /*isSigned=*/false);
4956  CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
4957  Elem);
4958  }
4959  }
4960 
4961  // 2. Emit reduce_func().
4962  auto *ReductionFn = emitReductionFunction(
4963  CGM, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
4964  LHSExprs, RHSExprs, ReductionOps);
4965 
4966  // 3. Create static kmp_critical_name lock = { 0 };
4967  auto *Lock = getCriticalRegionLock(".reduction");
4968 
4969  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4970  // RedList, reduce_func, &<lock>);
4971  auto *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
4972  auto *ThreadId = getThreadID(CGF, Loc);
4973  auto *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
4975  ReductionList.getPointer(), CGF.VoidPtrTy);
4976  llvm::Value *Args[] = {
4977  IdentTLoc, // ident_t *<loc>
4978  ThreadId, // i32 <gtid>
4979  CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
4980  ReductionArrayTySize, // size_type sizeof(RedList)
4981  RL, // void *RedList
4982  ReductionFn, // void (*) (void *, void *) <reduce_func>
4983  Lock // kmp_critical_name *&<lock>
4984  };
4985  auto Res = CGF.EmitRuntimeCall(
4988  Args);
4989 
4990  // 5. Build switch(res)
4991  auto *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
4992  auto *SwInst = CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
4993 
4994  // 6. Build case 1:
4995  // ...
4996  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4997  // ...
4998  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4999  // break;
5000  auto *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5001  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5002  CGF.EmitBlock(Case1BB);
5003 
5004  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5005  llvm::Value *EndArgs[] = {
5006  IdentTLoc, // ident_t *<loc>
5007  ThreadId, // i32 <gtid>
5008  Lock // kmp_critical_name *&<lock>
5009  };
5010  auto &&CodeGen = [&Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5012  auto &RT = CGF.CGM.getOpenMPRuntime();
5013  auto IPriv = Privates.begin();
5014  auto ILHS = LHSExprs.begin();
5015  auto IRHS = RHSExprs.begin();
5016  for (auto *E : ReductionOps) {
5017  RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5018  cast<DeclRefExpr>(*IRHS));
5019  ++IPriv;
5020  ++ILHS;
5021  ++IRHS;
5022  }
5023  };
5024  RegionCodeGenTy RCG(CodeGen);
5025  CommonActionTy Action(
5026  nullptr, llvm::None,
5029  EndArgs);
5030  RCG.setAction(Action);
5031  RCG(CGF);
5032 
5033  CGF.EmitBranch(DefaultBB);
5034 
5035  // 7. Build case 2:
5036  // ...
5037  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5038  // ...
5039  // break;
5040  auto *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5041  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5042  CGF.EmitBlock(Case2BB);
5043 
5044  auto &&AtomicCodeGen = [Loc, &Privates, &LHSExprs, &RHSExprs, &ReductionOps](
5046  auto ILHS = LHSExprs.begin();
5047  auto IRHS = RHSExprs.begin();
5048  auto IPriv = Privates.begin();
5049  for (auto *E : ReductionOps) {
5050  const Expr *XExpr = nullptr;
5051  const Expr *EExpr = nullptr;
5052  const Expr *UpExpr = nullptr;
5053  BinaryOperatorKind BO = BO_Comma;
5054  if (auto *BO = dyn_cast<BinaryOperator>(E)) {
5055  if (BO->getOpcode() == BO_Assign) {
5056  XExpr = BO->getLHS();
5057  UpExpr = BO->getRHS();
5058  }
5059  }
5060  // Try to emit update expression as a simple atomic.
5061  auto *RHSExpr = UpExpr;
5062  if (RHSExpr) {
5063  // Analyze RHS part of the whole expression.
5064  if (auto *ACO = dyn_cast<AbstractConditionalOperator>(
5065  RHSExpr->IgnoreParenImpCasts())) {
5066  // If this is a conditional operator, analyze its condition for
5067  // min/max reduction operator.
5068  RHSExpr = ACO->getCond();
5069  }
5070  if (auto *BORHS =
5071  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5072  EExpr = BORHS->getRHS();
5073  BO = BORHS->getOpcode();
5074  }
5075  }
5076  if (XExpr) {
5077  auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5078  auto &&AtomicRedGen = [BO, VD,
5079  Loc](CodeGenFunction &CGF, const Expr *XExpr,
5080  const Expr *EExpr, const Expr *UpExpr) {
5081  LValue X = CGF.EmitLValue(XExpr);
5082  RValue E;
5083  if (EExpr)
5084  E = CGF.EmitAnyExpr(EExpr);
5085  CGF.EmitOMPAtomicSimpleUpdateExpr(
5086  X, E, BO, /*IsXLHSInRHSPart=*/true,
5087  llvm::AtomicOrdering::Monotonic, Loc,
5088  [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5089  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5090  PrivateScope.addPrivate(
5091  VD, [&CGF, VD, XRValue, Loc]() -> Address {
5092  Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5093  CGF.emitOMPSimpleStore(
5094  CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5095  VD->getType().getNonReferenceType(), Loc);
5096  return LHSTemp;
5097  });
5098  (void)PrivateScope.Privatize();
5099  return CGF.EmitAnyExpr(UpExpr);
5100  });
5101  };
5102  if ((*IPriv)->getType()->isArrayType()) {
5103  // Emit atomic reduction for array section.
5104  auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5105  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5106  AtomicRedGen, XExpr, EExpr, UpExpr);
5107  } else
5108  // Emit atomic reduction for array subscript or single variable.
5109  AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5110  } else {
5111  // Emit as a critical region.
5112  auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5113  const Expr *, const Expr *) {
5114  auto &RT = CGF.CGM.getOpenMPRuntime();
5115  RT.emitCriticalRegion(
5116  CGF, ".atomic_reduction",
5117  [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5118  Action.Enter(CGF);
5119  emitReductionCombiner(CGF, E);
5120  },
5121  Loc);
5122  };
5123  if ((*IPriv)->getType()->isArrayType()) {
5124  auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5125  auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5126  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5127  CritRedGen);
5128  } else
5129  CritRedGen(CGF, nullptr, nullptr, nullptr);
5130  }
5131  ++ILHS;
5132  ++IRHS;
5133  ++IPriv;
5134  }
5135  };
5136  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5137  if (!WithNowait) {
5138  // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5139  llvm::Value *EndArgs[] = {
5140  IdentTLoc, // ident_t *<loc>
5141  ThreadId, // i32 <gtid>
5142  Lock // kmp_critical_name *&<lock>
5143  };
5144  CommonActionTy Action(nullptr, llvm::None,
5146  EndArgs);
5147  AtomicRCG.setAction(Action);
5148  AtomicRCG(CGF);
5149  } else
5150  AtomicRCG(CGF);
5151 
5152  CGF.EmitBranch(DefaultBB);
5153  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5154 }
5155 
5156 /// Generates unique name for artificial threadprivate variables.
5157 /// Format is: <Prefix> "." <Loc_raw_encoding> "_" <N>
5158 static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc,
5159  unsigned N) {
5161  llvm::raw_svector_ostream Out(Buffer);
5162  Out << Prefix << "." << Loc.getRawEncoding() << "_" << N;
5163  return Out.str();
5164 }
5165 
5166 /// Emits reduction initializer function:
5167 /// \code
5168 /// void @.red_init(void* %arg) {
5169 /// %0 = bitcast void* %arg to <type>*
5170 /// store <type> <init>, <type>* %0
5171 /// ret void
5172 /// }
5173 /// \endcode
5175  SourceLocation Loc,
5176  ReductionCodeGen &RCG, unsigned N) {
5177  auto &C = CGM.getContext();
5178  FunctionArgList Args;
5179  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5180  Args.emplace_back(&Param);
5181  auto &FnInfo =
5182  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5183  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5185  ".red_init.", &CGM.getModule());
5186  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5187  CodeGenFunction CGF(CGM);
5188  CGF.disableDebugInfo();
5189  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5190  Address PrivateAddr = CGF.EmitLoadOfPointer(
5191  CGF.GetAddrOfLocalVar(&Param),
5192  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5193  llvm::Value *Size = nullptr;
5194  // If the size of the reduction item is non-constant, load it from global
5195  // threadprivate variable.
5196  if (RCG.getSizes(N).second) {
5197  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5198  CGF, CGM.getContext().getSizeType(),
5199  generateUniqueName("reduction_size", Loc, N));
5200  Size =
5201  CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5203  }
5204  RCG.emitAggregateType(CGF, N, Size);
5205  LValue SharedLVal;
5206  // If initializer uses initializer from declare reduction construct, emit a
5207  // pointer to the address of the original reduction item (reuired by reduction
5208  // initializer)
5209  if (RCG.usesReductionInitializer(N)) {
5210  Address SharedAddr =
5211  CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5212  CGF, CGM.getContext().VoidPtrTy,
5213  generateUniqueName("reduction", Loc, N));
5214  SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
5215  } else {
5216  SharedLVal = CGF.MakeNaturalAlignAddrLValue(
5217  llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
5218  CGM.getContext().VoidPtrTy);
5219  }
5220  // Emit the initializer:
5221  // %0 = bitcast void* %arg to <type>*
5222  // store <type> <init>, <type>* %0
5223  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
5224  [](CodeGenFunction &) { return false; });
5225  CGF.FinishFunction();
5226  return Fn;
5227 }
5228 
5229 /// Emits reduction combiner function:
5230 /// \code
5231 /// void @.red_comb(void* %arg0, void* %arg1) {
5232 /// %lhs = bitcast void* %arg0 to <type>*
5233 /// %rhs = bitcast void* %arg1 to <type>*
5234 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5235 /// store <type> %2, <type>* %lhs
5236 /// ret void
5237 /// }
5238 /// \endcode
5240  SourceLocation Loc,
5241  ReductionCodeGen &RCG, unsigned N,
5242  const Expr *ReductionOp,
5243  const Expr *LHS, const Expr *RHS,
5244  const Expr *PrivateRef) {
5245  auto &C = CGM.getContext();
5246  auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
5247  auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
5248  FunctionArgList Args;
5249  ImplicitParamDecl ParamInOut(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5250  ImplicitParamDecl ParamIn(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5251  Args.emplace_back(&ParamInOut);
5252  Args.emplace_back(&ParamIn);
5253  auto &FnInfo =
5254  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5255  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5257  ".red_comb.", &CGM.getModule());
5258  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5259  CodeGenFunction CGF(CGM);
5260  CGF.disableDebugInfo();
5261  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5262  llvm::Value *Size = nullptr;
5263  // If the size of the reduction item is non-constant, load it from global
5264  // threadprivate variable.
5265  if (RCG.getSizes(N).second) {
5266  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5267  CGF, CGM.getContext().getSizeType(),
5268  generateUniqueName("reduction_size", Loc, N));
5269  Size =
5270  CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5272  }
5273  RCG.emitAggregateType(CGF, N, Size);
5274  // Remap lhs and rhs variables to the addresses of the function arguments.
5275  // %lhs = bitcast void* %arg0 to <type>*
5276  // %rhs = bitcast void* %arg1 to <type>*
5277  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5278  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() -> Address {
5279  // Pull out the pointer to the variable.
5280  Address PtrAddr = CGF.EmitLoadOfPointer(
5281  CGF.GetAddrOfLocalVar(&ParamInOut),
5282  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5283  return CGF.Builder.CreateElementBitCast(
5284  PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
5285  });
5286  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() -> Address {
5287  // Pull out the pointer to the variable.
5288  Address PtrAddr = CGF.EmitLoadOfPointer(
5289  CGF.GetAddrOfLocalVar(&ParamIn),
5290  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5291  return CGF.Builder.CreateElementBitCast(
5292  PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
5293  });
5294  PrivateScope.Privatize();
5295  // Emit the combiner body:
5296  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5297  // store <type> %2, <type>* %lhs
5298  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5299  CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
5300  cast<DeclRefExpr>(RHS));
5301  CGF.FinishFunction();
5302  return Fn;
5303 }
5304 
5305 /// Emits reduction finalizer function:
5306 /// \code
5307 /// void @.red_fini(void* %arg) {
5308 /// %0 = bitcast void* %arg to <type>*
5309 /// <destroy>(<type>* %0)
5310 /// ret void
5311 /// }
5312 /// \endcode
5314  SourceLocation Loc,
5315  ReductionCodeGen &RCG, unsigned N) {
5316  if (!RCG.needCleanups(N))
5317  return nullptr;
5318  auto &C = CGM.getContext();
5319  FunctionArgList Args;
5320  ImplicitParamDecl Param(C, C.VoidPtrTy, ImplicitParamDecl::Other);
5321  Args.emplace_back(&Param);
5322  auto &FnInfo =
5323  CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5324  auto *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5326  ".red_fini.", &CGM.getModule());
5327  CGM.SetInternalFunctionAttributes(/*D=*/nullptr, Fn, FnInfo);
5328  CodeGenFunction CGF(CGM);
5329  CGF.disableDebugInfo();
5330  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args);
5331  Address PrivateAddr = CGF.EmitLoadOfPointer(
5332  CGF.GetAddrOfLocalVar(&Param),
5333  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
5334  llvm::Value *Size = nullptr;
5335  // If the size of the reduction item is non-constant, load it from global
5336  // threadprivate variable.
5337  if (RCG.getSizes(N).second) {
5338  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5339  CGF, CGM.getContext().getSizeType(),
5340  generateUniqueName("reduction_size", Loc, N));
5341  Size =
5342  CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
5344  }
5345  RCG.emitAggregateType(CGF, N, Size);
5346  // Emit the finalizer body:
5347  // <destroy>(<type>* %0)
5348  RCG.emitCleanups(CGF, N, PrivateAddr);
5349  CGF.FinishFunction();
5350  return Fn;
5351 }
5352 
5354  CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5355  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5356  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5357  return nullptr;
5358 
5359  // Build typedef struct:
5360  // kmp_task_red_input {
5361  // void *reduce_shar; // shared reduction item
5362  // size_t reduce_size; // size of data item
5363  // void *reduce_init; // data initialization routine
5364  // void *reduce_fini; // data finalization routine
5365  // void *reduce_comb; // data combiner routine
5366  // kmp_task_red_flags_t flags; // flags for additional info from compiler
5367  // } kmp_task_red_input_t;
5368  ASTContext &C = CGM.getContext();
5369  auto *RD = C.buildImplicitRecord("kmp_task_red_input_t");
5370  RD->startDefinition();
5371  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5372  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5373  const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5374  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5375  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5376  const FieldDecl *FlagsFD = addFieldToRecordDecl(
5377  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5378  RD->completeDefinition();
5379  QualType RDType = C.getRecordType(RD);
5380  unsigned Size = Data.ReductionVars.size();
5381  llvm::APInt ArraySize(/*numBits=*/64, Size);
5382  QualType ArrayRDType = C.getConstantArrayType(
5383  RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
5384  // kmp_task_red_input_t .rd_input.[Size];
5385  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
5387  Data.ReductionOps);
5388  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5389  // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5390  llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
5391  llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
5393  TaskRedInput.getPointer(), Idxs,
5394  /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5395  ".rd_input.gep.");
5396  LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
5397  // ElemLVal.reduce_shar = &Shareds[Cnt];
5398  LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
5399  RCG.emitSharedLValue(CGF, Cnt);
5400  llvm::Value *CastedShared =
5401  CGF.EmitCastToVoidPtr(RCG.getSharedLValue(Cnt).getPointer());
5402  CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
5403  RCG.emitAggregateType(CGF, Cnt);
5404  llvm::Value *SizeValInChars;
5405  llvm::Value *SizeVal;
5406  std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
5407  // We use delayed creation/initialization for VLAs, array sections and
5408  // custom reduction initializations. It is required because runtime does not
5409  // provide the way to pass the sizes of VLAs/array sections to
5410  // initializer/combiner/finalizer functions and does not pass the pointer to
5411  // original reduction item to the initializer. Instead threadprivate global
5412  // variables are used to store these values and use them in the functions.
5413  bool DelayedCreation = !!SizeVal;
5414  SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
5415  /*isSigned=*/false);
5416  LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
5417  CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
5418  // ElemLVal.reduce_init = init;
5419  LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
5420  llvm::Value *InitAddr =
5421  CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
5422  CGF.EmitStoreOfScalar(InitAddr, InitLVal);
5423  DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
5424  // ElemLVal.reduce_fini = fini;
5425  LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
5426  llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
5427  llvm::Value *FiniAddr = Fini
5428  ? CGF.EmitCastToVoidPtr(Fini)
5429  : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
5430  CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
5431  // ElemLVal.reduce_comb = comb;
5432  LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
5434  CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
5435  RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
5436  CGF.EmitStoreOfScalar(CombAddr, CombLVal);
5437  // ElemLVal.flags = 0;
5438  LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
5439  if (DelayedCreation) {
5440  CGF.EmitStoreOfScalar(
5441  llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*IsSigned=*/true),
5442  FlagsLVal);
5443  } else
5444  CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
5445  }
5446  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
5447  // *data);
5448  llvm::Value *Args[] = {
5449  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5450  /*isSigned=*/true),
5451  llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
5453  CGM.VoidPtrTy)};
5454  return CGF.EmitRuntimeCall(
5456 }
5457 
5459  SourceLocation Loc,
5460  ReductionCodeGen &RCG,
5461  unsigned N) {
5462  auto Sizes = RCG.getSizes(N);
5463  // Emit threadprivate global variable if the type is non-constant
5464  // (Sizes.second = nullptr).
5465  if (Sizes.second) {
5466  llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
5467  /*isSigned=*/false);
5469  CGF, CGM.getContext().getSizeType(),
5470  generateUniqueName("reduction_size", Loc, N));
5471  CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
5472  }
5473  // Store address of the original reduction item if custom initializer is used.
5474  if (RCG.usesReductionInitializer(N)) {
5476  CGF, CGM.getContext().VoidPtrTy,
5477  generateUniqueName("reduction", Loc, N));
5478  CGF.Builder.CreateStore(
5480  RCG.getSharedLValue(N).getPointer(), CGM.VoidPtrTy),
5481  SharedAddr, /*IsVolatile=*/false);
5482  }
5483 }
5484 
5486  SourceLocation Loc,
5487  llvm::Value *ReductionsPtr,
5488  LValue SharedLVal) {
5489  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5490  // *d);
5491  llvm::Value *Args[] = {
5492  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
5493  /*isSigned=*/true),
5494  ReductionsPtr,
5496  CGM.VoidPtrTy)};
5497  return Address(
5498  CGF.EmitRuntimeCall(
5500  SharedLVal.getAlignment());
5501 }
5502 
5504  SourceLocation Loc) {
5505  if (!CGF.HaveInsertPoint())
5506  return;
5507  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5508  // global_tid);
5509  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
5510  // Ignore return result until untied tasks are supported.
5512  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5513  Region->emitUntiedSwitch(CGF);
5514 }
5515 
5517  OpenMPDirectiveKind InnerKind,
5518  const RegionCodeGenTy &CodeGen,
5519  bool HasCancel) {
5520  if (!CGF.HaveInsertPoint())
5521  return;
5522  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
5523  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5524 }
5525 
5526 namespace {
5528  CancelNoreq = 0,
5529  CancelParallel = 1,
5530  CancelLoop = 2,
5531  CancelSections = 3,
5532  CancelTaskgroup = 4
5533 };
5534 } // anonymous namespace
5535 
5537  RTCancelKind CancelKind = CancelNoreq;
5538  if (CancelRegion == OMPD_parallel)
5539  CancelKind = CancelParallel;
5540  else if (CancelRegion == OMPD_for)
5541  CancelKind = CancelLoop;
5542  else if (CancelRegion == OMPD_sections)
5543  CancelKind = CancelSections;
5544  else {
5545  assert(CancelRegion == OMPD_taskgroup);
5546  CancelKind = CancelTaskgroup;
5547  }
5548  return CancelKind;
5549 }
5550 
5552  CodeGenFunction &CGF, SourceLocation Loc,
5553  OpenMPDirectiveKind CancelRegion) {
5554  if (!CGF.HaveInsertPoint())
5555  return;
5556  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5557  // global_tid, kmp_int32 cncl_kind);
5558  if (auto *OMPRegionInfo =
5559  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5560  // For 'cancellation point taskgroup', the task region info may not have a
5561  // cancel. This may instead happen in another adjacent task.
5562  if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5563  llvm::Value *Args[] = {
5564  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5565  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5566  // Ignore return result until untied tasks are supported.
5567  auto *Result = CGF.EmitRuntimeCall(
5569  // if (__kmpc_cancellationpoint()) {
5570  // exit from construct;
5571  // }
5572  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
5573  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
5574  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
5575  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5576  CGF.EmitBlock(ExitBB);
5577  // exit from construct;
5578  auto CancelDest =
5579  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5580  CGF.EmitBranchThroughCleanup(CancelDest);
5581  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5582  }
5583  }
5584 }
5585 
5587  const Expr *IfCond,
5588  OpenMPDirectiveKind CancelRegion) {
5589  if (!CGF.HaveInsertPoint())
5590  return;
5591  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5592  // kmp_int32 cncl_kind);
5593  if (auto *OMPRegionInfo =
5594  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
5595  auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
5596  PrePostActionTy &) {
5597  auto &RT = CGF.CGM.getOpenMPRuntime();
5598  llvm::Value *Args[] = {
5599  RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5600  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5601  // Ignore return result until untied tasks are supported.
5602  auto *Result = CGF.EmitRuntimeCall(
5603  RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
5604  // if (__kmpc_cancel()) {
5605  // exit from construct;
5606  // }
5607  auto *ExitBB = CGF.createBasicBlock(".cancel.exit");
5608  auto *ContBB = CGF.createBasicBlock(".cancel.continue");
5609  auto *Cmp = CGF.Builder.CreateIsNotNull(Result);
5610  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
5611  CGF.EmitBlock(ExitBB);
5612  // exit from construct;
5613  auto CancelDest =
5614  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5615  CGF.EmitBranchThroughCleanup(CancelDest);
5616  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
5617  };
5618  if (IfCond)
5619  emitOMPIfClause(CGF, IfCond, ThenGen,
5620  [](CodeGenFunction &, PrePostActionTy &) {});
5621  else {
5622  RegionCodeGenTy ThenRCG(ThenGen);
5623  ThenRCG(CGF);
5624  }
5625  }
5626 }
5627 
5628 /// \brief Obtain information that uniquely identifies a target entry. This
5629 /// consists of the file and device IDs as well as line number associated with
5630 /// the relevant entry source location.
5632  unsigned &DeviceID, unsigned &FileID,
5633  unsigned &LineNum) {
5634 
5635  auto &SM = C.getSourceManager();
5636 
5637  // The loc should be always valid and have a file ID (the user cannot use
5638  // #pragma directives in macros)
5639 
5640  assert(Loc.isValid() && "Source location is expected to be always valid.");
5641  assert(Loc.isFileID() && "Source location is expected to refer to a file.");
5642 
5643  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
5644  assert(PLoc.isValid() && "Source location is expected to be always valid.");
5645 
5646  llvm::sys::fs::UniqueID ID;
5647  if (llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
5648  llvm_unreachable("Source file with target region no longer exists!");
5649 
5650  DeviceID = ID.getDevice();
5651  FileID = ID.getFile();
5652  LineNum = PLoc.getLine();
5653 }
5654 
5656  const OMPExecutableDirective &D, StringRef ParentName,
5657  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5658  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5659  assert(!ParentName.empty() && "Invalid target region parent name!");
5660 
5661  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5662  IsOffloadEntry, CodeGen);
5663 }
5664 
5665 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5666  const OMPExecutableDirective &D, StringRef ParentName,
5667  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5668  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5669  // Create a unique name for the entry function using the source location
5670  // information of the current target region. The name will be something like:
5671  //
5672  // __omp_offloading_DD_FFFF_PP_lBB
5673  //
5674  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
5675  // mangled name of the function that encloses the target region and BB is the
5676  // line number of the target region.
5677 
5678  unsigned DeviceID;
5679  unsigned FileID;
5680  unsigned Line;
5681  getTargetEntryUniqueInfo(CGM.getContext(), D.getLocStart(), DeviceID, FileID,
5682  Line);
5683  SmallString<64> EntryFnName;
5684  {
5685  llvm::raw_svector_ostream OS(EntryFnName);
5686  OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
5687  << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
5688  }
5689 
5690  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5691 
5692  CodeGenFunction CGF(CGM, true);
5693  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5694  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5695 
5696  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
5697 
5698  // If this target outline function is not an offload entry, we don't need to
5699  // register it.
5700  if (!IsOffloadEntry)
5701  return;
5702 
5703  // The target region ID is used by the runtime library to identify the current
5704  // target region, so it only has to be unique and not necessarily point to
5705  // anything. It could be the pointer to the outlined function that implements
5706  // the target region, but we aren't using that so that the compiler doesn't
5707  // need to keep that, and could therefore inline the host function if proven
5708  // worthwhile during optimization. In the other hand, if emitting code for the
5709  // device, the ID has to be the function address so that it can retrieved from
5710  // the offloading entry and launched by the runtime library. We also mark the
5711  // outlined function to have external linkage in case we are emitting code for
5712  // the device, because these functions will be entry points to the device.
5713 
5714  if (CGM.getLangOpts().OpenMPIsDevice) {
5715  OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
5716  OutlinedFn->setLinkage(llvm::GlobalValue::ExternalLinkage);
5717  } else
5718  OutlinedFnID = new llvm::GlobalVariable(
5719  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
5720  llvm::GlobalValue::PrivateLinkage,
5721  llvm::Constant::getNullValue(CGM.Int8Ty), ".omp_offload.region_id");
5722 
5723  // Register the information for the entry associated with this target region.
5725  DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
5726  /*Flags=*/0);
5727 }
5728 
5729 /// discard all CompoundStmts intervening between two constructs
5730 static const Stmt *ignoreCompoundStmts(const Stmt *Body) {
5731  while (auto *CS = dyn_cast_or_null<CompoundStmt>(Body))
5732  Body = CS->body_front();
5733 
5734  return Body;
5735 }
5736 
5737 /// Emit the number of teams for a target directive. Inspect the num_teams
5738 /// clause associated with a teams construct combined or closely nested
5739 /// with the target directive.
5740 ///
5741 /// Emit a team of size one for directives such as 'target parallel' that
5742 /// have no associated teams construct.
5743 ///
5744 /// Otherwise, return nullptr.
5745 static llvm::Value *
5747  CodeGenFunction &CGF,
5748  const OMPExecutableDirective &D) {
5749 
5750  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
5751  "teams directive expected to be "
5752  "emitted only for the host!");
5753 
5754  auto &Bld = CGF.Builder;
5755 
5756  // If the target directive is combined with a teams directive:
5757  // Return the value in the num_teams clause, if any.
5758  // Otherwise, return 0 to denote the runtime default.
5760  if (const auto *NumTeamsClause = D.getSingleClause<OMPNumTeamsClause>()) {
5761  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
5762  auto NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(),
5763  /*IgnoreResultAssign*/ true);
5764  return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
5765  /*IsSigned=*/true);
5766  }
5767 
5768  // The default value is 0.
5769  return Bld.getInt32(0);
5770  }
5771 
5772  // If the target directive is combined with a parallel directive but not a
5773  // teams directive, start one team.
5775  return Bld.getInt32(1);
5776 
5777  // If the current target region has a teams region enclosed, we need to get
5778  // the number of teams to pass to the runtime function call. This is done
5779  // by generating the expression in a inlined region. This is required because
5780  // the expression is captured in the enclosing target environment when the
5781  // teams directive is not combined with target.
5782 
5783  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5784 
5785  // FIXME: Accommodate other combined directives with teams when they become
5786  // available.
5787  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
5789  if (auto *NTE = TeamsDir->getSingleClause<OMPNumTeamsClause>()) {
5790  CGOpenMPInnerExprInfo CGInfo(CGF, CS);
5791  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5792  llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams());
5793  return Bld.CreateIntCast(NumTeams, CGF.Int32Ty,
5794  /*IsSigned=*/true);
5795  }
5796 
5797  // If we have an enclosed teams directive but no num_teams clause we use
5798  // the default value 0.
5799  return Bld.getInt32(0);
5800  }
5801 
5802  // No teams associated with the directive.
5803  return nullptr;
5804 }
5805 
5806 /// Emit the number of threads for a target directive. Inspect the
5807 /// thread_limit clause associated with a teams construct combined or closely
5808 /// nested with the target directive.
5809 ///
5810 /// Emit the num_threads clause for directives such as 'target parallel' that
5811 /// have no associated teams construct.
5812 ///
5813 /// Otherwise, return nullptr.
5814 static llvm::Value *
5816  CodeGenFunction &CGF,
5817  const OMPExecutableDirective &D) {
5818 
5819  assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the "
5820  "teams directive expected to be "
5821  "emitted only for the host!");
5822 
5823  auto &Bld = CGF.Builder;
5824 
5825  //
5826  // If the target directive is combined with a teams directive:
5827  // Return the value in the thread_limit clause, if any.
5828  //
5829  // If the target directive is combined with a parallel directive:
5830  // Return the value in the num_threads clause, if any.
5831  //
5832  // If both clauses are set, select the minimum of the two.
5833  //
5834  // If neither teams or parallel combined directives set the number of threads
5835  // in a team, return 0 to denote the runtime default.
5836  //
5837  // If this is not a teams directive return nullptr.
5838 
5841  llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0);
5842  llvm::Value *NumThreadsVal = nullptr;
5843  llvm::Value *ThreadLimitVal = nullptr;
5844 
5845  if (const auto *ThreadLimitClause =
5847  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
5848  auto ThreadLimit = CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(),
5849  /*IgnoreResultAssign*/ true);
5850  ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty,
5851  /*IsSigned=*/true);
5852  }
5853 
5854  if (const auto *NumThreadsClause =
5856  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
5857  llvm::Value *NumThreads =
5858  CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(),
5859  /*IgnoreResultAssign*/ true);
5860  NumThreadsVal =
5861  Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true);
5862  }
5863 
5864  // Select the lesser of thread_limit and num_threads.
5865  if (NumThreadsVal)
5866  ThreadLimitVal = ThreadLimitVal
5867  ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal,
5868  ThreadLimitVal),
5869  NumThreadsVal, ThreadLimitVal)
5870  : NumThreadsVal;
5871 
5872  // Set default value passed to the runtime if either teams or a target
5873  // parallel type directive is found but no clause is specified.
5874  if (!ThreadLimitVal)
5875  ThreadLimitVal = DefaultThreadLimitVal;
5876 
5877  return ThreadLimitVal;
5878  }
5879 
5880  // If the current target region has a teams region enclosed, we need to get
5881  // the thread limit to pass to the runtime function call. This is done
5882  // by generating the expression in a inlined region. This is required because
5883  // the expression is captured in the enclosing target environment when the
5884  // teams directive is not combined with target.
5885 
5886  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
5887 
5888  // FIXME: Accommodate other combined directives with teams when they become
5889  // available.
5890  if (auto *TeamsDir = dyn_cast_or_null<OMPTeamsDirective>(
5892  if (auto *TLE = TeamsDir->getSingleClause<OMPThreadLimitClause>()) {
5893  CGOpenMPInnerExprInfo CGInfo(CGF, CS);
5894  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5895  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit());
5896  return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty,
5897  /*IsSigned=*/true);
5898  }
5899 
5900  // If we have an enclosed teams directive but no thread_limit clause we use
5901  // the default value 0.
5902  return CGF.Builder.getInt32(0);
5903  }
5904 
5905  // No teams associated with the directive.
5906  return nullptr;
5907 }
5908 
5909 namespace {
5910 // \brief Utility to handle information from clauses associated with a given
5911 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
5912 // It provides a convenient interface to obtain the information and generate
5913 // code for that information.
5914 class MappableExprsHandler {
5915 public:
5916  /// \brief Values for bit flags used to specify the mapping type for
5917  /// offloading.
5918  enum OpenMPOffloadMappingFlags {
5919  /// \brief Allocate memory on the device and move data from host to device.
5920  OMP_MAP_TO = 0x01,
5921  /// \brief Allocate memory on the device and move data from device to host.
5922  OMP_MAP_FROM = 0x02,
5923  /// \brief Always perform the requested mapping action on the element, even
5924  /// if it was already mapped before.
5925  OMP_MAP_ALWAYS = 0x04,
5926  /// \brief Delete the element from the device environment, ignoring the
5927  /// current reference count associated with the element.
5928  OMP_MAP_DELETE = 0x08,
5929  /// \brief The element being mapped is a pointer, therefore the pointee
5930  /// should be mapped as well.
5931  OMP_MAP_IS_PTR = 0x10,
5932  /// \brief This flags signals that an argument is the first one relating to
5933  /// a map/private clause expression. For some cases a single
5934  /// map/privatization results in multiple arguments passed to the runtime
5935  /// library.
5936  OMP_MAP_FIRST_REF = 0x20,
5937  /// \brief Signal that the runtime library has to return the device pointer
5938  /// in the current position for the data being mapped.
5939  OMP_MAP_RETURN_PTR = 0x40,
5940  /// \brief This flag signals that the reference being passed is a pointer to
5941  /// private data.
5942  OMP_MAP_PRIVATE_PTR = 0x80,
5943  /// \brief Pass the element to the device by value.
5944  OMP_MAP_PRIVATE_VAL = 0x100,
5945  };
5946 
5947  /// Class that associates information with a base pointer to be passed to the
5948  /// runtime library.
5949  class BasePointerInfo {
5950  /// The base pointer.
5951  llvm::Value *Ptr = nullptr;
5952  /// The base declaration that refers to this device pointer, or null if
5953  /// there is none.
5954  const ValueDecl *DevPtrDecl = nullptr;
5955 
5956  public:
5957  BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
5958  : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
5959  llvm::Value *operator*() const { return Ptr; }
5960  const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
5961  void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
5962  };
5963 
5964  typedef SmallVector<BasePointerInfo, 16> MapBaseValuesArrayTy;
5965  typedef SmallVector<llvm::Value *, 16> MapValuesArrayTy;
5966  typedef SmallVector<unsigned, 16> MapFlagsArrayTy;
5967 
5968 private:
5969  /// \brief Directive from where the map clauses were extracted.
5970  const OMPExecutableDirective &CurDir;
5971 
5972  /// \brief Function the directive is being generated for.
5973  CodeGenFunction &CGF;
5974 
5975  /// \brief Set of all first private variables in the current directive.
5976  llvm::SmallPtrSet<const VarDecl *, 8> FirstPrivateDecls;
5977 
5978  /// Map between device pointer declarations and their expression components.
5979  /// The key value for declarations in 'this' is null.
5980  llvm::DenseMap<
5981  const ValueDecl *,
5983  DevPointersMap;
5984 
5985  llvm::Value *getExprTypeSize(const Expr *E) const {
5986  auto ExprTy = E->getType().getCanonicalType();
5987 
5988  // Reference types are ignored for mapping purposes.
5989  if (auto *RefTy = ExprTy->getAs<ReferenceType>())
5990  ExprTy = RefTy->getPointeeType().getCanonicalType();
5991 
5992  // Given that an array section is considered a built-in type, we need to
5993  // do the calculation based on the length of the section instead of relying
5994  // on CGF.getTypeSize(E->getType()).
5995  if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
5997  OAE->getBase()->IgnoreParenImpCasts())
5998  .getCanonicalType();
5999 
6000  // If there is no length associated with the expression, that means we
6001  // are using the whole length of the base.
6002  if (!OAE->getLength() && OAE->getColonLoc().isValid())
6003  return CGF.getTypeSize(BaseTy);
6004 
6005  llvm::Value *ElemSize;
6006  if (auto *PTy = BaseTy->getAs<PointerType>())
6007  ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
6008  else {
6009  auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
6010  assert(ATy && "Expecting array type if not a pointer type.");
6011  ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
6012  }
6013 
6014  // If we don't have a length at this point, that is because we have an
6015  // array section with a single element.
6016  if (!OAE->getLength())
6017  return ElemSize;
6018 
6019  auto *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
6020  LengthVal =
6021  CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
6022  return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
6023  }
6024  return CGF.getTypeSize(ExprTy);
6025  }
6026 
6027  /// \brief Return the corresponding bits for a given map clause modifier. Add
6028  /// a flag marking the map as a pointer if requested. Add a flag marking the
6029  /// map as the first one of a series of maps that relate to the same map
6030  /// expression.
6031  unsigned getMapTypeBits(OpenMPMapClauseKind MapType,
6032  OpenMPMapClauseKind MapTypeModifier, bool AddPtrFlag,
6033  bool AddIsFirstFlag) const {
6034  unsigned Bits = 0u;
6035  switch (MapType) {
6036  case OMPC_MAP_alloc:
6037  case OMPC_MAP_release:
6038  // alloc and release is the default behavior in the runtime library, i.e.
6039  // if we don't pass any bits alloc/release that is what the runtime is
6040  // going to do. Therefore, we don't need to signal anything for these two
6041  // type modifiers.
6042  break;
6043  case OMPC_MAP_to:
6044  Bits = OMP_MAP_TO;
6045  break;
6046  case OMPC_MAP_from:
6047  Bits = OMP_MAP_FROM;
6048  break;
6049  case OMPC_MAP_tofrom:
6050  Bits = OMP_MAP_TO | OMP_MAP_FROM;
6051  break;
6052  case OMPC_MAP_delete:
6053  Bits = OMP_MAP_DELETE;
6054  break;
6055  default:
6056  llvm_unreachable("Unexpected map type!");
6057  break;
6058  }
6059  if (AddPtrFlag)
6060  Bits |= OMP_MAP_IS_PTR;
6061  if (AddIsFirstFlag)
6062  Bits |= OMP_MAP_FIRST_REF;
6063  if (MapTypeModifier == OMPC_MAP_always)
6064  Bits |= OMP_MAP_ALWAYS;
6065  return Bits;
6066  }
6067 
6068  /// \brief Return true if the provided expression is a final array section. A
6069  /// final array section, is one whose length can't be proved to be one.
6070  bool isFinalArraySectionExpression(const Expr *E) const {
6071  auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
6072 
6073  // It is not an array section and therefore not a unity-size one.
6074  if (!OASE)
6075  return false;
6076 
6077  // An array section with no colon always refer to a single element.
6078  if (OASE->getColonLoc().isInvalid())
6079  return false;
6080 
6081  auto *Length = OASE->getLength();
6082 
6083  // If we don't have a length we have to check if the array has size 1
6084  // for this dimension. Also, we should always expect a length if the
6085  // base type is pointer.
6086  if (!Length) {
6088  OASE->getBase()->IgnoreParenImpCasts())
6089  .getCanonicalType();
6090  if (auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
6091  return ATy->getSize().getSExtValue() != 1;
6092  // If we don't have a constant dimension length, we have to consider
6093  // the current section as having any size, so it is not necessarily
6094  // unitary. If it happen to be unity size, that's user fault.
6095  return true;
6096  }
6097 
6098  // Check if the length evaluates to 1.
6099  llvm::APSInt ConstLength;
6100  if (!Length->EvaluateAsInt(ConstLength, CGF.getContext()))
6101  return true; // Can have more that size 1.
6102 
6103  return ConstLength.getSExtValue() != 1;
6104  }
6105 
6106  /// \brief Generate the base pointers, section pointers, sizes and map type
6107  /// bits for the provided map type, map modifier, and expression components.
6108  /// \a IsFirstComponent should be set to true if the provided set of
6109  /// components is the first associated with a capture.
6110  void generateInfoForComponentList(
6111  OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6113  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
6114  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
6115  bool IsFirstComponentList) const {
6116 
6117  // The following summarizes what has to be generated for each map and the
6118  // types bellow. The generated information is expressed in this order:
6119  // base pointer, section pointer, size, flags
6120  // (to add to the ones that come from the map type and modifier).
6121  //
6122  // double d;
6123  // int i[100];
6124  // float *p;
6125  //
6126  // struct S1 {
6127  // int i;
6128  // float f[50];
6129  // }
6130  // struct S2 {
6131  // int i;
6132  // float f[50];
6133  // S1 s;
6134  // double *p;
6135  // struct S2 *ps;
6136  // }
6137  // S2 s;
6138  // S2 *ps;
6139  //
6140  // map(d)
6141  // &d, &d, sizeof(double), noflags
6142  //
6143  // map(i)
6144  // &i, &i, 100*sizeof(int), noflags
6145  //
6146  // map(i[1:23])
6147  // &i(=&i[0]), &i[1], 23*sizeof(int), noflags
6148  //
6149  // map(p)
6150  // &p, &p, sizeof(float*), noflags
6151  //
6152  // map(p[1:24])
6153  // p, &p[1], 24*sizeof(float), noflags
6154  //
6155  // map(s)
6156  // &s, &s, sizeof(S2), noflags
6157  //
6158  // map(s.i)
6159  // &s, &(s.i), sizeof(int), noflags
6160  //
6161  // map(s.s.f)
6162  // &s, &(s.i.f), 50*sizeof(int), noflags
6163  //
6164  // map(s.p)
6165  // &s, &(s.p), sizeof(double*), noflags
6166  //
6167  // map(s.p[:22], s.a s.b)
6168  // &s, &(s.p), sizeof(double*), noflags
6169  // &(s.p), &(s.p[0]), 22*sizeof(double), ptr_flag + extra_flag
6170  //
6171  // map(s.ps)
6172  // &s, &(s.ps), sizeof(S2*), noflags
6173  //
6174  // map(s.ps->s.i)
6175  // &s, &(s.ps), sizeof(S2*), noflags
6176  // &(s.ps), &(s.ps->s.i), sizeof(int), ptr_flag + extra_flag
6177  //
6178  // map(s.ps->ps)
6179  // &s, &(s.ps), sizeof(S2*), noflags
6180  // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
6181  //
6182  // map(s.ps->ps->ps)
6183  // &s, &(s.ps), sizeof(S2*), noflags
6184  // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
6185  // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6186  //
6187  // map(s.ps->ps->s.f[:22])
6188  // &s, &(s.ps), sizeof(S2*), noflags
6189  // &(s.ps), &(s.ps->ps), sizeof(S2*), ptr_flag + extra_flag
6190  // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), ptr_flag + extra_flag
6191  //
6192  // map(ps)
6193  // &ps, &ps, sizeof(S2*), noflags
6194  //
6195  // map(ps->i)
6196  // ps, &(ps->i), sizeof(int), noflags
6197  //
6198  // map(ps->s.f)
6199  // ps, &(ps->s.f[0]), 50*sizeof(float), noflags
6200  //
6201  // map(ps->p)
6202  // ps, &(ps->p), sizeof(double*), noflags
6203  //
6204  // map(ps->p[:22])
6205  // ps, &(ps->p), sizeof(double*), noflags
6206  // &(ps->p), &(ps->p[0]), 22*sizeof(double), ptr_flag + extra_flag
6207  //
6208  // map(ps->ps)
6209  // ps, &(ps->ps), sizeof(S2*), noflags
6210  //
6211  // map(ps->ps->s.i)
6212  // ps, &(ps->ps), sizeof(S2*), noflags
6213  // &(ps->ps), &(ps->ps->s.i), sizeof(int), ptr_flag + extra_flag
6214  //
6215  // map(ps->ps->ps)
6216  // ps, &(ps->ps), sizeof(S2*), noflags
6217  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6218  //
6219  // map(ps->ps->ps->ps)
6220  // ps, &(ps->ps), sizeof(S2*), noflags
6221  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6222  // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6223  //
6224  // map(ps->ps->ps->s.f[:22])
6225  // ps, &(ps->ps), sizeof(S2*), noflags
6226  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), ptr_flag + extra_flag
6227  // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), ptr_flag +
6228  // extra_flag
6229 
6230  // Track if the map information being generated is the first for a capture.
6231  bool IsCaptureFirstInfo = IsFirstComponentList;
6232 
6233  // Scan the components from the base to the complete expression.
6234  auto CI = Components.rbegin();
6235  auto CE = Components.rend();
6236  auto I = CI;
6237 
6238  // Track if the map information being generated is the first for a list of
6239  // components.
6240  bool IsExpressionFirstInfo = true;
6241  llvm::Value *BP = nullptr;
6242 
6243  if (auto *ME = dyn_cast<MemberExpr>(I->getAssociatedExpression())) {
6244  // The base is the 'this' pointer. The content of the pointer is going
6245  // to be the base of the field being mapped.
6246  BP = CGF.EmitScalarExpr(ME->getBase());
6247  } else {
6248  // The base is the reference to the variable.
6249  // BP = &Var.
6250  BP = CGF.EmitLValue(cast<DeclRefExpr>(I->getAssociatedExpression()))
6251  .getPointer();
6252 
6253  // If the variable is a pointer and is being dereferenced (i.e. is not
6254  // the last component), the base has to be the pointer itself, not its
6255  // reference. References are ignored for mapping purposes.
6256  QualType Ty =
6257  I->getAssociatedDeclaration()->getType().getNonReferenceType();
6258  if (Ty->isAnyPointerType() && std::next(I) != CE) {
6259  auto PtrAddr = CGF.MakeNaturalAlignAddrLValue(BP, Ty);
6260  BP = CGF.EmitLoadOfPointerLValue(PtrAddr.getAddress(),
6261  Ty->castAs<PointerType>())
6262  .getPointer();
6263 
6264  // We do not need to generate individual map information for the
6265  // pointer, it can be associated with the combined storage.
6266  ++I;
6267  }
6268  }
6269 
6270  for (; I != CE; ++I) {
6271  auto Next = std::next(I);
6272 
6273  // We need to generate the addresses and sizes if this is the last
6274  // component, if the component is a pointer or if it is an array section
6275  // whose length can't be proved to be one. If this is a pointer, it
6276  // becomes the base address for the following components.
6277 
6278  // A final array section, is one whose length can't be proved to be one.
6279  bool IsFinalArraySection =
6280  isFinalArraySectionExpression(I->getAssociatedExpression());
6281 
6282  // Get information on whether the element is a pointer. Have to do a
6283  // special treatment for array sections given that they are built-in
6284  // types.
6285  const auto *OASE =
6286  dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
6287  bool IsPointer =
6288  (OASE &&
6290  .getCanonicalType()
6291  ->isAnyPointerType()) ||
6292  I->getAssociatedExpression()->getType()->isAnyPointerType();
6293 
6294  if (Next == CE || IsPointer || IsFinalArraySection) {
6295 
6296  // If this is not the last component, we expect the pointer to be
6297  // associated with an array expression or member expression.
6298  assert((Next == CE ||
6299  isa<MemberExpr>(Next->getAssociatedExpression()) ||
6300  isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
6301  isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
6302  "Unexpected expression");
6303 
6304  auto *LB = CGF.EmitLValue(I->getAssociatedExpression()).getPointer();
6305  auto *Size = getExprTypeSize(I->getAssociatedExpression());
6306 
6307  // If we have a member expression and the current component is a
6308  // reference, we have to map the reference too. Whenever we have a
6309  // reference, the section that reference refers to is going to be a
6310  // load instruction from the storage assigned to the reference.
6311  if (isa<MemberExpr>(I->getAssociatedExpression()) &&
6312  I->getAssociatedDeclaration()->getType()->isReferenceType()) {
6313  auto *LI = cast<llvm::LoadInst>(LB);
6314  auto *RefAddr = LI->getPointerOperand();
6315 
6316  BasePointers.push_back(BP);
6317  Pointers.push_back(RefAddr);
6318  Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6319  Types.push_back(getMapTypeBits(
6320  /*MapType*/ OMPC_MAP_alloc, /*MapTypeModifier=*/OMPC_MAP_unknown,
6321  !IsExpressionFirstInfo, IsCaptureFirstInfo));
6322  IsExpressionFirstInfo = false;
6323  IsCaptureFirstInfo = false;
6324  // The reference will be the next base address.
6325  BP = RefAddr;
6326  }
6327 
6328  BasePointers.push_back(BP);
6329  Pointers.push_back(LB);
6330  Sizes.push_back(Size);
6331 
6332  // We need to add a pointer flag for each map that comes from the
6333  // same expression except for the first one. We also need to signal
6334  // this map is the first one that relates with the current capture
6335  // (there is a set of entries for each capture).
6336  Types.push_back(getMapTypeBits(MapType, MapTypeModifier,
6337  !IsExpressionFirstInfo,
6338  IsCaptureFirstInfo));
6339 
6340  // If we have a final array section, we are done with this expression.
6341  if (IsFinalArraySection)
6342  break;
6343 
6344  // The pointer becomes the base for the next element.
6345  if (Next != CE)
6346  BP = LB;
6347 
6348  IsExpressionFirstInfo = false;
6349  IsCaptureFirstInfo = false;
6350  continue;
6351  }
6352  }
6353  }
6354 
6355  /// \brief Return the adjusted map modifiers if the declaration a capture
6356  /// refers to appears in a first-private clause. This is expected to be used
6357  /// only with directives that start with 'target'.
6358  unsigned adjustMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap,
6359  unsigned CurrentModifiers) {
6360  assert(Cap.capturesVariable() && "Expected capture by reference only!");
6361 
6362  // A first private variable captured by reference will use only the
6363  // 'private ptr' and 'map to' flag. Return the right flags if the captured
6364  // declaration is known as first-private in this handler.
6365  if (FirstPrivateDecls.count(Cap.getCapturedVar()))
6366  return MappableExprsHandler::OMP_MAP_PRIVATE_PTR |
6367  MappableExprsHandler::OMP_MAP_TO;
6368 
6369  // We didn't modify anything.
6370  return CurrentModifiers;
6371  }
6372 
6373 public:
6374  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
6375  : CurDir(Dir), CGF(CGF) {
6376  // Extract firstprivate clause information.
6377  for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
6378  for (const auto *D : C->varlists())
6379  FirstPrivateDecls.insert(
6380  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl());
6381  // Extract device pointer clause information.
6382  for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
6383  for (auto L : C->component_lists())
6384  DevPointersMap[L.first].push_back(L.second);
6385  }
6386 
6387  /// \brief Generate all the base pointers, section pointers, sizes and map
6388  /// types for the extracted mappable expressions. Also, for each item that
6389  /// relates with a device pointer, a pair of the relevant declaration and
6390  /// index where it occurs is appended to the device pointers info array.
6391  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
6392  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
6393  MapFlagsArrayTy &Types) const {
6394  BasePointers.clear();
6395  Pointers.clear();
6396  Sizes.clear();
6397  Types.clear();
6398 
6399  struct MapInfo {
6400  /// Kind that defines how a device pointer has to be returned.
6401  enum ReturnPointerKind {
6402  // Don't have to return any pointer.
6403  RPK_None,
6404  // Pointer is the base of the declaration.
6405  RPK_Base,
6406  // Pointer is a member of the base declaration - 'this'
6407  RPK_Member,
6408  // Pointer is a reference and a member of the base declaration - 'this'
6409  RPK_MemberReference,
6410  };
6412  OpenMPMapClauseKind MapType;
6413  OpenMPMapClauseKind MapTypeModifier;
6414  ReturnPointerKind ReturnDevicePointer;
6415 
6416  MapInfo()
6417  : MapType(OMPC_MAP_unknown), MapTypeModifier(OMPC_MAP_unknown),
6418  ReturnDevicePointer(RPK_None) {}
6419  MapInfo(
6421  OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapTypeModifier,
6422  ReturnPointerKind ReturnDevicePointer)
6423  : Components(Components), MapType(MapType),
6424  MapTypeModifier(MapTypeModifier),
6425  ReturnDevicePointer(ReturnDevicePointer) {}
6426  };
6427 
6428  // We have to process the component lists that relate with the same
6429  // declaration in a single chunk so that we can generate the map flags
6430  // correctly. Therefore, we organize all lists in a map.
6431  llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
6432 
6433  // Helper function to fill the information map for the different supported
6434  // clauses.
6435  auto &&InfoGen = [&Info](
6436  const ValueDecl *D,
6438  OpenMPMapClauseKind MapType, OpenMPMapClauseKind MapModifier,
6439  MapInfo::ReturnPointerKind ReturnDevicePointer) {
6440  const ValueDecl *VD =
6441  D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
6442  Info[VD].push_back({L, MapType, MapModifier, ReturnDevicePointer});
6443  };
6444 
6445  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6446  for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6447  for (auto L : C->component_lists())
6448  InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifier(),
6449  MapInfo::RPK_None);
6450  for (auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
6451  for (auto L : C->component_lists())
6452  InfoGen(L.first, L.second, OMPC_MAP_to, OMPC_MAP_unknown,
6453  MapInfo::RPK_None);
6454  for (auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
6455  for (auto L : C->component_lists())
6456  InfoGen(L.first, L.second, OMPC_MAP_from, OMPC_MAP_unknown,
6457  MapInfo::RPK_None);
6458 
6459  // Look at the use_device_ptr clause information and mark the existing map
6460  // entries as such. If there is no map information for an entry in the
6461  // use_device_ptr list, we create one with map type 'alloc' and zero size
6462  // section. It is the user fault if that was not mapped before.
6463  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6464  for (auto *C : this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>())
6465  for (auto L : C->component_lists()) {
6466  assert(!L.second.empty() && "Not expecting empty list of components!");
6467  const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
6468  VD = cast<ValueDecl>(VD->getCanonicalDecl());
6469  auto *IE = L.second.back().getAssociatedExpression();
6470  // If the first component is a member expression, we have to look into
6471  // 'this', which maps to null in the map of map information. Otherwise
6472  // look directly for the information.
6473  auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
6474 
6475  // We potentially have map information for this declaration already.
6476  // Look for the first set of components that refer to it.
6477  if (It != Info.end()) {
6478  auto CI = std::find_if(
6479  It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
6480  return MI.Components.back().getAssociatedDeclaration() == VD;
6481  });
6482  // If we found a map entry, signal that the pointer has to be returned
6483  // and move on to the next declaration.
6484  if (CI != It->second.end()) {
6485  CI->ReturnDevicePointer = isa<MemberExpr>(IE)
6486  ? (VD->getType()->isReferenceType()
6487  ? MapInfo::RPK_MemberReference
6488  : MapInfo::RPK_Member)
6489  : MapInfo::RPK_Base;
6490  continue;
6491  }
6492  }
6493 
6494  // We didn't find any match in our map information - generate a zero
6495  // size array section.
6496  // FIXME: MSVC 2013 seems to require this-> to find member CGF.
6497  llvm::Value *Ptr =
6498  this->CGF
6499  .EmitLoadOfLValue(this->CGF.EmitLValue(IE), SourceLocation())
6500  .getScalarVal();
6501  BasePointers.push_back({Ptr, VD});
6502  Pointers.push_back(Ptr);
6503  Sizes.push_back(llvm::Constant::getNullValue(this->CGF.SizeTy));
6504  Types.push_back(OMP_MAP_RETURN_PTR | OMP_MAP_FIRST_REF);
6505  }
6506 
6507  for (auto &M : Info) {
6508  // We need to know when we generate information for the first component
6509  // associated with a capture, because the mapping flags depend on it.
6510  bool IsFirstComponentList = true;
6511  for (MapInfo &L : M.second) {
6512  assert(!L.Components.empty() &&
6513  "Not expecting declaration with no component lists.");
6514 
6515  // Remember the current base pointer index.
6516  unsigned CurrentBasePointersIdx = BasePointers.size();
6517  // FIXME: MSVC 2013 seems to require this-> to find the member method.
6518  this->generateInfoForComponentList(L.MapType, L.MapTypeModifier,
6519  L.Components, BasePointers, Pointers,
6520  Sizes, Types, IsFirstComponentList);
6521 
6522  // If this entry relates with a device pointer, set the relevant
6523  // declaration and add the 'return pointer' flag.
6524  if (IsFirstComponentList &&
6525  L.ReturnDevicePointer != MapInfo::RPK_None) {
6526  // If the pointer is not the base of the map, we need to skip the
6527  // base. If it is a reference in a member field, we also need to skip
6528  // the map of the reference.
6529  if (L.ReturnDevicePointer != MapInfo::RPK_Base) {
6530  ++CurrentBasePointersIdx;
6531  if (L.ReturnDevicePointer == MapInfo::RPK_MemberReference)
6532  ++CurrentBasePointersIdx;
6533  }
6534  assert(BasePointers.size() > CurrentBasePointersIdx &&
6535  "Unexpected number of mapped base pointers.");
6536 
6537  auto *RelevantVD = L.Components.back().getAssociatedDeclaration();
6538  assert(RelevantVD &&
6539  "No relevant declaration related with device pointer??");
6540 
6541  BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
6542  Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PTR;
6543  }
6544  IsFirstComponentList = false;
6545  }
6546  }
6547  }
6548 
6549  /// \brief Generate the base pointers, section pointers, sizes and map types
6550  /// associated to a given capture.
6551  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
6552  llvm::Value *Arg,
6553  MapBaseValuesArrayTy &BasePointers,
6554  MapValuesArrayTy &Pointers,
6555  MapValuesArrayTy &Sizes,
6556  MapFlagsArrayTy &Types) const {
6557  assert(!Cap->capturesVariableArrayType() &&
6558  "Not expecting to generate map info for a variable array type!");
6559 
6560  BasePointers.clear();
6561  Pointers.clear();
6562  Sizes.clear();
6563  Types.clear();
6564 
6565  // We need to know when we generating information for the first component
6566  // associated with a capture, because the mapping flags depend on it.
6567  bool IsFirstComponentList = true;
6568 
6569  const ValueDecl *VD =
6570  Cap->capturesThis()
6571  ? nullptr
6572  : cast<ValueDecl>(Cap->getCapturedVar()->getCanonicalDecl());
6573 
6574  // If this declaration appears in a is_device_ptr clause we just have to
6575  // pass the pointer by value. If it is a reference to a declaration, we just
6576  // pass its value, otherwise, if it is a member expression, we need to map
6577  // 'to' the field.
6578  if (!VD) {
6579  auto It = DevPointersMap.find(VD);
6580  if (It != DevPointersMap.end()) {
6581  for (auto L : It->second) {
6582  generateInfoForComponentList(
6583  /*MapType=*/OMPC_MAP_to, /*MapTypeModifier=*/OMPC_MAP_unknown, L,
6584  BasePointers, Pointers, Sizes, Types, IsFirstComponentList);
6585  IsFirstComponentList = false;
6586  }
6587  return;
6588  }
6589  } else if (DevPointersMap.count(VD)) {
6590  BasePointers.push_back({Arg, VD});
6591  Pointers.push_back(Arg);
6592  Sizes.push_back(CGF.getTypeSize(CGF.getContext().VoidPtrTy));
6593  Types.push_back(OMP_MAP_PRIVATE_VAL | OMP_MAP_FIRST_REF);
6594  return;
6595  }
6596 
6597  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
6598  for (auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
6599  for (auto L : C->decl_component_lists(VD)) {
6600  assert(L.first == VD &&
6601  "We got information for the wrong declaration??");
6602  assert(!L.second.empty() &&
6603  "Not expecting declaration with no component lists.");
6604  generateInfoForComponentList(C->getMapType(), C->getMapTypeModifier(),
6605  L.second, BasePointers, Pointers, Sizes,
6606  Types, IsFirstComponentList);
6607  IsFirstComponentList = false;
6608  }
6609 
6610  return;
6611  }
6612 
6613  /// \brief Generate the default map information for a given capture \a CI,
6614  /// record field declaration \a RI and captured value \a CV.
6615  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
6616  const FieldDecl &RI, llvm::Value *CV,
6617  MapBaseValuesArrayTy &CurBasePointers,
6618  MapValuesArrayTy &CurPointers,
6619  MapValuesArrayTy &CurSizes,
6620  MapFlagsArrayTy &CurMapTypes) {
6621 
6622  // Do the default mapping.
6623  if (CI.capturesThis()) {
6624  CurBasePointers.push_back(CV);
6625  CurPointers.push_back(CV);
6626  const PointerType *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
6627  CurSizes.push_back(CGF.getTypeSize(PtrTy->getPointeeType()));
6628  // Default map type.
6629  CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
6630  } else if (CI.capturesVariableByCopy()) {
6631  CurBasePointers.push_back(CV);
6632  CurPointers.push_back(CV);
6633  if (!RI.getType()->isAnyPointerType()) {
6634  // We have to signal to the runtime captures passed by value that are
6635  // not pointers.
6636  CurMapTypes.push_back(OMP_MAP_PRIVATE_VAL);
6637  CurSizes.push_back(CGF.getTypeSize(RI.getType()));
6638  } else {
6639  // Pointers are implicitly mapped with a zero size and no flags
6640  // (other than first map that is added for all implicit maps).
6641  CurMapTypes.push_back(0u);
6642  CurSizes.push_back(llvm::Constant::getNullValue(CGF.SizeTy));
6643  }
6644  } else {
6645  assert(CI.capturesVariable() && "Expected captured reference.");
6646  CurBasePointers.push_back(CV);
6647  CurPointers.push_back(CV);
6648 
6649  const ReferenceType *PtrTy =
6650  cast<ReferenceType>(RI.getType().getTypePtr());
6651  QualType ElementType = PtrTy->getPointeeType();
6652  CurSizes.push_back(CGF.getTypeSize(ElementType));
6653  // The default map type for a scalar/complex type is 'to' because by
6654  // default the value doesn't have to be retrieved. For an aggregate
6655  // type, the default is 'tofrom'.
6656  CurMapTypes.push_back(ElementType->isAggregateType()
6657  ? (OMP_MAP_TO | OMP_MAP_FROM)
6658  : OMP_MAP_TO);
6659 
6660  // If we have a capture by reference we may need to add the private
6661  // pointer flag if the base declaration shows in some first-private
6662  // clause.
6663  CurMapTypes.back() =
6664  adjustMapModifiersForPrivateClauses(CI, CurMapTypes.back());
6665  }
6666  // Every default map produces a single argument, so, it is always the
6667  // first one.
6668  CurMapTypes.back() |= OMP_MAP_FIRST_REF;
6669  }
6670 };
6671 
6673  /// \brief Device ID if the device was not defined, runtime should get it
6674  /// from environment variables in the spec.
6675  OMP_DEVICEID_UNDEF = -1,
6676 };
6677 } // anonymous namespace
6678 
6679 /// \brief Emit the arrays used to pass the captures and map information to the
6680 /// offloading runtime library. If there is no map or capture information,
6681 /// return nullptr by reference.
6682 static void
6688  CGOpenMPRuntime::TargetDataInfo &Info) {
6689  auto &CGM = CGF.CGM;
6690  auto &Ctx = CGF.getContext();
6691 
6692  // Reset the array information.
6693  Info.clearArrayInfo();
6694  Info.NumberOfPtrs = BasePointers.size();
6695 
6696  if (Info.NumberOfPtrs) {
6697  // Detect if we have any capture size requiring runtime evaluation of the
6698  // size so that a constant array could be eventually used.
6699  bool hasRuntimeEvaluationCaptureSize = false;
6700  for (auto *S : Sizes)
6701  if (!isa<llvm::Constant>(S)) {
6702  hasRuntimeEvaluationCaptureSize = true;
6703  break;
6704  }
6705 
6706  llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
6707  QualType PointerArrayType =
6708  Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
6709  /*IndexTypeQuals=*/0);
6710 
6711  Info.BasePointersArray =
6712  CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
6713  Info.PointersArray =
6714  CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
6715 
6716  // If we don't have any VLA types or other types that require runtime
6717  // evaluation, we can use a constant array for the map sizes, otherwise we
6718  // need to fill up the arrays as we do for the pointers.
6719  if (hasRuntimeEvaluationCaptureSize) {
6720  QualType SizeArrayType = Ctx.getConstantArrayType(
6721  Ctx.getSizeType(), PointerNumAP, ArrayType::Normal,
6722  /*IndexTypeQuals=*/0);
6723  Info.SizesArray =
6724  CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
6725  } else {
6726  // We expect all the sizes to be constant, so we collect them to create
6727  // a constant array.
6729  for (auto S : Sizes)
6730  ConstSizes.push_back(cast<llvm::Constant>(S));
6731 
6732  auto *SizesArrayInit = llvm::ConstantArray::get(
6733  llvm::ArrayType::get(CGM.SizeTy, ConstSizes.size()), ConstSizes);
6734  auto *SizesArrayGbl = new llvm::GlobalVariable(
6735  CGM.getModule(), SizesArrayInit->getType(),
6736  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
6737  SizesArrayInit, ".offload_sizes");
6738  SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
6739  Info.SizesArray = SizesArrayGbl;
6740  }
6741 
6742  // The map types are always constant so we don't need to generate code to
6743  // fill arrays. Instead, we create an array constant.
6744  llvm::Constant *MapTypesArrayInit =
6745  llvm::ConstantDataArray::get(CGF.Builder.getContext(), MapTypes);
6746  auto *MapTypesArrayGbl = new llvm::GlobalVariable(
6747  CGM.getModule(), MapTypesArrayInit->getType(),
6748  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
6749  MapTypesArrayInit, ".offload_maptypes");
6750  MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
6751  Info.MapTypesArray = MapTypesArrayGbl;
6752 
6753  for (unsigned i = 0; i < Info.NumberOfPtrs; ++i) {
6754  llvm::Value *BPVal = *BasePointers[i];
6755  llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
6756  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6757  Info.BasePointersArray, 0, i);
6759  BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
6760  Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
6761  CGF.Builder.CreateStore(BPVal, BPAddr);
6762 
6763  if (Info.requiresDevicePointerInfo())
6764  if (auto *DevVD = BasePointers[i].getDevicePtrDecl())
6765  Info.CaptureDeviceAddrMap.insert(std::make_pair(DevVD, BPAddr));
6766 
6767  llvm::Value *PVal = Pointers[i];
6768  llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
6769  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6770  Info.PointersArray, 0, i);
6772  P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
6773  Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
6774  CGF.Builder.CreateStore(PVal, PAddr);
6775 
6776  if (hasRuntimeEvaluationCaptureSize) {
6777  llvm::Value *S = CGF.Builder.CreateConstInBoundsGEP2_32(
6778  llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs),
6779  Info.SizesArray,
6780  /*Idx0=*/0,
6781  /*Idx1=*/i);
6782  Address SAddr(S, Ctx.getTypeAlignInChars(Ctx.getSizeType()));
6783  CGF.Builder.CreateStore(
6784  CGF.Builder.CreateIntCast(Sizes[i], CGM.SizeTy, /*isSigned=*/true),
6785  SAddr);
6786  }
6787  }
6788  }
6789 }
6790 /// \brief Emit the arguments to be passed to the runtime library based on the
6791 /// arrays of pointers, sizes and map types.
6793  CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
6794  llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
6795  llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
6796  auto &CGM = CGF.CGM;
6797  if (Info.NumberOfPtrs) {
6798  BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6799  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6800  Info.BasePointersArray,
6801  /*Idx0=*/0, /*Idx1=*/0);
6802  PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6803  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
6804  Info.PointersArray,
6805  /*Idx0=*/0,
6806  /*Idx1=*/0);
6807  SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6808  llvm::ArrayType::get(CGM.SizeTy, Info.NumberOfPtrs), Info.SizesArray,
6809  /*Idx0=*/0, /*Idx1=*/0);
6810  MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
6811  llvm::ArrayType::get(CGM.Int32Ty, Info.NumberOfPtrs),
6812  Info.MapTypesArray,
6813  /*Idx0=*/0,
6814  /*Idx1=*/0);
6815  } else {
6816  BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
6817  PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
6818  SizesArrayArg = llvm::ConstantPointerNull::get(CGM.SizeTy->getPointerTo());
6819  MapTypesArrayArg =
6820  llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo());
6821  }
6822 }
6823 
6825  const OMPExecutableDirective &D,
6826  llvm::Value *OutlinedFn,
6827  llvm::Value *OutlinedFnID,
6828  const Expr *IfCond, const Expr *Device,
6829  ArrayRef<llvm::Value *> CapturedVars) {
6830  if (!CGF.HaveInsertPoint())
6831  return;
6832 
6833  assert(OutlinedFn && "Invalid outlined function!");
6834 
6835  auto &Ctx = CGF.getContext();
6836 
6837  // Fill up the arrays with all the captured variables.
6843 
6848 
6849  // Get mappable expression information.
6850  MappableExprsHandler MEHandler(D, CGF);
6851 
6852  const CapturedStmt &CS = *cast<CapturedStmt>(D.getAssociatedStmt());
6853  auto RI = CS.getCapturedRecordDecl()->field_begin();
6854  auto CV = CapturedVars.begin();
6856  CE = CS.capture_end();
6857  CI != CE; ++CI, ++RI, ++CV) {
6858  StringRef Name;
6859  QualType Ty;
6860 
6861  CurBasePointers.clear();
6862  CurPointers.clear();
6863  CurSizes.clear();
6864  CurMapTypes.clear();
6865 
6866  // VLA sizes are passed to the outlined region by copy and do not have map
6867  // information associated.
6868  if (CI->capturesVariableArrayType()) {
6869  CurBasePointers.push_back(*CV);
6870  CurPointers.push_back(*CV);
6871  CurSizes.push_back(CGF.getTypeSize(RI->getType()));
6872  // Copy to the device as an argument. No need to retrieve it.
6873  CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_PRIVATE_VAL |
6874  MappableExprsHandler::OMP_MAP_FIRST_REF);
6875  } else {
6876  // If we have any information in the map clause, we use it, otherwise we
6877  // just do a default mapping.
6878  MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
6879  CurSizes, CurMapTypes);
6880  if (CurBasePointers.empty())
6881  MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
6882  CurPointers, CurSizes, CurMapTypes);
6883  }
6884  // We expect to have at least an element of information for this capture.
6885  assert(!CurBasePointers.empty() && "Non-existing map pointer for capture!");
6886  assert(CurBasePointers.size() == CurPointers.size() &&
6887  CurBasePointers.size() == CurSizes.size() &&
6888  CurBasePointers.size() == CurMapTypes.size() &&
6889  "Inconsistent map information sizes!");
6890 
6891  // The kernel args are always the first elements of the base pointers
6892  // associated with a capture.
6893  KernelArgs.push_back(*CurBasePointers.front());
6894  // We need to append the results of this capture to what we already have.
6895  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
6896  Pointers.append(CurPointers.begin(), CurPointers.end());
6897  Sizes.append(CurSizes.begin(), CurSizes.end());
6898  MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
6899  }
6900 
6901  // Keep track on whether the host function has to be executed.
6902  auto OffloadErrorQType =
6903  Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true);
6904  auto OffloadError = CGF.MakeAddrLValue(
6905  CGF.CreateMemTemp(OffloadErrorQType, ".run_host_version"),
6906  OffloadErrorQType);
6907  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty),
6908  OffloadError);
6909 
6910  // Fill up the pointer arrays and transfer execution to the device.
6911  auto &&ThenGen = [&BasePointers, &Pointers, &Sizes, &MapTypes, Device,
6912  OutlinedFnID, OffloadError,
6913  &D](CodeGenFunction &CGF, PrePostActionTy &) {
6914  auto &RT = CGF.CGM.getOpenMPRuntime();
6915  // Emit the offloading arrays.
6916  TargetDataInfo Info;
6917  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
6919  Info.PointersArray, Info.SizesArray,
6920  Info.MapTypesArray, Info);
6921 
6922  // On top of the arrays that were filled up, the target offloading call
6923  // takes as arguments the device id as well as the host pointer. The host
6924  // pointer is used by the runtime library to identify the current target
6925  // region, so it only has to be unique and not necessarily point to
6926  // anything. It could be the pointer to the outlined function that
6927  // implements the target region, but we aren't using that so that the
6928  // compiler doesn't need to keep that, and could therefore inline the host
6929  // function if proven worthwhile during optimization.
6930 
6931  // From this point on, we need to have an ID of the target region defined.
6932  assert(OutlinedFnID && "Invalid outlined function ID!");
6933 
6934  // Emit device ID if any.
6935  llvm::Value *DeviceID;
6936  if (Device)
6937  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
6938  CGF.Int32Ty, /*isSigned=*/true);
6939  else
6940  DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
6941 
6942  // Emit the number of elements in the offloading arrays.
6943  llvm::Value *PointerNum = CGF.Builder.getInt32(BasePointers.size());
6944 
6945  // Return value of the runtime offloading call.
6946  llvm::Value *Return;
6947 
6948  auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
6949  auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
6950 
6951  // The target region is an outlined function launched by the runtime
6952  // via calls __tgt_target() or __tgt_target_teams().
6953  //
6954  // __tgt_target() launches a target region with one team and one thread,
6955  // executing a serial region. This master thread may in turn launch
6956  // more threads within its team upon encountering a parallel region,
6957  // however, no additional teams can be launched on the device.
6958  //
6959  // __tgt_target_teams() launches a target region with one or more teams,
6960  // each with one or more threads. This call is required for target
6961  // constructs such as:
6962  // 'target teams'
6963  // 'target' / 'teams'
6964  // 'target teams distribute parallel for'
6965  // 'target parallel'
6966  // and so on.
6967  //
6968  // Note that on the host and CPU targets, the runtime implementation of
6969  // these calls simply call the outlined function without forking threads.
6970  // The outlined functions themselves have runtime calls to
6971  // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
6972  // the compiler in emitTeamsCall() and emitParallelCall().
6973  //
6974  // In contrast, on the NVPTX target, the implementation of
6975  // __tgt_target_teams() launches a GPU kernel with the requested number
6976  // of teams and threads so no additional calls to the runtime are required.
6977  if (NumTeams) {
6978  // If we have NumTeams defined this means that we have an enclosed teams
6979  // region. Therefore we also expect to have NumThreads defined. These two
6980  // values should be defined in the presence of a teams directive,
6981  // regardless of having any clauses associated. If the user is using teams
6982  // but no clauses, these two values will be the default that should be
6983  // passed to the runtime library - a 32-bit integer with the value zero.
6984  assert(NumThreads && "Thread limit expression should be available along "
6985  "with number of teams.");
6986  llvm::Value *OffloadingArgs[] = {
6987  DeviceID, OutlinedFnID,
6988  PointerNum, Info.BasePointersArray,
6989  Info.PointersArray, Info.SizesArray,
6990  Info.MapTypesArray, NumTeams,
6991  NumThreads};
6992  Return = CGF.EmitRuntimeCall(
6993  RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
6994  } else {
6995  llvm::Value *OffloadingArgs[] = {
6996  DeviceID, OutlinedFnID,
6997  PointerNum, Info.BasePointersArray,
6998  Info.PointersArray, Info.SizesArray,
6999  Info.MapTypesArray};
7000  Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
7001  OffloadingArgs);
7002  }
7003 
7004  CGF.EmitStoreOfScalar(Return, OffloadError);
7005  };
7006 
7007  // Notify that the host version must be executed.
7008  auto &&ElseGen = [OffloadError](CodeGenFunction &CGF, PrePostActionTy &) {
7009  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/-1u),
7010  OffloadError);
7011  };
7012 
7013  // If we have a target function ID it means that we need to support
7014  // offloading, otherwise, just execute on the host. We need to execute on host
7015  // regardless of the conditional in the if clause if, e.g., the user do not
7016  // specify target triples.
7017  if (OutlinedFnID) {
7018  if (IfCond)
7019  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
7020  else {
7021  RegionCodeGenTy ThenRCG(ThenGen);
7022  ThenRCG(CGF);
7023  }
7024  } else {
7025  RegionCodeGenTy ElseRCG(ElseGen);
7026  ElseRCG(CGF);
7027  }
7028 
7029  // Check the error code and execute the host version if required.
7030  auto OffloadFailedBlock = CGF.createBasicBlock("omp_offload.failed");
7031  auto OffloadContBlock = CGF.createBasicBlock("omp_offload.cont");
7032  auto OffloadErrorVal = CGF.EmitLoadOfScalar(OffloadError, SourceLocation());
7033  auto Failed = CGF.Builder.CreateIsNotNull(OffloadErrorVal);
7034  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
7035 
7036  CGF.EmitBlock(OffloadFailedBlock);
7037  CGF.Builder.CreateCall(OutlinedFn, KernelArgs);
7038  CGF.EmitBranch(OffloadContBlock);
7039 
7040  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
7041 }
7042 
7044  StringRef ParentName) {
7045  if (!S)
7046  return;
7047 
7048  // Codegen OMP target directives that offload compute to the device.
7049  bool requiresDeviceCodegen =
7050  isa<OMPExecutableDirective>(S) &&
7052  cast<OMPExecutableDirective>(S)->getDirectiveKind());
7053 
7054  if (requiresDeviceCodegen) {
7055  auto &E = *cast<OMPExecutableDirective>(S);
7056  unsigned DeviceID;
7057  unsigned FileID;
7058  unsigned Line;
7059  getTargetEntryUniqueInfo(CGM.getContext(), E.getLocStart(), DeviceID,
7060  FileID, Line);
7061 
7062  // Is this a target region that should not be emitted as an entry point? If
7063  // so just signal we are done with this target region.
7065  ParentName, Line))
7066  return;
7067 
7068  switch (S->getStmtClass()) {
7069  case Stmt::OMPTargetDirectiveClass:
7070  CodeGenFunction::EmitOMPTargetDeviceFunction(
7071  CGM, ParentName, cast<OMPTargetDirective>(*S));
7072  break;
7073  case Stmt::OMPTargetParallelDirectiveClass:
7074  CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7075  CGM, ParentName, cast<OMPTargetParallelDirective>(*S));
7076  break;
7077  case Stmt::OMPTargetTeamsDirectiveClass:
7078  CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
7079  CGM, ParentName, cast<OMPTargetTeamsDirective>(*S));
7080  break;
7081  default:
7082  llvm_unreachable("Unknown target directive for OpenMP device codegen.");
7083  }
7084  return;
7085  }
7086 
7087  if (const OMPExecutableDirective *E = dyn_cast<OMPExecutableDirective>(S)) {
7088  if (!E->hasAssociatedStmt())
7089  return;
7090 
7092  cast<CapturedStmt>(E->getAssociatedStmt())->getCapturedStmt(),
7093  ParentName);
7094  return;
7095  }
7096 
7097  // If this is a lambda function, look into its body.
7098  if (auto *L = dyn_cast<LambdaExpr>(S))
7099  S = L->getBody();
7100 
7101  // Keep looking for target regions recursively.
7102  for (auto *II : S->children())
7103  scanForTargetRegionsFunctions(II, ParentName);
7104 }
7105 
7107  auto &FD = *cast<FunctionDecl>(GD.getDecl());
7108 
7109  // If emitting code for the host, we do not process FD here. Instead we do
7110  // the normal code generation.
7111  if (!CGM.getLangOpts().OpenMPIsDevice)
7112  return false;
7113 
7114  // Try to detect target regions in the function.
7115  scanForTargetRegionsFunctions(FD.getBody(), CGM.getMangledName(GD));
7116 
7117  // We should not emit any function other that the ones created during the
7118  // scanning. Therefore, we signal that this function is completely dealt
7119  // with.
7120  return true;
7121 }
7122 
7124  if (!CGM.getLangOpts().OpenMPIsDevice)
7125  return false;
7126 
7127  // Check if there are Ctors/Dtors in this declaration and look for target
7128  // regions in it. We use the complete variant to produce the kernel name
7129  // mangling.
7130  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
7131  if (auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
7132  for (auto *Ctor : RD->ctors()) {
7133  StringRef ParentName =
7135  scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
7136  }
7137  auto *Dtor = RD->getDestructor();
7138  if (Dtor) {
7139  StringRef ParentName =
7141  scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
7142  }
7143  }
7144 
7145  // If we are in target mode, we do not emit any global (declare target is not
7146  // implemented yet). Therefore we signal that GD was processed in this case.
7147  return true;
7148 }
7149 
7151  auto *VD = GD.getDecl();
7152  if (isa<FunctionDecl>(VD))
7153  return emitTargetFunctions(GD);
7154 
7155  return emitTargetGlobalVariable(GD);
7156 }
7157 
7159  // If we have offloading in the current module, we need to emit the entries
7160  // now and register the offloading descriptor.
7162 
7163  // Create and register the offloading binary descriptors. This is the main
7164  // entity that captures all the information about offloading in the current
7165  // compilation unit.
7167 }
7168 
7170  const OMPExecutableDirective &D,
7171  SourceLocation Loc,
7172  llvm::Value *OutlinedFn,
7173  ArrayRef<llvm::Value *> CapturedVars) {
7174  if (!CGF.HaveInsertPoint())
7175  return;
7176 
7177  auto *RTLoc = emitUpdateLocation(CGF, Loc);
7179 
7180  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
7181  llvm::Value *Args[] = {
7182  RTLoc,
7183  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
7184  CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
7186  RealArgs.append(std::begin(Args), std::end(Args));
7187  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
7188 
7190  CGF.EmitRuntimeCall(RTLFn, RealArgs);
7191 }
7192 
7194  const Expr *NumTeams,
7195  const Expr *ThreadLimit,
7196  SourceLocation Loc) {
7197  if (!CGF.HaveInsertPoint())
7198  return;
7199 
7200  auto *RTLoc = emitUpdateLocation(CGF, Loc);
7201 
7202  llvm::Value *NumTeamsVal =
7203  (NumTeams)
7204  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
7205  CGF.CGM.Int32Ty, /* isSigned = */ true)
7206  : CGF.Builder.getInt32(0);
7207 
7208  llvm::Value *ThreadLimitVal =
7209  (ThreadLimit)
7210  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
7211  CGF.CGM.Int32Ty, /* isSigned = */ true)
7212  : CGF.Builder.getInt32(0);
7213 
7214  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
7215  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
7216  ThreadLimitVal};
7218  PushNumTeamsArgs);
7219 }
7220 
7222  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7223  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
7224  if (!CGF.HaveInsertPoint())
7225  return;
7226 
7227  // Action used to replace the default codegen action and turn privatization
7228  // off.
7229  PrePostActionTy NoPrivAction;
7230 
7231  // Generate the code for the opening of the data environment. Capture all the
7232  // arguments of the runtime call by reference because they are used in the
7233  // closing of the region.
7234  auto &&BeginThenGen = [&D, Device, &Info, &CodeGen](CodeGenFunction &CGF,
7235  PrePostActionTy &) {
7236  // Fill up the arrays with all the mapped variables.
7241 
7242  // Get map clause information.
7243  MappableExprsHandler MCHandler(D, CGF);
7244  MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7245 
7246  // Fill up the arrays and create the arguments.
7247  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7248 
7249  llvm::Value *BasePointersArrayArg = nullptr;
7250  llvm::Value *PointersArrayArg = nullptr;
7251  llvm::Value *SizesArrayArg = nullptr;
7252  llvm::Value *MapTypesArrayArg = nullptr;
7253  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7254  SizesArrayArg, MapTypesArrayArg, Info);
7255 
7256  // Emit device ID if any.
7257  llvm::Value *DeviceID = nullptr;
7258  if (Device)
7259  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7260  CGF.Int32Ty, /*isSigned=*/true);
7261  else
7262  DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
7263 
7264  // Emit the number of elements in the offloading arrays.
7265  auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7266 
7267  llvm::Value *OffloadingArgs[] = {
7268  DeviceID, PointerNum, BasePointersArrayArg,
7269  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7270  auto &RT = CGF.CGM.getOpenMPRuntime();
7271  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_begin),
7272  OffloadingArgs);
7273 
7274  // If device pointer privatization is required, emit the body of the region
7275  // here. It will have to be duplicated: with and without privatization.
7276  if (!Info.CaptureDeviceAddrMap.empty())
7277  CodeGen(CGF);
7278  };
7279 
7280  // Generate code for the closing of the data region.
7281  auto &&EndThenGen = [Device, &Info](CodeGenFunction &CGF, PrePostActionTy &) {
7282  assert(Info.isValid() && "Invalid data environment closing arguments.");
7283 
7284  llvm::Value *BasePointersArrayArg = nullptr;
7285  llvm::Value *PointersArrayArg = nullptr;
7286  llvm::Value *SizesArrayArg = nullptr;
7287  llvm::Value *MapTypesArrayArg = nullptr;
7288  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
7289  SizesArrayArg, MapTypesArrayArg, Info);
7290 
7291  // Emit device ID if any.
7292  llvm::Value *DeviceID = nullptr;
7293  if (Device)
7294  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7295  CGF.Int32Ty, /*isSigned=*/true);
7296  else
7297  DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
7298 
7299  // Emit the number of elements in the offloading arrays.
7300  auto *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
7301 
7302  llvm::Value *OffloadingArgs[] = {
7303  DeviceID, PointerNum, BasePointersArrayArg,
7304  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
7305  auto &RT = CGF.CGM.getOpenMPRuntime();
7306  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target_data_end),
7307  OffloadingArgs);
7308  };
7309 
7310  // If we need device pointer privatization, we need to emit the body of the
7311  // region with no privatization in the 'else' branch of the conditional.
7312  // Otherwise, we don't have to do anything.
7313  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
7314  PrePostActionTy &) {
7315  if (!Info.CaptureDeviceAddrMap.empty()) {
7316  CodeGen.setAction(NoPrivAction);
7317  CodeGen(CGF);
7318  }
7319  };
7320 
7321  // We don't have to do anything to close the region if the if clause evaluates
7322  // to false.
7323  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7324 
7325  if (IfCond) {
7326  emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
7327  } else {
7328  RegionCodeGenTy RCG(BeginThenGen);
7329  RCG(CGF);
7330  }
7331 
7332  // If we don't require privatization of device pointers, we emit the body in
7333  // between the runtime calls. This avoids duplicating the body code.
7334  if (Info.CaptureDeviceAddrMap.empty()) {
7335  CodeGen.setAction(NoPrivAction);
7336  CodeGen(CGF);
7337  }
7338 
7339  if (IfCond) {
7340  emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
7341  } else {
7342  RegionCodeGenTy RCG(EndThenGen);
7343  RCG(CGF);
7344  }
7345 }
7346 
7348  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
7349  const Expr *Device) {
7350  if (!CGF.HaveInsertPoint())
7351  return;
7352 
7353  assert((isa<OMPTargetEnterDataDirective>(D) ||
7354  isa<OMPTargetExitDataDirective>(D) ||
7355  isa<OMPTargetUpdateDirective>(D)) &&
7356  "Expecting either target enter, exit data, or update directives.");
7357 
7358  // Generate the code for the opening of the data environment.
7359  auto &&ThenGen = [&D, Device](CodeGenFunction &CGF, PrePostActionTy &) {
7360  // Fill up the arrays with all the mapped variables.
7365 
7366  // Get map clause information.
7367  MappableExprsHandler MEHandler(D, CGF);
7368  MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
7369 
7370  // Fill up the arrays and create the arguments.
7371  TargetDataInfo Info;
7372  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
7374  Info.PointersArray, Info.SizesArray,
7375  Info.MapTypesArray, Info);
7376 
7377  // Emit device ID if any.
7378  llvm::Value *DeviceID = nullptr;
7379  if (Device)
7380  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
7381  CGF.Int32Ty, /*isSigned=*/true);
7382  else
7383  DeviceID = CGF.Builder.getInt32(OMP_DEVICEID_UNDEF);
7384 
7385  // Emit the number of elements in the offloading arrays.
7386  auto *PointerNum = CGF.Builder.getInt32(BasePointers.size());
7387 
7388  llvm::Value *OffloadingArgs[] = {
7389  DeviceID, PointerNum, Info.BasePointersArray,
7390  Info.PointersArray, Info.SizesArray, Info.MapTypesArray};
7391 
7392  auto &RT = CGF.CGM.getOpenMPRuntime();
7393  // Select the right runtime function call for each expected standalone
7394  // directive.
7395  OpenMPRTLFunction RTLFn;
7396  switch (D.getDirectiveKind()) {
7397  default:
7398  llvm_unreachable("Unexpected standalone target data directive.");
7399  break;
7400  case OMPD_target_enter_data:
7402  break;
7403  case OMPD_target_exit_data:
7405  break;
7406  case OMPD_target_update:
7408  break;
7409  }
7410  CGF.EmitRuntimeCall(RT.createRuntimeFunction(RTLFn), OffloadingArgs);
7411  };
7412 
7413  // In the event we get an if clause, we don't have to take any action on the
7414  // else side.
7415  auto &&ElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
7416 
7417  if (IfCond) {
7418  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
7419  } else {
7420  RegionCodeGenTy ThenGenRCG(ThenGen);
7421  ThenGenRCG(CGF);
7422  }
7423 }
7424 
7425 namespace {
7426  /// Kind of parameter in a function with 'declare simd' directive.
7427  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
7428  /// Attribute set of the parameter.
7429  struct ParamAttrTy {
7430  ParamKindTy Kind = Vector;
7431  llvm::APSInt StrideOrArg;
7432  llvm::APSInt Alignment;
7433  };
7434 } // namespace
7435 
7436 static unsigned evaluateCDTSize(const FunctionDecl *FD,
7437  ArrayRef<ParamAttrTy> ParamAttrs) {
7438  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
7439  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
7440  // of that clause. The VLEN value must be power of 2.
7441  // In other case the notion of the function`s "characteristic data type" (CDT)
7442  // is used to compute the vector length.
7443  // CDT is defined in the following order:
7444  // a) For non-void function, the CDT is the return type.
7445  // b) If the function has any non-uniform, non-linear parameters, then the
7446  // CDT is the type of the first such parameter.
7447  // c) If the CDT determined by a) or b) above is struct, union, or class
7448  // type which is pass-by-value (except for the type that maps to the
7449  // built-in complex data type), the characteristic data type is int.
7450  // d) If none of the above three cases is applicable, the CDT is int.
7451  // The VLEN is then determined based on the CDT and the size of vector
7452  // register of that ISA for which current vector version is generated. The
7453  // VLEN is computed using the formula below:
7454  // VLEN = sizeof(vector_register) / sizeof(CDT),
7455  // where vector register size specified in section 3.2.1 Registers and the
7456  // Stack Frame of original AMD64 ABI document.
7457  QualType RetType = FD->getReturnType();
7458  if (RetType.isNull())
7459  return 0;
7460  ASTContext &C = FD->getASTContext();
7461  QualType CDT;
7462  if (!RetType.isNull() && !RetType->isVoidType())
7463  CDT = RetType;
7464  else {
7465  unsigned Offset = 0;
7466  if (auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
7467  if (ParamAttrs[Offset].Kind == Vector)
7468  CDT = C.getPointerType(C.getRecordType(MD->getParent()));
7469  ++Offset;
7470  }
7471  if (CDT.isNull()) {
7472  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
7473  if (ParamAttrs[I + Offset].Kind == Vector) {
7474  CDT = FD->getParamDecl(I)->getType();
7475  break;
7476  }
7477  }
7478  }
7479  }
7480  if (CDT.isNull())
7481  CDT = C.IntTy;
7482  CDT = CDT->getCanonicalTypeUnqualified();
7483  if (CDT->isRecordType() || CDT->isUnionType())
7484  CDT = C.IntTy;
7485  return C.getTypeSize(CDT);
7486 }
7487 
7488 static void
7489 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
7490  const llvm::APSInt &VLENVal,
7491  ArrayRef<ParamAttrTy> ParamAttrs,
7492  OMPDeclareSimdDeclAttr::BranchStateTy State) {
7493  struct ISADataTy {
7494  char ISA;
7495  unsigned VecRegSize;
7496  };
7497  ISADataTy ISAData[] = {
7498  {
7499  'b', 128
7500  }, // SSE
7501  {
7502  'c', 256
7503  }, // AVX
7504  {
7505  'd', 256
7506  }, // AVX2
7507  {
7508  'e', 512
7509  }, // AVX512
7510  };
7512  switch (State) {
7513  case OMPDeclareSimdDeclAttr::BS_Undefined:
7514  Masked.push_back('N');
7515  Masked.push_back('M');
7516  break;
7517  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
7518  Masked.push_back('N');
7519  break;
7520  case OMPDeclareSimdDeclAttr::BS_Inbranch:
7521  Masked.push_back('M');
7522  break;
7523  }
7524  for (auto Mask : Masked) {
7525  for (auto &Data : ISAData) {
7527  llvm::raw_svector_ostream Out(Buffer);
7528  Out << "_ZGV" << Data.ISA << Mask;
7529  if (!VLENVal) {
7530  Out << llvm::APSInt::getUnsigned(Data.VecRegSize /
7531  evaluateCDTSize(FD, ParamAttrs));
7532  } else
7533  Out << VLENVal;
7534  for (auto &ParamAttr : ParamAttrs) {
7535  switch (ParamAttr.Kind){
7536  case LinearWithVarStride:
7537  Out << 's' << ParamAttr.StrideOrArg;
7538  break;
7539  case Linear:
7540  Out << 'l';
7541  if (!!ParamAttr.StrideOrArg)
7542  Out << ParamAttr.StrideOrArg;
7543  break;
7544  case Uniform:
7545  Out << 'u';
7546  break;
7547  case Vector:
7548  Out << 'v';
7549  break;
7550  }
7551  if (!!ParamAttr.Alignment)
7552  Out << 'a' << ParamAttr.Alignment;
7553  }
7554  Out << '_' << Fn->getName();
7555  Fn->addFnAttr(Out.str());
7556  }
7557  }
7558 }
7559 
7561  llvm::Function *Fn) {
7562  ASTContext &C = CGM.getContext();
7563  FD = FD->getCanonicalDecl();
7564  // Map params to their positions in function decl.
7565  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
7566  if (isa<CXXMethodDecl>(FD))
7567  ParamPositions.insert({FD, 0});
7568  unsigned ParamPos = ParamPositions.size();
7569  for (auto *P : FD->parameters()) {
7570  ParamPositions.insert({P->getCanonicalDecl(), ParamPos});
7571  ++ParamPos;
7572  }
7573  for (auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
7574  llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
7575  // Mark uniform parameters.
7576  for (auto *E : Attr->uniforms()) {
7577  E = E->IgnoreParenImpCasts();
7578  unsigned Pos;
7579  if (isa<CXXThisExpr>(E))
7580  Pos = ParamPositions[FD];
7581  else {
7582  auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7583  ->getCanonicalDecl();
7584  Pos = ParamPositions[PVD];
7585  }
7586  ParamAttrs[Pos].Kind = Uniform;
7587  }
7588  // Get alignment info.
7589  auto NI = Attr->alignments_begin();
7590  for (auto *E : Attr->aligneds()) {
7591  E = E->IgnoreParenImpCasts();
7592  unsigned Pos;
7593  QualType ParmTy;
7594  if (isa<CXXThisExpr>(E)) {
7595  Pos = ParamPositions[FD];
7596  ParmTy = E->getType();
7597  } else {
7598  auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7599  ->getCanonicalDecl();
7600  Pos = ParamPositions[PVD];
7601  ParmTy = PVD->getType();
7602  }
7603  ParamAttrs[Pos].Alignment =
7604  (*NI) ? (*NI)->EvaluateKnownConstInt(C)
7605  : llvm::APSInt::getUnsigned(
7607  .getQuantity());
7608  ++NI;
7609  }
7610  // Mark linear parameters.
7611  auto SI = Attr->steps_begin();
7612  auto MI = Attr->modifiers_begin();
7613  for (auto *E : Attr->linears()) {
7614  E = E->IgnoreParenImpCasts();
7615  unsigned Pos;
7616  if (isa<CXXThisExpr>(E))
7617  Pos = ParamPositions[FD];
7618  else {
7619  auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
7620  ->getCanonicalDecl();
7621  Pos = ParamPositions[PVD];
7622  }
7623  auto &ParamAttr = ParamAttrs[Pos];
7624  ParamAttr.Kind = Linear;
7625  if (*SI) {
7626  if (!(*SI)->EvaluateAsInt(ParamAttr.StrideOrArg, C,
7628  if (auto *DRE = cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
7629  if (auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
7630  ParamAttr.Kind = LinearWithVarStride;
7631  ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
7632  ParamPositions[StridePVD->getCanonicalDecl()]);
7633  }
7634  }
7635  }
7636  }
7637  ++SI;
7638  ++MI;
7639  }
7640  llvm::APSInt VLENVal;
7641  if (const Expr *VLEN = Attr->getSimdlen())
7642  VLENVal = VLEN->EvaluateKnownConstInt(C);
7643  OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
7644  if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
7645  CGM.getTriple().getArch() == llvm::Triple::x86_64)
7646  emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
7647  }
7648 }
7649 
7650 namespace {
7651 /// Cleanup action for doacross support.
7652 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
7653 public:
7654  static const int DoacrossFinArgs = 2;
7655 
7656 private:
7657  llvm::Value *RTLFn;
7658  llvm::Value *Args[DoacrossFinArgs];
7659 
7660 public:
7661  DoacrossCleanupTy(llvm::Value *RTLFn, ArrayRef<llvm::Value *> CallArgs)
7662  : RTLFn(RTLFn) {
7663  assert(CallArgs.size() == DoacrossFinArgs);
7664  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
7665  }
7666  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
7667  if (!CGF.HaveInsertPoint())
7668  return;
7669  CGF.EmitRuntimeCall(RTLFn, Args);
7670  }
7671 };
7672 } // namespace
7673 
7675  const OMPLoopDirective &D) {
7676  if (!CGF.HaveInsertPoint())
7677  return;
7678 
7679  ASTContext &C = CGM.getContext();
7680  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
7681  RecordDecl *RD;
7682  if (KmpDimTy.isNull()) {
7683  // Build struct kmp_dim { // loop bounds info casted to kmp_int64
7684  // kmp_int64 lo; // lower
7685  // kmp_int64 up; // upper
7686  // kmp_int64 st; // stride
7687  // };
7688  RD = C.buildImplicitRecord("kmp_dim");
7689  RD->startDefinition();
7690  addFieldToRecordDecl(C, RD, Int64Ty);
7691  addFieldToRecordDecl(C, RD, Int64Ty);
7692  addFieldToRecordDecl(C, RD, Int64Ty);
7693  RD->completeDefinition();
7694  KmpDimTy = C.getRecordType(RD);
7695  } else
7696  RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
7697 
7698  Address DimsAddr = CGF.CreateMemTemp(KmpDimTy, "dims");
7699  CGF.EmitNullInitialization(DimsAddr, KmpDimTy);
7700  enum { LowerFD = 0, UpperFD, StrideFD };
7701  // Fill dims with data.
7702  LValue DimsLVal = CGF.MakeAddrLValue(DimsAddr, KmpDimTy);
7703  // dims.upper = num_iterations;
7704  LValue UpperLVal =
7705  CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), UpperFD));
7706  llvm::Value *NumIterVal = CGF.EmitScalarConversion(
7708  Int64Ty, D.getNumIterations()->getExprLoc());
7709  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
7710  // dims.stride = 1;
7711  LValue StrideLVal =
7712  CGF.EmitLValueForField(DimsLVal, *std::next(RD->field_begin(), StrideFD));
7713  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
7714  StrideLVal);
7715 
7716  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
7717  // kmp_int32 num_dims, struct kmp_dim * dims);
7718  llvm::Value *Args[] = {emitUpdateLocation(CGF, D.getLocStart()),
7719  getThreadID(CGF, D.getLocStart()),
7720  llvm::ConstantInt::getSigned(CGM.Int32Ty, 1),
7722  DimsAddr.getPointer(), CGM.VoidPtrTy)};
7723 
7725  CGF.EmitRuntimeCall(RTLFn, Args);
7726  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
7727  emitUpdateLocation(CGF, D.getLocEnd()), getThreadID(CGF, D.getLocEnd())};
7729  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
7730  llvm::makeArrayRef(FiniArgs));
7731 }
7732 
7734  const OMPDependClause *C) {
7735  QualType Int64Ty =
7736  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
7737  const Expr *CounterVal = C->getCounterValue();
7738  assert(CounterVal);
7739  llvm::Value *CntVal = CGF.EmitScalarConversion(CGF.EmitScalarExpr(CounterVal),
7740  CounterVal->getType(), Int64Ty,
7741  CounterVal->getExprLoc());
7742  Address CntAddr = CGF.CreateMemTemp(Int64Ty, ".cnt.addr");
7743  CGF.EmitStoreOfScalar(CntVal, CntAddr, /*Volatile=*/false, Int64Ty);
7744  llvm::Value *Args[] = {emitUpdateLocation(CGF, C->getLocStart()),
7745  getThreadID(CGF, C->getLocStart()),
7746  CntAddr.getPointer()};
7747  llvm::Value *RTLFn;
7748  if (C->getDependencyKind() == OMPC_DEPEND_source)
7750  else {
7751  assert(C->getDependencyKind() == OMPC_DEPEND_sink);
7753  }
7754  CGF.EmitRuntimeCall(RTLFn, Args);
7755 }
7756 
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:636
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:259
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, llvm::Type *BaseLVType, CharUnits BaseLVAlignment, llvm::Value *Addr)
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
StmtClass getStmtClass() const
Definition: Stmt.h:361
CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
Definition: StmtOpenMP.h:205
static llvm::Value * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
This represents '#pragma omp task' directive.
Definition: StmtOpenMP.h:1704
static const Decl * getCanonicalDecl(const Decl *D)
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1618
llvm::IntegerType * IntTy
int
This represents 'thread_limit' clause in the '#pragma omp ...' directive.
External linkage, which indicates that the entity can be referred to from other translation units...
Definition: Linkage.h:61
Parameter for captured context.
Definition: Decl.h:1395
QualType TgtDeviceImageQTy
struct __tgt_device_image{ void *ImageStart; // Pointer to the target code start. ...
Complete object ctor.
Definition: ABI.h:26
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2224
CanQualType VoidPtrTy
Definition: ASTContext.h:978
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:124
unsigned Length
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition: CGDecl.cpp:1451
A (possibly-)qualified type.
Definition: Type.h:616
llvm::Value * getPointer() const
Definition: CGValue.h:342
unsigned getColumn() const
Return the presumed column number of this location.
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
Expr * getNumIterations() const
Definition: StmtOpenMP.h:826
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk=nullptr)
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after...
Definition: Type.h:1054
llvm::Module & getModule() const
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
llvm::SmallPtrSet< const VarDecl *, 4 > ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> &CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
The standard implementation of ConstantInitBuilder used in Clang.
Stmt - This represents one statement.
Definition: Stmt.h:60
llvm::ConstantInt * getSize(CharUnits N)
Definition: CGBuilder.h:61
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
QualType getTgtBinaryDescriptorQTy()
Returns __tgt_bin_desc type.
SmallVector< std::pair< OpenMPDependClauseKind, const Expr * >, 4 > Dependences
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
bool isRecordType() const
Definition: Type.h:5769
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
SmallVector< const Expr *, 4 > LastprivateCopies
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:47
Address getAddress() const
Definition: CGValue.h:346
llvm::Constant * getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2069
StringRef P
static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr, IdentFieldIndex Field, const llvm::Twine &Name="")
Call to void __kmpc_threadprivate_register( ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);.
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
The base class of the type hierarchy.
Definition: Type.h:1303
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition: CGExpr.cpp:1749
QualType getRecordType(const RecordDecl *Decl) const
std::unique_ptr< llvm::MemoryBuffer > Buffer
llvm::Value * PointersArray
The array of section pointers passed to the runtime library.
virtual void clear()
virtual void completeDefinition()
completeDefinition - Notes that the definition of this type is now complete.
Definition: Decl.cpp:3921
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
struct with the values to be passed to the dispatch runtime function
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, bool forPointeeType=false)
const LangOptions & getLangOpts() const
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and and emit all target regions found along the way.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static CharUnits getOffsetOfIdentField(IdentFieldIndex Field)
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS...
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant, or if it does but contains a label, return false.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
OpenMPSchedType
Schedule types for 'omp for' loops (these enumerators are taken from the enum sched_type in kmp...
SmallVector< const Expr *, 4 > ReductionCopies
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
VarDecl - An instance of this class is created to represent a variable declaration or definition...
Definition: Decl.h:758
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:35
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
This represents 'num_threads' clause in the '#pragma omp ...' directive.
Definition: OpenMPClause.h:349
The "union" keyword.
Definition: Type.h:4494
Extra information about a function prototype.
Definition: Type.h:3234
field_iterator field_begin() const
Definition: Decl.cpp:3912
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:997
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:1924
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
capture_iterator capture_begin()
Retrieve an iterator pointing to the first capture.
Definition: Stmt.h:2174
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata...
bool isUnionType() const
Definition: Type.cpp:390
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
bool isVoidType() const
Definition: Type.h:5906
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:221
Struct that keeps all the relevant information that should be kept throughout a 'target data' region...
QualType getTgtOffloadEntryQTy()
Returns __tgt_offload_entry type.
SmallVector< const Expr *, 4 > PrivateVars
RecordDecl - Represents a struct/union/class.
Definition: Decl.h:3354
llvm::DenseMap< const VarDecl *, FieldDecl * > LambdaCaptureFields
Source[4] in Fortran, do not use for C++.
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition: CGDecl.cpp:1527
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1552
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
Expr * getSizeExpr() const
Definition: Type.h:2664
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:1813
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:128
LineState State
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition: CGDecl.cpp:1316
Call to void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data...
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr * > Vars, SourceLocation Loc)
Emit flush of the variables specified in 'omp flush' directive.
bool isReferenceType() const
Definition: Type.h:5721
SmallVector< const Expr *, 4 > LastprivateVars
QualType getReturnType() const
Definition: Decl.h:2106
FieldDecl - An instance of this class is created by Sema::ActOnField to represent a member of a struc...
Definition: Decl.h:2366
bool isAnyPointerType() const
Definition: Type.h:5715
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr)
Definition: CGExpr.cpp:2138
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
bool isFileID() const
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancellation point' construct.
might be used in Fortran
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound=true)
Definition: CGExpr.cpp:3310
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arrays used to pass the captures and map information to the offloading runtime library...
llvm::CallInst * EmitRuntimeCall(llvm::Value *callee, const Twine &name="")
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:3675
This represents clause 'map' in the '#pragma omp ...' directives.
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2103
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable. ...
This represents clause 'to' in the '#pragma omp ...' directives.
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
Definition: CharUnits.h:53
llvm::CallSite EmitCallOrInvoke(llvm::Value *Callee, ArrayRef< llvm::Value * > Args, const Twine &Name="")
Emits a call or invoke instruction to the given function, depending on the current state of the EH st...
Definition: CGCall.cpp:3655
FrontendAction * Action
Definition: Tooling.cpp:205
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:208
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
Expr * getCounterValue()
Get the loop counter value.
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
This is a common base class for loop directives ('omp simd', 'omp for', 'omp for simd' etc...
Definition: StmtOpenMP.h:313
const Decl * getDecl() const
Definition: GlobalDecl.h:62
unsigned size() const
Return number of entries defined so far.
An r-value expression (a pr-value in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:106
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
OpenMPDistScheduleClauseKind
OpenMP attributes for 'dist_schedule' clause.
Definition: OpenMPKinds.h:100
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, LValueBaseInfo BaseInfo=LValueBaseInfo(AlignmentSource::Type), llvm::MDNode *TBAAInfo=nullptr, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1437
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk=nullptr)
Call the appropriate runtime routine to initialize it before start of loop.
BinaryOperatorKind
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:143
QualType TgtOffloadEntryQTy
Type struct __tgt_offload_entry{ void *addr; // Pointer to the offload entry info.
CharUnits getAlignment() const
Definition: CGValue.h:335
static CharUnits getIdentAlign(CodeGenModule &CGM)
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:125
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:150
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
const Qualifiers & getQuals() const
Definition: CGValue.h:330
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1043
uint32_t Offset
Definition: CacheTokens.cpp:43
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
RAII for correct setting/restoring of CapturedStmtInfo.
child_range children()
Definition: Stmt.cpp:208
String describing the source location.
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned getOpenMPDefaultSimdAlign(QualType T) const
Get default simd alignment of the specified complete type in bits.
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:127
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind...
SmallVector< const Expr *, 4 > PrivateCopies
RecordDecl * getDecl() const
Definition: Type.h:3793
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:252
void EmitAggregateAssign(Address DestPtr, Address SrcPtr, QualType EltTy)
EmitAggregateCopy - Emit an aggregate assignment.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations...
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:39
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition: CGExpr.cpp:157
SmallVector< const Expr *, 4 > FirstprivateCopies
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static int addMonoNonMonoModifier(OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
static void emitOffloadingArraysArgument(CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arguments to be passed to the runtime library based on the arrays of pointers, sizes and map types.
Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
virtual llvm::Value * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
unsigned getLine() const
Return the presumed line number of this location.
const RecordDecl * getCapturedRecordDecl() const
Retrieve the record declaration for captured variables.
Definition: Stmt.h:2149
SmallVector< const Expr *, 4 > ReductionOps
This represents clause 'is_device_ptr' in the '#pragma omp ...' directives.
llvm::AllocaInst * CreateTempAlloca(llvm::Type *Ty, const Twine &Name="tmp", llvm::Value *ArraySize=nullptr)
CreateTempAlloca - This creates an alloca and inserts it into the entry block if ArraySize is nullptr...
Definition: CGExpr.cpp:90
bool isValid() const
Definition: Address.h:36
detail::InMemoryDirectory::const_iterator I
llvm::Value * EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef< llvm::Value * > IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
SmallVector< const Expr *, 4 > ReductionVars
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef< const Expr * > PrivateVars, ArrayRef< const Expr * > FirstprivateVars, ArrayRef< const Expr * > LastprivateVars, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables...
QualType getType() const
Definition: Decl.h:589
bool isInvalid() const
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:841
llvm::Value * emitReductionFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps)
Emits reduction function.
This represents clause 'from' in the '#pragma omp ...' directives.
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
Definition: CGExpr.cpp:3615
virtual llvm::Value * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type...
Definition: Type.h:6091
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:4161
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
OpenMPDependClauseKind getDependencyKind() const
Get dependency type.
OpenMP 4.0 [2.4, Array Sections].
Definition: ExprOpenMP.h:45
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
CanQualType getCanonicalTypeUnqualified() const
Describes the capture of either a variable, or 'this', or variable-length array type.
Definition: Stmt.h:2045
llvm::Constant * createForStaticInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned...
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:198
OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:38
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
bool addPrivate(const VarDecl *LocalVD, llvm::function_ref< Address()> PrivateGen)
Registers LocalVD variable as a private and apply PrivateGen function for it to generate correspondin...
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
Class intended to support codegen of all kind of the reduction clauses.
llvm::Constant * createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned...
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:136
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:414
QualType getTgtDeviceImageQTy()
Returns __tgt_device_image type.
This represents implicit clause 'depend' for the '#pragma omp task' directive.
KmpTaskTFields
Indexes of fields for type kmp_task_t.
static TypeEvaluationKind getEvaluationKind(QualType T)
hasAggregateLLVMType - Return true if the specified AST type will map into an aggregate LLVM type or ...
llvm::Value * getPointer() const
Definition: Address.h:38
ValueDecl - Represent the declaration of a variable (in which case it is an lvalue) a function (in wh...
Definition: Decl.h:580
Expr - This represents one expression.
Definition: Expr.h:105
Allow any unmodeled side effect.
Definition: Expr.h:595
virtual llvm::Value * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
static Address invalid()
Definition: Address.h:35
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with 'depend' clause.
void loadOffloadInfoMetadata()
Loads all the offload entries information from the host IR metadata.
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
static llvm::Value * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Value *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info)
Emit the target data mapping code associated with D.
static llvm::Function * createOffloadingBinaryDescriptorFunction(CodeGenModule &CGM, StringRef Name, const RegionCodeGenTy &Codegen)
Create a Ctor/Dtor-like function whose body is emitted through Codegen.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::Constant *ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, unsigned IVSize, bool Ordered, Address IL, Address LB, Address UB, Address ST, llvm::Value *Chunk)
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
void SetInternalFunctionAttributes(const Decl *D, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD)
Checks if destructor function is required to be generated.
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2088
ASTContext & getContext() const
llvm::BasicBlock * getBlock() const
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, int32_t Flags)
Register target region entry.
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, LValueBaseInfo BaseInfo=LValueBaseInfo(AlignmentSource::Type), llvm::MDNode *TBAAInfo=nullptr, bool isInit=false, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1527
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:2081
static OMPLinearClause * CreateEmpty(const ASTContext &C, unsigned NumVars)
Creates an empty clause with the place for NumVars variables.
SmallVector< const Expr *, 4 > FirstprivateVars
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
Represents an unpacked "presumed" location which can be presented to the user.
void Emit(CodeGenFunction &CGF, Flags) override
Emit the cleanup.
unsigned Map[FirstTargetAddressSpace]
The type of a lookup table which maps from language-specific address spaces to target-specific ones...
Definition: AddressSpaces.h:53
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
Definition: CGExpr.cpp:49
This represents clause 'firstprivate' in the '#pragma omp ...' directives.
CGOpenMPRuntime(CodeGenModule &CGM)
ValueDecl * getDecl()
Definition: Expr.h:1038
*QualType KmpTaskTQTy
OpenMPProcBindClauseKind
OpenMP attributes for 'proc_bind' clause.
Definition: OpenMPKinds.h:51
virtual llvm::Function * emitRegistrationFunction()
Creates the offloading descriptor in the event any target region was emitted in the current module an...
do v
Definition: arm_acle.h:78
llvm::Constant * createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned...
const SourceManager & SM
Definition: Format.cpp:1293
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc)
Emit code for 'taskwait' directive.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:1966
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:29
const Expr * getAnyInitializer() const
getAnyInitializer - Get the initializer for this variable, no matter which declaration it is attached...
Definition: Decl.h:1136
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const
Return true if a target region entry with the provided information exists.
AttrVec & getAttrs()
Definition: DeclBase.h:466
SourceLocation getLocStart() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:168
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
static llvm::Value * emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of threads for a target directive.
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition: CGExpr.cpp:2147
QualType TgtBinaryDescriptorQTy
struct __tgt_bin_desc{ int32_t NumDevices; // Number of devices supported.
The l-value was considered opaque, so the alignment was determined from a type.
const MatchFinder::MatchFinderOptions & Options
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
This represents 'num_teams' clause in the '#pragma omp ...' directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class...
Definition: Expr.h:865
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
static llvm::Value * emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of teams for a target directive.
#define false
Definition: stdbool.h:33
Kind
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
This captures a statement into a function.
Definition: Stmt.h:2032
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
IdentFieldIndex
SmallVectorImpl< AnnotatedLine * >::const_iterator Next
const char * getFilename() const
Return the presumed filename of this location.
static with chunk adjustment (e.g., simd)
ASTContext & getContext() const
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup...
Definition: CGDecl.cpp:1476
void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
Encodes a location in the source.
unsigned getNumParams() const
getNumParams - Return the number of parameters this function must have based on its FunctionType...
Definition: Decl.cpp:2878
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:5489
llvm::Value * MapTypesArray
The array of map types passed to the runtime library.
This represents '#pragma omp declare reduction ...' directive.
Definition: DeclOpenMP.h:102
AnnotatedLine & Line
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition: CGExpr.cpp:139
llvm::PointerIntPair< llvm::Value *, 1, bool > Final
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:33
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
bool isValid() const
Return true if this is a valid SourceLocation object.
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:778
const std::string ID
reference front() const
Definition: DeclBase.h:1188
Lower bound for 'ordered' versions.
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:346
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr * > VL, ArrayRef< Expr * > PL, ArrayRef< Expr * > IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:2075
OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:23
Set if the nonmonotonic schedule modifier was present.
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
OpenMPLinearClauseKind Modifier
Modifier of 'linear' clause.
Definition: OpenMPClause.h:86
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2066
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:2072
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition: CGDecl.cpp:1187
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form 'targe...
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:197
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:6000
virtual void Enter(CodeGenFunction &CGF)
An aligned address.
Definition: Address.h:25
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
const LangOptions & getLangOpts() const
capture_iterator capture_end() const
Retrieve an iterator pointing past the end of the sequence of captures.
Definition: Stmt.h:2179
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:2132
void setAction(PrePostActionTy &Action) const
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6105
lookup_result lookup(DeclarationName Name) const
lookup - Find the declarations (if any) with the given Name in this context.
Definition: DeclBase.cpp:1507
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:786
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
Complete object dtor.
Definition: ABI.h:36
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
const SpecificClause * getSingleClause() const
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:148
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:338
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:216
QualType getPointeeType() const
Definition: Type.h:2238
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param Data Additional data for task generation like final list of privates etc *TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:276
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item)...
QualType getType() const
Definition: Expr.h:127
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
CanQualType CharTy
Definition: ASTContext.h:965
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
This class organizes the cross-function state that is used while generating LLVM code.
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
OpenMPScheduleClauseModifier
OpenMP modifiers for 'schedule' clause.
Definition: OpenMPKinds.h:67
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1215
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:92
StringRef Name
Definition: USRFinder.cpp:123
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc)
Call the appropriate runtime routine to notify that we finished all the work with current loop...
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:58
OpenMPLocationFlags
Values for bit flags used in the ident_t to describe the fields.
Address CreateStructGEP(Address Addr, unsigned Index, CharUnits Offset, const llvm::Twine &Name="")
Definition: CGBuilder.h:165
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:126
llvm::Value * LB
Loop lower bound.
bool capturesThis() const
Determine whether this capture handles the C++ 'this' pointer.
Definition: Stmt.h:2069
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order)
Initialize target region entry.
std::unique_ptr< DiagnosticConsumer > create(StringRef OutputFile, DiagnosticOptions *Diags, bool MergeChildRecords=false)
Returns a DiagnosticConsumer that serializes diagnostics to a bitcode file.
llvm::Function * createOffloadingBinaryDescriptorRegistration()
Creates and registers offloading binary descriptor for the current compilation unit.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Value *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device, ArrayRef< llvm::Value * > CapturedVars)
Emit the target offloading code associated with D.
llvm::PointerIntPair< llvm::Value *, 1, bool > Priority
ReductionCodeGen(ArrayRef< const Expr * > Shareds, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > ReductionOps)
RTCancelKind
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
bool empty() const
Return true if a there are no entries defined.
bool hasAttrs() const
Definition: DeclBase.h:462
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
llvm::Value * UB
Loop upper bound.
llvm::Value * Chunk
Chunk size specified using 'schedule' clause (nullptr if chunk was not specified) ...
detail::InMemoryDirectory::const_iterator E
void EmitAggregateCopy(Address DestPtr, Address SrcPtr, QualType EltTy, bool isVolatile=false, bool isAssignment=false)
EmitAggregateCopy - Emit an aggregate copy.
Definition: CGExprAgg.cpp:1561
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:108
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...
bool field_empty() const
Definition: Decl.h:3492
Not really used in Fortran any more.
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for 'target' directive.
const RecordType * getAsStructureType() const
Definition: Type.cpp:430
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2486
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2238
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:5662
static CharUnits getIdentSize(CodeGenModule &CGM)
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager
API for captured statement code generation.
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:44
static bool classof(const OMPClause *T)
const T * getAs() const
Member-template getAs<specific type>'.
Definition: Type.h:6042
QualType getCanonicalType() const
Definition: Type.h:5528
This file defines OpenMP AST classes for executable directives and clauses.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
StructBuilder beginStruct(llvm::StructType *structTy=nullptr)
Address CreateConstArrayGEP(Address Addr, uint64_t Index, CharUnits EltSize, const llvm::Twine &Name="")
Given addr = [n x T]* ...
Definition: CGBuilder.h:188
CleanupTy(PrePostActionTy *Action)
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum)
Obtain information that uniquely identifies a target entry.
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
LValue EmitLoadOfReferenceLValue(Address Ref, const ReferenceType *RefTy)
Definition: CGExpr.cpp:2131
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
OpenMPScheduleClauseKind
OpenMP attributes for 'schedule' clause.
Definition: OpenMPKinds.h:59
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:2360
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1548
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > Privates, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, ArrayRef< const Expr * > ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
StringRef getMangledName(GlobalDecl GD)
QualType withRestrict() const
Definition: Type.h:798
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:33
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:436
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1396
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr * > LHSExprs, ArrayRef< const Expr * > RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
llvm::Constant * createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned...
QualType getPointeeType() const
Definition: Type.h:2381
SourceManager & getSourceManager()
Definition: ASTContext.h:616
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for 'cancel' construct.
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13074
Lower bound for default (unordered) versions.
static std::string generateUniqueName(StringRef Prefix, SourceLocation Loc, unsigned N)
Generates unique name for artificial threadprivate variables.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause * > Clauses)
Definition: StmtOpenMP.h:130
llvm::PointerIntPair< llvm::Value *, 1, bool > Schedule
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Definition: CGStmt.cpp:456
llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false)
Definition: CGDeclCXX.cpp:262
llvm::Value * BasePointersArray
The array of base pointer passed to the runtime library.
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:3984
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
OpenMPOffloadingReservedDeviceIDs
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:505
llvm::Type * ConvertType(QualType T)
Privates[]
Gets the list of initial values for linear variables.
Definition: OpenMPClause.h:136
OpenMPMapClauseKind
OpenMP mapping kind for 'map' clause.
Definition: OpenMPKinds.h:92
LValue MakeAddrLValue(Address Addr, QualType T, LValueBaseInfo BaseInfo=LValueBaseInfo(AlignmentSource::Type))
static const Stmt * ignoreCompoundStmts(const Stmt *Body)
discard all CompoundStmts intervening between two constructs
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
Definition: CGExpr.cpp:1082
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:583
OpenMPRTLFunction
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:245
bool isArrayType() const
Definition: Type.h:5751
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition: CGExpr.cpp:1595
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
std::pair< llvm::Value *, QualType > getVLASize(const VariableArrayType *vla)
getVLASize - Returns an LLVM value that corresponds to the size, in non-variably-sized elements...
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
QualType getType() const
Definition: CGValue.h:277
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
CanQualType IntTy
Definition: ASTContext.h:971
capture_range captures()
Definition: Stmt.h:2166
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:953
static RValue get(llvm::Value *V)
Definition: CGValue.h:85
const llvm::Triple & getTriple() const
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
Definition: CGCleanup.cpp:1034
ParamKindTy
Kind of parameter in a function with 'declare simd' directive.
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D)
Emit initialization for doacross loop nesting support.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
static int array_pod_sort_comparator(const PrivateDataTy *P1, const PrivateDataTy *P2)
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:110
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition: CGValue.h:106
SourceLocation getLocation() const
Definition: DeclBase.h:407
LValue - This represents an lvalue references.
Definition: CGValue.h:171
FunctionDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2812
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:147
CanQualType BoolTy
Definition: ASTContext.h:964
llvm::Constant * createRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function.
SourceLocation getLocEnd() const
Returns ending location of directive.
Definition: StmtOpenMP.h:170
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition: Type.h:683
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:3574
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:157
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it's a VLA, and drill down to the base elem...
Stmt * getAssociatedStmt() const
Returns statement associated with the directive.
Definition: StmtOpenMP.h:196
void operator()(CodeGenFunction &CGF) const
SourceLocation getLocStart() const LLVM_READONLY
Definition: Stmt.cpp:257
Address CreateMemTemp(QualType T, const Twine &Name="tmp", bool CastToDefaultAddrSpace=true)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignment...
Definition: CGExpr.cpp:123
No in-class initializer.
Definition: Specifiers.h:226
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr * > CopyprivateVars, ArrayRef< const Expr * > DestExprs, ArrayRef< const Expr * > SrcExprs, ArrayRef< const Expr * > AssignmentOps)
Emits a single region.
A class which abstracts out some details necessary for making a call.
Definition: Type.h:2948
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
const NamedDecl * Result
Definition: USRFinder.cpp:70
Attr - This represents one attribute.
Definition: Attr.h:43
SmallVector< const Expr *, 4 > FirstprivateInits
This represents clause 'use_device_ptr' in the '#pragma omp ...' directives.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object...
llvm::Value * SizesArray
The array of sizes passed to the runtime library.
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:794
bool Privatize()
Privatizes local variables previously registered as private.
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:5516
bool isPointerType() const
Definition: Type.h:5712
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn, ArrayRef< llvm::Value * > CapturedVars, const Expr *IfCond)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1519
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.