clang  9.0.0
CGOpenMPRuntime.cpp
Go to the documentation of this file.
1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This provides a class for OpenMP runtime code generation.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGCleanup.h"
15 #include "CGOpenMPRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
19 #include "clang/AST/Decl.h"
20 #include "clang/AST/StmtOpenMP.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/Bitcode/BitcodeReader.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/Value.h"
27 #include "llvm/Support/Format.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <cassert>
30 
31 using namespace clang;
32 using namespace CodeGen;
33 
34 namespace {
35 /// Base class for handling code generation inside OpenMP regions.
36 class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
37 public:
38  /// Kinds of OpenMP regions used in codegen.
39  enum CGOpenMPRegionKind {
40  /// Region with outlined function for standalone 'parallel'
41  /// directive.
42  ParallelOutlinedRegion,
43  /// Region with outlined function for standalone 'task' directive.
44  TaskOutlinedRegion,
45  /// Region for constructs that do not require function outlining,
46  /// like 'for', 'sections', 'atomic' etc. directives.
47  InlinedRegion,
48  /// Region with outlined function for standalone 'target' directive.
49  TargetRegion,
50  };
51 
52  CGOpenMPRegionInfo(const CapturedStmt &CS,
53  const CGOpenMPRegionKind RegionKind,
55  bool HasCancel)
56  : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
57  CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
58 
59  CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
61  bool HasCancel)
62  : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
63  Kind(Kind), HasCancel(HasCancel) {}
64 
65  /// Get a variable or parameter for storing global thread id
66  /// inside OpenMP construct.
67  virtual const VarDecl *getThreadIDVariable() const = 0;
68 
69  /// Emit the captured statement body.
70  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
71 
72  /// Get an LValue for the current ThreadID variable.
73  /// \return LValue for thread id variable. This LValue always has type int32*.
74  virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
75 
76  virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
77 
78  CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
79 
80  OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
81 
82  bool hasCancel() const { return HasCancel; }
83 
84  static bool classof(const CGCapturedStmtInfo *Info) {
85  return Info->getKind() == CR_OpenMP;
86  }
87 
88  ~CGOpenMPRegionInfo() override = default;
89 
90 protected:
91  CGOpenMPRegionKind RegionKind;
92  RegionCodeGenTy CodeGen;
94  bool HasCancel;
95 };
96 
97 /// API for captured statement code generation in OpenMP constructs.
98 class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
99 public:
100  CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
101  const RegionCodeGenTy &CodeGen,
102  OpenMPDirectiveKind Kind, bool HasCancel,
103  StringRef HelperName)
104  : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
105  HasCancel),
106  ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
107  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
108  }
109 
110  /// Get a variable or parameter for storing global thread id
111  /// inside OpenMP construct.
112  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
113 
114  /// Get the name of the capture helper.
115  StringRef getHelperName() const override { return HelperName; }
116 
117  static bool classof(const CGCapturedStmtInfo *Info) {
118  return CGOpenMPRegionInfo::classof(Info) &&
119  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
120  ParallelOutlinedRegion;
121  }
122 
123 private:
124  /// A variable or parameter storing global thread id for OpenMP
125  /// constructs.
126  const VarDecl *ThreadIDVar;
127  StringRef HelperName;
128 };
129 
130 /// API for captured statement code generation in OpenMP constructs.
131 class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
132 public:
133  class UntiedTaskActionTy final : public PrePostActionTy {
134  bool Untied;
135  const VarDecl *PartIDVar;
136  const RegionCodeGenTy UntiedCodeGen;
137  llvm::SwitchInst *UntiedSwitch = nullptr;
138 
139  public:
140  UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
141  const RegionCodeGenTy &UntiedCodeGen)
142  : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
143  void Enter(CodeGenFunction &CGF) override {
144  if (Untied) {
145  // Emit task switching point.
146  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
147  CGF.GetAddrOfLocalVar(PartIDVar),
148  PartIDVar->getType()->castAs<PointerType>());
149  llvm::Value *Res =
150  CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
151  llvm::BasicBlock *DoneBB = CGF.createBasicBlock(".untied.done.");
152  UntiedSwitch = CGF.Builder.CreateSwitch(Res, DoneBB);
153  CGF.EmitBlock(DoneBB);
155  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
156  UntiedSwitch->addCase(CGF.Builder.getInt32(0),
157  CGF.Builder.GetInsertBlock());
158  emitUntiedSwitch(CGF);
159  }
160  }
161  void emitUntiedSwitch(CodeGenFunction &CGF) const {
162  if (Untied) {
163  LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
164  CGF.GetAddrOfLocalVar(PartIDVar),
165  PartIDVar->getType()->castAs<PointerType>());
166  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
167  PartIdLVal);
168  UntiedCodeGen(CGF);
169  CodeGenFunction::JumpDest CurPoint =
170  CGF.getJumpDestInCurrentScope(".untied.next.");
172  CGF.EmitBlock(CGF.createBasicBlock(".untied.jmp."));
173  UntiedSwitch->addCase(CGF.Builder.getInt32(UntiedSwitch->getNumCases()),
174  CGF.Builder.GetInsertBlock());
175  CGF.EmitBranchThroughCleanup(CurPoint);
176  CGF.EmitBlock(CurPoint.getBlock());
177  }
178  }
179  unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
180  };
181  CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
182  const VarDecl *ThreadIDVar,
183  const RegionCodeGenTy &CodeGen,
184  OpenMPDirectiveKind Kind, bool HasCancel,
185  const UntiedTaskActionTy &Action)
186  : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
187  ThreadIDVar(ThreadIDVar), Action(Action) {
188  assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
189  }
190 
191  /// Get a variable or parameter for storing global thread id
192  /// inside OpenMP construct.
193  const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
194 
195  /// Get an LValue for the current ThreadID variable.
196  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
197 
198  /// Get the name of the capture helper.
199  StringRef getHelperName() const override { return ".omp_outlined."; }
200 
201  void emitUntiedSwitch(CodeGenFunction &CGF) override {
202  Action.emitUntiedSwitch(CGF);
203  }
204 
205  static bool classof(const CGCapturedStmtInfo *Info) {
206  return CGOpenMPRegionInfo::classof(Info) &&
207  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() ==
208  TaskOutlinedRegion;
209  }
210 
211 private:
212  /// A variable or parameter storing global thread id for OpenMP
213  /// constructs.
214  const VarDecl *ThreadIDVar;
215  /// Action for emitting code for untied tasks.
216  const UntiedTaskActionTy &Action;
217 };
218 
219 /// API for inlined captured statement code generation in OpenMP
220 /// constructs.
221 class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
222 public:
223  CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
224  const RegionCodeGenTy &CodeGen,
225  OpenMPDirectiveKind Kind, bool HasCancel)
226  : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
227  OldCSI(OldCSI),
228  OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(OldCSI)) {}
229 
230  // Retrieve the value of the context parameter.
231  llvm::Value *getContextValue() const override {
232  if (OuterRegionInfo)
233  return OuterRegionInfo->getContextValue();
234  llvm_unreachable("No context value for inlined OpenMP region");
235  }
236 
237  void setContextValue(llvm::Value *V) override {
238  if (OuterRegionInfo) {
239  OuterRegionInfo->setContextValue(V);
240  return;
241  }
242  llvm_unreachable("No context value for inlined OpenMP region");
243  }
244 
245  /// Lookup the captured field decl for a variable.
246  const FieldDecl *lookup(const VarDecl *VD) const override {
247  if (OuterRegionInfo)
248  return OuterRegionInfo->lookup(VD);
249  // If there is no outer outlined region,no need to lookup in a list of
250  // captured variables, we can use the original one.
251  return nullptr;
252  }
253 
254  FieldDecl *getThisFieldDecl() const override {
255  if (OuterRegionInfo)
256  return OuterRegionInfo->getThisFieldDecl();
257  return nullptr;
258  }
259 
260  /// Get a variable or parameter for storing global thread id
261  /// inside OpenMP construct.
262  const VarDecl *getThreadIDVariable() const override {
263  if (OuterRegionInfo)
264  return OuterRegionInfo->getThreadIDVariable();
265  return nullptr;
266  }
267 
268  /// Get an LValue for the current ThreadID variable.
269  LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
270  if (OuterRegionInfo)
271  return OuterRegionInfo->getThreadIDVariableLValue(CGF);
272  llvm_unreachable("No LValue for inlined OpenMP construct");
273  }
274 
275  /// Get the name of the capture helper.
276  StringRef getHelperName() const override {
277  if (auto *OuterRegionInfo = getOldCSI())
278  return OuterRegionInfo->getHelperName();
279  llvm_unreachable("No helper name for inlined OpenMP construct");
280  }
281 
282  void emitUntiedSwitch(CodeGenFunction &CGF) override {
283  if (OuterRegionInfo)
284  OuterRegionInfo->emitUntiedSwitch(CGF);
285  }
286 
287  CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
288 
289  static bool classof(const CGCapturedStmtInfo *Info) {
290  return CGOpenMPRegionInfo::classof(Info) &&
291  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == InlinedRegion;
292  }
293 
294  ~CGOpenMPInlinedRegionInfo() override = default;
295 
296 private:
297  /// CodeGen info about outer OpenMP region.
299  CGOpenMPRegionInfo *OuterRegionInfo;
300 };
301 
302 /// API for captured statement code generation in OpenMP target
303 /// constructs. For this captures, implicit parameters are used instead of the
304 /// captured fields. The name of the target region has to be unique in a given
305 /// application so it is provided by the client, because only the client has
306 /// the information to generate that.
307 class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
308 public:
309  CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
310  const RegionCodeGenTy &CodeGen, StringRef HelperName)
311  : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
312  /*HasCancel=*/false),
313  HelperName(HelperName) {}
314 
315  /// This is unused for target regions because each starts executing
316  /// with a single thread.
317  const VarDecl *getThreadIDVariable() const override { return nullptr; }
318 
319  /// Get the name of the capture helper.
320  StringRef getHelperName() const override { return HelperName; }
321 
322  static bool classof(const CGCapturedStmtInfo *Info) {
323  return CGOpenMPRegionInfo::classof(Info) &&
324  cast<CGOpenMPRegionInfo>(Info)->getRegionKind() == TargetRegion;
325  }
326 
327 private:
328  StringRef HelperName;
329 };
330 
331 static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
332  llvm_unreachable("No codegen for expressions");
333 }
334 /// API for generation of expressions captured in a innermost OpenMP
335 /// region.
336 class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
337 public:
338  CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
339  : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
340  OMPD_unknown,
341  /*HasCancel=*/false),
342  PrivScope(CGF) {
343  // Make sure the globals captured in the provided statement are local by
344  // using the privatization logic. We assume the same variable is not
345  // captured more than once.
346  for (const auto &C : CS.captures()) {
347  if (!C.capturesVariable() && !C.capturesVariableByCopy())
348  continue;
349 
350  const VarDecl *VD = C.getCapturedVar();
351  if (VD->isLocalVarDeclOrParm())
352  continue;
353 
354  DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
355  /*RefersToEnclosingVariableOrCapture=*/false,
357  C.getLocation());
358  PrivScope.addPrivate(
359  VD, [&CGF, &DRE]() { return CGF.EmitLValue(&DRE).getAddress(); });
360  }
361  (void)PrivScope.Privatize();
362  }
363 
364  /// Lookup the captured field decl for a variable.
365  const FieldDecl *lookup(const VarDecl *VD) const override {
366  if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
367  return FD;
368  return nullptr;
369  }
370 
371  /// Emit the captured statement body.
372  void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
373  llvm_unreachable("No body for expressions");
374  }
375 
376  /// Get a variable or parameter for storing global thread id
377  /// inside OpenMP construct.
378  const VarDecl *getThreadIDVariable() const override {
379  llvm_unreachable("No thread id for expressions");
380  }
381 
382  /// Get the name of the capture helper.
383  StringRef getHelperName() const override {
384  llvm_unreachable("No helper name for expressions");
385  }
386 
387  static bool classof(const CGCapturedStmtInfo *Info) { return false; }
388 
389 private:
390  /// Private scope to capture global variables.
392 };
393 
394 /// RAII for emitting code of OpenMP constructs.
395 class InlinedOpenMPRegionRAII {
396  CodeGenFunction &CGF;
397  llvm::DenseMap<const VarDecl *, FieldDecl *> LambdaCaptureFields;
398  FieldDecl *LambdaThisCaptureField = nullptr;
399  const CodeGen::CGBlockInfo *BlockInfo = nullptr;
400 
401 public:
402  /// Constructs region for combined constructs.
403  /// \param CodeGen Code generation sequence for combined directives. Includes
404  /// a list of functions used for code generation of implicitly inlined
405  /// regions.
406  InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
407  OpenMPDirectiveKind Kind, bool HasCancel)
408  : CGF(CGF) {
409  // Start emission for the construct.
410  CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
411  CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
412  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
413  LambdaThisCaptureField = CGF.LambdaThisCaptureField;
414  CGF.LambdaThisCaptureField = nullptr;
415  BlockInfo = CGF.BlockInfo;
416  CGF.BlockInfo = nullptr;
417  }
418 
419  ~InlinedOpenMPRegionRAII() {
420  // Restore original CapturedStmtInfo only if we're done with code emission.
421  auto *OldCSI =
422  cast<CGOpenMPInlinedRegionInfo>(CGF.CapturedStmtInfo)->getOldCSI();
423  delete CGF.CapturedStmtInfo;
424  CGF.CapturedStmtInfo = OldCSI;
425  std::swap(CGF.LambdaCaptureFields, LambdaCaptureFields);
426  CGF.LambdaThisCaptureField = LambdaThisCaptureField;
427  CGF.BlockInfo = BlockInfo;
428  }
429 };
430 
431 /// Values for bit flags used in the ident_t to describe the fields.
432 /// All enumeric elements are named and described in accordance with the code
433 /// from https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
434 enum OpenMPLocationFlags : unsigned {
435  /// Use trampoline for internal microtask.
436  OMP_IDENT_IMD = 0x01,
437  /// Use c-style ident structure.
438  OMP_IDENT_KMPC = 0x02,
439  /// Atomic reduction option for kmpc_reduce.
440  OMP_ATOMIC_REDUCE = 0x10,
441  /// Explicit 'barrier' directive.
442  OMP_IDENT_BARRIER_EXPL = 0x20,
443  /// Implicit barrier in code.
444  OMP_IDENT_BARRIER_IMPL = 0x40,
445  /// Implicit barrier in 'for' directive.
446  OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
447  /// Implicit barrier in 'sections' directive.
448  OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
449  /// Implicit barrier in 'single' directive.
450  OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
451  /// Call of __kmp_for_static_init for static loop.
452  OMP_IDENT_WORK_LOOP = 0x200,
453  /// Call of __kmp_for_static_init for sections.
454  OMP_IDENT_WORK_SECTIONS = 0x400,
455  /// Call of __kmp_for_static_init for distribute.
456  OMP_IDENT_WORK_DISTRIBUTE = 0x800,
457  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
458 };
459 
460 namespace {
462 /// Values for bit flags for marking which requires clauses have been used.
464  /// flag undefined.
465  OMP_REQ_UNDEFINED = 0x000,
466  /// no requires clause present.
467  OMP_REQ_NONE = 0x001,
468  /// reverse_offload clause.
469  OMP_REQ_REVERSE_OFFLOAD = 0x002,
470  /// unified_address clause.
471  OMP_REQ_UNIFIED_ADDRESS = 0x004,
472  /// unified_shared_memory clause.
473  OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
474  /// dynamic_allocators clause.
475  OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
476  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
477 };
478 
480  /// Device ID if the device was not defined, runtime should get it
481  /// from environment variables in the spec.
482  OMP_DEVICEID_UNDEF = -1,
483 };
484 } // anonymous namespace
485 
486 /// Describes ident structure that describes a source location.
487 /// All descriptions are taken from
488 /// https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h
489 /// Original structure:
490 /// typedef struct ident {
491 /// kmp_int32 reserved_1; /**< might be used in Fortran;
492 /// see above */
493 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
494 /// KMP_IDENT_KMPC identifies this union
495 /// member */
496 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
497 /// see above */
498 ///#if USE_ITT_BUILD
499 /// /* but currently used for storing
500 /// region-specific ITT */
501 /// /* contextual information. */
502 ///#endif /* USE_ITT_BUILD */
503 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
504 /// C++ */
505 /// char const *psource; /**< String describing the source location.
506 /// The string is composed of semi-colon separated
507 // fields which describe the source file,
508 /// the function and a pair of line numbers that
509 /// delimit the construct.
510 /// */
511 /// } ident_t;
513  /// might be used in Fortran
515  /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
517  /// Not really used in Fortran any more
519  /// Source[4] in Fortran, do not use for C++
521  /// String describing the source location. The string is composed of
522  /// semi-colon separated fields which describe the source file, the function
523  /// and a pair of line numbers that delimit the construct.
525 };
526 
527 /// Schedule types for 'omp for' loops (these enumerators are taken from
528 /// the enum sched_type in kmp.h).
530  /// Lower bound for default (unordered) versions.
538  /// static with chunk adjustment (e.g., simd)
540  /// Lower bound for 'ordered' versions.
549  /// dist_schedule types
552  /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
553  /// Set if the monotonic schedule modifier was present.
555  /// Set if the nonmonotonic schedule modifier was present.
557 };
558 
560  /// Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc,
561  /// kmpc_micro microtask, ...);
563  /// Call to void *__kmpc_threadprivate_cached(ident_t *loc,
564  /// kmp_int32 global_tid, void *data, size_t size, void ***cache);
566  /// Call to void __kmpc_threadprivate_register( ident_t *,
567  /// void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
569  // Call to __kmpc_int32 kmpc_global_thread_num(ident_t *loc);
571  // Call to void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
572  // kmp_critical_name *crit);
574  // Call to void __kmpc_critical_with_hint(ident_t *loc, kmp_int32
575  // global_tid, kmp_critical_name *crit, uintptr_t hint);
577  // Call to void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
578  // kmp_critical_name *crit);
580  // Call to kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
581  // global_tid);
583  // Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
585  // Call to void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
587  // Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
588  // global_tid);
590  // Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
591  // global_tid);
593  // Call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
594  // kmp_int32 num_threads);
596  // Call to void __kmpc_flush(ident_t *loc);
598  // Call to kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);
600  // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
602  // Call to kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
603  // int end_part);
605  // Call to kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);
607  // Call to void __kmpc_end_single(ident_t *, kmp_int32 global_tid);
609  // Call to kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
610  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
611  // kmp_routine_entry_t *task_entry);
613  // Call to kmp_task_t * __kmpc_omp_target_task_alloc(ident_t *,
614  // kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,
615  // size_t sizeof_shareds, kmp_routine_entry_t *task_entry,
616  // kmp_int64 device_id);
618  // Call to kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t *
619  // new_task);
621  // Call to void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
622  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
623  // kmp_int32 didit);
625  // Call to kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
626  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
627  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
629  // Call to kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
630  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
631  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
632  // *lck);
634  // Call to void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
635  // kmp_critical_name *lck);
637  // Call to void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
638  // kmp_critical_name *lck);
640  // Call to void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
641  // kmp_task_t * new_task);
643  // Call to void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
644  // kmp_task_t * new_task);
646  // Call to void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
648  // Call to void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
650  // Call to kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
651  // global_tid);
653  // Call to void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
655  // Call to void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
657  // Call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
658  // int proc_bind);
660  // Call to kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32
661  // gtid, kmp_task_t * new_task, kmp_int32 ndeps, kmp_depend_info_t
662  // *dep_list, kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
664  // Call to void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32
665  // gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
666  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
668  // Call to kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
669  // global_tid, kmp_int32 cncl_kind);
671  // Call to kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
672  // kmp_int32 cncl_kind);
674  // Call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
675  // kmp_int32 num_teams, kmp_int32 thread_limit);
677  // Call to void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
678  // microtask, ...);
680  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
681  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
682  // sched, kmp_uint64 grainsize, void *task_dup);
684  // Call to void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
685  // num_dims, struct kmp_dim *dims);
687  // Call to void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
689  // Call to void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
690  // *vec);
692  // Call to void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
693  // *vec);
695  // Call to void *__kmpc_task_reduction_init(int gtid, int num_data, void
696  // *data);
698  // Call to void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
699  // *d);
701  // Call to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);
703  // Call to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);
705 
706  //
707  // Offloading related calls
708  //
709  // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
710  // size);
712  // Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
713  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
714  // *arg_types);
716  // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
717  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
718  // *arg_types);
720  // Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
721  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
722  // *arg_types, int32_t num_teams, int32_t thread_limit);
724  // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
725  // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
726  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
728  // Call to void __tgt_register_requires(int64_t flags);
730  // Call to void __tgt_register_lib(__tgt_bin_desc *desc);
732  // Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
734  // Call to void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
735  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
737  // Call to void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
738  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
739  // *arg_types);
741  // Call to void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
742  // void** args_base, void **args, size_t *arg_sizes, int64_t *arg_types);
744  // Call to void __tgt_target_data_end_nowait(int64_t device_id, int32_t
745  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
746  // *arg_types);
748  // Call to void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
749  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
751  // Call to void __tgt_target_data_update_nowait(int64_t device_id, int32_t
752  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
753  // *arg_types);
755 };
756 
757 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
758 /// region.
759 class CleanupTy final : public EHScopeStack::Cleanup {
760  PrePostActionTy *Action;
761 
762 public:
763  explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
764  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
765  if (!CGF.HaveInsertPoint())
766  return;
767  Action->Exit(CGF);
768  }
769 };
770 
771 } // anonymous namespace
772 
775  if (PrePostAction) {
776  CGF.EHStack.pushCleanup<CleanupTy>(NormalAndEHCleanup, PrePostAction);
777  Callback(CodeGen, CGF, *PrePostAction);
778  } else {
779  PrePostActionTy Action;
780  Callback(CodeGen, CGF, Action);
781  }
782 }
783 
784 /// Check if the combiner is a call to UDR combiner and if it is so return the
785 /// UDR decl used for reduction.
786 static const OMPDeclareReductionDecl *
787 getReductionInit(const Expr *ReductionOp) {
788  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
789  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
790  if (const auto *DRE =
791  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
792  if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl()))
793  return DRD;
794  return nullptr;
795 }
796 
798  const OMPDeclareReductionDecl *DRD,
799  const Expr *InitOp,
800  Address Private, Address Original,
801  QualType Ty) {
802  if (DRD->getInitializer()) {
803  std::pair<llvm::Function *, llvm::Function *> Reduction =
804  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
805  const auto *CE = cast<CallExpr>(InitOp);
806  const auto *OVE = cast<OpaqueValueExpr>(CE->getCallee());
807  const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
808  const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
809  const auto *LHSDRE =
810  cast<DeclRefExpr>(cast<UnaryOperator>(LHS)->getSubExpr());
811  const auto *RHSDRE =
812  cast<DeclRefExpr>(cast<UnaryOperator>(RHS)->getSubExpr());
813  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
814  PrivateScope.addPrivate(cast<VarDecl>(LHSDRE->getDecl()),
815  [=]() { return Private; });
816  PrivateScope.addPrivate(cast<VarDecl>(RHSDRE->getDecl()),
817  [=]() { return Original; });
818  (void)PrivateScope.Privatize();
819  RValue Func = RValue::get(Reduction.second);
820  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
821  CGF.EmitIgnoredExpr(InitOp);
822  } else {
823  llvm::Constant *Init = CGF.CGM.EmitNullConstant(Ty);
824  std::string Name = CGF.CGM.getOpenMPRuntime().getName({"init"});
825  auto *GV = new llvm::GlobalVariable(
826  CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
827  llvm::GlobalValue::PrivateLinkage, Init, Name);
828  LValue LV = CGF.MakeNaturalAlignAddrLValue(GV, Ty);
829  RValue InitRVal;
830  switch (CGF.getEvaluationKind(Ty)) {
831  case TEK_Scalar:
832  InitRVal = CGF.EmitLoadOfLValue(LV, DRD->getLocation());
833  break;
834  case TEK_Complex:
835  InitRVal =
837  break;
838  case TEK_Aggregate:
839  InitRVal = RValue::getAggregate(LV.getAddress());
840  break;
841  }
842  OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_RValue);
843  CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
844  CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
845  /*IsInitializer=*/false);
846  }
847 }
848 
849 /// Emit initialization of arrays of complex types.
850 /// \param DestAddr Address of the array.
851 /// \param Type Type of array.
852 /// \param Init Initial expression of array.
853 /// \param SrcAddr Address of the original array.
854 static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
855  QualType Type, bool EmitDeclareReductionInit,
856  const Expr *Init,
857  const OMPDeclareReductionDecl *DRD,
858  Address SrcAddr = Address::invalid()) {
859  // Perform element-by-element initialization.
860  QualType ElementTy;
861 
862  // Drill down to the base element type on both arrays.
863  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
864  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, DestAddr);
865  DestAddr =
866  CGF.Builder.CreateElementBitCast(DestAddr, DestAddr.getElementType());
867  if (DRD)
868  SrcAddr =
869  CGF.Builder.CreateElementBitCast(SrcAddr, DestAddr.getElementType());
870 
871  llvm::Value *SrcBegin = nullptr;
872  if (DRD)
873  SrcBegin = SrcAddr.getPointer();
874  llvm::Value *DestBegin = DestAddr.getPointer();
875  // Cast from pointer to array type to pointer to single element.
876  llvm::Value *DestEnd = CGF.Builder.CreateGEP(DestBegin, NumElements);
877  // The basic structure here is a while-do loop.
878  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arrayinit.body");
879  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arrayinit.done");
880  llvm::Value *IsEmpty =
881  CGF.Builder.CreateICmpEQ(DestBegin, DestEnd, "omp.arrayinit.isempty");
882  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
883 
884  // Enter the loop body, making that address the current address.
885  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
886  CGF.EmitBlock(BodyBB);
887 
888  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
889 
890  llvm::PHINode *SrcElementPHI = nullptr;
891  Address SrcElementCurrent = Address::invalid();
892  if (DRD) {
893  SrcElementPHI = CGF.Builder.CreatePHI(SrcBegin->getType(), 2,
894  "omp.arraycpy.srcElementPast");
895  SrcElementPHI->addIncoming(SrcBegin, EntryBB);
896  SrcElementCurrent =
897  Address(SrcElementPHI,
898  SrcAddr.getAlignment().alignmentOfArrayElement(ElementSize));
899  }
900  llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
901  DestBegin->getType(), 2, "omp.arraycpy.destElementPast");
902  DestElementPHI->addIncoming(DestBegin, EntryBB);
903  Address DestElementCurrent =
904  Address(DestElementPHI,
905  DestAddr.getAlignment().alignmentOfArrayElement(ElementSize));
906 
907  // Emit copy.
908  {
909  CodeGenFunction::RunCleanupsScope InitScope(CGF);
910  if (EmitDeclareReductionInit) {
911  emitInitWithReductionInitializer(CGF, DRD, Init, DestElementCurrent,
912  SrcElementCurrent, ElementTy);
913  } else
914  CGF.EmitAnyExprToMem(Init, DestElementCurrent, ElementTy.getQualifiers(),
915  /*IsInitializer=*/false);
916  }
917 
918  if (DRD) {
919  // Shift the address forward by one element.
920  llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
921  SrcElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
922  SrcElementPHI->addIncoming(SrcElementNext, CGF.Builder.GetInsertBlock());
923  }
924 
925  // Shift the address forward by one element.
926  llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
927  DestElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
928  // Check whether we've reached the end.
929  llvm::Value *Done =
930  CGF.Builder.CreateICmpEQ(DestElementNext, DestEnd, "omp.arraycpy.done");
931  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
932  DestElementPHI->addIncoming(DestElementNext, CGF.Builder.GetInsertBlock());
933 
934  // Done.
935  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
936 }
937 
938 LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
939  return CGF.EmitOMPSharedLValue(E);
940 }
941 
942 LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
943  const Expr *E) {
944  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(E))
945  return CGF.EmitOMPArraySectionExpr(OASE, /*IsLowerBound=*/false);
946  return LValue();
947 }
948 
949 void ReductionCodeGen::emitAggregateInitialization(
950  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
951  const OMPDeclareReductionDecl *DRD) {
952  // Emit VarDecl with copy init for arrays.
953  // Get the address of the original variable captured in current
954  // captured region.
955  const auto *PrivateVD =
956  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
957  bool EmitDeclareReductionInit =
958  DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
959  EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
960  EmitDeclareReductionInit,
961  EmitDeclareReductionInit ? ClausesData[N].ReductionOp
962  : PrivateVD->getInit(),
963  DRD, SharedLVal.getAddress());
964 }
965 
968  ArrayRef<const Expr *> ReductionOps) {
969  ClausesData.reserve(Shareds.size());
970  SharedAddresses.reserve(Shareds.size());
971  Sizes.reserve(Shareds.size());
972  BaseDecls.reserve(Shareds.size());
973  auto IPriv = Privates.begin();
974  auto IRed = ReductionOps.begin();
975  for (const Expr *Ref : Shareds) {
976  ClausesData.emplace_back(Ref, *IPriv, *IRed);
977  std::advance(IPriv, 1);
978  std::advance(IRed, 1);
979  }
980 }
981 
982 void ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, unsigned N) {
983  assert(SharedAddresses.size() == N &&
984  "Number of generated lvalues must be exactly N.");
985  LValue First = emitSharedLValue(CGF, ClausesData[N].Ref);
986  LValue Second = emitSharedLValueUB(CGF, ClausesData[N].Ref);
987  SharedAddresses.emplace_back(First, Second);
988 }
989 
991  const auto *PrivateVD =
992  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
993  QualType PrivateType = PrivateVD->getType();
994  bool AsArraySection = isa<OMPArraySectionExpr>(ClausesData[N].Ref);
995  if (!PrivateType->isVariablyModifiedType()) {
996  Sizes.emplace_back(
997  CGF.getTypeSize(
998  SharedAddresses[N].first.getType().getNonReferenceType()),
999  nullptr);
1000  return;
1001  }
1002  llvm::Value *Size;
1003  llvm::Value *SizeInChars;
1004  auto *ElemType =
1005  cast<llvm::PointerType>(SharedAddresses[N].first.getPointer()->getType())
1006  ->getElementType();
1007  auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
1008  if (AsArraySection) {
1009  Size = CGF.Builder.CreatePtrDiff(SharedAddresses[N].second.getPointer(),
1010  SharedAddresses[N].first.getPointer());
1011  Size = CGF.Builder.CreateNUWAdd(
1012  Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
1013  SizeInChars = CGF.Builder.CreateNUWMul(Size, ElemSizeOf);
1014  } else {
1015  SizeInChars = CGF.getTypeSize(
1016  SharedAddresses[N].first.getType().getNonReferenceType());
1017  Size = CGF.Builder.CreateExactUDiv(SizeInChars, ElemSizeOf);
1018  }
1019  Sizes.emplace_back(SizeInChars, Size);
1021  CGF,
1022  cast<OpaqueValueExpr>(
1023  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1024  RValue::get(Size));
1025  CGF.EmitVariablyModifiedType(PrivateType);
1026 }
1027 
1029  llvm::Value *Size) {
1030  const auto *PrivateVD =
1031  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1032  QualType PrivateType = PrivateVD->getType();
1033  if (!PrivateType->isVariablyModifiedType()) {
1034  assert(!Size && !Sizes[N].second &&
1035  "Size should be nullptr for non-variably modified reduction "
1036  "items.");
1037  return;
1038  }
1040  CGF,
1041  cast<OpaqueValueExpr>(
1042  CGF.getContext().getAsVariableArrayType(PrivateType)->getSizeExpr()),
1043  RValue::get(Size));
1044  CGF.EmitVariablyModifiedType(PrivateType);
1045 }
1046 
1048  CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal,
1049  llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
1050  assert(SharedAddresses.size() > N && "No variable was generated");
1051  const auto *PrivateVD =
1052  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1053  const OMPDeclareReductionDecl *DRD =
1054  getReductionInit(ClausesData[N].ReductionOp);
1055  QualType PrivateType = PrivateVD->getType();
1056  PrivateAddr = CGF.Builder.CreateElementBitCast(
1057  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1058  QualType SharedType = SharedAddresses[N].first.getType();
1059  SharedLVal = CGF.MakeAddrLValue(
1060  CGF.Builder.CreateElementBitCast(SharedLVal.getAddress(),
1061  CGF.ConvertTypeForMem(SharedType)),
1062  SharedType, SharedAddresses[N].first.getBaseInfo(),
1063  CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
1064  if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
1065  emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
1066  } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
1067  emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
1068  PrivateAddr, SharedLVal.getAddress(),
1069  SharedLVal.getType());
1070  } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
1071  !CGF.isTrivialInitializer(PrivateVD->getInit())) {
1072  CGF.EmitAnyExprToMem(PrivateVD->getInit(), PrivateAddr,
1073  PrivateVD->getType().getQualifiers(),
1074  /*IsInitializer=*/false);
1075  }
1076 }
1077 
1079  const auto *PrivateVD =
1080  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1081  QualType PrivateType = PrivateVD->getType();
1082  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1083  return DTorKind != QualType::DK_none;
1084 }
1085 
1087  Address PrivateAddr) {
1088  const auto *PrivateVD =
1089  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Private)->getDecl());
1090  QualType PrivateType = PrivateVD->getType();
1091  QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
1092  if (needCleanups(N)) {
1093  PrivateAddr = CGF.Builder.CreateElementBitCast(
1094  PrivateAddr, CGF.ConvertTypeForMem(PrivateType));
1095  CGF.pushDestroy(DTorKind, PrivateAddr, PrivateType);
1096  }
1097 }
1098 
1100  LValue BaseLV) {
1101  BaseTy = BaseTy.getNonReferenceType();
1102  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1103  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1104  if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
1105  BaseLV = CGF.EmitLoadOfPointerLValue(BaseLV.getAddress(), PtrTy);
1106  } else {
1107  LValue RefLVal = CGF.MakeAddrLValue(BaseLV.getAddress(), BaseTy);
1108  BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
1109  }
1110  BaseTy = BaseTy->getPointeeType();
1111  }
1112  return CGF.MakeAddrLValue(
1114  CGF.ConvertTypeForMem(ElTy)),
1115  BaseLV.getType(), BaseLV.getBaseInfo(),
1116  CGF.CGM.getTBAAInfoForSubobject(BaseLV, BaseLV.getType()));
1117 }
1118 
1120  llvm::Type *BaseLVType, CharUnits BaseLVAlignment,
1121  llvm::Value *Addr) {
1122  Address Tmp = Address::invalid();
1123  Address TopTmp = Address::invalid();
1124  Address MostTopTmp = Address::invalid();
1125  BaseTy = BaseTy.getNonReferenceType();
1126  while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
1127  !CGF.getContext().hasSameType(BaseTy, ElTy)) {
1128  Tmp = CGF.CreateMemTemp(BaseTy);
1129  if (TopTmp.isValid())
1130  CGF.Builder.CreateStore(Tmp.getPointer(), TopTmp);
1131  else
1132  MostTopTmp = Tmp;
1133  TopTmp = Tmp;
1134  BaseTy = BaseTy->getPointeeType();
1135  }
1136  llvm::Type *Ty = BaseLVType;
1137  if (Tmp.isValid())
1138  Ty = Tmp.getElementType();
1139  Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Addr, Ty);
1140  if (Tmp.isValid()) {
1141  CGF.Builder.CreateStore(Addr, Tmp);
1142  return MostTopTmp;
1143  }
1144  return Address(Addr, BaseLVAlignment);
1145 }
1146 
1147 static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
1148  const VarDecl *OrigVD = nullptr;
1149  if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Ref)) {
1150  const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
1151  while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
1152  Base = TempOASE->getBase()->IgnoreParenImpCasts();
1153  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1154  Base = TempASE->getBase()->IgnoreParenImpCasts();
1155  DE = cast<DeclRefExpr>(Base);
1156  OrigVD = cast<VarDecl>(DE->getDecl());
1157  } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Ref)) {
1158  const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
1159  while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
1160  Base = TempASE->getBase()->IgnoreParenImpCasts();
1161  DE = cast<DeclRefExpr>(Base);
1162  OrigVD = cast<VarDecl>(DE->getDecl());
1163  }
1164  return OrigVD;
1165 }
1166 
1168  Address PrivateAddr) {
1169  const DeclRefExpr *DE;
1170  if (const VarDecl *OrigVD = ::getBaseDecl(ClausesData[N].Ref, DE)) {
1171  BaseDecls.emplace_back(OrigVD);
1172  LValue OriginalBaseLValue = CGF.EmitLValue(DE);
1173  LValue BaseLValue =
1174  loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
1175  OriginalBaseLValue);
1176  llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
1177  BaseLValue.getPointer(), SharedAddresses[N].first.getPointer());
1178  llvm::Value *PrivatePointer =
1180  PrivateAddr.getPointer(),
1181  SharedAddresses[N].first.getAddress().getType());
1182  llvm::Value *Ptr = CGF.Builder.CreateGEP(PrivatePointer, Adjustment);
1183  return castToBase(CGF, OrigVD->getType(),
1184  SharedAddresses[N].first.getType(),
1185  OriginalBaseLValue.getAddress().getType(),
1186  OriginalBaseLValue.getAlignment(), Ptr);
1187  }
1188  BaseDecls.emplace_back(
1189  cast<VarDecl>(cast<DeclRefExpr>(ClausesData[N].Ref)->getDecl()));
1190  return PrivateAddr;
1191 }
1192 
1194  const OMPDeclareReductionDecl *DRD =
1195  getReductionInit(ClausesData[N].ReductionOp);
1196  return DRD && DRD->getInitializer();
1197 }
1198 
1199 LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
1200  return CGF.EmitLoadOfPointerLValue(
1201  CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1202  getThreadIDVariable()->getType()->castAs<PointerType>());
1203 }
1204 
1205 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
1206  if (!CGF.HaveInsertPoint())
1207  return;
1208  // 1.2.2 OpenMP Language Terminology
1209  // Structured block - An executable statement with a single entry at the
1210  // top and a single exit at the bottom.
1211  // The point of exit cannot be a branch out of the structured block.
1212  // longjmp() and throw() must not violate the entry/exit criteria.
1213  CGF.EHStack.pushTerminate();
1214  CodeGen(CGF);
1215  CGF.EHStack.popTerminate();
1216 }
1217 
1218 LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1219  CodeGenFunction &CGF) {
1220  return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(getThreadIDVariable()),
1221  getThreadIDVariable()->getType(),
1223 }
1224 
1226  QualType FieldTy) {
1227  auto *Field = FieldDecl::Create(
1228  C, DC, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy,
1230  /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1231  Field->setAccess(AS_public);
1232  DC->addDecl(Field);
1233  return Field;
1234 }
1235 
1236 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator,
1237  StringRef Separator)
1238  : CGM(CGM), FirstSeparator(FirstSeparator), Separator(Separator),
1240  ASTContext &C = CGM.getContext();
1241  RecordDecl *RD = C.buildImplicitRecord("ident_t");
1242  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
1243  RD->startDefinition();
1244  // reserved_1
1245  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1246  // flags
1247  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1248  // reserved_2
1249  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1250  // reserved_3
1251  addFieldToRecordDecl(C, RD, KmpInt32Ty);
1252  // psource
1253  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
1254  RD->completeDefinition();
1255  IdentQTy = C.getRecordType(RD);
1256  IdentTy = CGM.getTypes().ConvertRecordDeclType(RD);
1257  KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
1258 
1260 }
1261 
1262 void CGOpenMPRuntime::clear() {
1263  InternalVars.clear();
1264  // Clean non-target variable declarations possibly used only in debug info.
1265  for (const auto &Data : EmittedNonTargetVariables) {
1266  if (!Data.getValue().pointsToAliveValue())
1267  continue;
1268  auto *GV = dyn_cast<llvm::GlobalVariable>(Data.getValue());
1269  if (!GV)
1270  continue;
1271  if (!GV->isDeclaration() || GV->getNumUses() > 0)
1272  continue;
1273  GV->eraseFromParent();
1274  }
1275 }
1276 
1277 std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1278  SmallString<128> Buffer;
1279  llvm::raw_svector_ostream OS(Buffer);
1280  StringRef Sep = FirstSeparator;
1281  for (StringRef Part : Parts) {
1282  OS << Sep << Part;
1283  Sep = Separator;
1284  }
1285  return OS.str();
1286 }
1287 
1288 static llvm::Function *
1290  const Expr *CombinerInitializer, const VarDecl *In,
1291  const VarDecl *Out, bool IsCombiner) {
1292  // void .omp_combiner.(Ty *in, Ty *out);
1293  ASTContext &C = CGM.getContext();
1294  QualType PtrTy = C.getPointerType(Ty).withRestrict();
1295  FunctionArgList Args;
1296  ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1297  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1298  ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1299  /*Id=*/nullptr, PtrTy, ImplicitParamDecl::Other);
1300  Args.push_back(&OmpOutParm);
1301  Args.push_back(&OmpInParm);
1302  const CGFunctionInfo &FnInfo =
1304  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
1305  std::string Name = CGM.getOpenMPRuntime().getName(
1306  {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1308  Name, &CGM.getModule());
1309  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
1310  if (CGM.getLangOpts().Optimize) {
1311  Fn->removeFnAttr(llvm::Attribute::NoInline);
1312  Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1313  Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1314  }
1315  CodeGenFunction CGF(CGM);
1316  // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1317  // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1318  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, In->getLocation(),
1319  Out->getLocation());
1321  Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1322  Scope.addPrivate(In, [&CGF, AddrIn, PtrTy]() {
1323  return CGF.EmitLoadOfPointerLValue(AddrIn, PtrTy->castAs<PointerType>())
1324  .getAddress();
1325  });
1326  Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1327  Scope.addPrivate(Out, [&CGF, AddrOut, PtrTy]() {
1328  return CGF.EmitLoadOfPointerLValue(AddrOut, PtrTy->castAs<PointerType>())
1329  .getAddress();
1330  });
1331  (void)Scope.Privatize();
1332  if (!IsCombiner && Out->hasInit() &&
1333  !CGF.isTrivialInitializer(Out->getInit())) {
1334  CGF.EmitAnyExprToMem(Out->getInit(), CGF.GetAddrOfLocalVar(Out),
1335  Out->getType().getQualifiers(),
1336  /*IsInitializer=*/true);
1337  }
1338  if (CombinerInitializer)
1339  CGF.EmitIgnoredExpr(CombinerInitializer);
1340  Scope.ForceCleanup();
1341  CGF.FinishFunction();
1342  return Fn;
1343 }
1344 
1346  CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1347  if (UDRMap.count(D) > 0)
1348  return;
1349  llvm::Function *Combiner = emitCombinerOrInitializer(
1350  CGM, D->getType(), D->getCombiner(),
1351  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerIn())->getDecl()),
1352  cast<VarDecl>(cast<DeclRefExpr>(D->getCombinerOut())->getDecl()),
1353  /*IsCombiner=*/true);
1354  llvm::Function *Initializer = nullptr;
1355  if (const Expr *Init = D->getInitializer()) {
1356  Initializer = emitCombinerOrInitializer(
1357  CGM, D->getType(),
1359  : nullptr,
1360  cast<VarDecl>(cast<DeclRefExpr>(D->getInitOrig())->getDecl()),
1361  cast<VarDecl>(cast<DeclRefExpr>(D->getInitPriv())->getDecl()),
1362  /*IsCombiner=*/false);
1363  }
1364  UDRMap.try_emplace(D, Combiner, Initializer);
1365  if (CGF) {
1366  auto &Decls = FunctionUDRMap.FindAndConstruct(CGF->CurFn);
1367  Decls.second.push_back(D);
1368  }
1369 }
1370 
1371 std::pair<llvm::Function *, llvm::Function *>
1373  auto I = UDRMap.find(D);
1374  if (I != UDRMap.end())
1375  return I->second;
1376  emitUserDefinedReduction(/*CGF=*/nullptr, D);
1377  return UDRMap.lookup(D);
1378 }
1379 
1381  CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1382  const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1383  const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1384  assert(ThreadIDVar->getType()->isPointerType() &&
1385  "thread id variable must be of type kmp_int32 *");
1386  CodeGenFunction CGF(CGM, true);
1387  bool HasCancel = false;
1388  if (const auto *OPD = dyn_cast<OMPParallelDirective>(&D))
1389  HasCancel = OPD->hasCancel();
1390  else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(&D))
1391  HasCancel = OPSD->hasCancel();
1392  else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(&D))
1393  HasCancel = OPFD->hasCancel();
1394  else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(&D))
1395  HasCancel = OPFD->hasCancel();
1396  else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(&D))
1397  HasCancel = OPFD->hasCancel();
1398  else if (const auto *OPFD =
1399  dyn_cast<OMPTeamsDistributeParallelForDirective>(&D))
1400  HasCancel = OPFD->hasCancel();
1401  else if (const auto *OPFD =
1402  dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(&D))
1403  HasCancel = OPFD->hasCancel();
1404  CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1405  HasCancel, OutlinedHelperName);
1406  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1407  return CGF.GenerateOpenMPCapturedStmtFunction(*CS);
1408 }
1409 
1411  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1412  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1413  const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1415  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1416 }
1417 
1419  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1420  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
1421  const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1423  CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(), CodeGen);
1424 }
1425 
1427  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1428  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1429  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1430  bool Tied, unsigned &NumberOfParts) {
1431  auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1432  PrePostActionTy &) {
1433  llvm::Value *ThreadID = getThreadID(CGF, D.getBeginLoc());
1434  llvm::Value *UpLoc = emitUpdateLocation(CGF, D.getBeginLoc());
1435  llvm::Value *TaskArgs[] = {
1436  UpLoc, ThreadID,
1437  CGF.EmitLoadOfPointerLValue(CGF.GetAddrOfLocalVar(TaskTVar),
1438  TaskTVar->getType()->castAs<PointerType>())
1439  .getPointer()};
1440  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task), TaskArgs);
1441  };
1442  CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1443  UntiedCodeGen);
1444  CodeGen.setAction(Action);
1445  assert(!ThreadIDVar->getType()->isPointerType() &&
1446  "thread id variable must be of type kmp_int32 for tasks");
1447  const OpenMPDirectiveKind Region =
1448  isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1449  : OMPD_task;
1450  const CapturedStmt *CS = D.getCapturedStmt(Region);
1451  const auto *TD = dyn_cast<OMPTaskDirective>(&D);
1452  CodeGenFunction CGF(CGM, true);
1453  CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1454  InnermostKind,
1455  TD ? TD->hasCancel() : false, Action);
1456  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1457  llvm::Function *Res = CGF.GenerateCapturedStmtFunction(*CS);
1458  if (!Tied)
1459  NumberOfParts = Action.getNumberOfParts();
1460  return Res;
1461 }
1462 
1464  const RecordDecl *RD, const CGRecordLayout &RL,
1465  ArrayRef<llvm::Constant *> Data) {
1466  llvm::StructType *StructTy = RL.getLLVMType();
1467  unsigned PrevIdx = 0;
1468  ConstantInitBuilder CIBuilder(CGM);
1469  auto DI = Data.begin();
1470  for (const FieldDecl *FD : RD->fields()) {
1471  unsigned Idx = RL.getLLVMFieldNo(FD);
1472  // Fill the alignment.
1473  for (unsigned I = PrevIdx; I < Idx; ++I)
1474  Fields.add(llvm::Constant::getNullValue(StructTy->getElementType(I)));
1475  PrevIdx = Idx + 1;
1476  Fields.add(*DI);
1477  ++DI;
1478  }
1479 }
1480 
1481 template <class... As>
1482 static llvm::GlobalVariable *
1484  ArrayRef<llvm::Constant *> Data, const Twine &Name,
1485  As &&... Args) {
1486  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1487  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1488  ConstantInitBuilder CIBuilder(CGM);
1489  ConstantStructBuilder Fields = CIBuilder.beginStruct(RL.getLLVMType());
1490  buildStructValue(Fields, CGM, RD, RL, Data);
1491  return Fields.finishAndCreateGlobal(
1492  Name, CGM.getContext().getAlignOfGlobalVarInChars(Ty), IsConstant,
1493  std::forward<As>(Args)...);
1494 }
1495 
1496 template <typename T>
1497 static void
1499  ArrayRef<llvm::Constant *> Data,
1500  T &Parent) {
1501  const auto *RD = cast<RecordDecl>(Ty->getAsTagDecl());
1502  const CGRecordLayout &RL = CGM.getTypes().getCGRecordLayout(RD);
1503  ConstantStructBuilder Fields = Parent.beginStruct(RL.getLLVMType());
1504  buildStructValue(Fields, CGM, RD, RL, Data);
1505  Fields.finishAndAddTo(Parent);
1506 }
1507 
1508 Address CGOpenMPRuntime::getOrCreateDefaultLocation(unsigned Flags) {
1509  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1510  unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1511  FlagsTy FlagsKey(Flags, Reserved2Flags);
1512  llvm::Value *Entry = OpenMPDefaultLocMap.lookup(FlagsKey);
1513  if (!Entry) {
1514  if (!DefaultOpenMPPSource) {
1515  // Initialize default location for psource field of ident_t structure of
1516  // all ident_t objects. Format is ";file;function;line;column;;".
1517  // Taken from
1518  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp_str.cpp
1519  DefaultOpenMPPSource =
1520  CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
1521  DefaultOpenMPPSource =
1522  llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
1523  }
1524 
1525  llvm::Constant *Data[] = {
1526  llvm::ConstantInt::getNullValue(CGM.Int32Ty),
1527  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
1528  llvm::ConstantInt::get(CGM.Int32Ty, Reserved2Flags),
1529  llvm::ConstantInt::getNullValue(CGM.Int32Ty), DefaultOpenMPPSource};
1530  llvm::GlobalValue *DefaultOpenMPLocation =
1531  createGlobalStruct(CGM, IdentQTy, isDefaultLocationConstant(), Data, "",
1532  llvm::GlobalValue::PrivateLinkage);
1533  DefaultOpenMPLocation->setUnnamedAddr(
1534  llvm::GlobalValue::UnnamedAddr::Global);
1535 
1536  OpenMPDefaultLocMap[FlagsKey] = Entry = DefaultOpenMPLocation;
1537  }
1538  return Address(Entry, Align);
1539 }
1540 
1542  bool AtCurrentPoint) {
1543  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1544  assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1545 
1546  llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty);
1547  if (AtCurrentPoint) {
1548  Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1549  Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1550  } else {
1551  Elem.second.ServiceInsertPt =
1552  new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1553  Elem.second.ServiceInsertPt->insertAfter(CGF.AllocaInsertPt);
1554  }
1555 }
1556 
1558  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1559  if (Elem.second.ServiceInsertPt) {
1560  llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1561  Elem.second.ServiceInsertPt = nullptr;
1562  Ptr->eraseFromParent();
1563  }
1564 }
1565 
1567  SourceLocation Loc,
1568  unsigned Flags) {
1569  Flags |= OMP_IDENT_KMPC;
1570  // If no debug info is generated - return global default location.
1571  if (CGM.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo ||
1572  Loc.isInvalid())
1573  return getOrCreateDefaultLocation(Flags).getPointer();
1574 
1575  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1576 
1577  CharUnits Align = CGM.getContext().getTypeAlignInChars(IdentQTy);
1578  Address LocValue = Address::invalid();
1579  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1580  if (I != OpenMPLocThreadIDMap.end())
1581  LocValue = Address(I->second.DebugLoc, Align);
1582 
1583  // OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
1584  // GetOpenMPThreadID was called before this routine.
1585  if (!LocValue.isValid()) {
1586  // Generate "ident_t .kmpc_loc.addr;"
1587  Address AI = CGF.CreateMemTemp(IdentQTy, ".kmpc_loc.addr");
1588  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1589  Elem.second.DebugLoc = AI.getPointer();
1590  LocValue = AI;
1591 
1592  if (!Elem.second.ServiceInsertPt)
1594  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1595  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1596  CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
1597  CGF.getTypeSize(IdentQTy));
1598  }
1599 
1600  // char **psource = &.kmpc_loc_<flags>.addr.psource;
1601  LValue Base = CGF.MakeAddrLValue(LocValue, IdentQTy);
1602  auto Fields = cast<RecordDecl>(IdentQTy->getAsTagDecl())->field_begin();
1603  LValue PSource =
1604  CGF.EmitLValueForField(Base, *std::next(Fields, IdentField_PSource));
1605 
1606  llvm::Value *OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
1607  if (OMPDebugLoc == nullptr) {
1608  SmallString<128> Buffer2;
1609  llvm::raw_svector_ostream OS2(Buffer2);
1610  // Build debug location
1612  OS2 << ";" << PLoc.getFilename() << ";";
1613  if (const auto *FD = dyn_cast_or_null<FunctionDecl>(CGF.CurFuncDecl))
1614  OS2 << FD->getQualifiedNameAsString();
1615  OS2 << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1616  OMPDebugLoc = CGF.Builder.CreateGlobalStringPtr(OS2.str());
1617  OpenMPDebugLocMap[Loc.getRawEncoding()] = OMPDebugLoc;
1618  }
1619  // *psource = ";<File>;<Function>;<Line>;<Column>;;";
1620  CGF.EmitStoreOfScalar(OMPDebugLoc, PSource);
1621 
1622  // Our callers always pass this to a runtime function, so for
1623  // convenience, go ahead and return a naked pointer.
1624  return LocValue.getPointer();
1625 }
1626 
1628  SourceLocation Loc) {
1629  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1630 
1631  llvm::Value *ThreadID = nullptr;
1632  // Check whether we've already cached a load of the thread id in this
1633  // function.
1634  auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
1635  if (I != OpenMPLocThreadIDMap.end()) {
1636  ThreadID = I->second.ThreadID;
1637  if (ThreadID != nullptr)
1638  return ThreadID;
1639  }
1640  // If exceptions are enabled, do not use parameter to avoid possible crash.
1641  if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1642  !CGF.getLangOpts().CXXExceptions ||
1643  CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1644  if (auto *OMPRegionInfo =
1645  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
1646  if (OMPRegionInfo->getThreadIDVariable()) {
1647  // Check if this an outlined function with thread id passed as argument.
1648  LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1649  ThreadID = CGF.EmitLoadOfScalar(LVal, Loc);
1650  // If value loaded in entry block, cache it and use it everywhere in
1651  // function.
1652  if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
1653  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1654  Elem.second.ThreadID = ThreadID;
1655  }
1656  return ThreadID;
1657  }
1658  }
1659  }
1660 
1661  // This is not an outlined function region - need to call __kmpc_int32
1662  // kmpc_global_thread_num(ident_t *loc).
1663  // Generate thread id value and cache this value for use across the
1664  // function.
1665  auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
1666  if (!Elem.second.ServiceInsertPt)
1668  CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1669  CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1670  llvm::CallInst *Call = CGF.Builder.CreateCall(
1672  emitUpdateLocation(CGF, Loc));
1673  Call->setCallingConv(CGF.getRuntimeCC());
1674  Elem.second.ThreadID = Call;
1675  return Call;
1676 }
1677 
1679  assert(CGF.CurFn && "No function in current CodeGenFunction.");
1680  if (OpenMPLocThreadIDMap.count(CGF.CurFn)) {
1682  OpenMPLocThreadIDMap.erase(CGF.CurFn);
1683  }
1684  if (FunctionUDRMap.count(CGF.CurFn) > 0) {
1685  for(auto *D : FunctionUDRMap[CGF.CurFn])
1686  UDRMap.erase(D);
1687  FunctionUDRMap.erase(CGF.CurFn);
1688  }
1689 }
1690 
1692  return IdentTy->getPointerTo();
1693 }
1694 
1696  if (!Kmpc_MicroTy) {
1697  // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1698  llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(CGM.Int32Ty),
1699  llvm::PointerType::getUnqual(CGM.Int32Ty)};
1700  Kmpc_MicroTy = llvm::FunctionType::get(CGM.VoidTy, MicroParams, true);
1701  }
1702  return llvm::PointerType::getUnqual(Kmpc_MicroTy);
1703 }
1704 
1705 llvm::FunctionCallee CGOpenMPRuntime::createRuntimeFunction(unsigned Function) {
1706  llvm::FunctionCallee RTLFn = nullptr;
1707  switch (static_cast<OpenMPRTLFunction>(Function)) {
1708  case OMPRTL__kmpc_fork_call: {
1709  // Build void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro
1710  // microtask, ...);
1711  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1713  auto *FnTy =
1714  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
1715  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_call");
1716  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
1717  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1718  llvm::LLVMContext &Ctx = F->getContext();
1719  llvm::MDBuilder MDB(Ctx);
1720  // Annotate the callback behavior of the __kmpc_fork_call:
1721  // - The callback callee is argument number 2 (microtask).
1722  // - The first two arguments of the callback callee are unknown (-1).
1723  // - All variadic arguments to the __kmpc_fork_call are passed to the
1724  // callback callee.
1725  F->addMetadata(
1726  llvm::LLVMContext::MD_callback,
1727  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
1728  2, {-1, -1},
1729  /* VarArgsArePassed */ true)}));
1730  }
1731  }
1732  break;
1733  }
1735  // Build kmp_int32 __kmpc_global_thread_num(ident_t *loc);
1736  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1737  auto *FnTy =
1738  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1739  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_global_thread_num");
1740  break;
1741  }
1743  // Build void *__kmpc_threadprivate_cached(ident_t *loc,
1744  // kmp_int32 global_tid, void *data, size_t size, void ***cache);
1745  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1747  CGM.VoidPtrTy->getPointerTo()->getPointerTo()};
1748  auto *FnTy =
1749  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg*/ false);
1750  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_cached");
1751  break;
1752  }
1753  case OMPRTL__kmpc_critical: {
1754  // Build void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1755  // kmp_critical_name *crit);
1756  llvm::Type *TypeParams[] = {
1758  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1759  auto *FnTy =
1760  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1761  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical");
1762  break;
1763  }
1765  // Build void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1766  // kmp_critical_name *crit, uintptr_t hint);
1767  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1768  llvm::PointerType::getUnqual(KmpCriticalNameTy),
1769  CGM.IntPtrTy};
1770  auto *FnTy =
1771  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1772  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_critical_with_hint");
1773  break;
1774  }
1776  // Build void __kmpc_threadprivate_register(ident_t *, void *data,
1777  // kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);
1778  // typedef void *(*kmpc_ctor)(void *);
1779  auto *KmpcCtorTy =
1780  llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
1781  /*isVarArg*/ false)->getPointerTo();
1782  // typedef void *(*kmpc_cctor)(void *, void *);
1783  llvm::Type *KmpcCopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1784  auto *KmpcCopyCtorTy =
1785  llvm::FunctionType::get(CGM.VoidPtrTy, KmpcCopyCtorTyArgs,
1786  /*isVarArg*/ false)
1787  ->getPointerTo();
1788  // typedef void (*kmpc_dtor)(void *);
1789  auto *KmpcDtorTy =
1790  llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy, /*isVarArg*/ false)
1791  ->getPointerTo();
1792  llvm::Type *FnTyArgs[] = {getIdentTyPointerTy(), CGM.VoidPtrTy, KmpcCtorTy,
1793  KmpcCopyCtorTy, KmpcDtorTy};
1794  auto *FnTy = llvm::FunctionType::get(CGM.VoidTy, FnTyArgs,
1795  /*isVarArg*/ false);
1796  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_threadprivate_register");
1797  break;
1798  }
1800  // Build void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1801  // kmp_critical_name *crit);
1802  llvm::Type *TypeParams[] = {
1804  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1805  auto *FnTy =
1806  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1807  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_critical");
1808  break;
1809  }
1811  // Build kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32
1812  // global_tid);
1813  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1814  auto *FnTy =
1815  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
1816  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_cancel_barrier");
1817  break;
1818  }
1819  case OMPRTL__kmpc_barrier: {
1820  // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
1821  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1822  auto *FnTy =
1823  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1824  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
1825  break;
1826  }
1828  // Build void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);
1829  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1830  auto *FnTy =
1831  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1832  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_for_static_fini");
1833  break;
1834  }
1836  // Build void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
1837  // kmp_int32 num_threads)
1838  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1839  CGM.Int32Ty};
1840  auto *FnTy =
1841  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1842  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_threads");
1843  break;
1844  }
1846  // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
1847  // global_tid);
1848  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1849  auto *FnTy =
1850  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1851  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
1852  break;
1853  }
1855  // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
1856  // global_tid);
1857  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1858  auto *FnTy =
1859  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1860  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
1861  break;
1862  }
1863  case OMPRTL__kmpc_flush: {
1864  // Build void __kmpc_flush(ident_t *loc);
1865  llvm::Type *TypeParams[] = {getIdentTyPointerTy()};
1866  auto *FnTy =
1867  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
1868  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
1869  break;
1870  }
1871  case OMPRTL__kmpc_master: {
1872  // Build kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid);
1873  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1874  auto *FnTy =
1875  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1876  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_master");
1877  break;
1878  }
1879  case OMPRTL__kmpc_end_master: {
1880  // Build void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid);
1881  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1882  auto *FnTy =
1883  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1884  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
1885  break;
1886  }
1888  // Build kmp_int32 __kmpc_omp_taskyield(ident_t *, kmp_int32 global_tid,
1889  // int end_part);
1890  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
1891  auto *FnTy =
1892  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1893  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_taskyield");
1894  break;
1895  }
1896  case OMPRTL__kmpc_single: {
1897  // Build kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid);
1898  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1899  auto *FnTy =
1900  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1901  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_single");
1902  break;
1903  }
1904  case OMPRTL__kmpc_end_single: {
1905  // Build void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid);
1906  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
1907  auto *FnTy =
1908  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1909  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_single");
1910  break;
1911  }
1913  // Build kmp_task_t *__kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
1914  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1915  // kmp_routine_entry_t *task_entry);
1916  assert(KmpRoutineEntryPtrTy != nullptr &&
1917  "Type kmp_routine_entry_t must be created.");
1918  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1919  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy};
1920  // Return void * and then cast to particular kmp_task_t type.
1921  auto *FnTy =
1922  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1923  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_alloc");
1924  break;
1925  }
1927  // Build kmp_task_t *__kmpc_omp_target_task_alloc(ident_t *, kmp_int32 gtid,
1928  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
1929  // kmp_routine_entry_t *task_entry, kmp_int64 device_id);
1930  assert(KmpRoutineEntryPtrTy != nullptr &&
1931  "Type kmp_routine_entry_t must be created.");
1932  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
1933  CGM.SizeTy, CGM.SizeTy, KmpRoutineEntryPtrTy,
1934  CGM.Int64Ty};
1935  // Return void * and then cast to particular kmp_task_t type.
1936  auto *FnTy =
1937  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
1938  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_target_task_alloc");
1939  break;
1940  }
1941  case OMPRTL__kmpc_omp_task: {
1942  // Build kmp_int32 __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
1943  // *new_task);
1944  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
1945  CGM.VoidPtrTy};
1946  auto *FnTy =
1947  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1948  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task");
1949  break;
1950  }
1951  case OMPRTL__kmpc_copyprivate: {
1952  // Build void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,
1953  // size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *),
1954  // kmp_int32 didit);
1955  llvm::Type *CpyTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1956  auto *CpyFnTy =
1957  llvm::FunctionType::get(CGM.VoidTy, CpyTypeParams, /*isVarArg=*/false);
1958  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.SizeTy,
1959  CGM.VoidPtrTy, CpyFnTy->getPointerTo(),
1960  CGM.Int32Ty};
1961  auto *FnTy =
1962  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
1963  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_copyprivate");
1964  break;
1965  }
1966  case OMPRTL__kmpc_reduce: {
1967  // Build kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid,
1968  // kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void
1969  // (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck);
1970  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1971  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1972  /*isVarArg=*/false);
1973  llvm::Type *TypeParams[] = {
1975  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1976  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1977  auto *FnTy =
1978  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1979  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce");
1980  break;
1981  }
1983  // Build kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32
1984  // global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data,
1985  // void (*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name
1986  // *lck);
1987  llvm::Type *ReduceTypeParams[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1988  auto *ReduceFnTy = llvm::FunctionType::get(CGM.VoidTy, ReduceTypeParams,
1989  /*isVarArg=*/false);
1990  llvm::Type *TypeParams[] = {
1992  CGM.VoidPtrTy, ReduceFnTy->getPointerTo(),
1993  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
1994  auto *FnTy =
1995  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
1996  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_reduce_nowait");
1997  break;
1998  }
1999  case OMPRTL__kmpc_end_reduce: {
2000  // Build void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
2001  // kmp_critical_name *lck);
2002  llvm::Type *TypeParams[] = {
2004  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2005  auto *FnTy =
2006  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2007  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce");
2008  break;
2009  }
2011  // Build __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
2012  // kmp_critical_name *lck);
2013  llvm::Type *TypeParams[] = {
2015  llvm::PointerType::getUnqual(KmpCriticalNameTy)};
2016  auto *FnTy =
2017  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2018  RTLFn =
2019  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_reduce_nowait");
2020  break;
2021  }
2023  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2024  // *new_task);
2025  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2026  CGM.VoidPtrTy};
2027  auto *FnTy =
2028  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2029  RTLFn =
2030  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_begin_if0");
2031  break;
2032  }
2034  // Build void __kmpc_omp_task(ident_t *, kmp_int32 gtid, kmp_task_t
2035  // *new_task);
2036  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2037  CGM.VoidPtrTy};
2038  auto *FnTy =
2039  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2040  RTLFn = CGM.CreateRuntimeFunction(FnTy,
2041  /*Name=*/"__kmpc_omp_task_complete_if0");
2042  break;
2043  }
2044  case OMPRTL__kmpc_ordered: {
2045  // Build void __kmpc_ordered(ident_t *loc, kmp_int32 global_tid);
2046  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2047  auto *FnTy =
2048  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2049  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_ordered");
2050  break;
2051  }
2052  case OMPRTL__kmpc_end_ordered: {
2053  // Build void __kmpc_end_ordered(ident_t *loc, kmp_int32 global_tid);
2054  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2055  auto *FnTy =
2056  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2057  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_ordered");
2058  break;
2059  }
2061  // Build kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32 global_tid);
2062  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2063  auto *FnTy =
2064  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2065  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_omp_taskwait");
2066  break;
2067  }
2068  case OMPRTL__kmpc_taskgroup: {
2069  // Build void __kmpc_taskgroup(ident_t *loc, kmp_int32 global_tid);
2070  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2071  auto *FnTy =
2072  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2073  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_taskgroup");
2074  break;
2075  }
2077  // Build void __kmpc_end_taskgroup(ident_t *loc, kmp_int32 global_tid);
2078  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2079  auto *FnTy =
2080  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2081  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_taskgroup");
2082  break;
2083  }
2085  // Build void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
2086  // int proc_bind)
2087  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2088  auto *FnTy =
2089  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2090  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_proc_bind");
2091  break;
2092  }
2094  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
2095  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
2096  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list);
2097  llvm::Type *TypeParams[] = {
2100  auto *FnTy =
2101  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
2102  RTLFn =
2103  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_task_with_deps");
2104  break;
2105  }
2107  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
2108  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
2109  // kmp_depend_info_t *noalias_dep_list);
2110  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2113  auto *FnTy =
2114  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2115  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_omp_wait_deps");
2116  break;
2117  }
2119  // Build kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
2120  // global_tid, kmp_int32 cncl_kind)
2121  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2122  auto *FnTy =
2123  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2124  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancellationpoint");
2125  break;
2126  }
2127  case OMPRTL__kmpc_cancel: {
2128  // Build kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
2129  // kmp_int32 cncl_kind)
2130  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.IntTy};
2131  auto *FnTy =
2132  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2133  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_cancel");
2134  break;
2135  }
2137  // Build void kmpc_push_num_teams (ident_t loc, kmp_int32 global_tid,
2138  // kmp_int32 num_teams, kmp_int32 num_threads)
2139  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty, CGM.Int32Ty,
2140  CGM.Int32Ty};
2141  auto *FnTy =
2142  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2143  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_num_teams");
2144  break;
2145  }
2146  case OMPRTL__kmpc_fork_teams: {
2147  // Build void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro
2148  // microtask, ...);
2149  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2151  auto *FnTy =
2152  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ true);
2153  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_fork_teams");
2154  if (auto *F = dyn_cast<llvm::Function>(RTLFn.getCallee())) {
2155  if (!F->hasMetadata(llvm::LLVMContext::MD_callback)) {
2156  llvm::LLVMContext &Ctx = F->getContext();
2157  llvm::MDBuilder MDB(Ctx);
2158  // Annotate the callback behavior of the __kmpc_fork_teams:
2159  // - The callback callee is argument number 2 (microtask).
2160  // - The first two arguments of the callback callee are unknown (-1).
2161  // - All variadic arguments to the __kmpc_fork_teams are passed to the
2162  // callback callee.
2163  F->addMetadata(
2164  llvm::LLVMContext::MD_callback,
2165  *llvm::MDNode::get(Ctx, {MDB.createCallbackEncoding(
2166  2, {-1, -1},
2167  /* VarArgsArePassed */ true)}));
2168  }
2169  }
2170  break;
2171  }
2172  case OMPRTL__kmpc_taskloop: {
2173  // Build void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
2174  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
2175  // sched, kmp_uint64 grainsize, void *task_dup);
2176  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2177  CGM.IntTy,
2178  CGM.VoidPtrTy,
2179  CGM.IntTy,
2180  CGM.Int64Ty->getPointerTo(),
2181  CGM.Int64Ty->getPointerTo(),
2182  CGM.Int64Ty,
2183  CGM.IntTy,
2184  CGM.IntTy,
2185  CGM.Int64Ty,
2186  CGM.VoidPtrTy};
2187  auto *FnTy =
2188  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2189  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_taskloop");
2190  break;
2191  }
2193  // Build void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32
2194  // num_dims, struct kmp_dim *dims);
2195  llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
2196  CGM.Int32Ty,
2197  CGM.Int32Ty,
2198  CGM.VoidPtrTy};
2199  auto *FnTy =
2200  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2201  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_init");
2202  break;
2203  }
2205  // Build void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);
2206  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
2207  auto *FnTy =
2208  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2209  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_fini");
2210  break;
2211  }
2213  // Build void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64
2214  // *vec);
2215  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2216  CGM.Int64Ty->getPointerTo()};
2217  auto *FnTy =
2218  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2219  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_post");
2220  break;
2221  }
2223  // Build void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64
2224  // *vec);
2225  llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty,
2226  CGM.Int64Ty->getPointerTo()};
2227  auto *FnTy =
2228  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2229  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_doacross_wait");
2230  break;
2231  }
2233  // Build void *__kmpc_task_reduction_init(int gtid, int num_data, void
2234  // *data);
2235  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.IntTy, CGM.VoidPtrTy};
2236  auto *FnTy =
2237  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2238  RTLFn =
2239  CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_task_reduction_init");
2240  break;
2241  }
2243  // Build void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
2244  // *d);
2245  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2246  auto *FnTy =
2247  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2248  RTLFn = CGM.CreateRuntimeFunction(
2249  FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
2250  break;
2251  }
2252  case OMPRTL__kmpc_alloc: {
2253  // Build to void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t
2254  // al); omp_allocator_handle_t type is void *.
2255  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.SizeTy, CGM.VoidPtrTy};
2256  auto *FnTy =
2257  llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
2258  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_alloc");
2259  break;
2260  }
2261  case OMPRTL__kmpc_free: {
2262  // Build to void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t
2263  // al); omp_allocator_handle_t type is void *.
2264  llvm::Type *TypeParams[] = {CGM.IntTy, CGM.VoidPtrTy, CGM.VoidPtrTy};
2265  auto *FnTy =
2266  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2267  RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_free");
2268  break;
2269  }
2271  // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
2272  // size);
2273  llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
2274  llvm::FunctionType *FnTy =
2275  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2276  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
2277  break;
2278  }
2279  case OMPRTL__tgt_target: {
2280  // Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
2281  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2282  // *arg_types);
2283  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2284  CGM.VoidPtrTy,
2285  CGM.Int32Ty,
2286  CGM.VoidPtrPtrTy,
2287  CGM.VoidPtrPtrTy,
2288  CGM.Int64Ty->getPointerTo(),
2289  CGM.Int64Ty->getPointerTo()};
2290  auto *FnTy =
2291  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2292  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
2293  break;
2294  }
2296  // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
2297  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2298  // int64_t *arg_types);
2299  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2300  CGM.VoidPtrTy,
2301  CGM.Int32Ty,
2302  CGM.VoidPtrPtrTy,
2303  CGM.VoidPtrPtrTy,
2304  CGM.Int64Ty->getPointerTo(),
2305  CGM.Int64Ty->getPointerTo()};
2306  auto *FnTy =
2307  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2308  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
2309  break;
2310  }
2311  case OMPRTL__tgt_target_teams: {
2312  // Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
2313  // int32_t arg_num, void** args_base, void **args, int64_t *arg_sizes,
2314  // int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2315  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2316  CGM.VoidPtrTy,
2317  CGM.Int32Ty,
2318  CGM.VoidPtrPtrTy,
2319  CGM.VoidPtrPtrTy,
2320  CGM.Int64Ty->getPointerTo(),
2321  CGM.Int64Ty->getPointerTo(),
2322  CGM.Int32Ty,
2323  CGM.Int32Ty};
2324  auto *FnTy =
2325  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2326  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
2327  break;
2328  }
2330  // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
2331  // *host_ptr, int32_t arg_num, void** args_base, void **args, int64_t
2332  // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
2333  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2334  CGM.VoidPtrTy,
2335  CGM.Int32Ty,
2336  CGM.VoidPtrPtrTy,
2337  CGM.VoidPtrPtrTy,
2338  CGM.Int64Ty->getPointerTo(),
2339  CGM.Int64Ty->getPointerTo(),
2340  CGM.Int32Ty,
2341  CGM.Int32Ty};
2342  auto *FnTy =
2343  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2344  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
2345  break;
2346  }
2348  // Build void __tgt_register_requires(int64_t flags);
2349  llvm::Type *TypeParams[] = {CGM.Int64Ty};
2350  auto *FnTy =
2351  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2352  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_requires");
2353  break;
2354  }
2355  case OMPRTL__tgt_register_lib: {
2356  // Build void __tgt_register_lib(__tgt_bin_desc *desc);
2357  QualType ParamTy =
2359  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2360  auto *FnTy =
2361  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2362  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_register_lib");
2363  break;
2364  }
2366  // Build void __tgt_unregister_lib(__tgt_bin_desc *desc);
2367  QualType ParamTy =
2369  llvm::Type *TypeParams[] = {CGM.getTypes().ConvertTypeForMem(ParamTy)};
2370  auto *FnTy =
2371  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2372  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_unregister_lib");
2373  break;
2374  }
2376  // Build void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
2377  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2378  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2379  CGM.Int32Ty,
2380  CGM.VoidPtrPtrTy,
2381  CGM.VoidPtrPtrTy,
2382  CGM.Int64Ty->getPointerTo(),
2383  CGM.Int64Ty->getPointerTo()};
2384  auto *FnTy =
2385  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2386  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin");
2387  break;
2388  }
2390  // Build void __tgt_target_data_begin_nowait(int64_t device_id, int32_t
2391  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2392  // *arg_types);
2393  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2394  CGM.Int32Ty,
2395  CGM.VoidPtrPtrTy,
2396  CGM.VoidPtrPtrTy,
2397  CGM.Int64Ty->getPointerTo(),
2398  CGM.Int64Ty->getPointerTo()};
2399  auto *FnTy =
2400  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2401  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_begin_nowait");
2402  break;
2403  }
2405  // Build void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
2406  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2407  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2408  CGM.Int32Ty,
2409  CGM.VoidPtrPtrTy,
2410  CGM.VoidPtrPtrTy,
2411  CGM.Int64Ty->getPointerTo(),
2412  CGM.Int64Ty->getPointerTo()};
2413  auto *FnTy =
2414  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2415  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end");
2416  break;
2417  }
2419  // Build void __tgt_target_data_end_nowait(int64_t device_id, int32_t
2420  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2421  // *arg_types);
2422  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2423  CGM.Int32Ty,
2424  CGM.VoidPtrPtrTy,
2425  CGM.VoidPtrPtrTy,
2426  CGM.Int64Ty->getPointerTo(),
2427  CGM.Int64Ty->getPointerTo()};
2428  auto *FnTy =
2429  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2430  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_end_nowait");
2431  break;
2432  }
2434  // Build void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
2435  // void** args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
2436  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2437  CGM.Int32Ty,
2438  CGM.VoidPtrPtrTy,
2439  CGM.VoidPtrPtrTy,
2440  CGM.Int64Ty->getPointerTo(),
2441  CGM.Int64Ty->getPointerTo()};
2442  auto *FnTy =
2443  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2444  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update");
2445  break;
2446  }
2448  // Build void __tgt_target_data_update_nowait(int64_t device_id, int32_t
2449  // arg_num, void** args_base, void **args, int64_t *arg_sizes, int64_t
2450  // *arg_types);
2451  llvm::Type *TypeParams[] = {CGM.Int64Ty,
2452  CGM.Int32Ty,
2453  CGM.VoidPtrPtrTy,
2454  CGM.VoidPtrPtrTy,
2455  CGM.Int64Ty->getPointerTo(),
2456  CGM.Int64Ty->getPointerTo()};
2457  auto *FnTy =
2458  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2459  RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_data_update_nowait");
2460  break;
2461  }
2462  }
2463  assert(RTLFn && "Unable to find OpenMP runtime function");
2464  return RTLFn;
2465 }
2466 
2467 llvm::FunctionCallee
2468 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
2469  assert((IVSize == 32 || IVSize == 64) &&
2470  "IV size is not compatible with the omp runtime");
2471  StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
2472  : "__kmpc_for_static_init_4u")
2473  : (IVSigned ? "__kmpc_for_static_init_8"
2474  : "__kmpc_for_static_init_8u");
2475  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2476  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2477  llvm::Type *TypeParams[] = {
2478  getIdentTyPointerTy(), // loc
2479  CGM.Int32Ty, // tid
2480  CGM.Int32Ty, // schedtype
2481  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2482  PtrTy, // p_lower
2483  PtrTy, // p_upper
2484  PtrTy, // p_stride
2485  ITy, // incr
2486  ITy // chunk
2487  };
2488  auto *FnTy =
2489  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2490  return CGM.CreateRuntimeFunction(FnTy, Name);
2491 }
2492 
2493 llvm::FunctionCallee
2494 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize, bool IVSigned) {
2495  assert((IVSize == 32 || IVSize == 64) &&
2496  "IV size is not compatible with the omp runtime");
2497  StringRef Name =
2498  IVSize == 32
2499  ? (IVSigned ? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
2500  : (IVSigned ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
2501  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2502  llvm::Type *TypeParams[] = { getIdentTyPointerTy(), // loc
2503  CGM.Int32Ty, // tid
2504  CGM.Int32Ty, // schedtype
2505  ITy, // lower
2506  ITy, // upper
2507  ITy, // stride
2508  ITy // chunk
2509  };
2510  auto *FnTy =
2511  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
2512  return CGM.CreateRuntimeFunction(FnTy, Name);
2513 }
2514 
2515 llvm::FunctionCallee
2516 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize, bool IVSigned) {
2517  assert((IVSize == 32 || IVSize == 64) &&
2518  "IV size is not compatible with the omp runtime");
2519  StringRef Name =
2520  IVSize == 32
2521  ? (IVSigned ? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
2522  : (IVSigned ? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
2523  llvm::Type *TypeParams[] = {
2524  getIdentTyPointerTy(), // loc
2525  CGM.Int32Ty, // tid
2526  };
2527  auto *FnTy =
2528  llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
2529  return CGM.CreateRuntimeFunction(FnTy, Name);
2530 }
2531 
2532 llvm::FunctionCallee
2533 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize, bool IVSigned) {
2534  assert((IVSize == 32 || IVSize == 64) &&
2535  "IV size is not compatible with the omp runtime");
2536  StringRef Name =
2537  IVSize == 32
2538  ? (IVSigned ? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
2539  : (IVSigned ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
2540  llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
2541  auto *PtrTy = llvm::PointerType::getUnqual(ITy);
2542  llvm::Type *TypeParams[] = {
2543  getIdentTyPointerTy(), // loc
2544  CGM.Int32Ty, // tid
2545  llvm::PointerType::getUnqual(CGM.Int32Ty), // p_lastiter
2546  PtrTy, // p_lower
2547  PtrTy, // p_upper
2548  PtrTy // p_stride
2549  };
2550  auto *FnTy =
2551  llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
2552  return CGM.CreateRuntimeFunction(FnTy, Name);
2553 }
2554 
2556  if (CGM.getLangOpts().OpenMPSimd)
2557  return Address::invalid();
2559  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2560  if (Res && (*Res == OMPDeclareTargetDeclAttr::MT_Link ||
2561  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2563  SmallString<64> PtrName;
2564  {
2565  llvm::raw_svector_ostream OS(PtrName);
2566  OS << CGM.getMangledName(GlobalDecl(VD)) << "_decl_tgt_ref_ptr";
2567  }
2568  llvm::Value *Ptr = CGM.getModule().getNamedValue(PtrName);
2569  if (!Ptr) {
2570  QualType PtrTy = CGM.getContext().getPointerType(VD->getType());
2572  PtrName);
2573  if (!CGM.getLangOpts().OpenMPIsDevice) {
2574  auto *GV = cast<llvm::GlobalVariable>(Ptr);
2575  GV->setLinkage(llvm::GlobalValue::ExternalLinkage);
2576  GV->setInitializer(CGM.GetAddrOfGlobal(VD));
2577  }
2578  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ptr));
2579  registerTargetGlobalVariable(VD, cast<llvm::Constant>(Ptr));
2580  }
2581  return Address(Ptr, CGM.getContext().getDeclAlign(VD));
2582  }
2583  return Address::invalid();
2584 }
2585 
2586 llvm::Constant *
2588  assert(!CGM.getLangOpts().OpenMPUseTLS ||
2590  // Lookup the entry, lazily creating it if necessary.
2591  std::string Suffix = getName({"cache", ""});
2593  CGM.Int8PtrPtrTy, Twine(CGM.getMangledName(VD)).concat(Suffix));
2594 }
2595 
2597  const VarDecl *VD,
2598  Address VDAddr,
2599  SourceLocation Loc) {
2600  if (CGM.getLangOpts().OpenMPUseTLS &&
2602  return VDAddr;
2603 
2604  llvm::Type *VarTy = VDAddr.getElementType();
2605  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2606  CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
2607  CGM.Int8PtrTy),
2610  return Address(CGF.EmitRuntimeCall(
2612  VDAddr.getAlignment());
2613 }
2614 
2616  CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
2617  llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
2618  // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
2619  // library.
2620  llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
2622  OMPLoc);
2623  // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
2624  // to register constructor/destructor for variable.
2625  llvm::Value *Args[] = {
2626  OMPLoc, CGF.Builder.CreatePointerCast(VDAddr.getPointer(), CGM.VoidPtrTy),
2627  Ctor, CopyCtor, Dtor};
2628  CGF.EmitRuntimeCall(
2630 }
2631 
2633  const VarDecl *VD, Address VDAddr, SourceLocation Loc,
2634  bool PerformInit, CodeGenFunction *CGF) {
2635  if (CGM.getLangOpts().OpenMPUseTLS &&
2637  return nullptr;
2638 
2639  VD = VD->getDefinition(CGM.getContext());
2640  if (VD && ThreadPrivateWithDefinition.insert(CGM.getMangledName(VD)).second) {
2641  QualType ASTTy = VD->getType();
2642 
2643  llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
2644  const Expr *Init = VD->getAnyInitializer();
2645  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2646  // Generate function that re-emits the declaration's initializer into the
2647  // threadprivate copy of the variable VD
2648  CodeGenFunction CtorCGF(CGM);
2649  FunctionArgList Args;
2650  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2651  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2653  Args.push_back(&Dst);
2654 
2655  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2656  CGM.getContext().VoidPtrTy, Args);
2657  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2658  std::string Name = getName({"__kmpc_global_ctor_", ""});
2659  llvm::Function *Fn =
2660  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2661  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
2662  Args, Loc, Loc);
2663  llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
2664  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2665  CGM.getContext().VoidPtrTy, Dst.getLocation());
2666  Address Arg = Address(ArgVal, VDAddr.getAlignment());
2667  Arg = CtorCGF.Builder.CreateElementBitCast(
2668  Arg, CtorCGF.ConvertTypeForMem(ASTTy));
2669  CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
2670  /*IsInitializer=*/true);
2671  ArgVal = CtorCGF.EmitLoadOfScalar(
2672  CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
2673  CGM.getContext().VoidPtrTy, Dst.getLocation());
2674  CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
2675  CtorCGF.FinishFunction();
2676  Ctor = Fn;
2677  }
2678  if (VD->getType().isDestructedType() != QualType::DK_none) {
2679  // Generate function that emits destructor call for the threadprivate copy
2680  // of the variable VD
2681  CodeGenFunction DtorCGF(CGM);
2682  FunctionArgList Args;
2683  ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
2684  /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
2686  Args.push_back(&Dst);
2687 
2688  const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
2689  CGM.getContext().VoidTy, Args);
2690  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2691  std::string Name = getName({"__kmpc_global_dtor_", ""});
2692  llvm::Function *Fn =
2693  CGM.CreateGlobalInitOrDestructFunction(FTy, Name, FI, Loc);
2694  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2695  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI, Args,
2696  Loc, Loc);
2697  // Create a scope with an artificial location for the body of this function.
2698  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2699  llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
2700  DtorCGF.GetAddrOfLocalVar(&Dst),
2701  /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
2702  DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
2703  DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2704  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2705  DtorCGF.FinishFunction();
2706  Dtor = Fn;
2707  }
2708  // Do not emit init function if it is not required.
2709  if (!Ctor && !Dtor)
2710  return nullptr;
2711 
2712  llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
2713  auto *CopyCtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CopyCtorTyArgs,
2714  /*isVarArg=*/false)
2715  ->getPointerTo();
2716  // Copying constructor for the threadprivate variable.
2717  // Must be NULL - reserved by runtime, but currently it requires that this
2718  // parameter is always NULL. Otherwise it fires assertion.
2719  CopyCtor = llvm::Constant::getNullValue(CopyCtorTy);
2720  if (Ctor == nullptr) {
2721  auto *CtorTy = llvm::FunctionType::get(CGM.VoidPtrTy, CGM.VoidPtrTy,
2722  /*isVarArg=*/false)
2723  ->getPointerTo();
2724  Ctor = llvm::Constant::getNullValue(CtorTy);
2725  }
2726  if (Dtor == nullptr) {
2727  auto *DtorTy = llvm::FunctionType::get(CGM.VoidTy, CGM.VoidPtrTy,
2728  /*isVarArg=*/false)
2729  ->getPointerTo();
2730  Dtor = llvm::Constant::getNullValue(DtorTy);
2731  }
2732  if (!CGF) {
2733  auto *InitFunctionTy =
2734  llvm::FunctionType::get(CGM.VoidTy, /*isVarArg*/ false);
2735  std::string Name = getName({"__omp_threadprivate_init_", ""});
2736  llvm::Function *InitFunction = CGM.CreateGlobalInitOrDestructFunction(
2737  InitFunctionTy, Name, CGM.getTypes().arrangeNullaryFunction());
2738  CodeGenFunction InitCGF(CGM);
2739  FunctionArgList ArgList;
2740  InitCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, InitFunction,
2741  CGM.getTypes().arrangeNullaryFunction(), ArgList,
2742  Loc, Loc);
2743  emitThreadPrivateVarInit(InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2744  InitCGF.FinishFunction();
2745  return InitFunction;
2746  }
2747  emitThreadPrivateVarInit(*CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
2748  }
2749  return nullptr;
2750 }
2751 
2752 /// Obtain information that uniquely identifies a target entry. This
2753 /// consists of the file and device IDs as well as line number associated with
2754 /// the relevant entry source location.
2756  unsigned &DeviceID, unsigned &FileID,
2757  unsigned &LineNum) {
2759 
2760  // The loc should be always valid and have a file ID (the user cannot use
2761  // #pragma directives in macros)
2762 
2763  assert(Loc.isValid() && "Source location is expected to be always valid.");
2764 
2765  PresumedLoc PLoc = SM.getPresumedLoc(Loc);
2766  assert(PLoc.isValid() && "Source location is expected to be always valid.");
2767 
2768  llvm::sys::fs::UniqueID ID;
2769  if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
2770  SM.getDiagnostics().Report(diag::err_cannot_open_file)
2771  << PLoc.getFilename() << EC.message();
2772 
2773  DeviceID = ID.getDevice();
2774  FileID = ID.getFile();
2775  LineNum = PLoc.getLine();
2776 }
2777 
2779  llvm::GlobalVariable *Addr,
2780  bool PerformInit) {
2782  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
2783  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
2784  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
2786  return CGM.getLangOpts().OpenMPIsDevice;
2787  VD = VD->getDefinition(CGM.getContext());
2788  if (VD && !DeclareTargetWithDefinition.insert(CGM.getMangledName(VD)).second)
2789  return CGM.getLangOpts().OpenMPIsDevice;
2790 
2791  QualType ASTTy = VD->getType();
2792 
2794  // Produce the unique prefix to identify the new target regions. We use
2795  // the source location of the variable declaration which we know to not
2796  // conflict with any target region.
2797  unsigned DeviceID;
2798  unsigned FileID;
2799  unsigned Line;
2800  getTargetEntryUniqueInfo(CGM.getContext(), Loc, DeviceID, FileID, Line);
2801  SmallString<128> Buffer, Out;
2802  {
2803  llvm::raw_svector_ostream OS(Buffer);
2804  OS << "__omp_offloading_" << llvm::format("_%x", DeviceID)
2805  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
2806  }
2807 
2808  const Expr *Init = VD->getAnyInitializer();
2809  if (CGM.getLangOpts().CPlusPlus && PerformInit) {
2810  llvm::Constant *Ctor;
2811  llvm::Constant *ID;
2812  if (CGM.getLangOpts().OpenMPIsDevice) {
2813  // Generate function that re-emits the declaration's initializer into
2814  // the threadprivate copy of the variable VD
2815  CodeGenFunction CtorCGF(CGM);
2816 
2818  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2819  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2820  FTy, Twine(Buffer, "_ctor"), FI, Loc);
2821  auto NL = ApplyDebugLocation::CreateEmpty(CtorCGF);
2822  CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2823  FunctionArgList(), Loc, Loc);
2824  auto AL = ApplyDebugLocation::CreateArtificial(CtorCGF);
2825  CtorCGF.EmitAnyExprToMem(Init,
2826  Address(Addr, CGM.getContext().getDeclAlign(VD)),
2827  Init->getType().getQualifiers(),
2828  /*IsInitializer=*/true);
2829  CtorCGF.FinishFunction();
2830  Ctor = Fn;
2831  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2832  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Ctor));
2833  } else {
2834  Ctor = new llvm::GlobalVariable(
2835  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2836  llvm::GlobalValue::PrivateLinkage,
2837  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_ctor"));
2838  ID = Ctor;
2839  }
2840 
2841  // Register the information for the entry associated with the constructor.
2842  Out.clear();
2844  DeviceID, FileID, Twine(Buffer, "_ctor").toStringRef(Out), Line, Ctor,
2846  }
2847  if (VD->getType().isDestructedType() != QualType::DK_none) {
2848  llvm::Constant *Dtor;
2849  llvm::Constant *ID;
2850  if (CGM.getLangOpts().OpenMPIsDevice) {
2851  // Generate function that emits destructor call for the threadprivate
2852  // copy of the variable VD
2853  CodeGenFunction DtorCGF(CGM);
2854 
2856  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
2857  llvm::Function *Fn = CGM.CreateGlobalInitOrDestructFunction(
2858  FTy, Twine(Buffer, "_dtor"), FI, Loc);
2859  auto NL = ApplyDebugLocation::CreateEmpty(DtorCGF);
2860  DtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, Fn, FI,
2861  FunctionArgList(), Loc, Loc);
2862  // Create a scope with an artificial location for the body of this
2863  // function.
2864  auto AL = ApplyDebugLocation::CreateArtificial(DtorCGF);
2865  DtorCGF.emitDestroy(Address(Addr, CGM.getContext().getDeclAlign(VD)),
2866  ASTTy, DtorCGF.getDestroyer(ASTTy.isDestructedType()),
2867  DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
2868  DtorCGF.FinishFunction();
2869  Dtor = Fn;
2870  ID = llvm::ConstantExpr::getBitCast(Fn, CGM.Int8PtrTy);
2871  CGM.addUsedGlobal(cast<llvm::GlobalValue>(Dtor));
2872  } else {
2873  Dtor = new llvm::GlobalVariable(
2874  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
2875  llvm::GlobalValue::PrivateLinkage,
2876  llvm::Constant::getNullValue(CGM.Int8Ty), Twine(Buffer, "_dtor"));
2877  ID = Dtor;
2878  }
2879  // Register the information for the entry associated with the destructor.
2880  Out.clear();
2882  DeviceID, FileID, Twine(Buffer, "_dtor").toStringRef(Out), Line, Dtor,
2884  }
2885  return CGM.getLangOpts().OpenMPIsDevice;
2886 }
2887 
2889  QualType VarType,
2890  StringRef Name) {
2891  std::string Suffix = getName({"artificial", ""});
2892  std::string CacheSuffix = getName({"cache", ""});
2893  llvm::Type *VarLVType = CGF.ConvertTypeForMem(VarType);
2894  llvm::Value *GAddr =
2895  getOrCreateInternalVariable(VarLVType, Twine(Name).concat(Suffix));
2896  llvm::Value *Args[] = {
2898  getThreadID(CGF, SourceLocation()),
2900  CGF.Builder.CreateIntCast(CGF.getTypeSize(VarType), CGM.SizeTy,
2901  /*isSigned=*/false),
2903  CGM.VoidPtrPtrTy, Twine(Name).concat(Suffix).concat(CacheSuffix))};
2904  return Address(
2906  CGF.EmitRuntimeCall(
2908  VarLVType->getPointerTo(/*AddrSpace=*/0)),
2909  CGM.getPointerAlign());
2910 }
2911 
2913  const RegionCodeGenTy &ThenGen,
2914  const RegionCodeGenTy &ElseGen) {
2915  CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
2916 
2917  // If the condition constant folds and can be elided, try to avoid emitting
2918  // the condition and the dead arm of the if/else.
2919  bool CondConstant;
2920  if (CGF.ConstantFoldsToSimpleInteger(Cond, CondConstant)) {
2921  if (CondConstant)
2922  ThenGen(CGF);
2923  else
2924  ElseGen(CGF);
2925  return;
2926  }
2927 
2928  // Otherwise, the condition did not fold, or we couldn't elide it. Just
2929  // emit the conditional branch.
2930  llvm::BasicBlock *ThenBlock = CGF.createBasicBlock("omp_if.then");
2931  llvm::BasicBlock *ElseBlock = CGF.createBasicBlock("omp_if.else");
2932  llvm::BasicBlock *ContBlock = CGF.createBasicBlock("omp_if.end");
2933  CGF.EmitBranchOnBoolExpr(Cond, ThenBlock, ElseBlock, /*TrueCount=*/0);
2934 
2935  // Emit the 'then' code.
2936  CGF.EmitBlock(ThenBlock);
2937  ThenGen(CGF);
2938  CGF.EmitBranch(ContBlock);
2939  // Emit the 'else' code if present.
2940  // There is no need to emit line number for unconditional branch.
2942  CGF.EmitBlock(ElseBlock);
2943  ElseGen(CGF);
2944  // There is no need to emit line number for unconditional branch.
2946  CGF.EmitBranch(ContBlock);
2947  // Emit the continuation block for code after the if.
2948  CGF.EmitBlock(ContBlock, /*IsFinished=*/true);
2949 }
2950 
2952  llvm::Function *OutlinedFn,
2953  ArrayRef<llvm::Value *> CapturedVars,
2954  const Expr *IfCond) {
2955  if (!CGF.HaveInsertPoint())
2956  return;
2957  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
2958  auto &&ThenGen = [OutlinedFn, CapturedVars, RTLoc](CodeGenFunction &CGF,
2959  PrePostActionTy &) {
2960  // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2961  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2962  llvm::Value *Args[] = {
2963  RTLoc,
2964  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
2965  CGF.Builder.CreateBitCast(OutlinedFn, RT.getKmpc_MicroPointerTy())};
2967  RealArgs.append(std::begin(Args), std::end(Args));
2968  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
2969 
2970  llvm::FunctionCallee RTLFn =
2971  RT.createRuntimeFunction(OMPRTL__kmpc_fork_call);
2972  CGF.EmitRuntimeCall(RTLFn, RealArgs);
2973  };
2974  auto &&ElseGen = [OutlinedFn, CapturedVars, RTLoc, Loc](CodeGenFunction &CGF,
2975  PrePostActionTy &) {
2976  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
2977  llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
2978  // Build calls:
2979  // __kmpc_serialized_parallel(&Loc, GTid);
2980  llvm::Value *Args[] = {RTLoc, ThreadID};
2981  CGF.EmitRuntimeCall(
2982  RT.createRuntimeFunction(OMPRTL__kmpc_serialized_parallel), Args);
2983 
2984  // OutlinedFn(&GTid, &zero, CapturedStruct);
2985  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
2986  /*Name*/ ".zero.addr");
2987  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
2988  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
2989  // ThreadId for serialized parallels is 0.
2990  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2991  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
2992  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
2993  RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
2994 
2995  // __kmpc_end_serialized_parallel(&Loc, GTid);
2996  llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
2997  CGF.EmitRuntimeCall(
2998  RT.createRuntimeFunction(OMPRTL__kmpc_end_serialized_parallel),
2999  EndArgs);
3000  };
3001  if (IfCond) {
3002  emitOMPIfClause(CGF, IfCond, ThenGen, ElseGen);
3003  } else {
3004  RegionCodeGenTy ThenRCG(ThenGen);
3005  ThenRCG(CGF);
3006  }
3007 }
3008 
3009 // If we're inside an (outlined) parallel region, use the region info's
3010 // thread-ID variable (it is passed in a first argument of the outlined function
3011 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
3012 // regular serial code region, get thread ID by calling kmp_int32
3013 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
3014 // return the address of that temp.
3016  SourceLocation Loc) {
3017  if (auto *OMPRegionInfo =
3018  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3019  if (OMPRegionInfo->getThreadIDVariable())
3020  return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
3021 
3022  llvm::Value *ThreadID = getThreadID(CGF, Loc);
3023  QualType Int32Ty =
3024  CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
3025  Address ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
3026  CGF.EmitStoreOfScalar(ThreadID,
3027  CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
3028 
3029  return ThreadIDTemp;
3030 }
3031 
3033  llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace) {
3034  SmallString<256> Buffer;
3035  llvm::raw_svector_ostream Out(Buffer);
3036  Out << Name;
3037  StringRef RuntimeName = Out.str();
3038  auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
3039  if (Elem.second) {
3040  assert(Elem.second->getType()->getPointerElementType() == Ty &&
3041  "OMP internal variable has different type than requested");
3042  return &*Elem.second;
3043  }
3044 
3045  return Elem.second = new llvm::GlobalVariable(
3046  CGM.getModule(), Ty, /*IsConstant*/ false,
3047  llvm::GlobalValue::CommonLinkage, llvm::Constant::getNullValue(Ty),
3048  Elem.first(), /*InsertBefore=*/nullptr,
3049  llvm::GlobalValue::NotThreadLocal, AddressSpace);
3050 }
3051 
3053  std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
3054  std::string Name = getName({Prefix, "var"});
3055  return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
3056 }
3057 
3058 namespace {
3059 /// Common pre(post)-action for different OpenMP constructs.
3060 class CommonActionTy final : public PrePostActionTy {
3061  llvm::FunctionCallee EnterCallee;
3062  ArrayRef<llvm::Value *> EnterArgs;
3063  llvm::FunctionCallee ExitCallee;
3064  ArrayRef<llvm::Value *> ExitArgs;
3065  bool Conditional;
3066  llvm::BasicBlock *ContBlock = nullptr;
3067 
3068 public:
3069  CommonActionTy(llvm::FunctionCallee EnterCallee,
3070  ArrayRef<llvm::Value *> EnterArgs,
3071  llvm::FunctionCallee ExitCallee,
3072  ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
3073  : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
3074  ExitArgs(ExitArgs), Conditional(Conditional) {}
3075  void Enter(CodeGenFunction &CGF) override {
3076  llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
3077  if (Conditional) {
3078  llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
3079  auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
3080  ContBlock = CGF.createBasicBlock("omp_if.end");
3081  // Generate the branch (If-stmt)
3082  CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
3083  CGF.EmitBlock(ThenBlock);
3084  }
3085  }
3086  void Done(CodeGenFunction &CGF) {
3087  // Emit the rest of blocks/branches
3088  CGF.EmitBranch(ContBlock);
3089  CGF.EmitBlock(ContBlock, true);
3090  }
3091  void Exit(CodeGenFunction &CGF) override {
3092  CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
3093  }
3094 };
3095 } // anonymous namespace
3096 
3098  StringRef CriticalName,
3099  const RegionCodeGenTy &CriticalOpGen,
3100  SourceLocation Loc, const Expr *Hint) {
3101  // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
3102  // CriticalOpGen();
3103  // __kmpc_end_critical(ident_t *, gtid, Lock);
3104  // Prepare arguments and build a call to __kmpc_critical
3105  if (!CGF.HaveInsertPoint())
3106  return;
3107  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3108  getCriticalRegionLock(CriticalName)};
3109  llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args),
3110  std::end(Args));
3111  if (Hint) {
3112  EnterArgs.push_back(CGF.Builder.CreateIntCast(
3113  CGF.EmitScalarExpr(Hint), CGM.IntPtrTy, /*isSigned=*/false));
3114  }
3115  CommonActionTy Action(
3119  CriticalOpGen.setAction(Action);
3120  emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
3121 }
3122 
3124  const RegionCodeGenTy &MasterOpGen,
3125  SourceLocation Loc) {
3126  if (!CGF.HaveInsertPoint())
3127  return;
3128  // if(__kmpc_master(ident_t *, gtid)) {
3129  // MasterOpGen();
3130  // __kmpc_end_master(ident_t *, gtid);
3131  // }
3132  // Prepare arguments and build a call to __kmpc_master
3133  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3134  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_master), Args,
3136  /*Conditional=*/true);
3137  MasterOpGen.setAction(Action);
3138  emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
3139  Action.Done(CGF);
3140 }
3141 
3143  SourceLocation Loc) {
3144  if (!CGF.HaveInsertPoint())
3145  return;
3146  // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
3147  llvm::Value *Args[] = {
3148  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3149  llvm::ConstantInt::get(CGM.IntTy, /*V=*/0, /*isSigned=*/true)};
3151  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
3152  Region->emitUntiedSwitch(CGF);
3153 }
3154 
3156  const RegionCodeGenTy &TaskgroupOpGen,
3157  SourceLocation Loc) {
3158  if (!CGF.HaveInsertPoint())
3159  return;
3160  // __kmpc_taskgroup(ident_t *, gtid);
3161  // TaskgroupOpGen();
3162  // __kmpc_end_taskgroup(ident_t *, gtid);
3163  // Prepare arguments and build a call to __kmpc_taskgroup
3164  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3165  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_taskgroup), Args,
3167  Args);
3168  TaskgroupOpGen.setAction(Action);
3169  emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
3170 }
3171 
3172 /// Given an array of pointers to variables, project the address of a
3173 /// given variable.
3175  unsigned Index, const VarDecl *Var) {
3176  // Pull out the pointer to the variable.
3177  Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Array, Index);
3178  llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
3179 
3180  Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
3181  Addr = CGF.Builder.CreateElementBitCast(
3182  Addr, CGF.ConvertTypeForMem(Var->getType()));
3183  return Addr;
3184 }
3185 
3187  CodeGenModule &CGM, llvm::Type *ArgsType,
3188  ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
3189  ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
3190  SourceLocation Loc) {
3191  ASTContext &C = CGM.getContext();
3192  // void copy_func(void *LHSArg, void *RHSArg);
3193  FunctionArgList Args;
3194  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3196  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
3198  Args.push_back(&LHSArg);
3199  Args.push_back(&RHSArg);
3200  const auto &CGFI =
3202  std::string Name =
3203  CGM.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
3204  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
3206  &CGM.getModule());
3207  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
3208  Fn->setDoesNotRecurse();
3209  CodeGenFunction CGF(CGM);
3210  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
3211  // Dest = (void*[n])(LHSArg);
3212  // Src = (void*[n])(RHSArg);
3214  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
3215  ArgsType), CGF.getPointerAlign());
3217  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
3218  ArgsType), CGF.getPointerAlign());
3219  // *(Type0*)Dst[0] = *(Type0*)Src[0];
3220  // *(Type1*)Dst[1] = *(Type1*)Src[1];
3221  // ...
3222  // *(Typen*)Dst[n] = *(Typen*)Src[n];
3223  for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
3224  const auto *DestVar =
3225  cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
3226  Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
3227 
3228  const auto *SrcVar =
3229  cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
3230  Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
3231 
3232  const auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
3233  QualType Type = VD->getType();
3234  CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
3235  }
3236  CGF.FinishFunction();
3237  return Fn;
3238 }
3239 
3241  const RegionCodeGenTy &SingleOpGen,
3242  SourceLocation Loc,
3243  ArrayRef<const Expr *> CopyprivateVars,
3244  ArrayRef<const Expr *> SrcExprs,
3245  ArrayRef<const Expr *> DstExprs,
3246  ArrayRef<const Expr *> AssignmentOps) {
3247  if (!CGF.HaveInsertPoint())
3248  return;
3249  assert(CopyprivateVars.size() == SrcExprs.size() &&
3250  CopyprivateVars.size() == DstExprs.size() &&
3251  CopyprivateVars.size() == AssignmentOps.size());
3252  ASTContext &C = CGM.getContext();
3253  // int32 did_it = 0;
3254  // if(__kmpc_single(ident_t *, gtid)) {
3255  // SingleOpGen();
3256  // __kmpc_end_single(ident_t *, gtid);
3257  // did_it = 1;
3258  // }
3259  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3260  // <copy_func>, did_it);
3261 
3262  Address DidIt = Address::invalid();
3263  if (!CopyprivateVars.empty()) {
3264  // int32 did_it = 0;
3265  QualType KmpInt32Ty =
3266  C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3267  DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
3268  CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
3269  }
3270  // Prepare arguments and build a call to __kmpc_single
3271  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3272  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_single), Args,
3274  /*Conditional=*/true);
3275  SingleOpGen.setAction(Action);
3276  emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
3277  if (DidIt.isValid()) {
3278  // did_it = 1;
3279  CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
3280  }
3281  Action.Done(CGF);
3282  // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
3283  // <copy_func>, did_it);
3284  if (DidIt.isValid()) {
3285  llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
3286  QualType CopyprivateArrayTy =
3287  C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
3288  /*IndexTypeQuals=*/0);
3289  // Create a list of all private variables for copyprivate.
3290  Address CopyprivateList =
3291  CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
3292  for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
3293  Address Elem = CGF.Builder.CreateConstArrayGEP(CopyprivateList, I);
3294  CGF.Builder.CreateStore(
3296  CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
3297  Elem);
3298  }
3299  // Build function that copies private values from single region to all other
3300  // threads in the corresponding parallel region.
3302  CGM, CGF.ConvertTypeForMem(CopyprivateArrayTy)->getPointerTo(),
3303  CopyprivateVars, SrcExprs, DstExprs, AssignmentOps, Loc);
3304  llvm::Value *BufSize = CGF.getTypeSize(CopyprivateArrayTy);
3305  Address CL =
3306  CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
3307  CGF.VoidPtrTy);
3308  llvm::Value *DidItVal = CGF.Builder.CreateLoad(DidIt);
3309  llvm::Value *Args[] = {
3310  emitUpdateLocation(CGF, Loc), // ident_t *<loc>
3311  getThreadID(CGF, Loc), // i32 <gtid>
3312  BufSize, // size_t <buf_size>
3313  CL.getPointer(), // void *<copyprivate list>
3314  CpyFn, // void (*) (void *, void *) <copy_func>
3315  DidItVal // i32 did_it
3316  };
3318  }
3319 }
3320 
3322  const RegionCodeGenTy &OrderedOpGen,
3323  SourceLocation Loc, bool IsThreads) {
3324  if (!CGF.HaveInsertPoint())
3325  return;
3326  // __kmpc_ordered(ident_t *, gtid);
3327  // OrderedOpGen();
3328  // __kmpc_end_ordered(ident_t *, gtid);
3329  // Prepare arguments and build a call to __kmpc_ordered
3330  if (IsThreads) {
3331  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3332  CommonActionTy Action(createRuntimeFunction(OMPRTL__kmpc_ordered), Args,
3334  Args);
3335  OrderedOpGen.setAction(Action);
3336  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3337  return;
3338  }
3339  emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
3340 }
3341 
3343  unsigned Flags;
3344  if (Kind == OMPD_for)
3345  Flags = OMP_IDENT_BARRIER_IMPL_FOR;
3346  else if (Kind == OMPD_sections)
3347  Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
3348  else if (Kind == OMPD_single)
3349  Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
3350  else if (Kind == OMPD_barrier)
3351  Flags = OMP_IDENT_BARRIER_EXPL;
3352  else
3353  Flags = OMP_IDENT_BARRIER_IMPL;
3354  return Flags;
3355 }
3356 
3358  CodeGenFunction &CGF, const OMPLoopDirective &S,
3359  OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
3360  // Check if the loop directive is actually a doacross loop directive. In this
3361  // case choose static, 1 schedule.
3362  if (llvm::any_of(
3364  [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
3365  ScheduleKind = OMPC_SCHEDULE_static;
3366  // Chunk size is 1 in this case.
3367  llvm::APInt ChunkSize(32, 1);
3368  ChunkExpr = IntegerLiteral::Create(
3369  CGF.getContext(), ChunkSize,
3370  CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
3371  SourceLocation());
3372  }
3373 }
3374 
3376  OpenMPDirectiveKind Kind, bool EmitChecks,
3377  bool ForceSimpleCall) {
3378  if (!CGF.HaveInsertPoint())
3379  return;
3380  // Build call __kmpc_cancel_barrier(loc, thread_id);
3381  // Build call __kmpc_barrier(loc, thread_id);
3382  unsigned Flags = getDefaultFlagsForBarriers(Kind);
3383  // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
3384  // thread_id);
3385  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
3386  getThreadID(CGF, Loc)};
3387  if (auto *OMPRegionInfo =
3388  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
3389  if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
3390  llvm::Value *Result = CGF.EmitRuntimeCall(
3392  if (EmitChecks) {
3393  // if (__kmpc_cancel_barrier()) {
3394  // exit from construct;
3395  // }
3396  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
3397  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
3398  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
3399  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
3400  CGF.EmitBlock(ExitBB);
3401  // exit from construct;
3402  CodeGenFunction::JumpDest CancelDestination =
3403  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
3404  CGF.EmitBranchThroughCleanup(CancelDestination);
3405  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
3406  }
3407  return;
3408  }
3409  }
3411 }
3412 
3413 /// Map the OpenMP loop schedule to the runtime enumeration.
3415  bool Chunked, bool Ordered) {
3416  switch (ScheduleKind) {
3417  case OMPC_SCHEDULE_static:
3418  return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
3419  : (Ordered ? OMP_ord_static : OMP_sch_static);
3420  case OMPC_SCHEDULE_dynamic:
3422  case OMPC_SCHEDULE_guided:
3424  case OMPC_SCHEDULE_runtime:
3425  return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
3426  case OMPC_SCHEDULE_auto:
3427  return Ordered ? OMP_ord_auto : OMP_sch_auto;
3428  case OMPC_SCHEDULE_unknown:
3429  assert(!Chunked && "chunk was specified but schedule kind not known");
3430  return Ordered ? OMP_ord_static : OMP_sch_static;
3431  }
3432  llvm_unreachable("Unexpected runtime schedule");
3433 }
3434 
3435 /// Map the OpenMP distribute schedule to the runtime enumeration.
3436 static OpenMPSchedType
3438  // only static is allowed for dist_schedule
3440 }
3441 
3443  bool Chunked) const {
3444  OpenMPSchedType Schedule =
3445  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3446  return Schedule == OMP_sch_static;
3447 }
3448 
3450  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3451  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3452  return Schedule == OMP_dist_sch_static;
3453 }
3454 
3456  bool Chunked) const {
3457  OpenMPSchedType Schedule =
3458  getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
3459  return Schedule == OMP_sch_static_chunked;
3460 }
3461 
3463  OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
3464  OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
3465  return Schedule == OMP_dist_sch_static_chunked;
3466 }
3467 
3469  OpenMPSchedType Schedule =
3470  getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
3471  assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
3472  return Schedule != OMP_sch_static;
3473 }
3474 
3478  int Modifier = 0;
3479  switch (M1) {
3480  case OMPC_SCHEDULE_MODIFIER_monotonic:
3481  Modifier = OMP_sch_modifier_monotonic;
3482  break;
3483  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3484  Modifier = OMP_sch_modifier_nonmonotonic;
3485  break;
3486  case OMPC_SCHEDULE_MODIFIER_simd:
3487  if (Schedule == OMP_sch_static_chunked)
3489  break;
3492  break;
3493  }
3494  switch (M2) {
3495  case OMPC_SCHEDULE_MODIFIER_monotonic:
3496  Modifier = OMP_sch_modifier_monotonic;
3497  break;
3498  case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
3499  Modifier = OMP_sch_modifier_nonmonotonic;
3500  break;
3501  case OMPC_SCHEDULE_MODIFIER_simd:
3502  if (Schedule == OMP_sch_static_chunked)
3504  break;
3507  break;
3508  }
3509  return Schedule | Modifier;
3510 }
3511 
3513  CodeGenFunction &CGF, SourceLocation Loc,
3514  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
3515  bool Ordered, const DispatchRTInput &DispatchValues) {
3516  if (!CGF.HaveInsertPoint())
3517  return;
3519  ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
3520  assert(Ordered ||
3521  (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
3522  Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
3523  Schedule != OMP_sch_static_balanced_chunked));
3524  // Call __kmpc_dispatch_init(
3525  // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
3526  // kmp_int[32|64] lower, kmp_int[32|64] upper,
3527  // kmp_int[32|64] stride, kmp_int[32|64] chunk);
3528 
3529  // If the Chunk was not specified in the clause - use default value 1.
3530  llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
3531  : CGF.Builder.getIntN(IVSize, 1);
3532  llvm::Value *Args[] = {
3533  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3534  CGF.Builder.getInt32(addMonoNonMonoModifier(
3535  Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
3536  DispatchValues.LB, // Lower
3537  DispatchValues.UB, // Upper
3538  CGF.Builder.getIntN(IVSize, 1), // Stride
3539  Chunk // Chunk
3540  };
3541  CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
3542 }
3543 
3545  CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
3546  llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
3548  const CGOpenMPRuntime::StaticRTInput &Values) {
3549  if (!CGF.HaveInsertPoint())
3550  return;
3551 
3552  assert(!Values.Ordered);
3553  assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
3554  Schedule == OMP_sch_static_balanced_chunked ||
3555  Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
3556  Schedule == OMP_dist_sch_static ||
3557  Schedule == OMP_dist_sch_static_chunked);
3558 
3559  // Call __kmpc_for_static_init(
3560  // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
3561  // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
3562  // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
3563  // kmp_int[32|64] incr, kmp_int[32|64] chunk);
3564  llvm::Value *Chunk = Values.Chunk;
3565  if (Chunk == nullptr) {
3566  assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
3567  Schedule == OMP_dist_sch_static) &&
3568  "expected static non-chunked schedule");
3569  // If the Chunk was not specified in the clause - use default value 1.
3570  Chunk = CGF.Builder.getIntN(Values.IVSize, 1);
3571  } else {
3572  assert((Schedule == OMP_sch_static_chunked ||
3573  Schedule == OMP_sch_static_balanced_chunked ||
3574  Schedule == OMP_ord_static_chunked ||
3575  Schedule == OMP_dist_sch_static_chunked) &&
3576  "expected static chunked schedule");
3577  }
3578  llvm::Value *Args[] = {
3579  UpdateLocation,
3580  ThreadId,
3581  CGF.Builder.getInt32(addMonoNonMonoModifier(Schedule, M1,
3582  M2)), // Schedule type
3583  Values.IL.getPointer(), // &isLastIter
3584  Values.LB.getPointer(), // &LB
3585  Values.UB.getPointer(), // &UB
3586  Values.ST.getPointer(), // &Stride
3587  CGF.Builder.getIntN(Values.IVSize, 1), // Incr
3588  Chunk // Chunk
3589  };
3590  CGF.EmitRuntimeCall(ForStaticInitFunction, Args);
3591 }
3592 
3594  SourceLocation Loc,
3595  OpenMPDirectiveKind DKind,
3596  const OpenMPScheduleTy &ScheduleKind,
3597  const StaticRTInput &Values) {
3598  OpenMPSchedType ScheduleNum = getRuntimeSchedule(
3599  ScheduleKind.Schedule, Values.Chunk != nullptr, Values.Ordered);
3600  assert(isOpenMPWorksharingDirective(DKind) &&
3601  "Expected loop-based or sections-based directive.");
3602  llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
3603  isOpenMPLoopDirective(DKind)
3604  ? OMP_IDENT_WORK_LOOP
3605  : OMP_IDENT_WORK_SECTIONS);
3606  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3607  llvm::FunctionCallee StaticInitFunction =
3609  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3610  ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
3611 }
3612 
3614  CodeGenFunction &CGF, SourceLocation Loc,
3615  OpenMPDistScheduleClauseKind SchedKind,
3616  const CGOpenMPRuntime::StaticRTInput &Values) {
3617  OpenMPSchedType ScheduleNum =
3618  getRuntimeSchedule(SchedKind, Values.Chunk != nullptr);
3619  llvm::Value *UpdatedLocation =
3620  emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
3621  llvm::Value *ThreadId = getThreadID(CGF, Loc);
3622  llvm::FunctionCallee StaticInitFunction =
3623  createForStaticInitFunction(Values.IVSize, Values.IVSigned);
3624  emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
3625  ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
3627 }
3628 
3630  SourceLocation Loc,
3631  OpenMPDirectiveKind DKind) {
3632  if (!CGF.HaveInsertPoint())
3633  return;
3634  // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
3635  llvm::Value *Args[] = {
3636  emitUpdateLocation(CGF, Loc,
3638  ? OMP_IDENT_WORK_DISTRIBUTE
3639  : isOpenMPLoopDirective(DKind)
3640  ? OMP_IDENT_WORK_LOOP
3641  : OMP_IDENT_WORK_SECTIONS),
3642  getThreadID(CGF, Loc)};
3644  Args);
3645 }
3646 
3648  SourceLocation Loc,
3649  unsigned IVSize,
3650  bool IVSigned) {
3651  if (!CGF.HaveInsertPoint())
3652  return;
3653  // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
3654  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
3655  CGF.EmitRuntimeCall(createDispatchFiniFunction(IVSize, IVSigned), Args);
3656 }
3657 
3659  SourceLocation Loc, unsigned IVSize,
3660  bool IVSigned, Address IL,
3661  Address LB, Address UB,
3662  Address ST) {
3663  // Call __kmpc_dispatch_next(
3664  // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
3665  // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
3666  // kmp_int[32|64] *p_stride);
3667  llvm::Value *Args[] = {
3668  emitUpdateLocation(CGF, Loc),
3669  getThreadID(CGF, Loc),
3670  IL.getPointer(), // &isLastIter
3671  LB.getPointer(), // &Lower
3672  UB.getPointer(), // &Upper
3673  ST.getPointer() // &Stride
3674  };
3675  llvm::Value *Call =
3676  CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
3677  return CGF.EmitScalarConversion(
3678  Call, CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
3679  CGF.getContext().BoolTy, Loc);
3680 }
3681 
3683  llvm::Value *NumThreads,
3684  SourceLocation Loc) {
3685  if (!CGF.HaveInsertPoint())
3686  return;
3687  // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
3688  llvm::Value *Args[] = {
3689  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3690  CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned*/ true)};
3692  Args);
3693 }
3694 
3696  OpenMPProcBindClauseKind ProcBind,
3697  SourceLocation Loc) {
3698  if (!CGF.HaveInsertPoint())
3699  return;
3700  // Constants for proc bind value accepted by the runtime.
3701  enum ProcBindTy {
3702  ProcBindFalse = 0,
3703  ProcBindTrue,
3704  ProcBindMaster,
3705  ProcBindClose,
3706  ProcBindSpread,
3707  ProcBindIntel,
3708  ProcBindDefault
3709  } RuntimeProcBind;
3710  switch (ProcBind) {
3711  case OMPC_PROC_BIND_master:
3712  RuntimeProcBind = ProcBindMaster;
3713  break;
3714  case OMPC_PROC_BIND_close:
3715  RuntimeProcBind = ProcBindClose;
3716  break;
3717  case OMPC_PROC_BIND_spread:
3718  RuntimeProcBind = ProcBindSpread;
3719  break;
3721  llvm_unreachable("Unsupported proc_bind value.");
3722  }
3723  // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
3724  llvm::Value *Args[] = {
3725  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
3726  llvm::ConstantInt::get(CGM.IntTy, RuntimeProcBind, /*isSigned=*/true)};
3728 }
3729 
3730 void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
3731  SourceLocation Loc) {
3732  if (!CGF.HaveInsertPoint())
3733  return;
3734  // Build call void __kmpc_flush(ident_t *loc)
3736  emitUpdateLocation(CGF, Loc));
3737 }
3738 
3739 namespace {
3740 /// Indexes of fields for type kmp_task_t.
3742  /// List of shared variables.
3743  KmpTaskTShareds,
3744  /// Task routine.
3745  KmpTaskTRoutine,
3746  /// Partition id for the untied tasks.
3747  KmpTaskTPartId,
3748  /// Function with call of destructors for private variables.
3749  Data1,
3750  /// Task priority.
3751  Data2,
3752  /// (Taskloops only) Lower bound.
3753  KmpTaskTLowerBound,
3754  /// (Taskloops only) Upper bound.
3755  KmpTaskTUpperBound,
3756  /// (Taskloops only) Stride.
3757  KmpTaskTStride,
3758  /// (Taskloops only) Is last iteration flag.
3759  KmpTaskTLastIter,
3760  /// (Taskloops only) Reduction data.
3761  KmpTaskTReductions,
3762 };
3763 } // anonymous namespace
3764 
3765 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::empty() const {
3766  return OffloadEntriesTargetRegion.empty() &&
3767  OffloadEntriesDeviceGlobalVar.empty();
3768 }
3769 
3770 /// Initialize target region entry.
3771 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3772  initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3773  StringRef ParentName, unsigned LineNum,
3774  unsigned Order) {
3775  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3776  "only required for the device "
3777  "code generation.");
3778  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] =
3779  OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
3780  OMPTargetRegionEntryTargetRegion);
3781  ++OffloadingEntriesNum;
3782 }
3783 
3784 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3785  registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID,
3786  StringRef ParentName, unsigned LineNum,
3787  llvm::Constant *Addr, llvm::Constant *ID,
3788  OMPTargetRegionEntryKind Flags) {
3789  // If we are emitting code for a target, the entry is already initialized,
3790  // only has to be registered.
3791  if (CGM.getLangOpts().OpenMPIsDevice) {
3792  if (!hasTargetRegionEntryInfo(DeviceID, FileID, ParentName, LineNum)) {
3793  unsigned DiagID = CGM.getDiags().getCustomDiagID(
3795  "Unable to find target region on line '%0' in the device code.");
3796  CGM.getDiags().Report(DiagID) << LineNum;
3797  return;
3798  }
3799  auto &Entry =
3800  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum];
3801  assert(Entry.isValid() && "Entry not initialized!");
3802  Entry.setAddress(Addr);
3803  Entry.setID(ID);
3804  Entry.setFlags(Flags);
3805  } else {
3806  OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
3807  OffloadEntriesTargetRegion[DeviceID][FileID][ParentName][LineNum] = Entry;
3808  ++OffloadingEntriesNum;
3809  }
3810 }
3811 
3812 bool CGOpenMPRuntime::OffloadEntriesInfoManagerTy::hasTargetRegionEntryInfo(
3813  unsigned DeviceID, unsigned FileID, StringRef ParentName,
3814  unsigned LineNum) const {
3815  auto PerDevice = OffloadEntriesTargetRegion.find(DeviceID);
3816  if (PerDevice == OffloadEntriesTargetRegion.end())
3817  return false;
3818  auto PerFile = PerDevice->second.find(FileID);
3819  if (PerFile == PerDevice->second.end())
3820  return false;
3821  auto PerParentName = PerFile->second.find(ParentName);
3822  if (PerParentName == PerFile->second.end())
3823  return false;
3824  auto PerLine = PerParentName->second.find(LineNum);
3825  if (PerLine == PerParentName->second.end())
3826  return false;
3827  // Fail if this entry is already registered.
3828  if (PerLine->second.getAddress() || PerLine->second.getID())
3829  return false;
3830  return true;
3831 }
3832 
3833 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::actOnTargetRegionEntriesInfo(
3834  const OffloadTargetRegionEntryInfoActTy &Action) {
3835  // Scan all target region entries and perform the provided action.
3836  for (const auto &D : OffloadEntriesTargetRegion)
3837  for (const auto &F : D.second)
3838  for (const auto &P : F.second)
3839  for (const auto &L : P.second)
3840  Action(D.first, F.first, P.first(), L.first, L.second);
3841 }
3842 
3843 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3844  initializeDeviceGlobalVarEntryInfo(StringRef Name,
3845  OMPTargetGlobalVarEntryKind Flags,
3846  unsigned Order) {
3847  assert(CGM.getLangOpts().OpenMPIsDevice && "Initialization of entries is "
3848  "only required for the device "
3849  "code generation.");
3850  OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
3851  ++OffloadingEntriesNum;
3852 }
3853 
3854 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3855  registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr,
3856  CharUnits VarSize,
3857  OMPTargetGlobalVarEntryKind Flags,
3858  llvm::GlobalValue::LinkageTypes Linkage) {
3859  if (CGM.getLangOpts().OpenMPIsDevice) {
3860  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3861  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3862  "Entry not initialized!");
3863  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3864  "Resetting with the new address.");
3865  if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
3866  if (Entry.getVarSize().isZero()) {
3867  Entry.setVarSize(VarSize);
3868  Entry.setLinkage(Linkage);
3869  }
3870  return;
3871  }
3872  Entry.setVarSize(VarSize);
3873  Entry.setLinkage(Linkage);
3874  Entry.setAddress(Addr);
3875  } else {
3876  if (hasDeviceGlobalVarEntryInfo(VarName)) {
3877  auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
3878  assert(Entry.isValid() && Entry.getFlags() == Flags &&
3879  "Entry not initialized!");
3880  assert((!Entry.getAddress() || Entry.getAddress() == Addr) &&
3881  "Resetting with the new address.");
3882  if (Entry.getVarSize().isZero()) {
3883  Entry.setVarSize(VarSize);
3884  Entry.setLinkage(Linkage);
3885  }
3886  return;
3887  }
3888  OffloadEntriesDeviceGlobalVar.try_emplace(
3889  VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage);
3890  ++OffloadingEntriesNum;
3891  }
3892 }
3893 
3894 void CGOpenMPRuntime::OffloadEntriesInfoManagerTy::
3895  actOnDeviceGlobalVarEntriesInfo(
3896  const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
3897  // Scan all target region entries and perform the provided action.
3898  for (const auto &E : OffloadEntriesDeviceGlobalVar)
3899  Action(E.getKey(), E.getValue());
3900 }
3901 
3902 llvm::Function *
3904  // If we don't have entries or if we are emitting code for the device, we
3905  // don't need to do anything.
3906  if (CGM.getLangOpts().OpenMPIsDevice || OffloadEntriesInfoManager.empty())
3907  return nullptr;
3908 
3909  llvm::Module &M = CGM.getModule();
3910  ASTContext &C = CGM.getContext();
3911 
3912  // Get list of devices we care about
3913  const std::vector<llvm::Triple> &Devices = CGM.getLangOpts().OMPTargetTriples;
3914 
3915  // We should be creating an offloading descriptor only if there are devices
3916  // specified.
3917  assert(!Devices.empty() && "No OpenMP offloading devices??");
3918 
3919  // Create the external variables that will point to the begin and end of the
3920  // host entries section. These will be defined by the linker.
3921  llvm::Type *OffloadEntryTy =
3923  std::string EntriesBeginName = getName({"omp_offloading", "entries_begin"});
3924  auto *HostEntriesBegin = new llvm::GlobalVariable(
3925  M, OffloadEntryTy, /*isConstant=*/true,
3926  llvm::GlobalValue::ExternalLinkage, /*Initializer=*/nullptr,
3927  EntriesBeginName);
3928  std::string EntriesEndName = getName({"omp_offloading", "entries_end"});
3929  auto *HostEntriesEnd =
3930  new llvm::GlobalVariable(M, OffloadEntryTy, /*isConstant=*/true,
3932  /*Initializer=*/nullptr, EntriesEndName);
3933 
3934  // Create all device images
3935  auto *DeviceImageTy = cast<llvm::StructType>(
3937  ConstantInitBuilder DeviceImagesBuilder(CGM);
3938  ConstantArrayBuilder DeviceImagesEntries =
3939  DeviceImagesBuilder.beginArray(DeviceImageTy);
3940 
3941  for (const llvm::Triple &Device : Devices) {
3942  StringRef T = Device.getTriple();
3943  std::string BeginName = getName({"omp_offloading", "img_start", ""});
3944  auto *ImgBegin = new llvm::GlobalVariable(
3945  M, CGM.Int8Ty, /*isConstant=*/true,
3946  llvm::GlobalValue::ExternalWeakLinkage,
3947  /*Initializer=*/nullptr, Twine(BeginName).concat(T));
3948  std::string EndName = getName({"omp_offloading", "img_end", ""});
3949  auto *ImgEnd = new llvm::GlobalVariable(
3950  M, CGM.Int8Ty, /*isConstant=*/true,
3951  llvm::GlobalValue::ExternalWeakLinkage,
3952  /*Initializer=*/nullptr, Twine(EndName).concat(T));
3953 
3954  llvm::Constant *Data[] = {ImgBegin, ImgEnd, HostEntriesBegin,
3955  HostEntriesEnd};
3957  DeviceImagesEntries);
3958  }
3959 
3960  // Create device images global array.
3961  std::string ImagesName = getName({"omp_offloading", "device_images"});
3962  llvm::GlobalVariable *DeviceImages =
3963  DeviceImagesEntries.finishAndCreateGlobal(ImagesName,
3964  CGM.getPointerAlign(),
3965  /*isConstant=*/true);
3966  DeviceImages->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
3967 
3968  // This is a Zero array to be used in the creation of the constant expressions
3969  llvm::Constant *Index[] = {llvm::Constant::getNullValue(CGM.Int32Ty),
3970  llvm::Constant::getNullValue(CGM.Int32Ty)};
3971 
3972  // Create the target region descriptor.
3973  llvm::Constant *Data[] = {
3974  llvm::ConstantInt::get(CGM.Int32Ty, Devices.size()),
3975  llvm::ConstantExpr::getGetElementPtr(DeviceImages->getValueType(),
3976  DeviceImages, Index),
3977  HostEntriesBegin, HostEntriesEnd};
3978  std::string Descriptor = getName({"omp_offloading", "descriptor"});
3979  llvm::GlobalVariable *Desc = createGlobalStruct(
3980  CGM, getTgtBinaryDescriptorQTy(), /*IsConstant=*/true, Data, Descriptor);
3981 
3982  // Emit code to register or unregister the descriptor at execution
3983  // startup or closing, respectively.
3984 
3985  llvm::Function *UnRegFn;
3986  {
3987  FunctionArgList Args;
3989  Args.push_back(&DummyPtr);
3990 
3991  CodeGenFunction CGF(CGM);
3992  // Disable debug info for global (de-)initializer because they are not part
3993  // of some particular construct.
3994  CGF.disableDebugInfo();
3995  const auto &FI =
3997  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
3998  std::string UnregName = getName({"omp_offloading", "descriptor_unreg"});
3999  UnRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, UnregName, FI);
4000  CGF.StartFunction(GlobalDecl(), C.VoidTy, UnRegFn, FI, Args);
4002  Desc);
4003  CGF.FinishFunction();
4004  }
4005  llvm::Function *RegFn;
4006  {
4007  CodeGenFunction CGF(CGM);
4008  // Disable debug info for global (de-)initializer because they are not part
4009  // of some particular construct.
4010  CGF.disableDebugInfo();
4011  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
4012  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
4013 
4014  // Encode offload target triples into the registration function name. It
4015  // will serve as a comdat key for the registration/unregistration code for
4016  // this particular combination of offloading targets.
4017  SmallVector<StringRef, 4U> RegFnNameParts(Devices.size() + 2U);
4018  RegFnNameParts[0] = "omp_offloading";
4019  RegFnNameParts[1] = "descriptor_reg";
4020  llvm::transform(Devices, std::next(RegFnNameParts.begin(), 2),
4021  [](const llvm::Triple &T) -> const std::string& {
4022  return T.getTriple();
4023  });
4024  llvm::sort(std::next(RegFnNameParts.begin(), 2), RegFnNameParts.end());
4025  std::string Descriptor = getName(RegFnNameParts);
4026  RegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, Descriptor, FI);
4027  CGF.StartFunction(GlobalDecl(), C.VoidTy, RegFn, FI, FunctionArgList());
4029  // Create a variable to drive the registration and unregistration of the
4030  // descriptor, so we can reuse the logic that emits Ctors and Dtors.
4031  ImplicitParamDecl RegUnregVar(C, C.getTranslationUnitDecl(),
4032  SourceLocation(), nullptr, C.CharTy,
4034  CGM.getCXXABI().registerGlobalDtor(CGF, RegUnregVar, UnRegFn, Desc);
4035  CGF.FinishFunction();
4036  }
4037  if (CGM.supportsCOMDAT()) {
4038  // It is sufficient to call registration function only once, so create a
4039  // COMDAT group for registration/unregistration functions and associated
4040  // data. That would reduce startup time and code size. Registration
4041  // function serves as a COMDAT group key.
4042  llvm::Comdat *ComdatKey = M.getOrInsertComdat(RegFn->getName());
4043  RegFn->setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
4044  RegFn->setVisibility(llvm::GlobalValue::HiddenVisibility);
4045  RegFn->setComdat(ComdatKey);
4046  UnRegFn->setComdat(ComdatKey);
4047  DeviceImages->setComdat(ComdatKey);
4048  Desc->setComdat(ComdatKey);
4049  }
4050  return RegFn;
4051 }
4052 
4054  llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags,
4055  llvm::GlobalValue::LinkageTypes Linkage) {
4056  StringRef Name = Addr->getName();
4057  llvm::Module &M = CGM.getModule();
4058  llvm::LLVMContext &C = M.getContext();
4059 
4060  // Create constant string with the name.
4061  llvm::Constant *StrPtrInit = llvm::ConstantDataArray::getString(C, Name);
4062 
4063  std::string StringName = getName({"omp_offloading", "entry_name"});
4064  auto *Str = new llvm::GlobalVariable(
4065  M, StrPtrInit->getType(), /*isConstant=*/true,
4066  llvm::GlobalValue::InternalLinkage, StrPtrInit, StringName);
4067  Str->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
4068 
4069  llvm::Constant *Data[] = {llvm::ConstantExpr::getBitCast(ID, CGM.VoidPtrTy),
4070  llvm::ConstantExpr::getBitCast(Str, CGM.Int8PtrTy),
4071  llvm::ConstantInt::get(CGM.SizeTy, Size),
4072  llvm::ConstantInt::get(CGM.Int32Ty, Flags),
4073  llvm::ConstantInt::get(CGM.Int32Ty, 0)};
4074  std::string EntryName = getName({"omp_offloading", "entry", ""});
4075  llvm::GlobalVariable *Entry = createGlobalStruct(
4076  CGM, getTgtOffloadEntryQTy(), /*IsConstant=*/true, Data,
4077  Twine(EntryName).concat(Name), llvm::GlobalValue::WeakAnyLinkage);
4078 
4079  // The entry has to be created in the section the linker expects it to be.
4080  std::string Section = getName({"omp_offloading", "entries"});
4081  Entry->setSection(Section);
4082 }
4083 
4085  // Emit the offloading entries and metadata so that the device codegen side
4086  // can easily figure out what to emit. The produced metadata looks like
4087  // this:
4088  //
4089  // !omp_offload.info = !{!1, ...}
4090  //
4091  // Right now we only generate metadata for function that contain target
4092  // regions.
4093 
4094  // If we do not have entries, we don't need to do anything.
4096  return;
4097 
4098  llvm::Module &M = CGM.getModule();
4099  llvm::LLVMContext &C = M.getContext();
4101  OrderedEntries(OffloadEntriesInfoManager.size());
4102  llvm::SmallVector<StringRef, 16> ParentFunctions(
4104 
4105  // Auxiliary methods to create metadata values and strings.
4106  auto &&GetMDInt = [this](unsigned V) {
4107  return llvm::ConstantAsMetadata::get(
4108  llvm::ConstantInt::get(CGM.Int32Ty, V));
4109  };
4110 
4111  auto &&GetMDString = [&C](StringRef V) { return llvm::MDString::get(C, V); };
4112 
4113  // Create the offloading info metadata node.
4114  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
4115 
4116  // Create function that emits metadata for each target region entry;
4117  auto &&TargetRegionMetadataEmitter =
4118  [&C, MD, &OrderedEntries, &ParentFunctions, &GetMDInt, &GetMDString](
4119  unsigned DeviceID, unsigned FileID, StringRef ParentName,
4120  unsigned Line,
4122  // Generate metadata for target regions. Each entry of this metadata
4123  // contains:
4124  // - Entry 0 -> Kind of this type of metadata (0).
4125  // - Entry 1 -> Device ID of the file where the entry was identified.
4126  // - Entry 2 -> File ID of the file where the entry was identified.
4127  // - Entry 3 -> Mangled name of the function where the entry was
4128  // identified.
4129  // - Entry 4 -> Line in the file where the entry was identified.
4130  // - Entry 5 -> Order the entry was created.
4131  // The first element of the metadata node is the kind.
4132  llvm::Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDInt(DeviceID),
4133  GetMDInt(FileID), GetMDString(ParentName),
4134  GetMDInt(Line), GetMDInt(E.getOrder())};
4135 
4136  // Save this entry in the right position of the ordered entries array.
4137  OrderedEntries[E.getOrder()] = &E;
4138  ParentFunctions[E.getOrder()] = ParentName;
4139 
4140  // Add metadata to the named metadata node.
4141  MD->addOperand(llvm::MDNode::get(C, Ops));
4142  };
4143 
4145  TargetRegionMetadataEmitter);
4146 
4147  // Create function that emits metadata for each device global variable entry;
4148  auto &&DeviceGlobalVarMetadataEmitter =
4149  [&C, &OrderedEntries, &GetMDInt, &GetMDString,
4150  MD](StringRef MangledName,
4152  &E) {
4153  // Generate metadata for global variables. Each entry of this metadata
4154  // contains:
4155  // - Entry 0 -> Kind of this type of metadata (1).
4156  // - Entry 1 -> Mangled name of the variable.
4157  // - Entry 2 -> Declare target kind.
4158  // - Entry 3 -> Order the entry was created.
4159  // The first element of the metadata node is the kind.
4160  llvm::Metadata *Ops[] = {
4161  GetMDInt(E.getKind()), GetMDString(MangledName),
4162  GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
4163 
4164  // Save this entry in the right position of the ordered entries array.
4165  OrderedEntries[E.getOrder()] = &E;
4166 
4167  // Add metadata to the named metadata node.
4168  MD->addOperand(llvm::MDNode::get(C, Ops));
4169  };
4170 
4172  DeviceGlobalVarMetadataEmitter);
4173 
4174  for (const auto *E : OrderedEntries) {
4175  assert(E && "All ordered entries must exist!");
4176  if (const auto *CE =
4177  dyn_cast<OffloadEntriesInfoManagerTy::OffloadEntryInfoTargetRegion>(
4178  E)) {
4179  if (!CE->getID() || !CE->getAddress()) {
4180  // Do not blame the entry if the parent funtion is not emitted.
4181  StringRef FnName = ParentFunctions[CE->getOrder()];
4182  if (!CGM.GetGlobalValue(FnName))
4183  continue;
4184  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4186  "Offloading entry for target region is incorrect: either the "
4187  "address or the ID is invalid.");
4188  CGM.getDiags().Report(DiagID);
4189  continue;
4190  }
4191  createOffloadEntry(CE->getID(), CE->getAddress(), /*Size=*/0,
4192  CE->getFlags(), llvm::GlobalValue::WeakAnyLinkage);
4193  } else if (const auto *CE =
4194  dyn_cast<OffloadEntriesInfoManagerTy::
4195  OffloadEntryInfoDeviceGlobalVar>(E)) {
4198  CE->getFlags());
4199  switch (Flags) {
4201  if (CGM.getLangOpts().OpenMPIsDevice &&
4202  CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())
4203  continue;
4204  if (!CE->getAddress()) {
4205  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4207  "Offloading entry for declare target variable is incorrect: the "
4208  "address is invalid.");
4209  CGM.getDiags().Report(DiagID);
4210  continue;
4211  }
4212  // The vaiable has no definition - no need to add the entry.
4213  if (CE->getVarSize().isZero())
4214  continue;
4215  break;
4216  }
4218  assert(((CGM.getLangOpts().OpenMPIsDevice && !CE->getAddress()) ||
4219  (!CGM.getLangOpts().OpenMPIsDevice && CE->getAddress())) &&
4220  "Declaret target link address is set.");
4221  if (CGM.getLangOpts().OpenMPIsDevice)
4222  continue;
4223  if (!CE->getAddress()) {
4224  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4226  "Offloading entry for declare target variable is incorrect: the "
4227  "address is invalid.");
4228  CGM.getDiags().Report(DiagID);
4229  continue;
4230  }
4231  break;
4232  }
4233  createOffloadEntry(CE->getAddress(), CE->getAddress(),
4234  CE->getVarSize().getQuantity(), Flags,
4235  CE->getLinkage());
4236  } else {
4237  llvm_unreachable("Unsupported entry kind.");
4238  }
4239  }
4240 }
4241 
4242 /// Loads all the offload entries information from the host IR
4243 /// metadata.
4245  // If we are in target mode, load the metadata from the host IR. This code has
4246  // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
4247 
4248  if (!CGM.getLangOpts().OpenMPIsDevice)
4249  return;
4250 
4251  if (CGM.getLangOpts().OMPHostIRFile.empty())
4252  return;
4253 
4254  auto Buf = llvm::MemoryBuffer::getFile(CGM.getLangOpts().OMPHostIRFile);
4255  if (auto EC = Buf.getError()) {
4256  CGM.getDiags().Report(diag::err_cannot_open_file)
4257  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4258  return;
4259  }
4260 
4261  llvm::LLVMContext C;
4262  auto ME = expectedToErrorOrAndEmitErrors(
4263  C, llvm::parseBitcodeFile(Buf.get()->getMemBufferRef(), C));
4264 
4265  if (auto EC = ME.getError()) {
4266  unsigned DiagID = CGM.getDiags().getCustomDiagID(
4267  DiagnosticsEngine::Error, "Unable to parse host IR file '%0':'%1'");
4268  CGM.getDiags().Report(DiagID)
4269  << CGM.getLangOpts().OMPHostIRFile << EC.message();
4270  return;
4271  }
4272 
4273  llvm::NamedMDNode *MD = ME.get()->getNamedMetadata("omp_offload.info");
4274  if (!MD)
4275  return;
4276 
4277  for (llvm::MDNode *MN : MD->operands()) {
4278  auto &&GetMDInt = [MN](unsigned Idx) {
4279  auto *V = cast<llvm::ConstantAsMetadata>(MN->getOperand(Idx));
4280  return cast<llvm::ConstantInt>(V->getValue())->getZExtValue();
4281  };
4282 
4283  auto &&GetMDString = [MN](unsigned Idx) {
4284  auto *V = cast<llvm::MDString>(MN->getOperand(Idx));
4285  return V->getString();
4286  };
4287 
4288  switch (GetMDInt(0)) {
4289  default:
4290  llvm_unreachable("Unexpected metadata!");
4291  break;
4295  /*DeviceID=*/GetMDInt(1), /*FileID=*/GetMDInt(2),
4296  /*ParentName=*/GetMDString(3), /*Line=*/GetMDInt(4),
4297  /*Order=*/GetMDInt(5));
4298  break;
4302  /*MangledName=*/GetMDString(1),
4303  static_cast<OffloadEntriesInfoManagerTy::OMPTargetGlobalVarEntryKind>(
4304  /*Flags=*/GetMDInt(2)),
4305  /*Order=*/GetMDInt(3));
4306  break;
4307  }
4308  }
4309 }
4310 
4312  if (!KmpRoutineEntryPtrTy) {
4313  // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
4314  ASTContext &C = CGM.getContext();
4315  QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
4317  KmpRoutineEntryPtrQTy = C.getPointerType(
4318  C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
4319  KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
4320  }
4321 }
4322 
4324  // Make sure the type of the entry is already created. This is the type we
4325  // have to create:
4326  // struct __tgt_offload_entry{
4327  // void *addr; // Pointer to the offload entry info.
4328  // // (function or global)
4329  // char *name; // Name of the function or global.
4330  // size_t size; // Size of the entry info (0 if it a function).
4331  // int32_t flags; // Flags associated with the entry, e.g. 'link'.
4332  // int32_t reserved; // Reserved, to use by the runtime library.
4333  // };
4334  if (TgtOffloadEntryQTy.isNull()) {
4335  ASTContext &C = CGM.getContext();
4336  RecordDecl *RD = C.buildImplicitRecord("__tgt_offload_entry");
4337  RD->startDefinition();
4338  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4340  addFieldToRecordDecl(C, RD, C.getSizeType());
4342  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4344  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4345  RD->completeDefinition();
4346  RD->addAttr(PackedAttr::CreateImplicit(C));
4348  }
4349  return TgtOffloadEntryQTy;
4350 }
4351 
4353  // These are the types we need to build:
4354  // struct __tgt_device_image{
4355  // void *ImageStart; // Pointer to the target code start.
4356  // void *ImageEnd; // Pointer to the target code end.
4357  // // We also add the host entries to the device image, as it may be useful
4358  // // for the target runtime to have access to that information.
4359  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all
4360  // // the entries.
4361  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4362  // // entries (non inclusive).
4363  // };
4364  if (TgtDeviceImageQTy.isNull()) {
4365  ASTContext &C = CGM.getContext();
4366  RecordDecl *RD = C.buildImplicitRecord("__tgt_device_image");
4367  RD->startDefinition();
4368  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4369  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4372  RD->completeDefinition();
4374  }
4375  return TgtDeviceImageQTy;
4376 }
4377 
4379  // struct __tgt_bin_desc{
4380  // int32_t NumDevices; // Number of devices supported.
4381  // __tgt_device_image *DeviceImages; // Arrays of device images
4382  // // (one per device).
4383  // __tgt_offload_entry *EntriesBegin; // Begin of the table with all the
4384  // // entries.
4385  // __tgt_offload_entry *EntriesEnd; // End of the table with all the
4386  // // entries (non inclusive).
4387  // };
4389  ASTContext &C = CGM.getContext();
4390  RecordDecl *RD = C.buildImplicitRecord("__tgt_bin_desc");
4391  RD->startDefinition();
4393  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/true));
4397  RD->completeDefinition();
4399  }
4400  return TgtBinaryDescriptorQTy;
4401 }
4402 
4403 namespace {
4404 struct PrivateHelpersTy {
4405  PrivateHelpersTy(const VarDecl *Original, const VarDecl *PrivateCopy,
4406  const VarDecl *PrivateElemInit)
4407  : Original(Original), PrivateCopy(PrivateCopy),
4408  PrivateElemInit(PrivateElemInit) {}
4409  const VarDecl *Original;
4410  const VarDecl *PrivateCopy;
4411  const VarDecl *PrivateElemInit;
4412 };
4413 typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
4414 } // anonymous namespace
4415 
4416 static RecordDecl *
4418  if (!Privates.empty()) {
4419  ASTContext &C = CGM.getContext();
4420  // Build struct .kmp_privates_t. {
4421  // /* private vars */
4422  // };
4423  RecordDecl *RD = C.buildImplicitRecord(".kmp_privates.t");
4424  RD->startDefinition();
4425  for (const auto &Pair : Privates) {
4426  const VarDecl *VD = Pair.second.Original;
4427  QualType Type = VD->getType().getNonReferenceType();
4428  FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
4429  if (VD->hasAttrs()) {
4430  for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
4431  E(VD->getAttrs().end());
4432  I != E; ++I)
4433  FD->addAttr(*I);
4434  }
4435  }
4436  RD->completeDefinition();
4437  return RD;
4438  }
4439  return nullptr;
4440 }
4441 
4442 static RecordDecl *
4444  QualType KmpInt32Ty,
4445  QualType KmpRoutineEntryPointerQTy) {
4446  ASTContext &C = CGM.getContext();
4447  // Build struct kmp_task_t {
4448  // void * shareds;
4449  // kmp_routine_entry_t routine;
4450  // kmp_int32 part_id;
4451  // kmp_cmplrdata_t data1;
4452  // kmp_cmplrdata_t data2;
4453  // For taskloops additional fields:
4454  // kmp_uint64 lb;
4455  // kmp_uint64 ub;
4456  // kmp_int64 st;
4457  // kmp_int32 liter;
4458  // void * reductions;
4459  // };
4460  RecordDecl *UD = C.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union);
4461  UD->startDefinition();
4462  addFieldToRecordDecl(C, UD, KmpInt32Ty);
4463  addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
4464  UD->completeDefinition();
4465  QualType KmpCmplrdataTy = C.getRecordType(UD);
4466  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t");
4467  RD->startDefinition();
4468  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4469  addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
4470  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4471  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4472  addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
4473  if (isOpenMPTaskLoopDirective(Kind)) {
4474  QualType KmpUInt64Ty =
4475  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
4476  QualType KmpInt64Ty =
4477  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
4478  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4479  addFieldToRecordDecl(C, RD, KmpUInt64Ty);
4480  addFieldToRecordDecl(C, RD, KmpInt64Ty);
4481  addFieldToRecordDecl(C, RD, KmpInt32Ty);
4482  addFieldToRecordDecl(C, RD, C.VoidPtrTy);
4483  }
4484  RD->completeDefinition();
4485  return RD;
4486 }
4487 
4488 static RecordDecl *
4490  ArrayRef<PrivateDataTy> Privates) {
4491  ASTContext &C = CGM.getContext();
4492  // Build struct kmp_task_t_with_privates {
4493  // kmp_task_t task_data;
4494  // .kmp_privates_t. privates;
4495  // };
4496  RecordDecl *RD = C.buildImplicitRecord("kmp_task_t_with_privates");
4497  RD->startDefinition();
4498  addFieldToRecordDecl(C, RD, KmpTaskTQTy);
4499  if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
4500  addFieldToRecordDecl(C, RD, C.getRecordType(PrivateRD));
4501  RD->completeDefinition();
4502  return RD;
4503 }
4504 
4505 /// Emit a proxy function which accepts kmp_task_t as the second
4506 /// argument.
4507 /// \code
4508 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
4509 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
4510 /// For taskloops:
4511 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4512 /// tt->reductions, tt->shareds);
4513 /// return 0;
4514 /// }
4515 /// \endcode
4516 static llvm::Function *
4518  OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
4519  QualType KmpTaskTWithPrivatesPtrQTy,
4520  QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
4521  QualType SharedsPtrTy, llvm::Function *TaskFunction,
4522  llvm::Value *TaskPrivatesMap) {
4523  ASTContext &C = CGM.getContext();
4524  FunctionArgList Args;
4525  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4527  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4528  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4530  Args.push_back(&GtidArg);
4531  Args.push_back(&TaskTypeArg);
4532  const auto &TaskEntryFnInfo =
4533  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4534  llvm::FunctionType *TaskEntryTy =
4535  CGM.getTypes().GetFunctionType(TaskEntryFnInfo);
4536  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_entry", ""});
4537  auto *TaskEntry = llvm::Function::Create(
4538  TaskEntryTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4539  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry, TaskEntryFnInfo);
4540  TaskEntry->setDoesNotRecurse();
4541  CodeGenFunction CGF(CGM);
4542  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, TaskEntry, TaskEntryFnInfo, Args,
4543  Loc, Loc);
4544 
4545  // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
4546  // tt,
4547  // For taskloops:
4548  // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
4549  // tt->task_data.shareds);
4550  llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
4551  CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
4552  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4553  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4554  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4555  const auto *KmpTaskTWithPrivatesQTyRD =
4556  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4557  LValue Base =
4558  CGF.EmitLValueForField(TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4559  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
4560  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
4561  LValue PartIdLVal = CGF.EmitLValueForField(Base, *PartIdFI);
4562  llvm::Value *PartidParam = PartIdLVal.getPointer();
4563 
4564  auto SharedsFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds);
4565  LValue SharedsLVal = CGF.EmitLValueForField(Base, *SharedsFI);
4567  CGF.EmitLoadOfScalar(SharedsLVal, Loc),
4568  CGF.ConvertTypeForMem(SharedsPtrTy));
4569 
4570  auto PrivatesFI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4571  llvm::Value *PrivatesParam;
4572  if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
4573  LValue PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
4574  PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4575  PrivatesLVal.getPointer(), CGF.VoidPtrTy);
4576  } else {
4577  PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
4578  }
4579 
4580  llvm::Value *CommonArgs[] = {GtidParam, PartidParam, PrivatesParam,
4581  TaskPrivatesMap,
4582  CGF.Builder
4584  TDBase.getAddress(), CGF.VoidPtrTy)
4585  .getPointer()};
4586  SmallVector<llvm::Value *, 16> CallArgs(std::begin(CommonArgs),
4587  std::end(CommonArgs));
4588  if (isOpenMPTaskLoopDirective(Kind)) {
4589  auto LBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound);
4590  LValue LBLVal = CGF.EmitLValueForField(Base, *LBFI);
4591  llvm::Value *LBParam = CGF.EmitLoadOfScalar(LBLVal, Loc);
4592  auto UBFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound);
4593  LValue UBLVal = CGF.EmitLValueForField(Base, *UBFI);
4594  llvm::Value *UBParam = CGF.EmitLoadOfScalar(UBLVal, Loc);
4595  auto StFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTStride);
4596  LValue StLVal = CGF.EmitLValueForField(Base, *StFI);
4597  llvm::Value *StParam = CGF.EmitLoadOfScalar(StLVal, Loc);
4598  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4599  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4600  llvm::Value *LIParam = CGF.EmitLoadOfScalar(LILVal, Loc);
4601  auto RFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTReductions);
4602  LValue RLVal = CGF.EmitLValueForField(Base, *RFI);
4603  llvm::Value *RParam = CGF.EmitLoadOfScalar(RLVal, Loc);
4604  CallArgs.push_back(LBParam);
4605  CallArgs.push_back(UBParam);
4606  CallArgs.push_back(StParam);
4607  CallArgs.push_back(LIParam);
4608  CallArgs.push_back(RParam);
4609  }
4610  CallArgs.push_back(SharedsParam);
4611 
4612  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskFunction,
4613  CallArgs);
4614  CGF.EmitStoreThroughLValue(RValue::get(CGF.Builder.getInt32(/*C=*/0)),
4615  CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
4616  CGF.FinishFunction();
4617  return TaskEntry;
4618 }
4619 
4621  SourceLocation Loc,
4622  QualType KmpInt32Ty,
4623  QualType KmpTaskTWithPrivatesPtrQTy,
4624  QualType KmpTaskTWithPrivatesQTy) {
4625  ASTContext &C = CGM.getContext();
4626  FunctionArgList Args;
4627  ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
4629  ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4630  KmpTaskTWithPrivatesPtrQTy.withRestrict(),
4632  Args.push_back(&GtidArg);
4633  Args.push_back(&TaskTypeArg);
4634  const auto &DestructorFnInfo =
4635  CGM.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty, Args);
4636  llvm::FunctionType *DestructorFnTy =
4637  CGM.getTypes().GetFunctionType(DestructorFnInfo);
4638  std::string Name =
4639  CGM.getOpenMPRuntime().getName({"omp_task_destructor", ""});
4640  auto *DestructorFn =
4642  Name, &CGM.getModule());
4643  CGM.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn,
4644  DestructorFnInfo);
4645  DestructorFn->setDoesNotRecurse();
4646  CodeGenFunction CGF(CGM);
4647  CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
4648  Args, Loc, Loc);
4649 
4651  CGF.GetAddrOfLocalVar(&TaskTypeArg),
4652  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4653  const auto *KmpTaskTWithPrivatesQTyRD =
4654  cast<RecordDecl>(KmpTaskTWithPrivatesQTy->getAsTagDecl());
4655  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4656  Base = CGF.EmitLValueForField(Base, *FI);
4657  for (const auto *Field :
4658  cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
4659  if (QualType::DestructionKind DtorKind =
4660  Field->getType().isDestructedType()) {
4661  LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
4662  CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
4663  }
4664  }
4665  CGF.FinishFunction();
4666  return DestructorFn;
4667 }
4668 
4669 /// Emit a privates mapping function for correct handling of private and
4670 /// firstprivate variables.
4671 /// \code
4672 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
4673 /// **noalias priv1,..., <tyn> **noalias privn) {
4674 /// *priv1 = &.privates.priv1;
4675 /// ...;
4676 /// *privn = &.privates.privn;
4677 /// }
4678 /// \endcode
4679 static llvm::Value *
4681  ArrayRef<const Expr *> PrivateVars,
4682  ArrayRef<const Expr *> FirstprivateVars,
4683  ArrayRef<const Expr *> LastprivateVars,
4684  QualType PrivatesQTy,
4685  ArrayRef<PrivateDataTy> Privates) {
4686  ASTContext &C = CGM.getContext();
4687  FunctionArgList Args;
4688  ImplicitParamDecl TaskPrivatesArg(
4689  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4690  C.getPointerType(PrivatesQTy).withConst().withRestrict(),
4692  Args.push_back(&TaskPrivatesArg);
4693  llvm::DenseMap<const VarDecl *, unsigned> PrivateVarsPos;
4694  unsigned Counter = 1;
4695  for (const Expr *E : PrivateVars) {
4696  Args.push_back(ImplicitParamDecl::Create(
4697  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4699  .withConst()
4700  .withRestrict(),
4702  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4703  PrivateVarsPos[VD] = Counter;
4704  ++Counter;
4705  }
4706  for (const Expr *E : FirstprivateVars) {
4707  Args.push_back(ImplicitParamDecl::Create(
4708  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4710  .withConst()
4711  .withRestrict(),
4713  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4714  PrivateVarsPos[VD] = Counter;
4715  ++Counter;
4716  }
4717  for (const Expr *E : LastprivateVars) {
4718  Args.push_back(ImplicitParamDecl::Create(
4719  C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4721  .withConst()
4722  .withRestrict(),
4724  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4725  PrivateVarsPos[VD] = Counter;
4726  ++Counter;
4727  }
4728  const auto &TaskPrivatesMapFnInfo =
4730  llvm::FunctionType *TaskPrivatesMapTy =
4731  CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
4732  std::string Name =
4733  CGM.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
4734  auto *TaskPrivatesMap = llvm::Function::Create(
4735  TaskPrivatesMapTy, llvm::GlobalValue::InternalLinkage, Name,
4736  &CGM.getModule());
4737  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap,
4738  TaskPrivatesMapFnInfo);
4739  if (CGM.getLangOpts().Optimize) {
4740  TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
4741  TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
4742  TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
4743  }
4744  CodeGenFunction CGF(CGM);
4745  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskPrivatesMap,
4746  TaskPrivatesMapFnInfo, Args, Loc, Loc);
4747 
4748  // *privi = &.privates.privi;
4750  CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
4751  TaskPrivatesArg.getType()->castAs<PointerType>());
4752  const auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
4753  Counter = 0;
4754  for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
4755  LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
4756  const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
4757  LValue RefLVal =
4758  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
4759  LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
4760  RefLVal.getAddress(), RefLVal.getType()->castAs<PointerType>());
4761  CGF.EmitStoreOfScalar(FieldLVal.getPointer(), RefLoadLVal);
4762  ++Counter;
4763  }
4764  CGF.FinishFunction();
4765  return TaskPrivatesMap;
4766 }
4767 
4768 /// Emit initialization for private variables in task-based directives.
4770  const OMPExecutableDirective &D,
4771  Address KmpTaskSharedsPtr, LValue TDBase,
4772  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4773  QualType SharedsTy, QualType SharedsPtrTy,
4774  const OMPTaskDataTy &Data,
4775  ArrayRef<PrivateDataTy> Privates, bool ForDup) {
4776  ASTContext &C = CGF.getContext();
4777  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
4778  LValue PrivatesBase = CGF.EmitLValueForField(TDBase, *FI);
4780  ? OMPD_taskloop
4781  : OMPD_task;
4782  const CapturedStmt &CS = *D.getCapturedStmt(Kind);
4783  CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
4784  LValue SrcBase;
4785  bool IsTargetTask =
4788  // For target-based directives skip 3 firstprivate arrays BasePointersArray,
4789  // PointersArray and SizesArray. The original variables for these arrays are
4790  // not captured and we get their addresses explicitly.
4791  if ((!IsTargetTask && !Data.FirstprivateVars.empty()) ||
4792  (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
4793  SrcBase = CGF.MakeAddrLValue(
4795  KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
4796  SharedsTy);
4797  }
4798  FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
4799  for (const PrivateDataTy &Pair : Privates) {
4800  const VarDecl *VD = Pair.second.PrivateCopy;
4801  const Expr *Init = VD->getAnyInitializer();
4802  if (Init && (!ForDup || (isa<CXXConstructExpr>(Init) &&
4803  !CGF.isTrivialInitializer(Init)))) {
4804  LValue PrivateLValue = CGF.EmitLValueForField(PrivatesBase, *FI);
4805  if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
4806  const VarDecl *OriginalVD = Pair.second.Original;
4807  // Check if the variable is the target-based BasePointersArray,
4808  // PointersArray or SizesArray.
4809  LValue SharedRefLValue;
4810  QualType Type = PrivateLValue.getType();
4811  const FieldDecl *SharedField = CapturesInfo.lookup(OriginalVD);
4812  if (IsTargetTask && !SharedField) {
4813  assert(isa<ImplicitParamDecl>(OriginalVD) &&
4814  isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
4815  cast<CapturedDecl>(OriginalVD->getDeclContext())
4816  ->getNumParams() == 0 &&
4817  isa<TranslationUnitDecl>(
4818  cast<CapturedDecl>(OriginalVD->getDeclContext())
4819  ->getDeclContext()) &&
4820  "Expected artificial target data variable.");
4821  SharedRefLValue =
4822  CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(OriginalVD), Type);
4823  } else {
4824  SharedRefLValue = CGF.EmitLValueForField(SrcBase, SharedField);
4825  SharedRefLValue = CGF.MakeAddrLValue(
4826  Address(SharedRefLValue.getPointer(), C.getDeclAlign(OriginalVD)),
4827  SharedRefLValue.getType(), LValueBaseInfo(AlignmentSource::Decl),
4828  SharedRefLValue.getTBAAInfo());
4829  }
4830  if (Type->isArrayType()) {
4831  // Initialize firstprivate array.
4832  if (!isa<CXXConstructExpr>(Init) || CGF.isTrivialInitializer(Init)) {
4833  // Perform simple memcpy.
4834  CGF.EmitAggregateAssign(PrivateLValue, SharedRefLValue, Type);
4835  } else {
4836  // Initialize firstprivate array using element-by-element
4837  // initialization.
4839  PrivateLValue.getAddress(), SharedRefLValue.getAddress(), Type,
4840  [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
4841  Address SrcElement) {
4842  // Clean up any temporaries needed by the initialization.
4843  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4844  InitScope.addPrivate(
4845  Elem, [SrcElement]() -> Address { return SrcElement; });
4846  (void)InitScope.Privatize();
4847  // Emit initialization for single element.
4849  CGF, &CapturesInfo);
4850  CGF.EmitAnyExprToMem(Init, DestElement,
4851  Init->getType().getQualifiers(),
4852  /*IsInitializer=*/false);
4853  });
4854  }
4855  } else {
4856  CodeGenFunction::OMPPrivateScope InitScope(CGF);
4857  InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
4858  return SharedRefLValue.getAddress();
4859  });
4860  (void)InitScope.Privatize();
4861  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
4862  CGF.EmitExprAsInit(Init, VD, PrivateLValue,
4863  /*capturedByInit=*/false);
4864  }
4865  } else {
4866  CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
4867  }
4868  }
4869  ++FI;
4870  }
4871 }
4872 
4873 /// Check if duplication function is required for taskloops.
4875  ArrayRef<PrivateDataTy> Privates) {
4876  bool InitRequired = false;
4877  for (const PrivateDataTy &Pair : Privates) {
4878  const VarDecl *VD = Pair.second.PrivateCopy;
4879  const Expr *Init = VD->getAnyInitializer();
4880  InitRequired = InitRequired || (Init && isa<CXXConstructExpr>(Init) &&
4881  !CGF.isTrivialInitializer(Init));
4882  if (InitRequired)
4883  break;
4884  }
4885  return InitRequired;
4886 }
4887 
4888 
4889 /// Emit task_dup function (for initialization of
4890 /// private/firstprivate/lastprivate vars and last_iter flag)
4891 /// \code
4892 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
4893 /// lastpriv) {
4894 /// // setup lastprivate flag
4895 /// task_dst->last = lastpriv;
4896 /// // could be constructor calls here...
4897 /// }
4898 /// \endcode
4899 static llvm::Value *
4901  const OMPExecutableDirective &D,
4902  QualType KmpTaskTWithPrivatesPtrQTy,
4903  const RecordDecl *KmpTaskTWithPrivatesQTyRD,
4904  const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
4905  QualType SharedsPtrTy, const OMPTaskDataTy &Data,
4906  ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
4907  ASTContext &C = CGM.getContext();
4908  FunctionArgList Args;
4909  ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4910  KmpTaskTWithPrivatesPtrQTy,
4912  ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
4913  KmpTaskTWithPrivatesPtrQTy,
4915  ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
4917  Args.push_back(&DstArg);
4918  Args.push_back(&SrcArg);
4919  Args.push_back(&LastprivArg);
4920  const auto &TaskDupFnInfo =
4922  llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
4923  std::string Name = CGM.getOpenMPRuntime().getName({"omp_task_dup", ""});
4924  auto *TaskDup = llvm::Function::Create(
4925  TaskDupTy, llvm::GlobalValue::InternalLinkage, Name, &CGM.getModule());
4926  CGM.SetInternalFunctionAttributes(GlobalDecl(), TaskDup, TaskDupFnInfo);
4927  TaskDup->setDoesNotRecurse();
4928  CodeGenFunction CGF(CGM);
4929  CGF.StartFunction(GlobalDecl(), C.VoidTy, TaskDup, TaskDupFnInfo, Args, Loc,
4930  Loc);
4931 
4932  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4933  CGF.GetAddrOfLocalVar(&DstArg),
4934  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4935  // task_dst->liter = lastpriv;
4936  if (WithLastIter) {
4937  auto LIFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTLastIter);
4939  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4940  LValue LILVal = CGF.EmitLValueForField(Base, *LIFI);
4941  llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
4942  CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
4943  CGF.EmitStoreOfScalar(Lastpriv, LILVal);
4944  }
4945 
4946  // Emit initial values for private copies (if any).
4947  assert(!Privates.empty());
4948  Address KmpTaskSharedsPtr = Address::invalid();
4949  if (!Data.FirstprivateVars.empty()) {
4950  LValue TDBase = CGF.EmitLoadOfPointerLValue(
4951  CGF.GetAddrOfLocalVar(&SrcArg),
4952  KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
4954  TDBase, *KmpTaskTWithPrivatesQTyRD->field_begin());
4955  KmpTaskSharedsPtr = Address(
4957  Base, *std::next(KmpTaskTQTyRD->field_begin(),
4958  KmpTaskTShareds)),
4959  Loc),
4960  CGF.getNaturalTypeAlignment(SharedsTy));
4961  }
4962  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
4963  SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
4964  CGF.FinishFunction();
4965  return TaskDup;
4966 }
4967 
4968 /// Checks if destructor function is required to be generated.
4969 /// \return true if cleanups are required, false otherwise.
4970 static bool
4971 checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD) {
4972  bool NeedsCleanup = false;
4973  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin(), 1);
4974  const auto *PrivateRD = cast<RecordDecl>(FI->getType()->getAsTagDecl());
4975  for (const FieldDecl *FD : PrivateRD->fields()) {
4976  NeedsCleanup = NeedsCleanup || FD->getType().isDestructedType();
4977  if (NeedsCleanup)
4978  break;
4979  }
4980  return NeedsCleanup;
4981 }
4982 
4983 CGOpenMPRuntime::TaskResultTy
4985  const OMPExecutableDirective &D,
4986  llvm::Function *TaskFunction, QualType SharedsTy,
4987  Address Shareds, const OMPTaskDataTy &Data) {
4988  ASTContext &C = CGM.getContext();
4990  // Aggregate privates and sort them by the alignment.
4991  auto I = Data.PrivateCopies.begin();
4992  for (const Expr *E : Data.PrivateVars) {
4993  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
4994  Privates.emplace_back(
4995  C.getDeclAlign(VD),
4996  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
4997  /*PrivateElemInit=*/nullptr));
4998  ++I;
4999  }
5000  I = Data.FirstprivateCopies.begin();
5001  auto IElemInitRef = Data.FirstprivateInits.begin();
5002  for (const Expr *E : Data.FirstprivateVars) {
5003  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5004  Privates.emplace_back(
5005  C.getDeclAlign(VD),
5006  PrivateHelpersTy(
5007  VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5008  cast<VarDecl>(cast<DeclRefExpr>(*IElemInitRef)->getDecl())));
5009  ++I;
5010  ++IElemInitRef;
5011  }
5012  I = Data.LastprivateCopies.begin();
5013  for (const Expr *E : Data.LastprivateVars) {
5014  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
5015  Privates.emplace_back(
5016  C.getDeclAlign(VD),
5017  PrivateHelpersTy(VD, cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl()),
5018  /*PrivateElemInit=*/nullptr));
5019  ++I;
5020  }
5021  llvm::stable_sort(Privates, [](PrivateDataTy L, PrivateDataTy R) {
5022  return L.first > R.first;
5023  });
5024  QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
5025  // Build type kmp_routine_entry_t (if not built yet).
5026  emitKmpRoutineEntryT(KmpInt32Ty);
5027  // Build type kmp_task_t (if not built yet).
5029  if (SavedKmpTaskloopTQTy.isNull()) {
5031  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5032  }
5034  } else {
5035  assert((D.getDirectiveKind() == OMPD_task ||
5038  "Expected taskloop, task or target directive");
5039  if (SavedKmpTaskTQTy.isNull()) {
5041  CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
5042  }
5044  }
5045  const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
5046  // Build particular struct kmp_task_t for the given task.
5047  const RecordDecl *KmpTaskTWithPrivatesQTyRD =
5049  QualType KmpTaskTWithPrivatesQTy = C.getRecordType(KmpTaskTWithPrivatesQTyRD);
5050  QualType KmpTaskTWithPrivatesPtrQTy =
5051  C.getPointerType(KmpTaskTWithPrivatesQTy);
5052  llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(KmpTaskTWithPrivatesQTy);
5053  llvm::Type *KmpTaskTWithPrivatesPtrTy =
5054  KmpTaskTWithPrivatesTy->getPointerTo();
5055  llvm::Value *KmpTaskTWithPrivatesTySize =
5056  CGF.getTypeSize(KmpTaskTWithPrivatesQTy);
5057  QualType SharedsPtrTy = C.getPointerType(SharedsTy);
5058 
5059  // Emit initial values for private copies (if any).
5060  llvm::Value *TaskPrivatesMap = nullptr;
5061  llvm::Type *TaskPrivatesMapTy =
5062  std::next(TaskFunction->arg_begin(), 3)->getType();
5063  if (!Privates.empty()) {
5064  auto FI = std::next(KmpTaskTWithPrivatesQTyRD->field_begin());
5065  TaskPrivatesMap = emitTaskPrivateMappingFunction(
5066  CGM, Loc, Data.PrivateVars, Data.FirstprivateVars, Data.LastprivateVars,
5067  FI->getType(), Privates);
5068  TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5069  TaskPrivatesMap, TaskPrivatesMapTy);
5070  } else {
5071  TaskPrivatesMap = llvm::ConstantPointerNull::get(
5072  cast<llvm::PointerType>(TaskPrivatesMapTy));
5073  }
5074  // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
5075  // kmp_task_t *tt);
5076  llvm::Function *TaskEntry = emitProxyTaskFunction(
5077  CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5078  KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
5079  TaskPrivatesMap);
5080 
5081  // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
5082  // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
5083  // kmp_routine_entry_t *task_entry);
5084  // Task flags. Format is taken from
5085  // https://github.com/llvm/llvm-project/blob/master/openmp/runtime/src/kmp.h,
5086  // description of kmp_tasking_flags struct.
5087  enum {
5088  TiedFlag = 0x1,
5089  FinalFlag = 0x2,
5090  DestructorsFlag = 0x8,
5091  PriorityFlag = 0x20
5092  };
5093  unsigned Flags = Data.Tied ? TiedFlag : 0;
5094  bool NeedsCleanup = false;
5095  if (!Privates.empty()) {
5096  NeedsCleanup = checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD);
5097  if (NeedsCleanup)
5098  Flags = Flags | DestructorsFlag;
5099  }
5100  if (Data.Priority.getInt())
5101  Flags = Flags | PriorityFlag;
5102  llvm::Value *TaskFlags =
5103  Data.Final.getPointer()
5104  ? CGF.Builder.CreateSelect(Data.Final.getPointer(),
5105  CGF.Builder.getInt32(FinalFlag),
5106  CGF.Builder.getInt32(/*C=*/0))
5107  : CGF.Builder.getInt32(Data.Final.getInt() ? FinalFlag : 0);
5108  TaskFlags = CGF.Builder.CreateOr(TaskFlags, CGF.Builder.getInt32(Flags));
5109  llvm::Value *SharedsSize = CGM.getSize(C.getTypeSizeInChars(SharedsTy));
5110  SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
5111  getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
5113  TaskEntry, KmpRoutineEntryPtrTy)};
5114  llvm::Value *NewTask;
5115  if (D.hasClausesOfKind<OMPNowaitClause>()) {
5116  // Check if we have any device clause associated with the directive.
5117  const Expr *Device = nullptr;
5118  if (auto *C = D.getSingleClause<OMPDeviceClause>())
5119  Device = C->getDevice();
5120  // Emit device ID if any otherwise use default value.
5121  llvm::Value *DeviceID;
5122  if (Device)
5123  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
5124  CGF.Int64Ty, /*isSigned=*/true);
5125  else
5126  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
5127  AllocArgs.push_back(DeviceID);
5128  NewTask = CGF.EmitRuntimeCall(
5130  } else {
5131  NewTask = CGF.EmitRuntimeCall(
5133  }
5134  llvm::Value *NewTaskNewTaskTTy =
5136  NewTask, KmpTaskTWithPrivatesPtrTy);
5137  LValue Base = CGF.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy,
5138  KmpTaskTWithPrivatesQTy);
5139  LValue TDBase =
5140  CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
5141  // Fill the data in the resulting kmp_task_t record.
5142  // Copy shareds if there are any.
5143  Address KmpTaskSharedsPtr = Address::invalid();
5144  if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
5145  KmpTaskSharedsPtr =
5147  CGF.EmitLValueForField(
5148  TDBase, *std::next(KmpTaskTQTyRD->field_begin(),
5149  KmpTaskTShareds)),
5150  Loc),
5151  CGF.getNaturalTypeAlignment(SharedsTy));
5152  LValue Dest = CGF.MakeAddrLValue(KmpTaskSharedsPtr, SharedsTy);
5153  LValue Src = CGF.MakeAddrLValue(Shareds, SharedsTy);
5154  CGF.EmitAggregateCopy(Dest, Src, SharedsTy, AggValueSlot::DoesNotOverlap);
5155  }
5156  // Emit initial values for private copies (if any).
5157  TaskResultTy Result;
5158  if (!Privates.empty()) {
5159  emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, Base, KmpTaskTWithPrivatesQTyRD,
5160  SharedsTy, SharedsPtrTy, Data, Privates,
5161  /*ForDup=*/false);
5163  (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
5164  Result.TaskDupFn = emitTaskDupFunction(
5165  CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
5166  KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
5167  /*WithLastIter=*/!Data.LastprivateVars.empty());
5168  }
5169  }
5170  // Fields of union "kmp_cmplrdata_t" for destructors and priority.
5171  enum { Priority = 0, Destructors = 1 };
5172  // Provide pointer to function with destructors for privates.
5173  auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
5174  const RecordDecl *KmpCmplrdataUD =
5175  (*FI)->getType()->getAsUnionType()->getDecl();
5176  if (NeedsCleanup) {
5177  llvm::Value *DestructorFn = emitDestructorsFunction(
5178  CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
5179  KmpTaskTWithPrivatesQTy);
5180  LValue Data1LV = CGF.EmitLValueForField(TDBase, *FI);
5181  LValue DestructorsLV = CGF.EmitLValueForField(
5182  Data1LV, *std::next(KmpCmplrdataUD->field_begin(), Destructors));
5184  DestructorFn, KmpRoutineEntryPtrTy),
5185  DestructorsLV);
5186  }
5187  // Set priority.
5188  if (Data.Priority.getInt()) {
5189  LValue Data2LV = CGF.EmitLValueForField(
5190  TDBase, *std::next(KmpTaskTQTyRD->field_begin(), Data2));
5191  LValue PriorityLV = CGF.EmitLValueForField(
5192  Data2LV, *std::next(KmpCmplrdataUD->field_begin(), Priority));
5193  CGF.EmitStoreOfScalar(Data.Priority.getPointer(), PriorityLV);
5194  }
5195  Result.NewTask = NewTask;
5196  Result.TaskEntry = TaskEntry;
5197  Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
5198  Result.TDBase = TDBase;
5199  Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
5200  return Result;
5201 }
5202 
5204  const OMPExecutableDirective &D,
5205  llvm::Function *TaskFunction,
5206  QualType SharedsTy, Address Shareds,
5207  const Expr *IfCond,
5208  const OMPTaskDataTy &Data) {
5209  if (!CGF.HaveInsertPoint())
5210  return;
5211 
5212  TaskResultTy Result =
5213  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5214  llvm::Value *NewTask = Result.NewTask;
5215  llvm::Function *TaskEntry = Result.TaskEntry;
5216  llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
5217  LValue TDBase = Result.TDBase;
5218  const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
5219  ASTContext &C = CGM.getContext();
5220  // Process list of dependences.
5221  Address DependenciesArray = Address::invalid();
5222  unsigned NumDependencies = Data.Dependences.size();
5223  if (NumDependencies) {
5224  // Dependence kind for RTL.
5225  enum RTLDependenceKindTy { DepIn = 0x01, DepInOut = 0x3, DepMutexInOutSet = 0x4 };
5226  enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
5227  RecordDecl *KmpDependInfoRD;
5228  QualType FlagsTy =
5229  C.getIntTypeForBitwidth(C.getTypeSize(C.BoolTy), /*Signed=*/false);
5230  llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(FlagsTy);
5231  if (KmpDependInfoTy.isNull()) {
5232  KmpDependInfoRD = C.buildImplicitRecord("kmp_depend_info");
5233  KmpDependInfoRD->startDefinition();
5234  addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
5235  addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
5236  addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
5237  KmpDependInfoRD->completeDefinition();
5238  KmpDependInfoTy = C.getRecordType(KmpDependInfoRD);
5239  } else {
5240  KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
5241  }
5242  // Define type kmp_depend_info[<Dependences.size()>];
5243  QualType KmpDependInfoArrayTy = C.getConstantArrayType(
5244  KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
5245  ArrayType::Normal, /*IndexTypeQuals=*/0);
5246  // kmp_depend_info[<Dependences.size()>] deps;
5247  DependenciesArray =
5248  CGF.CreateMemTemp(KmpDependInfoArrayTy, ".dep.arr.addr");
5249  for (unsigned I = 0; I < NumDependencies; ++I) {
5250  const Expr *E = Data.Dependences[I].second;
5251  LValue Addr = CGF.EmitLValue(E);
5252  llvm::Value *Size;
5253  QualType Ty = E->getType();
5254  if (const auto *ASE =
5255  dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
5256  LValue UpAddrLVal =
5257  CGF.EmitOMPArraySectionExpr(ASE, /*IsLowerBound=*/false);
5258  llvm::Value *UpAddr =
5259  CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
5260  llvm::Value *LowIntPtr =
5261  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
5262  llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
5263  Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
5264  } else {
5265  Size = CGF.getTypeSize(Ty);
5266  }
5267  LValue Base = CGF.MakeAddrLValue(
5268  CGF.Builder.CreateConstArrayGEP(DependenciesArray, I),
5269  KmpDependInfoTy);
5270  // deps[i].base_addr = &<Dependences[i].second>;
5271  LValue BaseAddrLVal = CGF.EmitLValueForField(
5272  Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
5273  CGF.EmitStoreOfScalar(
5274  CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
5275  BaseAddrLVal);
5276  // deps[i].len = sizeof(<Dependences[i].second>);
5277  LValue LenLVal = CGF.EmitLValueForField(
5278  Base, *std::next(KmpDependInfoRD->field_begin(), Len));
5279  CGF.EmitStoreOfScalar(Size, LenLVal);
5280  // deps[i].flags = <Dependences[i].first>;
5281  RTLDependenceKindTy DepKind;
5282  switch (Data.Dependences[I].first) {
5283  case OMPC_DEPEND_in:
5284  DepKind = DepIn;
5285  break;
5286  // Out and InOut dependencies must use the same code.
5287  case OMPC_DEPEND_out:
5288  case OMPC_DEPEND_inout:
5289  DepKind = DepInOut;
5290  break;
5291  case OMPC_DEPEND_mutexinoutset:
5292  DepKind = DepMutexInOutSet;
5293  break;
5294  case OMPC_DEPEND_source:
5295  case OMPC_DEPEND_sink:
5296  case OMPC_DEPEND_unknown:
5297  llvm_unreachable("Unknown task dependence type");
5298  }
5299  LValue FlagsLVal = CGF.EmitLValueForField(
5300  Base, *std::next(KmpDependInfoRD->field_begin(), Flags));
5301  CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
5302  FlagsLVal);
5303  }
5304  DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5305  CGF.Builder.CreateConstArrayGEP(DependenciesArray, 0), CGF.VoidPtrTy);
5306  }
5307 
5308  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5309  // libcall.
5310  // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
5311  // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
5312  // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
5313  // list is not empty
5314  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5315  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5316  llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
5317  llvm::Value *DepTaskArgs[7];
5318  if (NumDependencies) {
5319  DepTaskArgs[0] = UpLoc;
5320  DepTaskArgs[1] = ThreadID;
5321  DepTaskArgs[2] = NewTask;
5322  DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
5323  DepTaskArgs[4] = DependenciesArray.getPointer();
5324  DepTaskArgs[5] = CGF.Builder.getInt32(0);
5325  DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5326  }
5327  auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, NumDependencies,
5328  &TaskArgs,
5329  &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
5330  if (!Data.Tied) {
5331  auto PartIdFI = std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTPartId);
5332  LValue PartIdLVal = CGF.EmitLValueForField(TDBase, *PartIdFI);
5333  CGF.EmitStoreOfScalar(CGF.Builder.getInt32(0), PartIdLVal);
5334  }
5335  if (NumDependencies) {
5336  CGF.EmitRuntimeCall(
5338  } else {
5339  CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
5340  TaskArgs);
5341  }
5342  // Check if parent region is untied and build return for untied task;
5343  if (auto *Region =
5344  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
5345  Region->emitUntiedSwitch(CGF);
5346  };
5347 
5348  llvm::Value *DepWaitTaskArgs[6];
5349  if (NumDependencies) {
5350  DepWaitTaskArgs[0] = UpLoc;
5351  DepWaitTaskArgs[1] = ThreadID;
5352  DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
5353  DepWaitTaskArgs[3] = DependenciesArray.getPointer();
5354  DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
5355  DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
5356  }
5357  auto &&ElseCodeGen = [&TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
5358  NumDependencies, &DepWaitTaskArgs,
5359  Loc](CodeGenFunction &CGF, PrePostActionTy &) {
5360  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5361  CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5362  // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5363  // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5364  // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5365  // is specified.
5366  if (NumDependencies)
5367  CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
5368  DepWaitTaskArgs);
5369  // Call proxy_task_entry(gtid, new_task);
5370  auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
5371  Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
5372  Action.Enter(CGF);
5373  llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
5374  CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, TaskEntry,
5375  OutlinedFnArgs);
5376  };
5377 
5378  // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
5379  // kmp_task_t *new_task);
5380  // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
5381  // kmp_task_t *new_task);
5382  RegionCodeGenTy RCG(CodeGen);
5383  CommonActionTy Action(
5384  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_begin_if0), TaskArgs,
5385  RT.createRuntimeFunction(OMPRTL__kmpc_omp_task_complete_if0), TaskArgs);
5386  RCG.setAction(Action);
5387  RCG(CGF);
5388  };
5389 
5390  if (IfCond) {
5391  emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
5392  } else {
5393  RegionCodeGenTy ThenRCG(ThenCodeGen);
5394  ThenRCG(CGF);
5395  }
5396 }
5397 
5399  const OMPLoopDirective &D,
5400  llvm::Function *TaskFunction,
5401  QualType SharedsTy, Address Shareds,
5402  const Expr *IfCond,
5403  const OMPTaskDataTy &Data) {
5404  if (!CGF.HaveInsertPoint())
5405  return;
5406  TaskResultTy Result =
5407  emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
5408  // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
5409  // libcall.
5410  // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
5411  // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
5412  // sched, kmp_uint64 grainsize, void *task_dup);
5413  llvm::Value *ThreadID = getThreadID(CGF, Loc);
5414  llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5415  llvm::Value *IfVal;
5416  if (IfCond) {
5417  IfVal = CGF.Builder.CreateIntCast(CGF.EvaluateExprAsBool(IfCond), CGF.IntTy,
5418  /*isSigned=*/true);
5419  } else {
5420  IfVal = llvm::ConstantInt::getSigned(CGF.IntTy, /*V=*/1);
5421  }
5422 
5423  LValue LBLVal = CGF.EmitLValueForField(
5424  Result.TDBase,
5425  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTLowerBound));
5426  const auto *LBVar =
5427  cast<VarDecl>(cast<DeclRefExpr>(D.getLowerBoundVariable())->getDecl());
5428  CGF.EmitAnyExprToMem(LBVar->getInit(), LBLVal.getAddress(), LBLVal.getQuals(),
5429  /*IsInitializer=*/true);
5430  LValue UBLVal = CGF.EmitLValueForField(
5431  Result.TDBase,
5432  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTUpperBound));
5433  const auto *UBVar =
5434  cast<VarDecl>(cast<DeclRefExpr>(D.getUpperBoundVariable())->getDecl());
5435  CGF.EmitAnyExprToMem(UBVar->getInit(), UBLVal.getAddress(), UBLVal.getQuals(),
5436  /*IsInitializer=*/true);
5437  LValue StLVal = CGF.EmitLValueForField(
5438  Result.TDBase,
5439  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTStride));
5440  const auto *StVar =
5441  cast<VarDecl>(cast<DeclRefExpr>(D.getStrideVariable())->getDecl());
5442  CGF.EmitAnyExprToMem(StVar->getInit(), StLVal.getAddress(), StLVal.getQuals(),
5443  /*IsInitializer=*/true);
5444  // Store reductions address.
5445  LValue RedLVal = CGF.EmitLValueForField(
5446  Result.TDBase,
5447  *std::next(Result.KmpTaskTQTyRD->field_begin(), KmpTaskTReductions));
5448  if (Data.Reductions) {
5449  CGF.EmitStoreOfScalar(Data.Reductions, RedLVal);
5450  } else {
5451  CGF.EmitNullInitialization(RedLVal.getAddress(),
5452  CGF.getContext().VoidPtrTy);
5453  }
5454  enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
5455  llvm::Value *TaskArgs[] = {
5456  UpLoc,
5457  ThreadID,
5458  Result.NewTask,
5459  IfVal,
5460  LBLVal.getPointer(),
5461  UBLVal.getPointer(),
5462  CGF.EmitLoadOfScalar(StLVal, Loc),
5463  llvm::ConstantInt::getSigned(
5464  CGF.IntTy, 1), // Always 1 because taskgroup emitted by the compiler
5465  llvm::ConstantInt::getSigned(
5466  CGF.IntTy, Data.Schedule.getPointer()
5467  ? Data.Schedule.getInt() ? NumTasks : Grainsize
5468  : NoSchedule),
5469  Data.Schedule.getPointer()
5470  ? CGF.Builder.CreateIntCast(Data.Schedule.getPointer(), CGF.Int64Ty,
5471  /*isSigned=*/false)
5472  : llvm::ConstantInt::get(CGF.Int64Ty, /*V=*/0),
5474  Result.TaskDupFn, CGF.VoidPtrTy)
5475  : llvm::ConstantPointerNull::get(CGF.VoidPtrTy)};
5477 }
5478 
5479 /// Emit reduction operation for each element of array (required for
5480 /// array sections) LHS op = RHS.
5481 /// \param Type Type of array.
5482 /// \param LHSVar Variable on the left side of the reduction operation
5483 /// (references element of array in original variable).
5484 /// \param RHSVar Variable on the right side of the reduction operation
5485 /// (references element of array in original variable).
5486 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
5487 /// RHSVar.
5489  CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
5490  const VarDecl *RHSVar,
5491  const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
5492  const Expr *, const Expr *)> &RedOpGen,
5493  const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
5494  const Expr *UpExpr = nullptr) {
5495  // Perform element-by-element initialization.
5496  QualType ElementTy;
5497  Address LHSAddr = CGF.GetAddrOfLocalVar(LHSVar);
5498  Address RHSAddr = CGF.GetAddrOfLocalVar(RHSVar);
5499 
5500  // Drill down to the base element type on both arrays.
5501  const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
5502  llvm::Value *NumElements = CGF.emitArrayLength(ArrayTy, ElementTy, LHSAddr);
5503 
5504  llvm::Value *RHSBegin = RHSAddr.getPointer();
5505  llvm::Value *LHSBegin = LHSAddr.getPointer();
5506  // Cast from pointer to array type to pointer to single element.
5507  llvm::Value *LHSEnd = CGF.Builder.CreateGEP(LHSBegin, NumElements);
5508  // The basic structure here is a while-do loop.
5509  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.arraycpy.body");
5510  llvm::BasicBlock *DoneBB = CGF.createBasicBlock("omp.arraycpy.done");
5511  llvm::Value *IsEmpty =
5512  CGF.Builder.CreateICmpEQ(LHSBegin, LHSEnd, "omp.arraycpy.isempty");
5513  CGF.Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
5514 
5515  // Enter the loop body, making that address the current address.
5516  llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
5517  CGF.EmitBlock(BodyBB);
5518 
5519  CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(ElementTy);
5520 
5521  llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
5522  RHSBegin->getType(), 2, "omp.arraycpy.srcElementPast");
5523  RHSElementPHI->addIncoming(RHSBegin, EntryBB);
5524  Address RHSElementCurrent =
5525  Address(RHSElementPHI,
5526  RHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5527 
5528  llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
5529  LHSBegin->getType(), 2, "omp.arraycpy.destElementPast");
5530  LHSElementPHI->addIncoming(LHSBegin, EntryBB);
5531  Address LHSElementCurrent =
5532  Address(LHSElementPHI,
5533  LHSAddr.getAlignment().alignmentOfArrayElement(ElementSize));
5534 
5535  // Emit copy.
5537  Scope.addPrivate(LHSVar, [=]() { return LHSElementCurrent; });
5538  Scope.addPrivate(RHSVar, [=]() { return RHSElementCurrent; });
5539  Scope.Privatize();
5540  RedOpGen(CGF, XExpr, EExpr, UpExpr);
5541  Scope.ForceCleanup();
5542 
5543  // Shift the address forward by one element.
5544  llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
5545  LHSElementPHI, /*Idx0=*/1, "omp.arraycpy.dest.element");
5546  llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
5547  RHSElementPHI, /*Idx0=*/1, "omp.arraycpy.src.element");
5548  // Check whether we've reached the end.
5549  llvm::Value *Done =
5550  CGF.Builder.CreateICmpEQ(LHSElementNext, LHSEnd, "omp.arraycpy.done");
5551  CGF.Builder.CreateCondBr(Done, DoneBB, BodyBB);
5552  LHSElementPHI->addIncoming(LHSElementNext, CGF.Builder.GetInsertBlock());
5553  RHSElementPHI->addIncoming(RHSElementNext, CGF.Builder.GetInsertBlock());
5554 
5555  // Done.
5556  CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
5557 }
5558 
5559 /// Emit reduction combiner. If the combiner is a simple expression emit it as
5560 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
5561 /// UDR combiner function.
5563  const Expr *ReductionOp) {
5564  if (const auto *CE = dyn_cast<CallExpr>(ReductionOp))
5565  if (const auto *OVE = dyn_cast<OpaqueValueExpr>(CE->getCallee()))
5566  if (const auto *DRE =
5567  dyn_cast<DeclRefExpr>(OVE->getSourceExpr()->IgnoreImpCasts()))
5568  if (const auto *DRD =
5569  dyn_cast<OMPDeclareReductionDecl>(DRE->getDecl())) {
5570  std::pair<llvm::Function *, llvm::Function *> Reduction =
5571  CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(DRD);
5572  RValue Func = RValue::get(Reduction.first);
5573  CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
5574  CGF.EmitIgnoredExpr(ReductionOp);
5575  return;
5576  }
5577  CGF.EmitIgnoredExpr(ReductionOp);
5578 }
5579 
5581  SourceLocation Loc, llvm::Type *ArgsType, ArrayRef<const Expr *> Privates,
5582  ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
5583  ArrayRef<const Expr *> ReductionOps) {
5584  ASTContext &C = CGM.getContext();
5585 
5586  // void reduction_func(void *LHSArg, void *RHSArg);
5587  FunctionArgList Args;
5588  ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5590  ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5592  Args.push_back(&LHSArg);
5593  Args.push_back(&RHSArg);
5594  const auto &CGFI =
5596  std::string Name = getName({"omp", "reduction", "reduction_func"});
5599  &CGM.getModule());
5601  Fn->setDoesNotRecurse();
5602  CodeGenFunction CGF(CGM);
5603  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
5604 
5605  // Dst = (void*[n])(LHSArg);
5606  // Src = (void*[n])(RHSArg);
5608  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
5609  ArgsType), CGF.getPointerAlign());
5611  CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
5612  ArgsType), CGF.getPointerAlign());
5613 
5614  // ...
5615  // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5616  // ...
5618  auto IPriv = Privates.begin();
5619  unsigned Idx = 0;
5620  for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
5621  const auto *RHSVar =
5622  cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
5623  Scope.addPrivate(RHSVar, [&CGF, RHS, Idx, RHSVar]() {
5624  return emitAddrOfVarFromArray(CGF, RHS, Idx, RHSVar);
5625  });
5626  const auto *LHSVar =
5627  cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
5628  Scope.addPrivate(LHSVar, [&CGF, LHS, Idx, LHSVar]() {
5629  return emitAddrOfVarFromArray(CGF, LHS, Idx, LHSVar);
5630  });
5631  QualType PrivTy = (*IPriv)->getType();
5632  if (PrivTy->isVariablyModifiedType()) {
5633  // Get array size and emit VLA type.
5634  ++Idx;
5635  Address Elem = CGF.Builder.CreateConstArrayGEP(LHS, Idx);
5636  llvm::Value *Ptr = CGF.Builder.CreateLoad(Elem);
5637  const VariableArrayType *VLA =
5638  CGF.getContext().getAsVariableArrayType(PrivTy);
5639  const auto *OVE = cast<OpaqueValueExpr>(VLA->getSizeExpr());
5641  CGF, OVE, RValue::get(CGF.Builder.CreatePtrToInt(Ptr, CGF.SizeTy)));
5642  CGF.EmitVariablyModifiedType(PrivTy);
5643  }
5644  }
5645  Scope.Privatize();
5646  IPriv = Privates.begin();
5647  auto ILHS = LHSExprs.begin();
5648  auto IRHS = RHSExprs.begin();
5649  for (const Expr *E : ReductionOps) {
5650  if ((*IPriv)->getType()->isArrayType()) {
5651  // Emit reduction for array section.
5652  const auto *LHSVar = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5653  const auto *RHSVar = cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5655  CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5656  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5657  emitReductionCombiner(CGF, E);
5658  });
5659  } else {
5660  // Emit reduction for array subscript or single variable.
5661  emitReductionCombiner(CGF, E);
5662  }
5663  ++IPriv;
5664  ++ILHS;
5665  ++IRHS;
5666  }
5667  Scope.ForceCleanup();
5668  CGF.FinishFunction();
5669  return Fn;
5670 }
5671 
5673  const Expr *ReductionOp,
5674  const Expr *PrivateRef,
5675  const DeclRefExpr *LHS,
5676  const DeclRefExpr *RHS) {
5677  if (PrivateRef->getType()->isArrayType()) {
5678  // Emit reduction for array section.
5679  const auto *LHSVar = cast<VarDecl>(LHS->getDecl());
5680  const auto *RHSVar = cast<VarDecl>(RHS->getDecl());
5682  CGF, PrivateRef->getType(), LHSVar, RHSVar,
5683  [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
5684  emitReductionCombiner(CGF, ReductionOp);
5685  });
5686  } else {
5687  // Emit reduction for array subscript or single variable.
5688  emitReductionCombiner(CGF, ReductionOp);
5689  }
5690 }
5691 
5693  ArrayRef<const Expr *> Privates,
5694  ArrayRef<const Expr *> LHSExprs,
5695  ArrayRef<const Expr *> RHSExprs,
5696  ArrayRef<const Expr *> ReductionOps,
5697  ReductionOptionsTy Options) {
5698  if (!CGF.HaveInsertPoint())
5699  return;
5700 
5701  bool WithNowait = Options.WithNowait;
5702  bool SimpleReduction = Options.SimpleReduction;
5703 
5704  // Next code should be emitted for reduction:
5705  //
5706  // static kmp_critical_name lock = { 0 };
5707  //
5708  // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5709  // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5710  // ...
5711  // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5712  // *(Type<n>-1*)rhs[<n>-1]);
5713  // }
5714  //
5715  // ...
5716  // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5717  // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5718  // RedList, reduce_func, &<lock>)) {
5719  // case 1:
5720  // ...
5721  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5722  // ...
5723  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5724  // break;
5725  // case 2:
5726  // ...
5727  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5728  // ...
5729  // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5730  // break;
5731  // default:;
5732  // }
5733  //
5734  // if SimpleReduction is true, only the next code is generated:
5735  // ...
5736  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5737  // ...
5738 
5739  ASTContext &C = CGM.getContext();
5740 
5741  if (SimpleReduction) {
5743  auto IPriv = Privates.begin();
5744  auto ILHS = LHSExprs.begin();
5745  auto IRHS = RHSExprs.begin();
5746  for (const Expr *E : ReductionOps) {
5747  emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5748  cast<DeclRefExpr>(*IRHS));
5749  ++IPriv;
5750  ++ILHS;
5751  ++IRHS;
5752  }
5753  return;
5754  }
5755 
5756  // 1. Build a list of reduction variables.
5757  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5758  auto Size = RHSExprs.size();
5759  for (const Expr *E : Privates) {
5760  if (E->getType()->isVariablyModifiedType())
5761  // Reserve place for array size.
5762  ++Size;
5763  }
5764  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5765  QualType ReductionArrayTy =
5767  /*IndexTypeQuals=*/0);
5768  Address ReductionList =
5769  CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
5770  auto IPriv = Privates.begin();
5771  unsigned Idx = 0;
5772  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5773  Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5774  CGF.Builder.CreateStore(
5776  CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
5777  Elem);
5778  if ((*IPriv)->getType()->isVariablyModifiedType()) {
5779  // Store array size.
5780  ++Idx;
5781  Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
5782  llvm::Value *Size = CGF.Builder.CreateIntCast(
5783  CGF.getVLASize(
5784  CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
5785  .NumElts,
5786  CGF.SizeTy, /*isSigned=*/false);
5787  CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
5788  Elem);
5789  }
5790  }
5791 
5792  // 2. Emit reduce_func().
5793  llvm::Function *ReductionFn = emitReductionFunction(
5794  Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
5795  LHSExprs, RHSExprs, ReductionOps);
5796 
5797  // 3. Create static kmp_critical_name lock = { 0 };
5798  std::string Name = getName({"reduction"});
5799  llvm::Value *Lock = getCriticalRegionLock(Name);
5800 
5801  // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5802  // RedList, reduce_func, &<lock>);
5803  llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
5804  llvm::Value *ThreadId = getThreadID(CGF, Loc);
5805  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
5807  ReductionList.getPointer(), CGF.VoidPtrTy);
5808  llvm::Value *Args[] = {
5809  IdentTLoc, // ident_t *<loc>
5810  ThreadId, // i32 <gtid>
5811  CGF.Builder.getInt32(RHSExprs.size()), // i32 <n>
5812  ReductionArrayTySize, // size_type sizeof(RedList)
5813  RL, // void *RedList
5814  ReductionFn, // void (*) (void *, void *) <reduce_func>
5815  Lock // kmp_critical_name *&<lock>
5816  };
5817  llvm::Value *Res = CGF.EmitRuntimeCall(
5820  Args);
5821 
5822  // 5. Build switch(res)
5823  llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(".omp.reduction.default");
5824  llvm::SwitchInst *SwInst =
5825  CGF.Builder.CreateSwitch(Res, DefaultBB, /*NumCases=*/2);
5826 
5827  // 6. Build case 1:
5828  // ...
5829  // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5830  // ...
5831  // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5832  // break;
5833  llvm::BasicBlock *Case1BB = CGF.createBasicBlock(".omp.reduction.case1");
5834  SwInst->addCase(CGF.Builder.getInt32(1), Case1BB);
5835  CGF.EmitBlock(Case1BB);
5836 
5837  // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5838  llvm::Value *EndArgs[] = {
5839  IdentTLoc, // ident_t *<loc>
5840  ThreadId, // i32 <gtid>
5841  Lock // kmp_critical_name *&<lock>
5842  };
5843  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5844  CodeGenFunction &CGF, PrePostActionTy &Action) {
5845  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5846  auto IPriv = Privates.begin();
5847  auto ILHS = LHSExprs.begin();
5848  auto IRHS = RHSExprs.begin();
5849  for (const Expr *E : ReductionOps) {
5850  RT.emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
5851  cast<DeclRefExpr>(*IRHS));
5852  ++IPriv;
5853  ++ILHS;
5854  ++IRHS;
5855  }
5856  };
5857  RegionCodeGenTy RCG(CodeGen);
5858  CommonActionTy Action(
5859  nullptr, llvm::None,
5862  EndArgs);
5863  RCG.setAction(Action);
5864  RCG(CGF);
5865 
5866  CGF.EmitBranch(DefaultBB);
5867 
5868  // 7. Build case 2:
5869  // ...
5870  // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5871  // ...
5872  // break;
5873  llvm::BasicBlock *Case2BB = CGF.createBasicBlock(".omp.reduction.case2");
5874  SwInst->addCase(CGF.Builder.getInt32(2), Case2BB);
5875  CGF.EmitBlock(Case2BB);
5876 
5877  auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5878  CodeGenFunction &CGF, PrePostActionTy &Action) {
5879  auto ILHS = LHSExprs.begin();
5880  auto IRHS = RHSExprs.begin();
5881  auto IPriv = Privates.begin();
5882  for (const Expr *E : ReductionOps) {
5883  const Expr *XExpr = nullptr;
5884  const Expr *EExpr = nullptr;
5885  const Expr *UpExpr = nullptr;
5886  BinaryOperatorKind BO = BO_Comma;
5887  if (const auto *BO = dyn_cast<BinaryOperator>(E)) {
5888  if (BO->getOpcode() == BO_Assign) {
5889  XExpr = BO->getLHS();
5890  UpExpr = BO->getRHS();
5891  }
5892  }
5893  // Try to emit update expression as a simple atomic.
5894  const Expr *RHSExpr = UpExpr;
5895  if (RHSExpr) {
5896  // Analyze RHS part of the whole expression.
5897  if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5898  RHSExpr->IgnoreParenImpCasts())) {
5899  // If this is a conditional operator, analyze its condition for
5900  // min/max reduction operator.
5901  RHSExpr = ACO->getCond();
5902  }
5903  if (const auto *BORHS =
5904  dyn_cast<BinaryOperator>(RHSExpr->IgnoreParenImpCasts())) {
5905  EExpr = BORHS->getRHS();
5906  BO = BORHS->getOpcode();
5907  }
5908  }
5909  if (XExpr) {
5910  const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5911  auto &&AtomicRedGen = [BO, VD,
5912  Loc](CodeGenFunction &CGF, const Expr *XExpr,
5913  const Expr *EExpr, const Expr *UpExpr) {
5914  LValue X = CGF.EmitLValue(XExpr);
5915  RValue E;
5916  if (EExpr)
5917  E = CGF.EmitAnyExpr(EExpr);
5918  CGF.EmitOMPAtomicSimpleUpdateExpr(
5919  X, E, BO, /*IsXLHSInRHSPart=*/true,
5920  llvm::AtomicOrdering::Monotonic, Loc,
5921  [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5922  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5923  PrivateScope.addPrivate(
5924  VD, [&CGF, VD, XRValue, Loc]() {
5925  Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5926  CGF.emitOMPSimpleStore(
5927  CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue,
5928  VD->getType().getNonReferenceType(), Loc);
5929  return LHSTemp;
5930  });
5931  (void)PrivateScope.Privatize();
5932  return CGF.EmitAnyExpr(UpExpr);
5933  });
5934  };
5935  if ((*IPriv)->getType()->isArrayType()) {
5936  // Emit atomic reduction for array section.
5937  const auto *RHSVar =
5938  cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5939  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), VD, RHSVar,
5940  AtomicRedGen, XExpr, EExpr, UpExpr);
5941  } else {
5942  // Emit atomic reduction for array subscript or single variable.
5943  AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5944  }
5945  } else {
5946  // Emit as a critical region.
5947  auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5948  const Expr *, const Expr *) {
5949  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5950  std::string Name = RT.getName({"atomic_reduction"});
5951  RT.emitCriticalRegion(
5952  CGF, Name,
5953  [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5954  Action.Enter(CGF);
5955  emitReductionCombiner(CGF, E);
5956  },
5957  Loc);
5958  };
5959  if ((*IPriv)->getType()->isArrayType()) {
5960  const auto *LHSVar =
5961  cast<VarDecl>(cast<DeclRefExpr>(*ILHS)->getDecl());
5962  const auto *RHSVar =
5963  cast<VarDecl>(cast<DeclRefExpr>(*IRHS)->getDecl());
5964  EmitOMPAggregateReduction(CGF, (*IPriv)->getType(), LHSVar, RHSVar,
5965  CritRedGen);
5966  } else {
5967  CritRedGen(CGF, nullptr, nullptr, nullptr);
5968  }
5969  }
5970  ++ILHS;
5971  ++IRHS;
5972  ++IPriv;
5973  }
5974  };
5975  RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5976  if (!WithNowait) {
5977  // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5978  llvm::Value *EndArgs[] = {
5979  IdentTLoc, // ident_t *<loc>
5980  ThreadId, // i32 <gtid>
5981  Lock // kmp_critical_name *&<lock>
5982  };
5983  CommonActionTy Action(nullptr, llvm::None,
5984  createRuntimeFunction(OMPRTL__kmpc_end_reduce),
5985  EndArgs);
5986  AtomicRCG.setAction(Action);
5987  AtomicRCG(CGF);
5988  } else {
5989  AtomicRCG(CGF);
5990  }
5991 
5992  CGF.EmitBranch(DefaultBB);
5993  CGF.EmitBlock(DefaultBB, /*IsFinished=*/true);
5994 }
5995 
5996 /// Generates unique name for artificial threadprivate variables.
5997 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5998 static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5999  const Expr *Ref) {
6000  SmallString<256> Buffer;
6001  llvm::raw_svector_ostream Out(Buffer);
6002  const clang::DeclRefExpr *DE;
6003  const VarDecl *D = ::getBaseDecl(Ref, DE);
6004  if (!D)
6005  D = cast<VarDecl>(cast<DeclRefExpr>(Ref)->getDecl());
6006  D = D->getCanonicalDecl();
6007  std::string Name = CGM.getOpenMPRuntime().getName(
6008  {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(D)});
6009  Out << Prefix << Name << "_"
6010  << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
6011  return Out.str();
6012 }
6013 
6014 /// Emits reduction initializer function:
6015 /// \code
6016 /// void @.red_init(void* %arg) {
6017 /// %0 = bitcast void* %arg to <type>*
6018 /// store <type> <init>, <type>* %0
6019 /// ret void
6020 /// }
6021 /// \endcode
6023  SourceLocation Loc,
6024  ReductionCodeGen &RCG, unsigned N) {
6025  ASTContext &C = CGM.getContext();
6026  FunctionArgList Args;
6027  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6029  Args.emplace_back(&Param);
6030  const auto &FnInfo =
6032  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6033  std::string Name = CGM.getOpenMPRuntime().getName({"red_init", ""});
6035  Name, &CGM.getModule());
6036  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6037  Fn->setDoesNotRecurse();
6038  CodeGenFunction CGF(CGM);
6039  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6040  Address PrivateAddr = CGF.EmitLoadOfPointer(
6041  CGF.GetAddrOfLocalVar(&Param),
6042  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6043  llvm::Value *Size = nullptr;
6044  // If the size of the reduction item is non-constant, load it from global
6045  // threadprivate variable.
6046  if (RCG.getSizes(N).second) {
6047  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6048  CGF, CGM.getContext().getSizeType(),
6049  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6050  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6051  CGM.getContext().getSizeType(), Loc);
6052  }
6053  RCG.emitAggregateType(CGF, N, Size);
6054  LValue SharedLVal;
6055  // If initializer uses initializer from declare reduction construct, emit a
6056  // pointer to the address of the original reduction item (reuired by reduction
6057  // initializer)
6058  if (RCG.usesReductionInitializer(N)) {
6059  Address SharedAddr =
6060  CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6061  CGF, CGM.getContext().VoidPtrTy,
6062  generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6063  SharedAddr = CGF.EmitLoadOfPointer(
6064  SharedAddr,
6065  CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
6066  SharedLVal = CGF.MakeAddrLValue(SharedAddr, CGM.getContext().VoidPtrTy);
6067  } else {
6068  SharedLVal = CGF.MakeNaturalAlignAddrLValue(
6069  llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
6070  CGM.getContext().VoidPtrTy);
6071  }
6072  // Emit the initializer:
6073  // %0 = bitcast void* %arg to <type>*
6074  // store <type> <init>, <type>* %0
6075  RCG.emitInitialization(CGF, N, PrivateAddr, SharedLVal,
6076  [](CodeGenFunction &) { return false; });
6077  CGF.FinishFunction();
6078  return Fn;
6079 }
6080 
6081 /// Emits reduction combiner function:
6082 /// \code
6083 /// void @.red_comb(void* %arg0, void* %arg1) {
6084 /// %lhs = bitcast void* %arg0 to <type>*
6085 /// %rhs = bitcast void* %arg1 to <type>*
6086 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
6087 /// store <type> %2, <type>* %lhs
6088 /// ret void
6089 /// }
6090 /// \endcode
6092  SourceLocation Loc,
6093  ReductionCodeGen &RCG, unsigned N,
6094  const Expr *ReductionOp,
6095  const Expr *LHS, const Expr *RHS,
6096  const Expr *PrivateRef) {
6097  ASTContext &C = CGM.getContext();
6098  const auto *LHSVD = cast<VarDecl>(cast<DeclRefExpr>(LHS)->getDecl());
6099  const auto *RHSVD = cast<VarDecl>(cast<DeclRefExpr>(RHS)->getDecl());
6100  FunctionArgList Args;
6101  ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
6103  ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6105  Args.emplace_back(&ParamInOut);
6106  Args.emplace_back(&ParamIn);
6107  const auto &FnInfo =
6109  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6110  std::string Name = CGM.getOpenMPRuntime().getName({"red_comb", ""});
6112  Name, &CGM.getModule());
6113  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6114  Fn->setDoesNotRecurse();
6115  CodeGenFunction CGF(CGM);
6116  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6117  llvm::Value *Size = nullptr;
6118  // If the size of the reduction item is non-constant, load it from global
6119  // threadprivate variable.
6120  if (RCG.getSizes(N).second) {
6121  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6122  CGF, CGM.getContext().getSizeType(),
6123  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6124  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6125  CGM.getContext().getSizeType(), Loc);
6126  }
6127  RCG.emitAggregateType(CGF, N, Size);
6128  // Remap lhs and rhs variables to the addresses of the function arguments.
6129  // %lhs = bitcast void* %arg0 to <type>*
6130  // %rhs = bitcast void* %arg1 to <type>*
6131  CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6132  PrivateScope.addPrivate(LHSVD, [&C, &CGF, &ParamInOut, LHSVD]() {
6133  // Pull out the pointer to the variable.
6134  Address PtrAddr = CGF.EmitLoadOfPointer(
6135  CGF.GetAddrOfLocalVar(&ParamInOut),
6136  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6137  return CGF.Builder.CreateElementBitCast(
6138  PtrAddr, CGF.ConvertTypeForMem(LHSVD->getType()));
6139  });
6140  PrivateScope.addPrivate(RHSVD, [&C, &CGF, &ParamIn, RHSVD]() {
6141  // Pull out the pointer to the variable.
6142  Address PtrAddr = CGF.EmitLoadOfPointer(
6143  CGF.GetAddrOfLocalVar(&ParamIn),
6144  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6145  return CGF.Builder.CreateElementBitCast(
6146  PtrAddr, CGF.ConvertTypeForMem(RHSVD->getType()));
6147  });
6148  PrivateScope.Privatize();
6149  // Emit the combiner body:
6150  // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
6151  // store <type> %2, <type>* %lhs
6152  CGM.getOpenMPRuntime().emitSingleReductionCombiner(
6153  CGF, ReductionOp, PrivateRef, cast<DeclRefExpr>(LHS),
6154  cast<DeclRefExpr>(RHS));
6155  CGF.FinishFunction();
6156  return Fn;
6157 }
6158 
6159 /// Emits reduction finalizer function:
6160 /// \code
6161 /// void @.red_fini(void* %arg) {
6162 /// %0 = bitcast void* %arg to <type>*
6163 /// <destroy>(<type>* %0)
6164 /// ret void
6165 /// }
6166 /// \endcode
6168  SourceLocation Loc,
6169  ReductionCodeGen &RCG, unsigned N) {
6170  if (!RCG.needCleanups(N))
6171  return nullptr;
6172  ASTContext &C = CGM.getContext();
6173  FunctionArgList Args;
6174  ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
6176  Args.emplace_back(&Param);
6177  const auto &FnInfo =
6179  llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
6180  std::string Name = CGM.getOpenMPRuntime().getName({"red_fini", ""});
6182  Name, &CGM.getModule());
6183  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FnInfo);
6184  Fn->setDoesNotRecurse();
6185  CodeGenFunction CGF(CGM);
6186  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, FnInfo, Args, Loc, Loc);
6187  Address PrivateAddr = CGF.EmitLoadOfPointer(
6188  CGF.GetAddrOfLocalVar(&Param),
6189  C.getPointerType(C.VoidPtrTy).castAs<PointerType>());
6190  llvm::Value *Size = nullptr;
6191  // If the size of the reduction item is non-constant, load it from global
6192  // threadprivate variable.
6193  if (RCG.getSizes(N).second) {
6194  Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
6195  CGF, CGM.getContext().getSizeType(),
6196  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6197  Size = CGF.EmitLoadOfScalar(SizeAddr, /*Volatile=*/false,
6198  CGM.getContext().getSizeType(), Loc);
6199  }
6200  RCG.emitAggregateType(CGF, N, Size);
6201  // Emit the finalizer body:
6202  // <destroy>(<type>* %0)
6203  RCG.emitCleanups(CGF, N, PrivateAddr);
6204  CGF.FinishFunction();
6205  return Fn;
6206 }
6207 
6209  CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
6210  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
6211  if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
6212  return nullptr;
6213 
6214  // Build typedef struct:
6215  // kmp_task_red_input {
6216  // void *reduce_shar; // shared reduction item
6217  // size_t reduce_size; // size of data item
6218  // void *reduce_init; // data initialization routine
6219  // void *reduce_fini; // data finalization routine
6220  // void *reduce_comb; // data combiner routine
6221  // kmp_task_red_flags_t flags; // flags for additional info from compiler
6222  // } kmp_task_red_input_t;
6223  ASTContext &C = CGM.getContext();
6224  RecordDecl *RD = C.buildImplicitRecord("kmp_task_red_input_t");
6225  RD->startDefinition();
6226  const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6227  const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
6228  const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6229  const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6230  const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
6231  const FieldDecl *FlagsFD = addFieldToRecordDecl(
6232  C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
6233  RD->completeDefinition();
6234  QualType RDType = C.getRecordType(RD);
6235  unsigned Size = Data.ReductionVars.size();
6236  llvm::APInt ArraySize(/*numBits=*/64, Size);
6237  QualType ArrayRDType = C.getConstantArrayType(
6238  RDType, ArraySize, ArrayType::Normal, /*IndexTypeQuals=*/0);
6239  // kmp_task_red_input_t .rd_input.[Size];
6240  Address TaskRedInput = CGF.CreateMemTemp(ArrayRDType, ".rd_input.");
6242  Data.ReductionOps);
6243  for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
6244  // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
6245  llvm::Value *Idxs[] = {llvm::ConstantInt::get(CGM.SizeTy, /*V=*/0),
6246  llvm::ConstantInt::get(CGM.SizeTy, Cnt)};
6248  TaskRedInput.getPointer(), Idxs,
6249  /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
6250  ".rd_input.gep.");
6251  LValue ElemLVal = CGF.MakeNaturalAlignAddrLValue(GEP, RDType);
6252  // ElemLVal.reduce_shar = &Shareds[Cnt];
6253  LValue SharedLVal = CGF.EmitLValueForField(ElemLVal, SharedFD);
6254  RCG.emitSharedLValue(CGF, Cnt);
6255  llvm::Value *CastedShared =
6257  CGF.EmitStoreOfScalar(CastedShared, SharedLVal);
6258  RCG.emitAggregateType(CGF, Cnt);
6259  llvm::Value *SizeValInChars;
6260  llvm::Value *SizeVal;
6261  std::tie(SizeValInChars, SizeVal) = RCG.getSizes(Cnt);
6262  // We use delayed creation/initialization for VLAs, array sections and
6263  // custom reduction initializations. It is required because runtime does not
6264  // provide the way to pass the sizes of VLAs/array sections to
6265  // initializer/combiner/finalizer functions and does not pass the pointer to
6266  // original reduction item to the initializer. Instead threadprivate global
6267  // variables are used to store these values and use them in the functions.
6268  bool DelayedCreation = !!SizeVal;
6269  SizeValInChars = CGF.Builder.CreateIntCast(SizeValInChars, CGM.SizeTy,
6270  /*isSigned=*/false);
6271  LValue SizeLVal = CGF.EmitLValueForField(ElemLVal, SizeFD);
6272  CGF.EmitStoreOfScalar(SizeValInChars, SizeLVal);
6273  // ElemLVal.reduce_init = init;
6274  LValue InitLVal = CGF.EmitLValueForField(ElemLVal, InitFD);
6275  llvm::Value *InitAddr =
6276  CGF.EmitCastToVoidPtr(emitReduceInitFunction(CGM, Loc, RCG, Cnt));
6277  CGF.EmitStoreOfScalar(InitAddr, InitLVal);
6278  DelayedCreation = DelayedCreation || RCG.usesReductionInitializer(Cnt);
6279  // ElemLVal.reduce_fini = fini;
6280  LValue FiniLVal = CGF.EmitLValueForField(ElemLVal, FiniFD);
6281  llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, Cnt);
6282  llvm::Value *FiniAddr = Fini
6283  ? CGF.EmitCastToVoidPtr(Fini)
6284  : llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
6285  CGF.EmitStoreOfScalar(FiniAddr, FiniLVal);
6286  // ElemLVal.reduce_comb = comb;
6287  LValue CombLVal = CGF.EmitLValueForField(ElemLVal, CombFD);
6289  CGM, Loc, RCG, Cnt, Data.ReductionOps[Cnt], LHSExprs[Cnt],
6290  RHSExprs[Cnt], Data.ReductionCopies[Cnt]));
6291  CGF.EmitStoreOfScalar(CombAddr, CombLVal);
6292  // ElemLVal.flags = 0;
6293  LValue FlagsLVal = CGF.EmitLValueForField(ElemLVal, FlagsFD);
6294  if (DelayedCreation) {
6295  CGF.EmitStoreOfScalar(
6296  llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1, /*isSigned=*/true),
6297  FlagsLVal);
6298  } else
6299  CGF.EmitNullInitialization(FlagsLVal.getAddress(), FlagsLVal.getType());
6300  }
6301  // Build call void *__kmpc_task_reduction_init(int gtid, int num_data, void
6302  // *data);
6303  llvm::Value *Args[] = {
6304  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6305  /*isSigned=*/true),
6306  llvm::ConstantInt::get(CGM.IntTy, Size, /*isSigned=*/true),
6308  CGM.VoidPtrTy)};
6309  return CGF.EmitRuntimeCall(
6311 }
6312 
6314  SourceLocation Loc,
6315  ReductionCodeGen &RCG,
6316  unsigned N) {
6317  auto Sizes = RCG.getSizes(N);
6318  // Emit threadprivate global variable if the type is non-constant
6319  // (Sizes.second = nullptr).
6320  if (Sizes.second) {
6321  llvm::Value *SizeVal = CGF.Builder.CreateIntCast(Sizes.second, CGM.SizeTy,
6322  /*isSigned=*/false);
6324  CGF, CGM.getContext().getSizeType(),
6325  generateUniqueName(CGM, "reduction_size", RCG.getRefExpr(N)));
6326  CGF.Builder.CreateStore(SizeVal, SizeAddr, /*IsVolatile=*/false);
6327  }
6328  // Store address of the original reduction item if custom initializer is used.
6329  if (RCG.usesReductionInitializer(N)) {
6331  CGF, CGM.getContext().VoidPtrTy,
6332  generateUniqueName(CGM, "reduction", RCG.getRefExpr(N)));
6333  CGF.Builder.CreateStore(
6336  SharedAddr, /*IsVolatile=*/false);
6337  }
6338 }
6339 
6341  SourceLocation Loc,
6342  llvm::Value *ReductionsPtr,
6343  LValue SharedLVal) {
6344  // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
6345  // *d);
6346  llvm::Value *Args[] = {
6347  CGF.Builder.CreateIntCast(getThreadID(CGF, Loc), CGM.IntTy,
6348  /*isSigned=*/true),
6349  ReductionsPtr,
6351  CGM.VoidPtrTy)};
6352  return Address(
6353  CGF.EmitRuntimeCall(
6355  SharedLVal.getAlignment());
6356 }
6357 
6359  SourceLocation Loc) {
6360  if (!CGF.HaveInsertPoint())
6361  return;
6362  // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
6363  // global_tid);
6364  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
6365  // Ignore return result until untied tasks are supported.
6367  if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
6368  Region->emitUntiedSwitch(CGF);
6369 }
6370 
6372  OpenMPDirectiveKind InnerKind,
6373  const RegionCodeGenTy &CodeGen,
6374  bool HasCancel) {
6375  if (!CGF.HaveInsertPoint())
6376  return;
6377  InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel);
6378  CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
6379 }
6380 
6381 namespace {
6383  CancelNoreq = 0,
6384  CancelParallel = 1,
6385  CancelLoop = 2,
6386  CancelSections = 3,
6387  CancelTaskgroup = 4
6388 };
6389 } // anonymous namespace
6390 
6392  RTCancelKind CancelKind = CancelNoreq;
6393  if (CancelRegion == OMPD_parallel)
6394  CancelKind = CancelParallel;
6395  else if (CancelRegion == OMPD_for)
6396  CancelKind = CancelLoop;
6397  else if (CancelRegion == OMPD_sections)
6398  CancelKind = CancelSections;
6399  else {
6400  assert(CancelRegion == OMPD_taskgroup);
6401  CancelKind = CancelTaskgroup;
6402  }
6403  return CancelKind;
6404 }
6405 
6407  CodeGenFunction &CGF, SourceLocation Loc,
6408  OpenMPDirectiveKind CancelRegion) {
6409  if (!CGF.HaveInsertPoint())
6410  return;
6411  // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
6412  // global_tid, kmp_int32 cncl_kind);
6413  if (auto *OMPRegionInfo =
6414  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6415  // For 'cancellation point taskgroup', the task region info may not have a
6416  // cancel. This may instead happen in another adjacent task.
6417  if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
6418  llvm::Value *Args[] = {
6419  emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
6420  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6421  // Ignore return result until untied tasks are supported.
6422  llvm::Value *Result = CGF.EmitRuntimeCall(
6424  // if (__kmpc_cancellationpoint()) {
6425  // exit from construct;
6426  // }
6427  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6428  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6429  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6430  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6431  CGF.EmitBlock(ExitBB);
6432  // exit from construct;
6433  CodeGenFunction::JumpDest CancelDest =
6434  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6435  CGF.EmitBranchThroughCleanup(CancelDest);
6436  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6437  }
6438  }
6439 }
6440 
6442  const Expr *IfCond,
6443  OpenMPDirectiveKind CancelRegion) {
6444  if (!CGF.HaveInsertPoint())
6445  return;
6446  // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6447  // kmp_int32 cncl_kind);
6448  if (auto *OMPRegionInfo =
6449  dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
6450  auto &&ThenGen = [Loc, CancelRegion, OMPRegionInfo](CodeGenFunction &CGF,
6451  PrePostActionTy &) {
6452  CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6453  llvm::Value *Args[] = {
6454  RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6455  CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6456  // Ignore return result until untied tasks are supported.
6457  llvm::Value *Result = CGF.EmitRuntimeCall(
6458  RT.createRuntimeFunction(OMPRTL__kmpc_cancel), Args);
6459  // if (__kmpc_cancel()) {
6460  // exit from construct;
6461  // }
6462  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".cancel.exit");
6463  llvm::BasicBlock *ContBB = CGF.createBasicBlock(".cancel.continue");
6464  llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Result);
6465  CGF.Builder.CreateCondBr(Cmp, ExitBB, ContBB);
6466  CGF.EmitBlock(ExitBB);
6467  // exit from construct;
6468  CodeGenFunction::JumpDest CancelDest =
6469  CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6470  CGF.EmitBranchThroughCleanup(CancelDest);
6471  CGF.EmitBlock(ContBB, /*IsFinished=*/true);
6472  };
6473  if (IfCond) {
6474  emitOMPIfClause(CGF, IfCond, ThenGen,
6475  [](CodeGenFunction &, PrePostActionTy &) {});
6476  } else {
6477  RegionCodeGenTy ThenRCG(ThenGen);
6478  ThenRCG(CGF);
6479  }
6480  }
6481 }
6482 
6484  const OMPExecutableDirective &D, StringRef ParentName,
6485  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6486  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6487  assert(!ParentName.empty() && "Invalid target region parent name!");
6488  HasEmittedTargetRegion = true;
6489  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6490  IsOffloadEntry, CodeGen);
6491 }
6492 
6494  const OMPExecutableDirective &D, StringRef ParentName,
6495  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6496  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6497  // Create a unique name for the entry function using the source location
6498  // information of the current target region. The name will be something like:
6499  //
6500  // __omp_offloading_DD_FFFF_PP_lBB
6501  //
6502  // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6503  // mangled name of the function that encloses the target region and BB is the
6504  // line number of the target region.
6505 
6506  unsigned DeviceID;
6507  unsigned FileID;
6508  unsigned Line;
6509  getTargetEntryUniqueInfo(CGM.getContext(), D.getBeginLoc(), DeviceID, FileID,
6510  Line);
6511  SmallString<64> EntryFnName;
6512  {
6513  llvm::raw_svector_ostream OS(EntryFnName);
6514  OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6515  << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6516  }
6517 
6518  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6519 
6520  CodeGenFunction CGF(CGM, true);
6521  CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6522  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6523 
6524  OutlinedFn = CGF.GenerateOpenMPCapturedStmtFunction(CS);
6525 
6526  // If this target outline function is not an offload entry, we don't need to
6527  // register it.
6528  if (!IsOffloadEntry)
6529  return;
6530 
6531  // The target region ID is used by the runtime library to identify the current
6532  // target region, so it only has to be unique and not necessarily point to
6533  // anything. It could be the pointer to the outlined function that implements
6534  // the target region, but we aren't using that so that the compiler doesn't
6535  // need to keep that, and could therefore inline the host function if proven
6536  // worthwhile during optimization. In the other hand, if emitting code for the
6537  // device, the ID has to be the function address so that it can retrieved from
6538  // the offloading entry and launched by the runtime library. We also mark the
6539  // outlined function to have external linkage in case we are emitting code for
6540  // the device, because these functions will be entry points to the device.
6541 
6542  if (CGM.getLangOpts().OpenMPIsDevice) {
6543  OutlinedFnID = llvm::ConstantExpr::getBitCast(OutlinedFn, CGM.Int8PtrTy);
6544  OutlinedFn->setLinkage(llvm::GlobalValue::WeakAnyLinkage);
6545  OutlinedFn->setDSOLocal(false);
6546  } else {
6547  std::string Name = getName({EntryFnName, "region_id"});
6548  OutlinedFnID = new llvm::GlobalVariable(
6549  CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
6550  llvm::GlobalValue::WeakAnyLinkage,
6551  llvm::Constant::getNullValue(CGM.Int8Ty), Name);
6552  }
6553 
6554  // Register the information for the entry associated with this target region.
6556  DeviceID, FileID, ParentName, Line, OutlinedFn, OutlinedFnID,
6558 }
6559 
6560 /// Checks if the expression is constant or does not have non-trivial function
6561 /// calls.
6562 static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6563  // We can skip constant expressions.
6564  // We can skip expressions with trivial calls or simple expressions.
6566  !E->hasNonTrivialCall(Ctx)) &&
6567  !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6568 }
6569 
6571  const Stmt *Body) {
6572  const Stmt *Child = Body->IgnoreContainers();
6573  while (const auto *C = dyn_cast_or_null<CompoundStmt>(Child)) {
6574  Child = nullptr;
6575  for (const Stmt *S : C->body()) {
6576  if (const auto *E = dyn_cast<Expr>(S)) {
6577  if (isTrivial(Ctx, E))
6578  continue;
6579  }
6580  // Some of the statements can be ignored.
6581  if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
6582  isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
6583  continue;
6584  // Analyze declarations.
6585  if (const auto *DS = dyn_cast<DeclStmt>(S)) {
6586  if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
6587  if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
6588  isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
6589  isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
6590  isa<UsingDirectiveDecl>(D) ||
6591  isa<OMPDeclareReductionDecl>(D) ||
6592  isa<OMPThreadPrivateDecl>(D) || isa<OMPAllocateDecl>(D))
6593  return true;
6594  const auto *VD = dyn_cast<VarDecl>(D);
6595  if (!VD)
6596  return false;
6597  return VD->isConstexpr() ||
6598  ((VD->getType().isTrivialType(Ctx) ||
6599  VD->getType()->isReferenceType()) &&
6600  (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
6601  }))
6602  continue;
6603  }
6604  // Found multiple children - cannot get the one child only.
6605  if (Child)
6606  return nullptr;
6607  Child = S;
6608  }
6609  if (Child)
6610  Child = Child->IgnoreContainers();
6611  }
6612  return Child;
6613 }
6614 
6615 /// Emit the number of teams for a target directive. Inspect the num_teams
6616 /// clause associated with a teams construct combined or closely nested
6617 /// with the target directive.
6618 ///
6619 /// Emit a team of size one for directives such as 'target parallel' that
6620 /// have no associated teams construct.
6621 ///
6622 /// Otherwise, return nullptr.
6623 static llvm::Value *
6625  const OMPExecutableDirective &D) {
6626  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6627  "Clauses associated with the teams directive expected to be emitted "
6628  "only for the host!");
6629  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6630  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6631  "Expected target-based executable directive.");
6632  CGBuilderTy &Bld = CGF.Builder;
6633  switch (DirectiveKind) {
6634  case OMPD_target: {
6635  const auto *CS = D.getInnermostCapturedStmt();
6636  const auto *Body =
6637  CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6638  const Stmt *ChildStmt =
6640  if (const auto *NestedDir =
6641  dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
6642  if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6643  if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6644  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6645  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6646  const Expr *NumTeams =
6647  NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6648  llvm::Value *NumTeamsVal =
6649  CGF.EmitScalarExpr(NumTeams,
6650  /*IgnoreResultAssign*/ true);
6651  return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6652  /*isSigned=*/true);
6653  }
6654  return Bld.getInt32(0);
6655  }
6656  if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6657  isOpenMPSimdDirective(NestedDir->getDirectiveKind()))
6658  return Bld.getInt32(1);
6659  return Bld.getInt32(0);
6660  }
6661  return nullptr;
6662  }
6663  case OMPD_target_teams:
6664  case OMPD_target_teams_distribute:
6665  case OMPD_target_teams_distribute_simd:
6666  case OMPD_target_teams_distribute_parallel_for:
6667  case OMPD_target_teams_distribute_parallel_for_simd: {
6669  CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6670  const Expr *NumTeams =
6671  D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6672  llvm::Value *NumTeamsVal =
6673  CGF.EmitScalarExpr(NumTeams,
6674  /*IgnoreResultAssign*/ true);
6675  return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty,
6676  /*isSigned=*/true);
6677  }
6678  return Bld.getInt32(0);
6679  }
6680  case OMPD_target_parallel:
6681  case OMPD_target_parallel_for:
6682  case OMPD_target_parallel_for_simd:
6683  case OMPD_target_simd:
6684  return Bld.getInt32(1);
6685  case OMPD_parallel:
6686  case OMPD_for:
6687  case OMPD_parallel_for:
6688  case OMPD_parallel_sections:
6689  case OMPD_for_simd:
6690  case OMPD_parallel_for_simd:
6691  case OMPD_cancel:
6692  case OMPD_cancellation_point:
6693  case OMPD_ordered:
6694  case OMPD_threadprivate:
6695  case OMPD_allocate:
6696  case OMPD_task:
6697  case OMPD_simd:
6698  case OMPD_sections:
6699  case OMPD_section:
6700  case OMPD_single:
6701  case OMPD_master:
6702  case OMPD_critical:
6703  case OMPD_taskyield:
6704  case OMPD_barrier:
6705  case OMPD_taskwait:
6706  case OMPD_taskgroup:
6707  case OMPD_atomic:
6708  case OMPD_flush:
6709  case OMPD_teams:
6710  case OMPD_target_data:
6711  case OMPD_target_exit_data:
6712  case OMPD_target_enter_data:
6713  case OMPD_distribute:
6714  case OMPD_distribute_simd:
6715  case OMPD_distribute_parallel_for:
6716  case OMPD_distribute_parallel_for_simd:
6717  case OMPD_teams_distribute:
6718  case OMPD_teams_distribute_simd:
6719  case OMPD_teams_distribute_parallel_for:
6720  case OMPD_teams_distribute_parallel_for_simd:
6721  case OMPD_target_update:
6722  case OMPD_declare_simd:
6723  case OMPD_declare_target:
6724  case OMPD_end_declare_target:
6725  case OMPD_declare_reduction:
6726  case OMPD_declare_mapper:
6727  case OMPD_taskloop:
6728  case OMPD_taskloop_simd:
6729  case OMPD_requires:
6730  case OMPD_unknown:
6731  break;
6732  }
6733  llvm_unreachable("Unexpected directive kind.");
6734 }
6735 
6737  llvm::Value *DefaultThreadLimitVal) {
6739  CGF.getContext(), CS->getCapturedStmt());
6740  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6741  if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6742  llvm::Value *NumThreads = nullptr;
6743  llvm::Value *CondVal = nullptr;
6744  // Handle if clause. If if clause present, the number of threads is
6745  // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6746  if (Dir->hasClausesOfKind<OMPIfClause>()) {
6747  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6748  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6749  const OMPIfClause *IfClause = nullptr;
6750  for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6751  if (C->getNameModifier() == OMPD_unknown ||
6752  C->getNameModifier() == OMPD_parallel) {
6753  IfClause = C;
6754  break;
6755  }
6756  }
6757  if (IfClause) {
6758  const Expr *Cond = IfClause->getCondition();
6759  bool Result;
6760  if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6761  if (!Result)
6762  return CGF.Builder.getInt32(1);
6763  } else {
6765  if (const auto *PreInit =
6766  cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6767  for (const auto *I : PreInit->decls()) {
6768  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6769  CGF.EmitVarDecl(cast<VarDecl>(*I));
6770  } else {
6772  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6773  CGF.EmitAutoVarCleanups(Emission);
6774  }
6775  }
6776  }
6777  CondVal = CGF.EvaluateExprAsBool(Cond);
6778  }
6779  }
6780  }
6781  // Check the value of num_threads clause iff if clause was not specified
6782  // or is not evaluated to false.
6783  if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6784  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6785  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6786  const auto *NumThreadsClause =
6787  Dir->getSingleClause<OMPNumThreadsClause>();
6789  CGF, NumThreadsClause->getNumThreads()->getSourceRange());
6790  if (const auto *PreInit =
6791  cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6792  for (const auto *I : PreInit->decls()) {
6793  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6794  CGF.EmitVarDecl(cast<VarDecl>(*I));
6795  } else {
6797  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6798  CGF.EmitAutoVarCleanups(Emission);
6799  }
6800  }
6801  }
6802  NumThreads = CGF.EmitScalarExpr(NumThreadsClause->getNumThreads());
6803  NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty,
6804  /*isSigned=*/false);
6805  if (DefaultThreadLimitVal)
6806  NumThreads = CGF.Builder.CreateSelect(
6807  CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, NumThreads),
6808  DefaultThreadLimitVal, NumThreads);
6809  } else {
6810  NumThreads = DefaultThreadLimitVal ? DefaultThreadLimitVal
6811  : CGF.Builder.getInt32(0);
6812  }
6813  // Process condition of the if clause.
6814  if (CondVal) {
6815  NumThreads = CGF.Builder.CreateSelect(CondVal, NumThreads,
6816  CGF.Builder.getInt32(1));
6817  }
6818  return NumThreads;
6819  }
6820  if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6821  return CGF.Builder.getInt32(1);
6822  return DefaultThreadLimitVal;
6823  }
6824  return DefaultThreadLimitVal ? DefaultThreadLimitVal
6825  : CGF.Builder.getInt32(0);
6826 }
6827 
6828 /// Emit the number of threads for a target directive. Inspect the
6829 /// thread_limit clause associated with a teams construct combined or closely
6830 /// nested with the target directive.
6831 ///
6832 /// Emit the num_threads clause for directives such as 'target parallel' that
6833 /// have no associated teams construct.
6834 ///
6835 /// Otherwise, return nullptr.
6836 static llvm::Value *
6838  const OMPExecutableDirective &D) {
6839  assert(!CGF.getLangOpts().OpenMPIsDevice &&
6840  "Clauses associated with the teams directive expected to be emitted "
6841  "only for the host!");
6842  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6843  assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6844  "Expected target-based executable directive.");
6845  CGBuilderTy &Bld = CGF.Builder;
6846  llvm::Value *ThreadLimitVal = nullptr;
6847  llvm::Value *NumThreadsVal = nullptr;
6848  switch (DirectiveKind) {
6849  case OMPD_target: {
6850  const CapturedStmt *CS = D.getInnermostCapturedStmt();
6851  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6852  return NumThreads;
6854  CGF.getContext(), CS->getCapturedStmt());
6855  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6856  if (Dir->hasClausesOfKind<OMPThreadLimitClause>()) {
6857  CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6858  CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6859  const auto *ThreadLimitClause =
6860  Dir->getSingleClause<OMPThreadLimitClause>();
6862  CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6863  if (const auto *PreInit =
6864  cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6865  for (const auto *I : PreInit->decls()) {
6866  if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6867  CGF.EmitVarDecl(cast<VarDecl>(*I));
6868  } else {
6870  CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6871  CGF.EmitAutoVarCleanups(Emission);
6872  }
6873  }
6874  }
6875  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6876  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6877  ThreadLimitVal =
6878  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6879  }
6880  if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6881  !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6882  CS = Dir->getInnermostCapturedStmt();
6884  CGF.getContext(), CS->getCapturedStmt());
6885  Dir = dyn_cast_or_null<OMPExecutableDirective>(Child);
6886  }
6887  if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) &&
6888  !isOpenMPSimdDirective(Dir->getDirectiveKind())) {
6889  CS = Dir->getInnermostCapturedStmt();
6890  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6891  return NumThreads;
6892  }
6893  if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6894  return Bld.getInt32(1);
6895  }
6896  return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6897  }
6898  case OMPD_target_teams: {
6900  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6901  const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6902  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6903  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6904  ThreadLimitVal =
6905  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6906  }
6907  const CapturedStmt *CS = D.getInnermostCapturedStmt();
6908  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6909  return NumThreads;
6911  CGF.getContext(), CS->getCapturedStmt());
6912  if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Child)) {
6913  if (Dir->getDirectiveKind() == OMPD_distribute) {
6914  CS = Dir->getInnermostCapturedStmt();
6915  if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal))
6916  return NumThreads;
6917  }
6918  }
6919  return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0);
6920  }
6921  case OMPD_target_teams_distribute:
6923  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6924  const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6925  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6926  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6927  ThreadLimitVal =
6928  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6929  }
6930  return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal);
6931  case OMPD_target_parallel:
6932  case OMPD_target_parallel_for:
6933  case OMPD_target_parallel_for_simd:
6934  case OMPD_target_teams_distribute_parallel_for:
6935  case OMPD_target_teams_distribute_parallel_for_simd: {
6936  llvm::Value *CondVal = nullptr;
6937  // Handle if clause. If if clause present, the number of threads is
6938  // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6939  if (D.hasClausesOfKind<OMPIfClause>()) {
6940  const OMPIfClause *IfClause = nullptr;
6941  for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6942  if (C->getNameModifier() == OMPD_unknown ||
6943  C->getNameModifier() == OMPD_parallel) {
6944  IfClause = C;
6945  break;
6946  }
6947  }
6948  if (IfClause) {
6949  const Expr *Cond = IfClause->getCondition();
6950  bool Result;
6951  if (Cond->EvaluateAsBooleanCondition(Result, CGF.getContext())) {
6952  if (!Result)
6953  return Bld.getInt32(1);
6954  } else {
6956  CondVal = CGF.EvaluateExprAsBool(Cond);
6957  }
6958  }
6959  }
6961  CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6962  const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6963  llvm::Value *ThreadLimit = CGF.EmitScalarExpr(
6964  ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true);
6965  ThreadLimitVal =
6966  Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*isSigned=*/false);
6967  }
6969  CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6970  const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6971  llvm::Value *NumThreads = CGF.EmitScalarExpr(
6972  NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true);
6973  NumThreadsVal =
6974  Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*isSigned=*/false);
6975  ThreadLimitVal = ThreadLimitVal
6976  ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal,
6977  ThreadLimitVal),
6978  NumThreadsVal, ThreadLimitVal)
6979  : NumThreadsVal;
6980  }
6981  if (!ThreadLimitVal)
6982  ThreadLimitVal = Bld.getInt32(0);
6983  if (CondVal)
6984  return Bld.CreateSelect(CondVal, ThreadLimitVal, Bld.getInt32(1));
6985  return ThreadLimitVal;
6986  }
6987  case OMPD_target_teams_distribute_simd:
6988  case OMPD_target_simd:
6989  return Bld.getInt32(1);
6990  case OMPD_parallel:
6991  case OMPD_for:
6992  case OMPD_parallel_for:
6993  case OMPD_parallel_sections:
6994  case OMPD_for_simd:
6995  case OMPD_parallel_for_simd:
6996  case OMPD_cancel:
6997  case OMPD_cancellation_point:
6998  case OMPD_ordered:
6999  case OMPD_threadprivate:
7000  case OMPD_allocate:
7001  case OMPD_task:
7002  case OMPD_simd:
7003  case OMPD_sections:
7004  case OMPD_section:
7005  case OMPD_single:
7006  case OMPD_master:
7007  case OMPD_critical:
7008  case OMPD_taskyield:
7009  case OMPD_barrier:
7010  case OMPD_taskwait:
7011  case OMPD_taskgroup:
7012  case OMPD_atomic:
7013  case OMPD_flush:
7014  case OMPD_teams:
7015  case OMPD_target_data:
7016  case OMPD_target_exit_data:
7017  case OMPD_target_enter_data:
7018  case OMPD_distribute:
7019  case OMPD_distribute_simd:
7020  case OMPD_distribute_parallel_for:
7021  case OMPD_distribute_parallel_for_simd:
7022  case OMPD_teams_distribute:
7023  case OMPD_teams_distribute_simd:
7024  case OMPD_teams_distribute_parallel_for:
7025  case OMPD_teams_distribute_parallel_for_simd:
7026  case OMPD_target_update:
7027  case OMPD_declare_simd:
7028  case OMPD_declare_target:
7029  case OMPD_end_declare_target:
7030  case OMPD_declare_reduction:
7031  case OMPD_declare_mapper:
7032  case OMPD_taskloop:
7033  case OMPD_taskloop_simd:
7034  case OMPD_requires:
7035  case OMPD_unknown:
7036  break;
7037  }
7038  llvm_unreachable("Unsupported directive kind.");
7039 }
7040 
7041 namespace {
7043 
7044 // Utility to handle information from clauses associated with a given
7045 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
7046 // It provides a convenient interface to obtain the information and generate
7047 // code for that information.
7048 class MappableExprsHandler {
7049 public:
7050  /// Values for bit flags used to specify the mapping type for
7051  /// offloading.
7052  enum OpenMPOffloadMappingFlags : uint64_t {
7053  /// No flags
7054  OMP_MAP_NONE = 0x0,
7055  /// Allocate memory on the device and move data from host to device.
7056  OMP_MAP_TO = 0x01,
7057  /// Allocate memory on the device and move data from device to host.
7058  OMP_MAP_FROM = 0x02,
7059  /// Always perform the requested mapping action on the element, even
7060  /// if it was already mapped before.
7061  OMP_MAP_ALWAYS = 0x04,
7062  /// Delete the element from the device environment, ignoring the
7063  /// current reference count associated with the element.
7064  OMP_MAP_DELETE = 0x08,
7065  /// The element being mapped is a pointer-pointee pair; both the
7066  /// pointer and the pointee should be mapped.
7067  OMP_MAP_PTR_AND_OBJ = 0x10,
7068  /// This flags signals that the base address of an entry should be
7069  /// passed to the target kernel as an argument.
7070  OMP_MAP_TARGET_PARAM = 0x20,
7071  /// Signal that the runtime library has to return the device pointer
7072  /// in the current position for the data being mapped. Used when we have the
7073  /// use_device_ptr clause.
7074  OMP_MAP_RETURN_PARAM = 0x40,
7075  /// This flag signals that the reference being passed is a pointer to
7076  /// private data.
7077  OMP_MAP_PRIVATE = 0x80,
7078  /// Pass the element to the device by value.
7079  OMP_MAP_LITERAL = 0x100,
7080  /// Implicit map
7081  OMP_MAP_IMPLICIT = 0x200,
7082  /// The 16 MSBs of the flags indicate whether the entry is member of some
7083  /// struct/class.
7084  OMP_MAP_MEMBER_OF = 0xffff000000000000,
7085  LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF),
7086  };
7087 
7088  /// Class that associates information with a base pointer to be passed to the
7089  /// runtime library.
7090  class BasePointerInfo {
7091  /// The base pointer.
7092  llvm::Value *Ptr = nullptr;
7093  /// The base declaration that refers to this device pointer, or null if
7094  /// there is none.
7095  const ValueDecl *DevPtrDecl = nullptr;
7096 
7097  public:
7098  BasePointerInfo(llvm::Value *Ptr, const ValueDecl *DevPtrDecl = nullptr)
7099  : Ptr(Ptr), DevPtrDecl(DevPtrDecl) {}
7100  llvm::Value *operator*() const { return Ptr; }
7101  const ValueDecl *getDevicePtrDecl() const { return DevPtrDecl; }
7102  void setDevicePtrDecl(const ValueDecl *D) { DevPtrDecl = D; }
7103  };
7104 
7105  using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
7106  using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
7107  using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
7108 
7109  /// Map between a struct and the its lowest & highest elements which have been
7110  /// mapped.
7111  /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7112  /// HE(FieldIndex, Pointer)}
7113  struct StructRangeInfoTy {
7114  std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
7115  0, Address::invalid()};
7116  std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
7117  0, Address::invalid()};
7119  };
7120 
7121 private:
7122  /// Kind that defines how a device pointer has to be returned.
7123  struct MapInfo {
7126  ArrayRef<OpenMPMapModifierKind> MapModifiers;
7127  bool ReturnDevicePointer = false;
7128  bool IsImplicit = false;
7129 
7130  MapInfo() = default;
7131  MapInfo(
7133  OpenMPMapClauseKind MapType,
7134  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7135  bool ReturnDevicePointer, bool IsImplicit)
7136  : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
7137  ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit) {}
7138  };
7139 
7140  /// If use_device_ptr is used on a pointer which is a struct member and there
7141  /// is no map information about it, then emission of that entry is deferred
7142  /// until the whole struct has been processed.
7143  struct DeferredDevicePtrEntryTy {
7144  const Expr *IE = nullptr;
7145  const ValueDecl *VD = nullptr;
7146 
7147  DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD)
7148  : IE(IE), VD(VD) {}
7149  };
7150 
7151  /// Directive from where the map clauses were extracted.
7152  const OMPExecutableDirective &CurDir;
7153 
7154  /// Function the directive is being generated for.
7155  CodeGenFunction &CGF;
7156 
7157  /// Set of all first private variables in the current directive.
7158  /// bool data is set to true if the variable is implicitly marked as
7159  /// firstprivate, false otherwise.
7160  llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
7161 
7162  /// Map between device pointer declarations and their expression components.
7163  /// The key value for declarations in 'this' is null.
7164  llvm::DenseMap<
7165  const ValueDecl *,
7167  DevPointersMap;
7168 
7169  llvm::Value *getExprTypeSize(const Expr *E) const {
7170  QualType ExprTy = E->getType().getCanonicalType();
7171 
7172  // Reference types are ignored for mapping purposes.
7173  if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
7174  ExprTy = RefTy->getPointeeType().getCanonicalType();
7175 
7176  // Given that an array section is considered a built-in type, we need to
7177  // do the calculation based on the length of the section instead of relying
7178  // on CGF.getTypeSize(E->getType()).
7179  if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(E)) {
7181  OAE->getBase()->IgnoreParenImpCasts())
7182  .getCanonicalType();
7183 
7184  // If there is no length associated with the expression, that means we
7185  // are using the whole length of the base.
7186  if (!OAE->getLength() && OAE->getColonLoc().isValid())
7187  return CGF.getTypeSize(BaseTy);
7188 
7189  llvm::Value *ElemSize;
7190  if (const auto *PTy = BaseTy->getAs<PointerType>()) {
7191  ElemSize = CGF.getTypeSize(PTy->getPointeeType().getCanonicalType());
7192  } else {
7193  const auto *ATy = cast<ArrayType>(BaseTy.getTypePtr());
7194  assert(ATy && "Expecting array type if not a pointer type.");
7195  ElemSize = CGF.getTypeSize(ATy->getElementType().getCanonicalType());
7196  }
7197 
7198  // If we don't have a length at this point, that is because we have an
7199  // array section with a single element.
7200  if (!OAE->getLength())
7201  return ElemSize;
7202 
7203  llvm::Value *LengthVal = CGF.EmitScalarExpr(OAE->getLength());
7204  LengthVal =
7205  CGF.Builder.CreateIntCast(LengthVal, CGF.SizeTy, /*isSigned=*/false);
7206  return CGF.Builder.CreateNUWMul(LengthVal, ElemSize);
7207  }
7208  return CGF.getTypeSize(ExprTy);
7209  }
7210 
7211  /// Return the corresponding bits for a given map clause modifier. Add
7212  /// a flag marking the map as a pointer if requested. Add a flag marking the
7213  /// map as the first one of a series of maps that relate to the same map
7214  /// expression.
7215  OpenMPOffloadMappingFlags getMapTypeBits(
7216  OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7217  bool IsImplicit, bool AddPtrFlag, bool AddIsTargetParamFlag) const {
7218  OpenMPOffloadMappingFlags Bits =
7219  IsImplicit ? OMP_MAP_IMPLICIT : OMP_MAP_NONE;
7220  switch (MapType) {
7221  case OMPC_MAP_alloc:
7222  case OMPC_MAP_release:
7223  // alloc and release is the default behavior in the runtime library, i.e.
7224  // if we don't pass any bits alloc/release that is what the runtime is
7225  // going to do. Therefore, we don't need to signal anything for these two
7226  // type modifiers.
7227  break;
7228  case OMPC_MAP_to:
7229  Bits |= OMP_MAP_TO;
7230  break;
7231  case OMPC_MAP_from:
7232  Bits |= OMP_MAP_FROM;
7233  break;
7234  case OMPC_MAP_tofrom:
7235  Bits |= OMP_MAP_TO | OMP_MAP_FROM;
7236  break;
7237  case OMPC_MAP_delete:
7238  Bits |= OMP_MAP_DELETE;
7239  break;
7240  case OMPC_MAP_unknown:
7241  llvm_unreachable("Unexpected map type!");
7242  }
7243  if (AddPtrFlag)
7244  Bits |= OMP_MAP_PTR_AND_OBJ;
7245  if (AddIsTargetParamFlag)
7246  Bits |= OMP_MAP_TARGET_PARAM;
7247  if (llvm::find(MapModifiers, OMPC_MAP_MODIFIER_always)
7248  != MapModifiers.end())
7249  Bits |= OMP_MAP_ALWAYS;
7250  return Bits;
7251  }
7252 
7253  /// Return true if the provided expression is a final array section. A
7254  /// final array section, is one whose length can't be proved to be one.
7255  bool isFinalArraySectionExpression(const Expr *E) const {
7256  const auto *OASE = dyn_cast<OMPArraySectionExpr>(E);
7257 
7258  // It is not an array section and therefore not a unity-size one.
7259  if (!OASE)
7260  return false;
7261 
7262  // An array section with no colon always refer to a single element.
7263  if (OASE->getColonLoc().isInvalid())
7264  return false;
7265 
7266  const Expr *Length = OASE->getLength();
7267 
7268  // If we don't have a length we have to check if the array has size 1
7269  // for this dimension. Also, we should always expect a length if the
7270  // base type is pointer.
7271  if (!Length) {
7273  OASE->getBase()->IgnoreParenImpCasts())
7274  .getCanonicalType();
7275  if (const auto *ATy = dyn_cast<ConstantArrayType>(BaseQTy.getTypePtr()))
7276  return ATy->getSize().getSExtValue() != 1;
7277  // If we don't have a constant dimension length, we have to consider
7278  // the current section as having any size, so it is not necessarily
7279  // unitary. If it happen to be unity size, that's user fault.
7280  return true;
7281  }
7282 
7283  // Check if the length evaluates to 1.
7284  Expr::EvalResult Result;
7285  if (!Length->EvaluateAsInt(Result, CGF.getContext()))
7286  return true; // Can have more that size 1.
7287 
7288  llvm::APSInt ConstLength = Result.Val.getInt();
7289  return ConstLength.getSExtValue() != 1;
7290  }
7291 
7292  /// Generate the base pointers, section pointers, sizes and map type
7293  /// bits for the provided map type, map modifier, and expression components.
7294  /// \a IsFirstComponent should be set to true if the provided set of
7295  /// components is the first associated with a capture.
7296  void generateInfoForComponentList(
7297  OpenMPMapClauseKind MapType,
7298  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7300  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
7301  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
7302  StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7303  bool IsImplicit,
7304  ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7305  OverlappedElements = llvm::None) const {
7306  // The following summarizes what has to be generated for each map and the
7307  // types below. The generated information is expressed in this order:
7308  // base pointer, section pointer, size, flags
7309  // (to add to the ones that come from the map type and modifier).
7310  //
7311  // double d;
7312  // int i[100];
7313  // float *p;
7314  //
7315  // struct S1 {
7316  // int i;
7317  // float f[50];
7318  // }
7319  // struct S2 {
7320  // int i;
7321  // float f[50];
7322  // S1 s;
7323  // double *p;
7324  // struct S2 *ps;
7325  // }
7326  // S2 s;
7327  // S2 *ps;
7328  //
7329  // map(d)
7330  // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7331  //
7332  // map(i)
7333  // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7334  //
7335  // map(i[1:23])
7336  // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7337  //
7338  // map(p)
7339  // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7340  //
7341  // map(p[1:24])
7342  // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7343  //
7344  // map(s)
7345  // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7346  //
7347  // map(s.i)
7348  // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7349  //
7350  // map(s.s.f)
7351  // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7352  //
7353  // map(s.p)
7354  // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7355  //
7356  // map(to: s.p[:22])
7357  // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7358  // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7359  // &(s.p), &(s.p[0]), 22*sizeof(double),
7360  // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7361  // (*) alloc space for struct members, only this is a target parameter
7362  // (**) map the pointer (nothing to be mapped in this example) (the compiler
7363  // optimizes this entry out, same in the examples below)
7364  // (***) map the pointee (map: to)
7365  //
7366  // map(s.ps)
7367  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7368  //
7369  // map(from: s.ps->s.i)
7370  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7371  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7372  // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7373  //
7374  // map(to: s.ps->ps)
7375  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7376  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7377  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7378  //
7379  // map(s.ps->ps->ps)
7380  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7381  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7382  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7383  // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7384  //
7385  // map(to: s.ps->ps->s.f[:22])
7386  // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7387  // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7388  // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7389  // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7390  //
7391  // map(ps)
7392  // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7393  //
7394  // map(ps->i)
7395  // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7396  //
7397  // map(ps->s.f)
7398  // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7399  //
7400  // map(from: ps->p)
7401  // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7402  //
7403  // map(to: ps->p[:22])
7404  // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7405  // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7406  // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7407  //
7408  // map(ps->ps)
7409  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7410  //
7411  // map(from: ps->ps->s.i)
7412  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7413  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7414  // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7415  //
7416  // map(from: ps->ps->ps)
7417  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7418  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7419  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7420  //
7421  // map(ps->ps->ps->ps)
7422  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7423  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7424  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7425  // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7426  //
7427  // map(to: ps->ps->ps->s.f[:22])
7428  // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7429  // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7430  // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7431  // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7432  //
7433  // map(to: s.f[:22]) map(from: s.p[:33])
7434  // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7435  // sizeof(double*) (**), TARGET_PARAM
7436  // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7437  // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7438  // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7439  // (*) allocate contiguous space needed to fit all mapped members even if
7440  // we allocate space for members not mapped (in this example,
7441  // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7442  // them as well because they fall between &s.f[0] and &s.p)
7443  //
7444  // map(from: s.f[:22]) map(to: ps->p[:33])
7445  // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7446  // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7447  // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7448  // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7449  // (*) the struct this entry pertains to is the 2nd element in the list of
7450  // arguments, hence MEMBER_OF(2)
7451  //
7452  // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7453  // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7454  // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7455  // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7456  // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7457  // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7458  // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7459  // (*) the struct this entry pertains to is the 4th element in the list
7460  // of arguments, hence MEMBER_OF(4)
7461 
7462  // Track if the map information being generated is the first for a capture.
7463  bool IsCaptureFirstInfo = IsFirstComponentList;
7464  // When the variable is on a declare target link or in a to clause with
7465  // unified memory, a reference is needed to hold the host/device address
7466  // of the variable.
7467  bool RequiresReference = false;
7468 
7469  // Scan the components from the base to the complete expression.
7470  auto CI = Components.rbegin();
7471  auto CE = Components.rend();
7472  auto I = CI;
7473 
7474  // Track if the map information being generated is the first for a list of
7475  // components.
7476  bool IsExpressionFirstInfo = true;
7477  Address BP = Address::invalid();
7478  const Expr *AssocExpr = I->getAssociatedExpression();
7479  const auto *AE = dyn_cast<ArraySubscriptExpr>(AssocExpr);
7480  const auto *OASE = dyn_cast<OMPArraySectionExpr>(AssocExpr);
7481 
7482  if (isa<MemberExpr>(AssocExpr)) {
7483  // The base is the 'this' pointer. The content of the pointer is going
7484  // to be the base of the field being mapped.
7485  BP = CGF.LoadCXXThisAddress();
7486  } else if ((AE && isa<CXXThisExpr>(AE->getBase()->IgnoreParenImpCasts())) ||
7487  (OASE &&
7488  isa<CXXThisExpr>(OASE->getBase()->IgnoreParenImpCasts()))) {
7489  BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7490  } else {
7491  // The base is the reference to the variable.
7492  // BP = &Var.
7493  BP = CGF.EmitOMPSharedLValue(AssocExpr).getAddress();
7494  if (const auto *VD =
7495  dyn_cast_or_null<VarDecl>(I->getAssociatedDeclaration())) {
7497  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7498  if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7499  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
7500  CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7501  RequiresReference = true;
7502  BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7503  }
7504  }
7505  }
7506 
7507  // If the variable is a pointer and is being dereferenced (i.e. is not
7508  // the last component), the base has to be the pointer itself, not its
7509  // reference. References are ignored for mapping purposes.
7510  QualType Ty =
7511  I->getAssociatedDeclaration()->getType().getNonReferenceType();
7512  if (Ty->isAnyPointerType() && std::next(I) != CE) {
7513  BP = CGF.EmitLoadOfPointer(BP, Ty->castAs<PointerType>());
7514 
7515  // We do not need to generate individual map information for the
7516  // pointer, it can be associated with the combined storage.
7517  ++I;
7518  }
7519  }
7520 
7521  // Track whether a component of the list should be marked as MEMBER_OF some
7522  // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7523  // in a component list should be marked as MEMBER_OF, all subsequent entries
7524  // do not belong to the base struct. E.g.
7525  // struct S2 s;
7526  // s.ps->ps->ps->f[:]
7527  // (1) (2) (3) (4)
7528  // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7529  // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7530  // is the pointee of ps(2) which is not member of struct s, so it should not
7531  // be marked as such (it is still PTR_AND_OBJ).
7532  // The variable is initialized to false so that PTR_AND_OBJ entries which
7533  // are not struct members are not considered (e.g. array of pointers to
7534  // data).
7535  bool ShouldBeMemberOf = false;
7536 
7537  // Variable keeping track of whether or not we have encountered a component
7538  // in the component list which is a member expression. Useful when we have a
7539  // pointer or a final array section, in which case it is the previous
7540  // component in the list which tells us whether we have a member expression.
7541  // E.g. X.f[:]
7542  // While processing the final array section "[:]" it is "f" which tells us
7543  // whether we are dealing with a member of a declared struct.
7544  const MemberExpr *EncounteredME = nullptr;
7545 
7546  for (; I != CE; ++I) {
7547  // If the current component is member of a struct (parent struct) mark it.
7548  if (!EncounteredME) {
7549  EncounteredME = dyn_cast<MemberExpr>(I->getAssociatedExpression());
7550  // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7551  // as MEMBER_OF the parent struct.
7552  if (EncounteredME)
7553  ShouldBeMemberOf = true;
7554  }
7555 
7556  auto Next = std::next(I);
7557 
7558  // We need to generate the addresses and sizes if this is the last
7559  // component, if the component is a pointer or if it is an array section
7560  // whose length can't be proved to be one. If this is a pointer, it
7561  // becomes the base address for the following components.
7562 
7563  // A final array section, is one whose length can't be proved to be one.
7564  bool IsFinalArraySection =
7565  isFinalArraySectionExpression(I->getAssociatedExpression());
7566 
7567  // Get information on whether the element is a pointer. Have to do a
7568  // special treatment for array sections given that they are built-in
7569  // types.
7570  const auto *OASE =
7571  dyn_cast<OMPArraySectionExpr>(I->getAssociatedExpression());
7572  bool IsPointer =
7574  .getCanonicalType()
7575  ->isAnyPointerType()) ||
7576  I->getAssociatedExpression()->getType()->isAnyPointerType();
7577 
7578  if (Next == CE || IsPointer || IsFinalArraySection) {
7579  // If this is not the last component, we expect the pointer to be
7580  // associated with an array expression or member expression.
7581  assert((Next == CE ||
7582  isa<MemberExpr>(Next->getAssociatedExpression()) ||
7583  isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7584  isa<OMPArraySectionExpr>(Next->getAssociatedExpression())) &&
7585  "Unexpected expression");
7586 
7587  Address LB =
7588  CGF.EmitOMPSharedLValue(I->getAssociatedExpression()).getAddress();
7589 
7590  // If this component is a pointer inside the base struct then we don't
7591  // need to create any entry for it - it will be combined with the object
7592  // it is pointing to into a single PTR_AND_OBJ entry.
7593  bool IsMemberPointer =
7594  IsPointer && EncounteredME &&
7595  (dyn_cast<MemberExpr>(I->getAssociatedExpression()) ==
7596  EncounteredME);
7597  if (!OverlappedElements.empty()) {
7598  // Handle base element with the info for overlapped elements.
7599  assert(!PartialStruct.Base.isValid() && "The base element is set.");
7600  assert(Next == CE &&
7601  "Expected last element for the overlapped elements.");
7602  assert(!IsPointer &&
7603  "Unexpected base element with the pointer type.");
7604  // Mark the whole struct as the struct that requires allocation on the
7605  // device.
7606  PartialStruct.LowestElem = {0, LB};
7607  CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7608  I->getAssociatedExpression()->getType());
7609  Address HB = CGF.Builder.CreateConstGEP(
7611  CGF.VoidPtrTy),
7612  TypeSize.getQuantity() - 1);
7613  PartialStruct.HighestElem = {
7614  std::numeric_limits<decltype(
7615  PartialStruct.HighestElem.first)>::max(),
7616  HB};
7617  PartialStruct.Base = BP;
7618  // Emit data for non-overlapped data.
7619  OpenMPOffloadMappingFlags Flags =
7620  OMP_MAP_MEMBER_OF |
7621  getMapTypeBits(MapType, MapModifiers, IsImplicit,
7622  /*AddPtrFlag=*/false,
7623  /*AddIsTargetParamFlag=*/false);
7624  LB = BP;
7625  llvm::Value *Size = nullptr;
7626  // Do bitcopy of all non-overlapped structure elements.
7628  Component : OverlappedElements) {
7629  Address ComponentLB = Address::invalid();
7631  Component) {
7632  if (MC.getAssociatedDeclaration()) {
7633  ComponentLB =
7634  CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7635  .getAddress();
7636  Size = CGF.Builder.CreatePtrDiff(
7637  CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
7638  CGF.EmitCastToVoidPtr(LB.getPointer()));
7639  break;
7640  }
7641  }
7642  BasePointers.push_back(BP.getPointer());
7643  Pointers.push_back(LB.getPointer());
7644  Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
7645  /*isSigned=*/true));
7646  Types.push_back(Flags);
7647  LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7648  }
7649  BasePointers.push_back(BP.getPointer());
7650  Pointers.push_back(LB.getPointer());
7651  Size = CGF.Builder.CreatePtrDiff(
7652  CGF.EmitCastToVoidPtr(
7653  CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
7654  CGF.EmitCastToVoidPtr(LB.getPointer()));
7655  Sizes.push_back(
7656  CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7657  Types.push_back(Flags);
7658  break;
7659  }
7660  llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
7661  if (!IsMemberPointer) {
7662  BasePointers.push_back(BP.getPointer());
7663  Pointers.push_back(LB.getPointer());
7664  Sizes.push_back(
7665  CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7666 
7667  // We need to add a pointer flag for each map that comes from the
7668  // same expression except for the first one. We also need to signal
7669  // this map is the first one that relates with the current capture
7670  // (there is a set of entries for each capture).
7671  OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7672  MapType, MapModifiers, IsImplicit,
7673  !IsExpressionFirstInfo || RequiresReference,
7674  IsCaptureFirstInfo && !RequiresReference);
7675 
7676  if (!IsExpressionFirstInfo) {
7677  // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7678  // then we reset the TO/FROM/ALWAYS/DELETE flags.
7679  if (IsPointer)
7680  Flags &= ~(OMP_MAP_TO | OMP_MAP_FROM | OMP_MAP_ALWAYS |
7681  OMP_MAP_DELETE);
7682 
7683  if (ShouldBeMemberOf) {
7684  // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7685  // should be later updated with the correct value of MEMBER_OF.
7686  Flags |= OMP_MAP_MEMBER_OF;
7687  // From now on, all subsequent PTR_AND_OBJ entries should not be
7688  // marked as MEMBER_OF.
7689  ShouldBeMemberOf = false;
7690  }
7691  }
7692 
7693  Types.push_back(Flags);
7694  }
7695 
7696  // If we have encountered a member expression so far, keep track of the
7697  // mapped member. If the parent is "*this", then the value declaration
7698  // is nullptr.
7699  if (EncounteredME) {
7700  const auto *FD = dyn_cast<FieldDecl>(EncounteredME->getMemberDecl());
7701  unsigned FieldIndex = FD->getFieldIndex();
7702 
7703  // Update info about the lowest and highest elements for this struct
7704  if (!PartialStruct.Base.isValid()) {
7705  PartialStruct.LowestElem = {FieldIndex, LB};
7706  PartialStruct.HighestElem = {FieldIndex, LB};
7707  PartialStruct.Base = BP;
7708  } else if (FieldIndex < PartialStruct.LowestElem.first) {
7709  PartialStruct.LowestElem = {FieldIndex, LB};
7710  } else if (FieldIndex > PartialStruct.HighestElem.first) {
7711  PartialStruct.HighestElem = {FieldIndex, LB};
7712  }
7713  }
7714 
7715  // If we have a final array section, we are done with this expression.
7716  if (IsFinalArraySection)
7717  break;
7718 
7719  // The pointer becomes the base for the next element.
7720  if (Next != CE)
7721  BP = LB;
7722 
7723  IsExpressionFirstInfo = false;
7724  IsCaptureFirstInfo = false;
7725  }
7726  }
7727  }
7728 
7729  /// Return the adjusted map modifiers if the declaration a capture refers to
7730  /// appears in a first-private clause. This is expected to be used only with
7731  /// directives that start with 'target'.
7732  MappableExprsHandler::OpenMPOffloadMappingFlags
7733  getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7734  assert(Cap.capturesVariable() && "Expected capture by reference only!");
7735 
7736  // A first private variable captured by reference will use only the
7737  // 'private ptr' and 'map to' flag. Return the right flags if the captured
7738  // declaration is known as first-private in this handler.
7739  if (FirstPrivateDecls.count(Cap.getCapturedVar())) {
7740  if (Cap.getCapturedVar()->getType().isConstant(CGF.getContext()) &&
7742  return MappableExprsHandler::OMP_MAP_ALWAYS |
7743  MappableExprsHandler::OMP_MAP_TO;
7744  if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7745  return MappableExprsHandler::OMP_MAP_TO |
7746  MappableExprsHandler::OMP_MAP_PTR_AND_OBJ;
7747  return MappableExprsHandler::OMP_MAP_PRIVATE |
7748  MappableExprsHandler::OMP_MAP_TO;
7749  }
7750  return MappableExprsHandler::OMP_MAP_TO |
7751  MappableExprsHandler::OMP_MAP_FROM;
7752  }
7753 
7754  static OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position) {
7755  // Member of is given by the 16 MSB of the flag, so rotate by 48 bits.
7756  return static_cast<OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
7757  << 48);
7758  }
7759 
7760  static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags &Flags,
7761  OpenMPOffloadMappingFlags MemberOfFlag) {
7762  // If the entry is PTR_AND_OBJ but has not been marked with the special
7763  // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7764  // marked as MEMBER_OF.
7765  if ((Flags & OMP_MAP_PTR_AND_OBJ) &&
7766  ((Flags & OMP_MAP_MEMBER_OF) != OMP_MAP_MEMBER_OF))
7767  return;
7768 
7769  // Reset the placeholder value to prepare the flag for the assignment of the
7770  // proper MEMBER_OF value.
7771  Flags &= ~OMP_MAP_MEMBER_OF;
7772  Flags |= MemberOfFlag;
7773  }
7774 
7775  void getPlainLayout(const CXXRecordDecl *RD,
7777  bool AsBase) const {
7778  const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7779 
7780  llvm::StructType *St =
7781  AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7782 
7783  unsigned NumElements = St->getNumElements();
7785  llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7786  RecordLayout(NumElements);
7787 
7788  // Fill bases.
7789  for (const auto &I : RD->bases()) {
7790  if (I.isVirtual())
7791  continue;
7792  const auto *Base = I.getType()->getAsCXXRecordDecl();
7793  // Ignore empty bases.
7794  if (Base->isEmpty() || CGF.getContext()
7797  .isZero())
7798  continue;
7799 
7800  unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(Base);
7801  RecordLayout[FieldIndex] = Base;
7802  }
7803  // Fill in virtual bases.
7804  for (const auto &I : RD->vbases()) {
7805  const auto *Base = I.getType()->getAsCXXRecordDecl();
7806  // Ignore empty bases.
7807  if (Base->isEmpty())
7808  continue;
7809  unsigned FieldIndex = RL.getVirtualBaseIndex(Base);
7810  if (RecordLayout[FieldIndex])
7811  continue;
7812  RecordLayout[FieldIndex] = Base;
7813  }
7814  // Fill in all the fields.
7815  assert(!RD->isUnion() && "Unexpected union.");
7816  for (const auto *Field : RD->fields()) {
7817  // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7818  // will fill in later.)
7819  if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7820  unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7821  RecordLayout[FieldIndex] = Field;
7822  }
7823  }
7824  for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7825  &Data : RecordLayout) {
7826  if (Data.isNull())
7827  continue;
7828  if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7829  getPlainLayout(Base, Layout, /*AsBase=*/true);
7830  else
7831  Layout.push_back(Data.get<const FieldDecl *>());
7832  }
7833  }
7834 
7835 public:
7836  MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
7837  : CurDir(Dir), CGF(CGF) {
7838  // Extract firstprivate clause information.
7839  for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
7840  for (const auto *D : C->varlists())
7841  FirstPrivateDecls.try_emplace(
7842  cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
7843  // Extract device pointer clause information.
7844  for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
7845  for (auto L : C->component_lists())
7846  DevPointersMap[L.first].push_back(L.second);
7847  }
7848 
7849  /// Generate code for the combined entry if we have a partially mapped struct
7850  /// and take care of the mapping flags of the arguments corresponding to
7851  /// individual struct members.
7852  void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
7853  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7854  MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
7855  const StructRangeInfoTy &PartialStruct) const {
7856  // Base is the base of the struct
7857  BasePointers.push_back(PartialStruct.Base.getPointer());
7858  // Pointer is the address of the lowest element
7859  llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
7860  Pointers.push_back(LB);
7861  // Size is (addr of {highest+1} element) - (addr of lowest element)
7862  llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
7863  llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
7864  llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
7865  llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
7866  llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
7867  llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
7868  /*isSigned=*/false);
7869  Sizes.push_back(Size);
7870  // Map type is always TARGET_PARAM
7871  Types.push_back(OMP_MAP_TARGET_PARAM);
7872  // Remove TARGET_PARAM flag from the first element
7873  (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
7874 
7875  // All other current entries will be MEMBER_OF the combined entry
7876  // (except for PTR_AND_OBJ entries which do not have a placeholder value
7877  // 0xFFFF in the MEMBER_OF field).
7878  OpenMPOffloadMappingFlags MemberOfFlag =
7879  getMemberOfFlag(BasePointers.size() - 1);
7880  for (auto &M : CurTypes)
7881  setCorrectMemberOfFlag(M, MemberOfFlag);
7882  }
7883 
7884  /// Generate all the base pointers, section pointers, sizes and map
7885  /// types for the extracted mappable expressions. Also, for each item that
7886  /// relates with a device pointer, a pair of the relevant declaration and
7887  /// index where it occurs is appended to the device pointers info array.
7888  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
7889  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
7890  MapFlagsArrayTy &Types) const {
7891  // We have to process the component lists that relate with the same
7892  // declaration in a single chunk so that we can generate the map flags
7893  // correctly. Therefore, we organize all lists in a map.
7894  llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
7895 
7896  // Helper function to fill the information map for the different supported
7897  // clauses.
7898  auto &&InfoGen = [&Info](
7899  const ValueDecl *D,
7901  OpenMPMapClauseKind MapType,
7902  ArrayRef<OpenMPMapModifierKind> MapModifiers,
7903  bool ReturnDevicePointer, bool IsImplicit) {
7904  const ValueDecl *VD =
7905  D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
7906  Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
7907  IsImplicit);
7908  };
7909 
7910  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7911  for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>())
7912  for (const auto &L : C->component_lists()) {
7913  InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
7914  /*ReturnDevicePointer=*/false, C->isImplicit());
7915  }
7916  for (const auto *C : this->CurDir.getClausesOfKind<OMPToClause>())
7917  for (const auto &L : C->component_lists()) {
7918  InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
7919  /*ReturnDevicePointer=*/false, C->isImplicit());
7920  }
7921  for (const auto *C : this->CurDir.getClausesOfKind<OMPFromClause>())
7922  for (const auto &L : C->component_lists()) {
7923  InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
7924  /*ReturnDevicePointer=*/false, C->isImplicit());
7925  }
7926 
7927  // Look at the use_device_ptr clause information and mark the existing map
7928  // entries as such. If there is no map information for an entry in the
7929  // use_device_ptr list, we create one with map type 'alloc' and zero size
7930  // section. It is the user fault if that was not mapped before. If there is
7931  // no map information and the pointer is a struct member, then we defer the
7932  // emission of that entry until the whole struct has been processed.
7933  llvm::MapVector<const ValueDecl *, SmallVector<DeferredDevicePtrEntryTy, 4>>
7934  DeferredInfo;
7935 
7936  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
7937  for (const auto *C :
7938  this->CurDir.getClausesOfKind<OMPUseDevicePtrClause>()) {
7939  for (const auto &L : C->component_lists()) {
7940  assert(!L.second.empty() && "Not expecting empty list of components!");
7941  const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
7942  VD = cast<ValueDecl>(VD->getCanonicalDecl());
7943  const Expr *IE = L.second.back().getAssociatedExpression();
7944  // If the first component is a member expression, we have to look into
7945  // 'this', which maps to null in the map of map information. Otherwise
7946  // look directly for the information.
7947  auto It = Info.find(isa<MemberExpr>(IE) ? nullptr : VD);
7948 
7949  // We potentially have map information for this declaration already.
7950  // Look for the first set of components that refer to it.
7951  if (It != Info.end()) {
7952  auto CI = std::find_if(
7953  It->second.begin(), It->second.end(), [VD](const MapInfo &MI) {
7954  return MI.Components.back().getAssociatedDeclaration() == VD;
7955  });
7956  // If we found a map entry, signal that the pointer has to be returned
7957  // and move on to the next declaration.
7958  if (CI != It->second.end()) {
7959  CI->ReturnDevicePointer = true;
7960  continue;
7961  }
7962  }
7963 
7964  // We didn't find any match in our map information - generate a zero
7965  // size array section - if the pointer is a struct member we defer this
7966  // action until the whole struct has been processed.
7967  // FIXME: MSVC 2013 seems to require this-> to find member CGF.
7968  if (isa<MemberExpr>(IE)) {
7969  // Insert the pointer into Info to be processed by
7970  // generateInfoForComponentList. Because it is a member pointer
7971  // without a pointee, no entry will be generated for it, therefore
7972  // we need to generate one after the whole struct has been processed.
7973  // Nonetheless, generateInfoForComponentList must be called to take
7974  // the pointer into account for the calculation of the range of the
7975  // partial struct.
7976  InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
7977  /*ReturnDevicePointer=*/false, C->isImplicit());
7978  DeferredInfo[nullptr].emplace_back(IE, VD);
7979  } else {
7980  llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
7981  this->CGF.EmitLValue(IE), IE->getExprLoc());
7982  BasePointers.emplace_back(Ptr, VD);
7983  Pointers.push_back(Ptr);
7984  Sizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
7985  Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
7986  }
7987  }
7988  }
7989 
7990  for (const auto &M : Info) {
7991  // We need to know when we generate information for the first component
7992  // associated with a capture, because the mapping flags depend on it.
7993  bool IsFirstComponentList = true;
7994 
7995  // Temporary versions of arrays
7996  MapBaseValuesArrayTy CurBasePointers;
7997  MapValuesArrayTy CurPointers;
7998  MapValuesArrayTy CurSizes;
7999  MapFlagsArrayTy CurTypes;
8000  StructRangeInfoTy PartialStruct;
8001 
8002  for (const MapInfo &L : M.second) {
8003  assert(!L.Components.empty() &&
8004  "Not expecting declaration with no component lists.");
8005 
8006  // Remember the current base pointer index.
8007  unsigned CurrentBasePointersIdx = CurBasePointers.size();
8008  // FIXME: MSVC 2013 seems to require this-> to find the member method.
8009  this->generateInfoForComponentList(
8010  L.MapType, L.MapModifiers, L.Components, CurBasePointers,
8011  CurPointers, CurSizes, CurTypes, PartialStruct,
8012  IsFirstComponentList, L.IsImplicit);
8013 
8014  // If this entry relates with a device pointer, set the relevant
8015  // declaration and add the 'return pointer' flag.
8016  if (L.ReturnDevicePointer) {
8017  assert(CurBasePointers.size() > CurrentBasePointersIdx &&
8018  "Unexpected number of mapped base pointers.");
8019 
8020  const ValueDecl *RelevantVD =
8021  L.Components.back().getAssociatedDeclaration();
8022  assert(RelevantVD &&
8023  "No relevant declaration related with device pointer??");
8024 
8025  CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
8026  CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
8027  }
8028  IsFirstComponentList = false;
8029  }
8030 
8031  // Append any pending zero-length pointers which are struct members and
8032  // used with use_device_ptr.
8033  auto CI = DeferredInfo.find(M.first);
8034  if (CI != DeferredInfo.end()) {
8035  for (const DeferredDevicePtrEntryTy &L : CI->second) {
8036  llvm::Value *BasePtr = this->CGF.EmitLValue(L.IE).getPointer();
8037  llvm::Value *Ptr = this->CGF.EmitLoadOfScalar(
8038  this->CGF.EmitLValue(L.IE), L.IE->getExprLoc());
8039  CurBasePointers.emplace_back(BasePtr, L.VD);
8040  CurPointers.push_back(Ptr);
8041  CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
8042  // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
8043  // value MEMBER_OF=FFFF so that the entry is later updated with the
8044  // correct value of MEMBER_OF.
8045  CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
8046  OMP_MAP_MEMBER_OF);
8047  }
8048  }
8049 
8050  // If there is an entry in PartialStruct it means we have a struct with
8051  // individual members mapped. Emit an extra combined entry.
8052  if (PartialStruct.Base.isValid())
8053  emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
8054  PartialStruct);
8055 
8056  // We need to append the results of this capture to what we already have.
8057  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8058  Pointers.append(CurPointers.begin(), CurPointers.end());
8059  Sizes.append(CurSizes.begin(), CurSizes.end());
8060  Types.append(CurTypes.begin(), CurTypes.end());
8061  }
8062  }
8063 
8064  /// Emit capture info for lambdas for variables captured by reference.
8065  void generateInfoForLambdaCaptures(
8066  const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
8067  MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
8068  MapFlagsArrayTy &Types,
8069  llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8070  const auto *RD = VD->getType()
8071  .getCanonicalType()
8073  ->getAsCXXRecordDecl();
8074  if (!RD || !RD->isLambda())
8075  return;
8076  Address VDAddr = Address(Arg, CGF.getContext().getDeclAlign(VD));
8077  LValue VDLVal = CGF.MakeAddrLValue(
8078  VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
8079  llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
8080  FieldDecl *ThisCapture = nullptr;
8081  RD->getCaptureFields(Captures, ThisCapture);
8082  if (ThisCapture) {
8083  LValue ThisLVal =
8084  CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
8085  LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
8086  LambdaPointers.try_emplace(ThisLVal.getPointer(), VDLVal.getPointer());
8087  BasePointers.push_back(ThisLVal.getPointer());
8088  Pointers.push_back(ThisLValVal.getPointer());
8089  Sizes.push_back(
8090  CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8091  CGF.Int64Ty, /*isSigned=*/true));
8092  Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8093  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8094  }
8095  for (const LambdaCapture &LC : RD->captures()) {
8096  if (!LC.capturesVariable())
8097  continue;
8098  const VarDecl *VD = LC.getCapturedVar();
8099  if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8100  continue;
8101  auto It = Captures.find(VD);
8102  assert(It != Captures.end() && "Found lambda capture without field.");
8103  LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
8104  if (LC.getCaptureKind() == LCK_ByRef) {
8105  LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
8106  LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8107  BasePointers.push_back(VarLVal.getPointer());
8108  Pointers.push_back(VarLValVal.getPointer());
8109  Sizes.push_back(CGF.Builder.CreateIntCast(
8110  CGF.getTypeSize(
8112  CGF.Int64Ty, /*isSigned=*/true));
8113  } else {
8114  RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
8115  LambdaPointers.try_emplace(VarLVal.getPointer(), VDLVal.getPointer());
8116  BasePointers.push_back(VarLVal.getPointer());
8117  Pointers.push_back(VarRVal.getScalarVal());
8118  Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
8119  }
8120  Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8121  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
8122  }
8123  }
8124 
8125  /// Set correct indices for lambdas captures.
8126  void adjustMemberOfForLambdaCaptures(
8127  const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8128  MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8129  MapFlagsArrayTy &Types) const {
8130  for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8131  // Set correct member_of idx for all implicit lambda captures.
8132  if (Types[I] != (OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
8133  OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT))
8134  continue;
8135  llvm::Value *BasePtr = LambdaPointers.lookup(*BasePointers[I]);
8136  assert(BasePtr && "Unable to find base lambda address.");
8137  int TgtIdx = -1;
8138  for (unsigned J = I; J > 0; --J) {
8139  unsigned Idx = J - 1;
8140  if (Pointers[Idx] != BasePtr)
8141  continue;
8142  TgtIdx = Idx;
8143  break;
8144  }
8145  assert(TgtIdx != -1 && "Unable to find parent lambda.");
8146  // All other current entries will be MEMBER_OF the combined entry
8147  // (except for PTR_AND_OBJ entries which do not have a placeholder value
8148  // 0xFFFF in the MEMBER_OF field).
8149  OpenMPOffloadMappingFlags MemberOfFlag = getMemberOfFlag(TgtIdx);
8150  setCorrectMemberOfFlag(Types[I], MemberOfFlag);
8151  }
8152  }
8153 
8154  /// Generate the base pointers, section pointers, sizes and map types
8155  /// associated to a given capture.
8156  void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8157  llvm::Value *Arg,
8158  MapBaseValuesArrayTy &BasePointers,
8159  MapValuesArrayTy &Pointers,
8160  MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
8161  StructRangeInfoTy &PartialStruct) const {
8162  assert(!Cap->capturesVariableArrayType() &&
8163  "Not expecting to generate map info for a variable array type!");
8164 
8165  // We need to know when we generating information for the first component
8166  const ValueDecl *VD = Cap->capturesThis()
8167  ? nullptr
8168  : Cap->getCapturedVar()->getCanonicalDecl();
8169 
8170  // If this declaration appears in a is_device_ptr clause we just have to
8171  // pass the pointer by value. If it is a reference to a declaration, we just
8172  // pass its value.
8173  if (DevPointersMap.count(VD)) {
8174  BasePointers.emplace_back(Arg, VD);
8175  Pointers.push_back(Arg);
8176  Sizes.push_back(
8177  CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
8178  CGF.Int64Ty, /*isSigned=*/true));
8179  Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
8180  return;
8181  }
8182 
8183  using MapData =
8185  OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
8186  SmallVector<MapData, 4> DeclComponentLists;
8187  // FIXME: MSVC 2013 seems to require this-> to find member CurDir.
8188  for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8189  for (const auto &L : C->decl_component_lists(VD)) {
8190  assert(L.first == VD &&
8191  "We got information for the wrong declaration??");
8192  assert(!L.second.empty() &&
8193  "Not expecting declaration with no component lists.");
8194  DeclComponentLists.emplace_back(L.second, C->getMapType(),
8195  C->getMapTypeModifiers(),
8196  C->isImplicit());
8197  }
8198  }
8199 
8200  // Find overlapping elements (including the offset from the base element).
8201  llvm::SmallDenseMap<
8202  const MapData *,
8205  4>
8206  OverlappedData;
8207  size_t Count = 0;
8208  for (const MapData &L : DeclComponentLists) {
8209  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8210  OpenMPMapClauseKind MapType;
8211  ArrayRef<OpenMPMapModifierKind> MapModifiers;
8212  bool IsImplicit;
8213  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8214  ++Count;
8215  for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
8216  OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8217  std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
8218  auto CI = Components.rbegin();
8219  auto CE = Components.rend();
8220  auto SI = Components1.rbegin();
8221  auto SE = Components1.rend();
8222  for (; CI != CE && SI != SE; ++CI, ++SI) {
8223  if (CI->getAssociatedExpression()->getStmtClass() !=
8224  SI->getAssociatedExpression()->getStmtClass())
8225  break;
8226  // Are we dealing with different variables/fields?
8227  if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8228  break;
8229  }
8230  // Found overlapping if, at least for one component, reached the head of
8231  // the components list.
8232  if (CI == CE || SI == SE) {
8233  assert((CI != CE || SI != SE) &&
8234  "Unexpected full match of the mapping components.");
8235  const MapData &BaseData = CI == CE ? L : L1;
8236  OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8237  SI == SE ? Components : Components1;
8238  auto &OverlappedElements = OverlappedData.FindAndConstruct(&BaseData);
8239  OverlappedElements.getSecond().push_back(SubData);
8240  }
8241  }
8242  }
8243  // Sort the overlapped elements for each item.
8245  if (!OverlappedData.empty()) {
8246  if (const auto *CRD =
8247  VD->getType().getCanonicalType()->getAsCXXRecordDecl())
8248  getPlainLayout(CRD, Layout, /*AsBase=*/false);
8249  else {
8250  const auto *RD = VD->getType().getCanonicalType()->getAsRecordDecl();
8251  Layout.append(RD->field_begin(), RD->field_end());
8252  }
8253  }
8254  for (auto &Pair : OverlappedData) {
8255  llvm::sort(
8256  Pair.getSecond(),
8257  [&Layout](
8258  OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8259  OMPClauseMappableExprCommon::MappableExprComponentListRef
8260  Second) {
8261  auto CI = First.rbegin();
8262  auto CE = First.rend();
8263  auto SI = Second.rbegin();
8264  auto SE = Second.rend();
8265  for (; CI != CE && SI != SE; ++CI, ++SI) {
8266  if (CI->getAssociatedExpression()->getStmtClass() !=
8267  SI->getAssociatedExpression()->getStmtClass())
8268  break;
8269  // Are we dealing with different variables/fields?
8270  if (CI->getAssociatedDeclaration() !=
8271  SI->getAssociatedDeclaration())
8272  break;
8273  }
8274 
8275  // Lists contain the same elements.
8276  if (CI == CE && SI == SE)
8277  return false;
8278 
8279  // List with less elements is less than list with more elements.
8280  if (CI == CE || SI == SE)
8281  return CI == CE;
8282 
8283  const auto *FD1 = cast<FieldDecl>(CI->getAssociatedDeclaration());
8284  const auto *FD2 = cast<FieldDecl>(SI->getAssociatedDeclaration());
8285  if (FD1->getParent() == FD2->getParent())
8286  return FD1->getFieldIndex() < FD2->getFieldIndex();
8287  const auto It =
8288  llvm::find_if(Layout, [FD1, FD2](const FieldDecl *FD) {
8289  return FD == FD1 || FD == FD2;
8290  });
8291  return *It == FD1;
8292  });
8293  }
8294 
8295  // Associated with a capture, because the mapping flags depend on it.
8296  // Go through all of the elements with the overlapped elements.
8297  for (const auto &Pair : OverlappedData) {
8298  const MapData &L = *Pair.getFirst();
8299  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8300  OpenMPMapClauseKind MapType;
8301  ArrayRef<OpenMPMapModifierKind> MapModifiers;
8302  bool IsImplicit;
8303  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8304  ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8305  OverlappedComponents = Pair.getSecond();
8306  bool IsFirstComponentList = true;
8307  generateInfoForComponentList(MapType, MapModifiers, Components,
8308  BasePointers, Pointers, Sizes, Types,
8309  PartialStruct, IsFirstComponentList,
8310  IsImplicit, OverlappedComponents);
8311  }
8312  // Go through other elements without overlapped elements.
8313  bool IsFirstComponentList = OverlappedData.empty();
8314  for (const MapData &L : DeclComponentLists) {
8315  OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8316  OpenMPMapClauseKind MapType;
8317  ArrayRef<OpenMPMapModifierKind> MapModifiers;
8318  bool IsImplicit;
8319  std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
8320  auto It = OverlappedData.find(&L);
8321  if (It == OverlappedData.end())
8322  generateInfoForComponentList(MapType, MapModifiers, Components,
8323  BasePointers, Pointers, Sizes, Types,
8324  PartialStruct, IsFirstComponentList,
8325  IsImplicit);
8326  IsFirstComponentList = false;
8327  }
8328  }
8329 
8330  /// Generate the base pointers, section pointers, sizes and map types
8331  /// associated with the declare target link variables.
8332  void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
8333  MapValuesArrayTy &Pointers,
8334  MapValuesArrayTy &Sizes,
8335  MapFlagsArrayTy &Types) const {
8336  // Map other list items in the map clause which are not captured variables
8337  // but "declare target link" global variables.
8338  for (const auto *C : this->CurDir.getClausesOfKind<OMPMapClause>()) {
8339  for (const auto &L : C->component_lists()) {
8340  if (!L.first)
8341  continue;
8342  const auto *VD = dyn_cast<VarDecl>(L.first);
8343  if (!VD)
8344  continue;
8346  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
8347  if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8348  !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
8349  continue;
8350  StructRangeInfoTy PartialStruct;
8351  generateInfoForComponentList(
8352  C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
8353  Pointers, Sizes, Types, PartialStruct,
8354  /*IsFirstComponentList=*/true, C->isImplicit());
8355  assert(!PartialStruct.Base.isValid() &&
8356  "No partial structs for declare target link expected.");
8357  }
8358  }
8359  }
8360 
8361  /// Generate the default map information for a given capture \a CI,
8362  /// record field declaration \a RI and captured value \a CV.
8363  void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8364  const FieldDecl &RI, llvm::Value *CV,
8365  MapBaseValuesArrayTy &CurBasePointers,
8366  MapValuesArrayTy &CurPointers,
8367  MapValuesArrayTy &CurSizes,
8368  MapFlagsArrayTy &CurMapTypes) const {
8369  bool IsImplicit = true;
8370  // Do the default mapping.
8371  if (CI.capturesThis()) {
8372  CurBasePointers.push_back(CV);
8373  CurPointers.push_back(CV);
8374  const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8375  CurSizes.push_back(
8376  CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
8377  CGF.Int64Ty, /*isSigned=*/true));
8378  // Default map type.
8379  CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
8380  } else if (CI.capturesVariableByCopy()) {
8381  CurBasePointers.push_back(CV);
8382  CurPointers.push_back(CV);
8383  if (!RI.getType()->isAnyPointerType()) {
8384  // We have to signal to the runtime captures passed by value that are
8385  // not pointers.
8386  CurMapTypes.push_back(OMP_MAP_LITERAL);
8387  CurSizes.push_back(CGF.Builder.CreateIntCast(
8388  CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8389  } else {
8390  // Pointers are implicitly mapped with a zero size and no flags
8391  // (other than first map that is added for all implicit maps).
8392  CurMapTypes.push_back(OMP_MAP_NONE);
8393  CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
8394  }
8395  const VarDecl *VD = CI.getCapturedVar();
8396  auto I = FirstPrivateDecls.find(VD);
8397  if (I != FirstPrivateDecls.end())
8398  IsImplicit = I->getSecond();
8399  } else {
8400  assert(CI.capturesVariable() && "Expected captured reference.");
8401  const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8402  QualType ElementType = PtrTy->getPointeeType();
8403  CurSizes.push_back(CGF.Builder.CreateIntCast(
8404  CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
8405  // The default map type for a scalar/complex type is 'to' because by
8406  // default the value doesn't have to be retrieved. For an aggregate
8407  // type, the default is 'tofrom'.
8408  CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
8409  const VarDecl *VD = CI.getCapturedVar();
8410  auto I = FirstPrivateDecls.find(VD);
8411  if (I != FirstPrivateDecls.end() &&
8412  VD->getType().isConstant(CGF.getContext())) {
8413  llvm::Constant *Addr =
8414  CGF.CGM.getOpenMPRuntime().registerTargetFirstprivateCopy(CGF, VD);
8415  // Copy the value of the original variable to the new global copy.
8416  CGF.Builder.CreateMemCpy(
8417  CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(),
8418  Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
8419  CurSizes.back(), /*IsVolatile=*/false);
8420  // Use new global variable as the base pointers.
8421  CurBasePointers.push_back(Addr);
8422  CurPointers.push_back(Addr);
8423  } else {
8424  CurBasePointers.push_back(CV);
8425  if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8426  Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
8427  CV, ElementType, CGF.getContext().getDeclAlign(VD),
8429  CurPointers.push_back(PtrAddr.getPointer());
8430  } else {
8431  CurPointers.push_back(CV);
8432  }
8433  }
8434  if (I != FirstPrivateDecls.end())
8435  IsImplicit = I->getSecond();
8436  }
8437  // Every default map produces a single argument which is a target parameter.
8438  CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
8439 
8440  // Add flag stating this is an implicit map.
8441  if (IsImplicit)
8442  CurMapTypes.back() |= OMP_MAP_IMPLICIT;
8443  }
8444 };
8445 } // anonymous namespace
8446 
8447 /// Emit the arrays used to pass the captures and map information to the
8448 /// offloading runtime library. If there is no map or capture information,
8449 /// return nullptr by reference.
8450 static void
8456  CGOpenMPRuntime::TargetDataInfo &Info) {
8457  CodeGenModule &CGM = CGF.CGM;
8458  ASTContext &Ctx = CGF.getContext();
8459 
8460  // Reset the array information.
8461  Info.clearArrayInfo();
8462  Info.NumberOfPtrs = BasePointers.size();
8463 
8464  if (Info.NumberOfPtrs) {
8465  // Detect if we have any capture size requiring runtime evaluation of the
8466  // size so that a constant array could be eventually used.
8467  bool hasRuntimeEvaluationCaptureSize = false;
8468  for (llvm::Value *S : Sizes)
8469  if (!isa<llvm::Constant>(S)) {
8470  hasRuntimeEvaluationCaptureSize = true;
8471  break;
8472  }
8473 
8474  llvm::APInt PointerNumAP(32, Info.NumberOfPtrs, /*isSigned=*/true);
8475  QualType PointerArrayType =
8476  Ctx.getConstantArrayType(Ctx.VoidPtrTy, PointerNumAP, ArrayType::Normal,
8477  /*IndexTypeQuals=*/0);
8478 
8479  Info.BasePointersArray =
8480  CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
8481  Info.PointersArray =
8482  CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
8483 
8484  // If we don't have any VLA types or other types that require runtime
8485  // evaluation, we can use a constant array for the map sizes, otherwise we
8486  // need to fill up the arrays as we do for the pointers.
8487  QualType Int64Ty =
8488  Ctx.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
8489  if (hasRuntimeEvaluationCaptureSize) {
8490  QualType SizeArrayType =
8491  Ctx.getConstantArrayType(Int64Ty, PointerNumAP, ArrayType::Normal,
8492  /*IndexTypeQuals=*/0);
8493  Info.SizesArray =
8494  CGF.CreateMemTemp(SizeArrayType, ".offload_sizes").getPointer();
8495  } else {
8496  // We expect all the sizes to be constant, so we collect them to create
8497  // a constant array.
8499  for (llvm::Value *S : Sizes)
8500  ConstSizes.push_back(cast<llvm::Constant>(S));
8501 
8502  auto *SizesArrayInit = llvm::ConstantArray::get(
8503  llvm::ArrayType::get(CGM.Int64Ty, ConstSizes.size()), ConstSizes);
8504  std::string Name = CGM.getOpenMPRuntime().getName({"offload_sizes"});
8505  auto *SizesArrayGbl = new llvm::GlobalVariable(
8506  CGM.getModule(), SizesArrayInit->getType(),
8507  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8508  SizesArrayInit, Name);
8509  SizesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8510  Info.SizesArray = SizesArrayGbl;
8511  }
8512 
8513  // The map types are always constant so we don't need to generate code to
8514  // fill arrays. Instead, we create an array constant.
8515  SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
8516  llvm::copy(MapTypes, Mapping.begin());
8517  llvm::Constant *MapTypesArrayInit =
8518  llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
8519  std::string MaptypesName =
8520  CGM.getOpenMPRuntime().getName({"offload_maptypes"});
8521  auto *MapTypesArrayGbl = new llvm::GlobalVariable(
8522  CGM.getModule(), MapTypesArrayInit->getType(),
8523  /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage,
8524  MapTypesArrayInit, MaptypesName);
8525  MapTypesArrayGbl->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
8526  Info.MapTypesArray = MapTypesArrayGbl;
8527 
8528  for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
8529  llvm::Value *BPVal = *BasePointers[I];
8531  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8532  Info.BasePointersArray, 0, I);
8534  BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
8535  Address BPAddr(BP, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8536  CGF.Builder.CreateStore(BPVal, BPAddr);
8537 
8538  if (Info.requiresDevicePointerInfo())
8539  if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
8540  Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
8541 
8542  llvm::Value *PVal = Pointers[I];
8544  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8545  Info.PointersArray, 0, I);
8547  P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
8548  Address PAddr(P, Ctx.getTypeAlignInChars(Ctx.VoidPtrTy));
8549  CGF.Builder.CreateStore(PVal, PAddr);
8550 
8551  if (hasRuntimeEvaluationCaptureSize) {
8553  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8554  Info.SizesArray,
8555  /*Idx0=*/0,
8556  /*Idx1=*/I);
8557  Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
8558  CGF.Builder.CreateStore(
8559  CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
8560  SAddr);
8561  }
8562  }
8563  }
8564 }
8565 /// Emit the arguments to be passed to the runtime library based on the
8566 /// arrays of pointers, sizes and map types.
8568  CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
8569  llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
8570  llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
8571  CodeGenModule &CGM = CGF.CGM;
8572  if (Info.NumberOfPtrs) {
8573  BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8574  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8575  Info.BasePointersArray,
8576  /*Idx0=*/0, /*Idx1=*/0);
8577  PointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8578  llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
8579  Info.PointersArray,
8580  /*Idx0=*/0,
8581  /*Idx1=*/0);
8582  SizesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8583  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs), Info.SizesArray,
8584  /*Idx0=*/0, /*Idx1=*/0);
8585  MapTypesArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
8586  llvm::ArrayType::get(CGM.Int64Ty, Info.NumberOfPtrs),
8587  Info.MapTypesArray,
8588  /*Idx0=*/0,
8589  /*Idx1=*/0);
8590  } else {
8591  BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8592  PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
8593  SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8594  MapTypesArrayArg =
8595  llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
8596  }
8597 }
8598 
8599 /// Check for inner distribute directive.
8600 static const OMPExecutableDirective *
8602  const auto *CS = D.getInnermostCapturedStmt();
8603  const auto *Body =
8604  CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8605  const Stmt *ChildStmt =
8607 
8608  if (const auto *NestedDir =
8609  dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8610  OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8611  switch (D.getDirectiveKind()) {
8612  case OMPD_target:
8613  if (isOpenMPDistributeDirective(DKind))
8614  return NestedDir;
8615  if (DKind == OMPD_teams) {
8616  Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8617  /*IgnoreCaptured=*/true);
8618  if (!Body)
8619  return nullptr;
8620  ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8621  if (const auto *NND =
8622  dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
8623  DKind = NND->getDirectiveKind();
8624  if (isOpenMPDistributeDirective(DKind))
8625  return NND;
8626  }
8627  }
8628  return nullptr;
8629  case OMPD_target_teams:
8630  if (isOpenMPDistributeDirective(DKind))
8631  return NestedDir;
8632  return nullptr;
8633  case OMPD_target_parallel:
8634  case OMPD_target_simd:
8635  case OMPD_target_parallel_for:
8636  case OMPD_target_parallel_for_simd:
8637  return nullptr;
8638  case OMPD_target_teams_distribute:
8639  case OMPD_target_teams_distribute_simd:
8640  case OMPD_target_teams_distribute_parallel_for:
8641  case OMPD_target_teams_distribute_parallel_for_simd:
8642  case OMPD_parallel:
8643  case OMPD_for:
8644  case OMPD_parallel_for:
8645  case OMPD_parallel_sections:
8646  case OMPD_for_simd:
8647  case OMPD_parallel_for_simd:
8648  case OMPD_cancel:
8649  case OMPD_cancellation_point:
8650  case OMPD_ordered:
8651  case OMPD_threadprivate:
8652  case OMPD_allocate:
8653  case OMPD_task:
8654  case OMPD_simd:
8655  case OMPD_sections:
8656  case OMPD_section:
8657  case OMPD_single:
8658  case OMPD_master:
8659  case OMPD_critical:
8660  case OMPD_taskyield:
8661  case OMPD_barrier:
8662  case OMPD_taskwait:
8663  case OMPD_taskgroup:
8664  case OMPD_atomic:
8665  case OMPD_flush:
8666  case OMPD_teams:
8667  case OMPD_target_data:
8668  case OMPD_target_exit_data:
8669  case OMPD_target_enter_data:
8670  case OMPD_distribute:
8671  case OMPD_distribute_simd:
8672  case OMPD_distribute_parallel_for:
8673  case OMPD_distribute_parallel_for_simd:
8674  case OMPD_teams_distribute:
8675  case OMPD_teams_distribute_simd:
8676  case OMPD_teams_distribute_parallel_for:
8677  case OMPD_teams_distribute_parallel_for_simd:
8678  case OMPD_target_update:
8679  case OMPD_declare_simd:
8680  case OMPD_declare_target:
8681  case OMPD_end_declare_target:
8682  case OMPD_declare_reduction:
8683  case OMPD_declare_mapper:
8684  case OMPD_taskloop:
8685  case OMPD_taskloop_simd:
8686  case OMPD_requires:
8687  case OMPD_unknown:
8688  llvm_unreachable("Unexpected directive.");
8689  }
8690  }
8691 
8692  return nullptr;
8693 }
8694 
8696  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
8697  const llvm::function_ref<llvm::Value *(
8698  CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
8700  const OMPExecutableDirective *TD = &D;
8701  // Get nested teams distribute kind directive, if any.
8704  if (!TD)
8705  return;
8706  const auto *LD = cast<OMPLoopDirective>(TD);
8707  auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
8708  PrePostActionTy &) {
8709  llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
8710 
8711  // Emit device ID if any.
8712  llvm::Value *DeviceID;
8713  if (Device)
8714  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8715  CGF.Int64Ty, /*isSigned=*/true);
8716  else
8717  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8718 
8719  llvm::Value *Args[] = {DeviceID, NumIterations};
8720  CGF.EmitRuntimeCall(
8722  };
8723  emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
8724 }
8725 
8727  const OMPExecutableDirective &D,
8728  llvm::Function *OutlinedFn,
8729  llvm::Value *OutlinedFnID,
8730  const Expr *IfCond, const Expr *Device) {
8731  if (!CGF.HaveInsertPoint())
8732  return;
8733 
8734  assert(OutlinedFn && "Invalid outlined function!");
8735 
8736  const bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>();
8738  const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
8739  auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
8740  PrePostActionTy &) {
8741  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8742  };
8743  emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
8744 
8746  llvm::Value *MapTypesArray = nullptr;
8747  // Fill up the pointer arrays and transfer execution to the device.
8748  auto &&ThenGen = [this, Device, OutlinedFn, OutlinedFnID, &D, &InputInfo,
8749  &MapTypesArray, &CS, RequiresOuterTask,
8750  &CapturedVars](CodeGenFunction &CGF, PrePostActionTy &) {
8751  // On top of the arrays that were filled up, the target offloading call
8752  // takes as arguments the device id as well as the host pointer. The host
8753  // pointer is used by the runtime library to identify the current target
8754  // region, so it only has to be unique and not necessarily point to
8755  // anything. It could be the pointer to the outlined function that
8756  // implements the target region, but we aren't using that so that the
8757  // compiler doesn't need to keep that, and could therefore inline the host
8758  // function if proven worthwhile during optimization.
8759 
8760  // From this point on, we need to have an ID of the target region defined.
8761  assert(OutlinedFnID && "Invalid outlined function ID!");
8762 
8763  // Emit device ID if any.
8764  llvm::Value *DeviceID;
8765  if (Device) {
8766  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
8767  CGF.Int64Ty, /*isSigned=*/true);
8768  } else {
8769  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
8770  }
8771 
8772  // Emit the number of elements in the offloading arrays.
8773  llvm::Value *PointerNum =
8774  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
8775 
8776  // Return value of the runtime offloading call.
8777  llvm::Value *Return;
8778 
8779  llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D);
8780  llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D);
8781 
8782  bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
8783  // The target region is an outlined function launched by the runtime
8784  // via calls __tgt_target() or __tgt_target_teams().
8785  //
8786  // __tgt_target() launches a target region with one team and one thread,
8787  // executing a serial region. This master thread may in turn launch
8788  // more threads within its team upon encountering a parallel region,
8789  // however, no additional teams can be launched on the device.
8790  //
8791  // __tgt_target_teams() launches a target region with one or more teams,
8792  // each with one or more threads. This call is required for target
8793  // constructs such as:
8794  // 'target teams'
8795  // 'target' / 'teams'
8796  // 'target teams distribute parallel for'
8797  // 'target parallel'
8798  // and so on.
8799  //
8800  // Note that on the host and CPU targets, the runtime implementation of
8801  // these calls simply call the outlined function without forking threads.
8802  // The outlined functions themselves have runtime calls to
8803  // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
8804  // the compiler in emitTeamsCall() and emitParallelCall().
8805  //
8806  // In contrast, on the NVPTX target, the implementation of
8807  // __tgt_target_teams() launches a GPU kernel with the requested number
8808  // of teams and threads so no additional calls to the runtime are required.
8809  if (NumTeams) {
8810  // If we have NumTeams defined this means that we have an enclosed teams
8811  // region. Therefore we also expect to have NumThreads defined. These two
8812  // values should be defined in the presence of a teams directive,
8813  // regardless of having any clauses associated. If the user is using teams
8814  // but no clauses, these two values will be the default that should be
8815  // passed to the runtime library - a 32-bit integer with the value zero.
8816  assert(NumThreads && "Thread limit expression should be available along "
8817  "with number of teams.");
8818  llvm::Value *OffloadingArgs[] = {DeviceID,
8819  OutlinedFnID,
8820  PointerNum,
8821  InputInfo.BasePointersArray.getPointer(),
8822  InputInfo.PointersArray.getPointer(),
8823  InputInfo.SizesArray.getPointer(),
8824  MapTypesArray,
8825  NumTeams,
8826  NumThreads};
8827  Return = CGF.EmitRuntimeCall(
8830  OffloadingArgs);
8831  } else {
8832  llvm::Value *OffloadingArgs[] = {DeviceID,
8833  OutlinedFnID,
8834  PointerNum,
8835  InputInfo.BasePointersArray.getPointer(),
8836  InputInfo.PointersArray.getPointer(),
8837  InputInfo.SizesArray.getPointer(),
8838  MapTypesArray};
8839  Return = CGF.EmitRuntimeCall(
8841  : OMPRTL__tgt_target),
8842  OffloadingArgs);
8843  }
8844 
8845  // Check the error code and execute the host version if required.
8846  llvm::BasicBlock *OffloadFailedBlock =
8847  CGF.createBasicBlock("omp_offload.failed");
8848  llvm::BasicBlock *OffloadContBlock =
8849  CGF.createBasicBlock("omp_offload.cont");
8850  llvm::Value *Failed = CGF.Builder.CreateIsNotNull(Return);
8851  CGF.Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
8852 
8853  CGF.EmitBlock(OffloadFailedBlock);
8854  if (RequiresOuterTask) {
8855  CapturedVars.clear();
8856  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8857  }
8858  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8859  CGF.EmitBranch(OffloadContBlock);
8860 
8861  CGF.EmitBlock(OffloadContBlock, /*IsFinished=*/true);
8862  };
8863 
8864  // Notify that the host version must be executed.
8865  auto &&ElseGen = [this, &D, OutlinedFn, &CS, &CapturedVars,
8866  RequiresOuterTask](CodeGenFunction &CGF,
8867  PrePostActionTy &) {
8868  if (RequiresOuterTask) {
8869  CapturedVars.clear();
8870  CGF.GenerateOpenMPCapturedVars(CS, CapturedVars);
8871  }
8872  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedFn, CapturedVars);
8873  };
8874 
8875  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
8876  &CapturedVars, RequiresOuterTask,
8877  &CS](CodeGenFunction &CGF, PrePostActionTy &) {
8878  // Fill up the arrays with all the captured variables.
8883 
8884  // Get mappable expression information.
8885  MappableExprsHandler MEHandler(D, CGF);
8886  llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
8887 
8888  auto RI = CS.getCapturedRecordDecl()->field_begin();
8889  auto CV = CapturedVars.begin();
8890  for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
8891  CE = CS.capture_end();
8892  CI != CE; ++CI, ++RI, ++CV) {
8897  MappableExprsHandler::StructRangeInfoTy PartialStruct;
8898 
8899  // VLA sizes are passed to the outlined region by copy and do not have map
8900  // information associated.
8901  if (CI->capturesVariableArrayType()) {
8902  CurBasePointers.push_back(*CV);
8903  CurPointers.push_back(*CV);
8904  CurSizes.push_back(CGF.Builder.CreateIntCast(
8905  CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
8906  // Copy to the device as an argument. No need to retrieve it.
8907  CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
8908  MappableExprsHandler::OMP_MAP_TARGET_PARAM |
8909  MappableExprsHandler::OMP_MAP_IMPLICIT);
8910  } else {
8911  // If we have any information in the map clause, we use it, otherwise we
8912  // just do a default mapping.
8913  MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
8914  CurSizes, CurMapTypes, PartialStruct);
8915  if (CurBasePointers.empty())
8916  MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
8917  CurPointers, CurSizes, CurMapTypes);
8918  // Generate correct mapping for variables captured by reference in
8919  // lambdas.
8920  if (CI->capturesVariable())
8921  MEHandler.generateInfoForLambdaCaptures(
8922  CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
8923  CurMapTypes, LambdaPointers);
8924  }
8925  // We expect to have at least an element of information for this capture.
8926  assert(!CurBasePointers.empty() &&
8927  "Non-existing map pointer for capture!");
8928  assert(CurBasePointers.size() == CurPointers.size() &&
8929  CurBasePointers.size() == CurSizes.size() &&
8930  CurBasePointers.size() == CurMapTypes.size() &&
8931  "Inconsistent map information sizes!");
8932 
8933  // If there is an entry in PartialStruct it means we have a struct with
8934  // individual members mapped. Emit an extra combined entry.
8935  if (PartialStruct.Base.isValid())
8936  MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
8937  CurMapTypes, PartialStruct);
8938 
8939  // We need to append the results of this capture to what we already have.
8940  BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
8941  Pointers.append(CurPointers.begin(), CurPointers.end());
8942  Sizes.append(CurSizes.begin(), CurSizes.end());
8943  MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
8944  }
8945  // Adjust MEMBER_OF flags for the lambdas captures.
8946  MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
8947  Pointers, MapTypes);
8948  // Map other list items in the map clause which are not captured variables
8949  // but "declare target link" global variables.
8950  MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
8951  MapTypes);
8952 
8953  TargetDataInfo Info;
8954  // Fill up the arrays and create the arguments.
8955  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
8957  Info.PointersArray, Info.SizesArray,
8958  Info.MapTypesArray, Info);
8959  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
8960  InputInfo.BasePointersArray =
8962  InputInfo.PointersArray =
8964  InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
8965  MapTypesArray = Info.MapTypesArray;
8966  if (RequiresOuterTask)
8967  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
8968  else
8969  emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
8970  };
8971 
8972  auto &&TargetElseGen = [this, &ElseGen, &D, RequiresOuterTask](
8973  CodeGenFunction &CGF, PrePostActionTy &) {
8974  if (RequiresOuterTask) {
8976  CGF.EmitOMPTargetTaskBasedDirective(D, ElseGen, InputInfo);
8977  } else {
8978  emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
8979  }
8980  };
8981 
8982  // If we have a target function ID it means that we need to support
8983  // offloading, otherwise, just execute on the host. We need to execute on host
8984  // regardless of the conditional in the if clause if, e.g., the user do not
8985  // specify target triples.
8986  if (OutlinedFnID) {
8987  if (IfCond) {
8988  emitOMPIfClause(CGF, IfCond, TargetThenGen, TargetElseGen);
8989  } else {
8990  RegionCodeGenTy ThenRCG(TargetThenGen);
8991  ThenRCG(CGF);
8992  }
8993  } else {
8994  RegionCodeGenTy ElseRCG(TargetElseGen);
8995  ElseRCG(CGF);
8996  }
8997 }
8998 
9000  StringRef ParentName) {
9001  if (!S)
9002  return;
9003 
9004  // Codegen OMP target directives that offload compute to the device.
9005  bool RequiresDeviceCodegen =
9006  isa<OMPExecutableDirective>(S) &&
9008  cast<OMPExecutableDirective>(S)->getDirectiveKind());
9009 
9010  if (RequiresDeviceCodegen) {
9011  const auto &E = *cast<OMPExecutableDirective>(S);
9012  unsigned DeviceID;
9013  unsigned FileID;
9014  unsigned Line;
9016  FileID, Line);
9017 
9018  // Is this a target region that should not be emitted as an entry point? If
9019  // so just signal we are done with this target region.
9021  ParentName, Line))
9022  return;
9023 
9024  switch (E.getDirectiveKind()) {
9025  case OMPD_target:
9027  cast<OMPTargetDirective>(E));
9028  break;
9029  case OMPD_target_parallel:
9031  CGM, ParentName, cast<OMPTargetParallelDirective>(E));
9032  break;
9033  case OMPD_target_teams:
9035  CGM, ParentName, cast<OMPTargetTeamsDirective>(E));
9036  break;
9037  case OMPD_target_teams_distribute:
9039  CGM, ParentName, cast<OMPTargetTeamsDistributeDirective>(E));
9040  break;
9041  case OMPD_target_teams_distribute_simd:
9043  CGM, ParentName, cast<OMPTargetTeamsDistributeSimdDirective>(E));
9044  break;
9045  case OMPD_target_parallel_for:
9047  CGM, ParentName, cast<OMPTargetParallelForDirective>(E));
9048  break;
9049  case OMPD_target_parallel_for_simd:
9051  CGM, ParentName, cast<OMPTargetParallelForSimdDirective>(E));
9052  break;
9053  case OMPD_target_simd:
9055  CGM, ParentName, cast<OMPTargetSimdDirective>(E));
9056  break;
9057  case OMPD_target_teams_distribute_parallel_for:
9059  CGM, ParentName,
9060  cast<OMPTargetTeamsDistributeParallelForDirective>(E));
9061  break;
9062  case OMPD_target_teams_distribute_parallel_for_simd:
9065  CGM, ParentName,
9066  cast<OMPTargetTeamsDistributeParallelForSimdDirective>(E));
9067  break;
9068  case OMPD_parallel:
9069  case OMPD_for:
9070  case OMPD_parallel_for:
9071  case OMPD_parallel_sections:
9072  case OMPD_for_simd:
9073  case OMPD_parallel_for_simd:
9074  case OMPD_cancel:
9075  case OMPD_cancellation_point:
9076  case OMPD_ordered:
9077  case OMPD_threadprivate:
9078  case OMPD_allocate:
9079  case OMPD_task:
9080  case OMPD_simd:
9081  case OMPD_sections:
9082  case OMPD_section:
9083  case OMPD_single:
9084  case OMPD_master:
9085  case OMPD_critical:
9086  case OMPD_taskyield:
9087  case OMPD_barrier:
9088  case OMPD_taskwait:
9089  case OMPD_taskgroup:
9090  case OMPD_atomic:
9091  case OMPD_flush:
9092  case OMPD_teams:
9093  case OMPD_target_data:
9094  case OMPD_target_exit_data:
9095  case OMPD_target_enter_data:
9096  case OMPD_distribute:
9097  case OMPD_distribute_simd:
9098  case OMPD_distribute_parallel_for:
9099  case OMPD_distribute_parallel_for_simd:
9100  case OMPD_teams_distribute:
9101  case OMPD_teams_distribute_simd:
9102  case OMPD_teams_distribute_parallel_for:
9103  case OMPD_teams_distribute_parallel_for_simd:
9104  case OMPD_target_update:
9105  case OMPD_declare_simd:
9106  case OMPD_declare_target:
9107  case OMPD_end_declare_target:
9108  case OMPD_declare_reduction:
9109  case OMPD_declare_mapper:
9110  case OMPD_taskloop:
9111  case OMPD_taskloop_simd:
9112  case OMPD_requires:
9113  case OMPD_unknown:
9114  llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9115  }
9116  return;
9117  }
9118 
9119  if (const auto *E = dyn_cast<OMPExecutableDirective>(S)) {
9120  if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9121  return;
9122 
9124  E->getInnermostCapturedStmt()->getCapturedStmt(), ParentName);
9125  return;
9126  }
9127 
9128  // If this is a lambda function, look into its body.
9129  if (const auto *L = dyn_cast<LambdaExpr>(S))
9130  S = L->getBody();
9131 
9132  // Keep looking for target regions recursively.
9133  for (const Stmt *II : S->children())
9134  scanForTargetRegionsFunctions(II, ParentName);
9135 }
9136 
9138  // If emitting code for the host, we do not process FD here. Instead we do
9139  // the normal code generation.
9140  if (!CGM.getLangOpts().OpenMPIsDevice)
9141  return false;
9142 
9143  const ValueDecl *VD = cast<ValueDecl>(GD.getDecl());
9144  StringRef Name = CGM.getMangledName(GD);
9145  // Try to detect target regions in the function.
9146  if (const auto *FD = dyn_cast<FunctionDecl>(VD))
9147  scanForTargetRegionsFunctions(FD->getBody(), Name);
9148 
9149  // Do not to emit function if it is not marked as declare target.
9150  return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9151  AlreadyEmittedTargetFunctions.count(Name) == 0;
9152 }
9153 
9155  if (!CGM.getLangOpts().OpenMPIsDevice)
9156  return false;
9157 
9158  // Check if there are Ctors/Dtors in this declaration and look for target
9159  // regions in it. We use the complete variant to produce the kernel name
9160  // mangling.
9161  QualType RDTy = cast<VarDecl>(GD.getDecl())->getType();
9162  if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9163  for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9164  StringRef ParentName =
9166  scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9167  }
9168  if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9169  StringRef ParentName =
9171  scanForTargetRegionsFunctions(Dtor->getBody(), ParentName);
9172  }
9173  }
9174 
9175  // Do not to emit variable if it is not marked as declare target.
9177  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9178  cast<VarDecl>(GD.getDecl()));
9179  if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9180  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9182  DeferredGlobalVariables.insert(cast<VarDecl>(GD.getDecl()));
9183  return true;
9184  }
9185  return false;
9186 }
9187 
9188 llvm::Constant *
9190  const VarDecl *VD) {
9191  assert(VD->getType().isConstant(CGM.getContext()) &&
9192  "Expected constant variable.");
9193  StringRef VarName;
9194  llvm::Constant *Addr;
9195  llvm::GlobalValue::LinkageTypes Linkage;
9196  QualType Ty = VD->getType();
9197  SmallString<128> Buffer;
9198  {
9199  unsigned DeviceID;
9200  unsigned FileID;
9201  unsigned Line;
9203  FileID, Line);
9204  llvm::raw_svector_ostream OS(Buffer);
9205  OS << "__omp_offloading_firstprivate_" << llvm::format("_%x", DeviceID)
9206  << llvm::format("_%x_", FileID) << VD->getName() << "_l" << Line;
9207  VarName = OS.str();
9208  }
9210  Addr =
9213  cast<llvm::GlobalValue>(Addr)->setLinkage(Linkage);
9214  CharUnits VarSize = CGM.getContext().getTypeSizeInChars(Ty);
9215  CGM.addCompilerUsedGlobal(cast<llvm::GlobalValue>(Addr));
9217  VarName, Addr, VarSize,
9219  return Addr;
9220 }
9221 
9223  llvm::Constant *Addr) {
9225  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9226  if (!Res) {
9227  if (CGM.getLangOpts().OpenMPIsDevice) {
9228  // Register non-target variables being emitted in device code (debug info
9229  // may cause this).
9230  StringRef VarName = CGM.getMangledName(VD);
9231  EmittedNonTargetVariables.try_emplace(VarName, Addr);
9232  }
9233  return;
9234  }
9235  // Register declare target variables.
9237  StringRef VarName;
9238  CharUnits VarSize;
9239  llvm::GlobalValue::LinkageTypes Linkage;
9240 
9241  if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9244  VarName = CGM.getMangledName(VD);
9246  VarSize = CGM.getContext().getTypeSizeInChars(VD->getType());
9247  assert(!VarSize.isZero() && "Expected non-zero size of the variable");
9248  } else {
9249  VarSize = CharUnits::Zero();
9250  }
9251  Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false);
9252  // Temp solution to prevent optimizations of the internal variables.
9253  if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) {
9254  std::string RefName = getName({VarName, "ref"});
9255  if (!CGM.GetGlobalValue(RefName)) {
9256  llvm::Constant *AddrRef =
9257  getOrCreateInternalVariable(Addr->getType(), RefName);
9258  auto *GVAddrRef = cast<llvm::GlobalVariable>(AddrRef);
9259  GVAddrRef->setConstant(/*Val=*/true);
9260  GVAddrRef->setLinkage(llvm::GlobalValue::InternalLinkage);
9261  GVAddrRef->setInitializer(Addr);
9262  CGM.addCompilerUsedGlobal(GVAddrRef);
9263  }
9264  }
9265  } else {
9266  assert(((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
9267  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9269  "Declare target attribute must link or to with unified memory.");
9270  if (*Res == OMPDeclareTargetDeclAttr::MT_Link)
9272  else
9274 
9275  if (CGM.getLangOpts().OpenMPIsDevice) {
9276  VarName = Addr->getName();
9277  Addr = nullptr;
9278  } else {
9279  VarName = getAddrOfDeclareTargetVar(VD).getName();
9280  Addr = cast<llvm::Constant>(getAddrOfDeclareTargetVar(VD).getPointer());
9281  }
9282  VarSize = CGM.getPointerSize();
9283  Linkage = llvm::GlobalValue::WeakAnyLinkage;
9284  }
9285 
9287  VarName, Addr, VarSize, Flags, Linkage);
9288 }
9289 
9291  if (isa<FunctionDecl>(GD.getDecl()) ||
9292  isa<OMPDeclareReductionDecl>(GD.getDecl()))
9293  return emitTargetFunctions(GD);
9294 
9295  return emitTargetGlobalVariable(GD);
9296 }
9297 
9299  for (const VarDecl *VD : DeferredGlobalVariables) {
9301  OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9302  if (!Res)
9303  continue;
9304  if (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9306  CGM.EmitGlobal(VD);
9307  } else {
9308  assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
9309  (*Res == OMPDeclareTargetDeclAttr::MT_To &&
9311  "Expected link clause or to clause with unified memory.");
9312  (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
9313  }
9314  }
9315 }
9316 
9318  CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
9320  " Expected target-based directive.");
9321 }
9322 
9324  const OMPRequiresDecl *D) {
9325  for (const OMPClause *Clause : D->clauselists()) {
9326  if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
9328  break;
9329  }
9330  }
9331 }
9332 
9334  LangAS &AS) {
9335  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
9336  return false;
9337  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
9338  switch(A->getAllocatorType()) {
9339  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
9340  // Not supported, fallback to the default mem space.
9341  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
9342  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
9343  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
9344  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
9345  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
9346  case OMPAllocateDeclAttr::OMPConstMemAlloc:
9347  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
9348  AS = LangAS::Default;
9349  return true;
9350  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
9351  llvm_unreachable("Expected predefined allocator for the variables with the "
9352  "static storage.");
9353  }
9354  return false;
9355 }
9356 
9359 }
9360 
9362  CodeGenModule &CGM)
9363  : CGM(CGM) {
9364  if (CGM.getLangOpts().OpenMPIsDevice) {
9365  SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
9366  CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
9367  }
9368 }
9369 
9371  if (CGM.getLangOpts().OpenMPIsDevice)
9372  CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
9373 }
9374 
9376  if (!CGM.getLangOpts().OpenMPIsDevice || !ShouldMarkAsGlobal)
9377  return true;
9378 
9379  StringRef Name = CGM.getMangledName(GD);
9380  const auto *D = cast<FunctionDecl>(GD.getDecl());
9381  // Do not to emit function if it is marked as declare target as it was already
9382  // emitted.
9383  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
9384  if (D->hasBody() && AlreadyEmittedTargetFunctions.count(Name) == 0) {
9385  if (auto *F = dyn_cast_or_null<llvm::Function>(CGM.GetGlobalValue(Name)))
9386  return !F->isDeclaration();
9387  return false;
9388  }
9389  return true;
9390  }
9391 
9392  return !AlreadyEmittedTargetFunctions.insert(Name).second;
9393 }
9394 
9396  // If we don't have entries or if we are emitting code for the device, we
9397  // don't need to do anything.
9398  if (CGM.getLangOpts().OMPTargetTriples.empty() ||
9399  CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice ||
9403  return nullptr;
9404 
9405  // Create and register the function that handles the requires directives.
9406  ASTContext &C = CGM.getContext();
9407 
9408  llvm::Function *RequiresRegFn;
9409  {
9410  CodeGenFunction CGF(CGM);
9411  const auto &FI = CGM.getTypes().arrangeNullaryFunction();
9412  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
9413  std::string ReqName = getName({"omp_offloading", "requires_reg"});
9414  RequiresRegFn = CGM.CreateGlobalInitOrDestructFunction(FTy, ReqName, FI);
9415  CGF.StartFunction(GlobalDecl(), C.VoidTy, RequiresRegFn, FI, {});
9416  OpenMPOffloadingRequiresDirFlags Flags = OMP_REQ_NONE;
9417  // TODO: check for other requires clauses.
9418  // The requires directive takes effect only when a target region is
9419  // present in the compilation unit. Otherwise it is ignored and not
9420  // passed to the runtime. This avoids the runtime from throwing an error
9421  // for mismatching requires clauses across compilation units that don't
9422  // contain at least 1 target region.
9423  assert((HasEmittedTargetRegion ||
9426  "Target or declare target region expected.");
9428  Flags = OMP_REQ_UNIFIED_SHARED_MEMORY;
9430  llvm::ConstantInt::get(CGM.Int64Ty, Flags));
9431  CGF.FinishFunction();
9432  }
9433  return RequiresRegFn;
9434 }
9435 
9437  // If we have offloading in the current module, we need to emit the entries
9438  // now and register the offloading descriptor.
9440 
9441  // Create and register the offloading binary descriptors. This is the main
9442  // entity that captures all the information about offloading in the current
9443  // compilation unit.
9445 }
9446 
9448  const OMPExecutableDirective &D,
9449  SourceLocation Loc,
9450  llvm::Function *OutlinedFn,
9451  ArrayRef<llvm::Value *> CapturedVars) {
9452  if (!CGF.HaveInsertPoint())
9453  return;
9454 
9455  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9457 
9458  // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
9459  llvm::Value *Args[] = {
9460  RTLoc,
9461  CGF.Builder.getInt32(CapturedVars.size()), // Number of captured vars
9462  CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy())};
9464  RealArgs.append(std::begin(Args), std::end(Args));
9465  RealArgs.append(CapturedVars.begin(), CapturedVars.end());
9466 
9467  llvm::FunctionCallee RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_teams);
9468  CGF.EmitRuntimeCall(RTLFn, RealArgs);
9469 }
9470 
9472  const Expr *NumTeams,
9473  const Expr *ThreadLimit,
9474  SourceLocation Loc) {
9475  if (!CGF.HaveInsertPoint())
9476  return;
9477 
9478  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
9479 
9480  llvm::Value *NumTeamsVal =
9481  NumTeams
9482  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(NumTeams),
9483  CGF.CGM.Int32Ty, /* isSigned = */ true)
9484  : CGF.Builder.getInt32(0);
9485 
9486  llvm::Value *ThreadLimitVal =
9487  ThreadLimit
9488  ? CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(ThreadLimit),
9489  CGF.CGM.Int32Ty, /* isSigned = */ true)
9490  : CGF.Builder.getInt32(0);
9491 
9492  // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
9493  llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
9494  ThreadLimitVal};
9496  PushNumTeamsArgs);
9497 }
9498 
9500  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9501  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
9502  if (!CGF.HaveInsertPoint())
9503  return;
9504 
9505  // Action used to replace the default codegen action and turn privatization
9506  // off.
9507  PrePostActionTy NoPrivAction;
9508 
9509  // Generate the code for the opening of the data environment. Capture all the
9510  // arguments of the runtime call by reference because they are used in the
9511  // closing of the region.
9512  auto &&BeginThenGen = [this, &D, Device, &Info,
9513  &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
9514  // Fill up the arrays with all the mapped variables.
9519 
9520  // Get map clause information.
9521  MappableExprsHandler MCHandler(D, CGF);
9522  MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9523 
9524  // Fill up the arrays and create the arguments.
9525  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9526 
9527  llvm::Value *BasePointersArrayArg = nullptr;
9528  llvm::Value *PointersArrayArg = nullptr;
9529  llvm::Value *SizesArrayArg = nullptr;
9530  llvm::Value *MapTypesArrayArg = nullptr;
9531  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9532  SizesArrayArg, MapTypesArrayArg, Info);
9533 
9534  // Emit device ID if any.
9535  llvm::Value *DeviceID = nullptr;
9536  if (Device) {
9537  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9538  CGF.Int64Ty, /*isSigned=*/true);
9539  } else {
9540  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9541  }
9542 
9543  // Emit the number of elements in the offloading arrays.
9544  llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9545 
9546  llvm::Value *OffloadingArgs[] = {
9547  DeviceID, PointerNum, BasePointersArrayArg,
9548  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9550  OffloadingArgs);
9551 
9552  // If device pointer privatization is required, emit the body of the region
9553  // here. It will have to be duplicated: with and without privatization.
9554  if (!Info.CaptureDeviceAddrMap.empty())
9555  CodeGen(CGF);
9556  };
9557 
9558  // Generate code for the closing of the data region.
9559  auto &&EndThenGen = [this, Device, &Info](CodeGenFunction &CGF,
9560  PrePostActionTy &) {
9561  assert(Info.isValid() && "Invalid data environment closing arguments.");
9562 
9563  llvm::Value *BasePointersArrayArg = nullptr;
9564  llvm::Value *PointersArrayArg = nullptr;
9565  llvm::Value *SizesArrayArg = nullptr;
9566  llvm::Value *MapTypesArrayArg = nullptr;
9567  emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
9568  SizesArrayArg, MapTypesArrayArg, Info);
9569 
9570  // Emit device ID if any.
9571  llvm::Value *DeviceID = nullptr;
9572  if (Device) {
9573  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9574  CGF.Int64Ty, /*isSigned=*/true);
9575  } else {
9576  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9577  }
9578 
9579  // Emit the number of elements in the offloading arrays.
9580  llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
9581 
9582  llvm::Value *OffloadingArgs[] = {
9583  DeviceID, PointerNum, BasePointersArrayArg,
9584  PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
9586  OffloadingArgs);
9587  };
9588 
9589  // If we need device pointer privatization, we need to emit the body of the
9590  // region with no privatization in the 'else' branch of the conditional.
9591  // Otherwise, we don't have to do anything.
9592  auto &&BeginElseGen = [&Info, &CodeGen, &NoPrivAction](CodeGenFunction &CGF,
9593  PrePostActionTy &) {
9594  if (!Info.CaptureDeviceAddrMap.empty()) {
9595  CodeGen.setAction(NoPrivAction);
9596  CodeGen(CGF);
9597  }
9598  };
9599 
9600  // We don't have to do anything to close the region if the if clause evaluates
9601  // to false.
9602  auto &&EndElseGen = [](CodeGenFunction &CGF, PrePostActionTy &) {};
9603 
9604  if (IfCond) {
9605  emitOMPIfClause(CGF, IfCond, BeginThenGen, BeginElseGen);
9606  } else {
9607  RegionCodeGenTy RCG(BeginThenGen);
9608  RCG(CGF);
9609  }
9610 
9611  // If we don't require privatization of device pointers, we emit the body in
9612  // between the runtime calls. This avoids duplicating the body code.
9613  if (Info.CaptureDeviceAddrMap.empty()) {
9614  CodeGen.setAction(NoPrivAction);
9615  CodeGen(CGF);
9616  }
9617 
9618  if (IfCond) {
9619  emitOMPIfClause(CGF, IfCond, EndThenGen, EndElseGen);
9620  } else {
9621  RegionCodeGenTy RCG(EndThenGen);
9622  RCG(CGF);
9623  }
9624 }
9625 
9627  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
9628  const Expr *Device) {
9629  if (!CGF.HaveInsertPoint())
9630  return;
9631 
9632  assert((isa<OMPTargetEnterDataDirective>(D) ||
9633  isa<OMPTargetExitDataDirective>(D) ||
9634  isa<OMPTargetUpdateDirective>(D)) &&
9635  "Expecting either target enter, exit data, or update directives.");
9636 
9638  llvm::Value *MapTypesArray = nullptr;
9639  // Generate the code for the opening of the data environment.
9640  auto &&ThenGen = [this, &D, Device, &InputInfo,
9641  &MapTypesArray](CodeGenFunction &CGF, PrePostActionTy &) {
9642  // Emit device ID if any.
9643  llvm::Value *DeviceID = nullptr;
9644  if (Device) {
9645  DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
9646  CGF.Int64Ty, /*isSigned=*/true);
9647  } else {
9648  DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
9649  }
9650 
9651  // Emit the number of elements in the offloading arrays.
9652  llvm::Constant *PointerNum =
9653  CGF.Builder.getInt32(InputInfo.NumberOfTargetItems);
9654 
9655  llvm::Value *OffloadingArgs[] = {DeviceID,
9656  PointerNum,
9657  InputInfo.BasePointersArray.getPointer(),
9658  InputInfo.PointersArray.getPointer(),
9659  InputInfo.SizesArray.getPointer(),
9660  MapTypesArray};
9661 
9662  // Select the right runtime function call for each expected standalone
9663  // directive.
9664  const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
9665  OpenMPRTLFunction RTLFn;
9666  switch (D.getDirectiveKind()) {
9667  case OMPD_target_enter_data:
9668  RTLFn = HasNowait ? OMPRTL__tgt_target_data_begin_nowait
9670  break;
9671  case OMPD_target_exit_data:
9672  RTLFn = HasNowait ? OMPRTL__tgt_target_data_end_nowait
9674  break;
9675  case OMPD_target_update:
9676  RTLFn = HasNowait ? OMPRTL__tgt_target_data_update_nowait
9678  break;
9679  case OMPD_parallel:
9680  case OMPD_for:
9681  case OMPD_parallel_for:
9682  case OMPD_parallel_sections:
9683  case OMPD_for_simd:
9684  case OMPD_parallel_for_simd:
9685  case OMPD_cancel:
9686  case OMPD_cancellation_point:
9687  case OMPD_ordered:
9688  case OMPD_threadprivate:
9689  case OMPD_allocate:
9690  case OMPD_task:
9691  case OMPD_simd:
9692  case OMPD_sections:
9693  case OMPD_section:
9694  case OMPD_single:
9695  case OMPD_master:
9696  case OMPD_critical:
9697  case OMPD_taskyield:
9698  case OMPD_barrier:
9699  case OMPD_taskwait:
9700  case OMPD_taskgroup:
9701  case OMPD_atomic:
9702  case OMPD_flush:
9703  case OMPD_teams:
9704  case OMPD_target_data:
9705  case OMPD_distribute:
9706  case OMPD_distribute_simd:
9707  case OMPD_distribute_parallel_for:
9708  case OMPD_distribute_parallel_for_simd:
9709  case OMPD_teams_distribute:
9710  case OMPD_teams_distribute_simd:
9711  case OMPD_teams_distribute_parallel_for:
9712  case OMPD_teams_distribute_parallel_for_simd:
9713  case OMPD_declare_simd:
9714  case OMPD_declare_target:
9715  case OMPD_end_declare_target:
9716  case OMPD_declare_reduction:
9717  case OMPD_declare_mapper:
9718  case OMPD_taskloop:
9719  case OMPD_taskloop_simd:
9720  case OMPD_target:
9721  case OMPD_target_simd:
9722  case OMPD_target_teams_distribute:
9723  case OMPD_target_teams_distribute_simd:
9724  case OMPD_target_teams_distribute_parallel_for:
9725  case OMPD_target_teams_distribute_parallel_for_simd:
9726  case OMPD_target_teams:
9727  case OMPD_target_parallel:
9728  case OMPD_target_parallel_for:
9729  case OMPD_target_parallel_for_simd:
9730  case OMPD_requires:
9731  case OMPD_unknown:
9732  llvm_unreachable("Unexpected standalone target data directive.");
9733  break;
9734  }
9735  CGF.EmitRuntimeCall(createRuntimeFunction(RTLFn), OffloadingArgs);
9736  };
9737 
9738  auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
9739  CodeGenFunction &CGF, PrePostActionTy &) {
9740  // Fill up the arrays with all the mapped variables.
9745 
9746  // Get map clause information.
9747  MappableExprsHandler MEHandler(D, CGF);
9748  MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
9749 
9750  TargetDataInfo Info;
9751  // Fill up the arrays and create the arguments.
9752  emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
9754  Info.PointersArray, Info.SizesArray,
9755  Info.MapTypesArray, Info);
9756  InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9757  InputInfo.BasePointersArray =
9759  InputInfo.PointersArray =
9760  Address(Info.PointersArray, CGM.getPointerAlign());
9761  InputInfo.SizesArray =
9762  Address(Info.SizesArray, CGM.getPointerAlign());
9763  MapTypesArray = Info.MapTypesArray;
9764  if (D.hasClausesOfKind<OMPDependClause>())
9765  CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
9766  else
9767  emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9768  };
9769 
9770  if (IfCond) {
9771  emitOMPIfClause(CGF, IfCond, TargetThenGen,
9772  [](CodeGenFunction &CGF, PrePostActionTy &) {});
9773  } else {
9774  RegionCodeGenTy ThenRCG(TargetThenGen);
9775  ThenRCG(CGF);
9776  }
9777 }
9778 
9779 namespace {
9780  /// Kind of parameter in a function with 'declare simd' directive.
9781  enum ParamKindTy { LinearWithVarStride, Linear, Uniform, Vector };
9782  /// Attribute set of the parameter.
9783  struct ParamAttrTy {
9784  ParamKindTy Kind = Vector;
9785  llvm::APSInt StrideOrArg;
9786  llvm::APSInt Alignment;
9787  };
9788 } // namespace
9789 
9790 static unsigned evaluateCDTSize(const FunctionDecl *FD,
9791  ArrayRef<ParamAttrTy> ParamAttrs) {
9792  // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
9793  // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
9794  // of that clause. The VLEN value must be power of 2.
9795  // In other case the notion of the function`s "characteristic data type" (CDT)
9796  // is used to compute the vector length.
9797  // CDT is defined in the following order:
9798  // a) For non-void function, the CDT is the return type.
9799  // b) If the function has any non-uniform, non-linear parameters, then the
9800  // CDT is the type of the first such parameter.
9801  // c) If the CDT determined by a) or b) above is struct, union, or class
9802  // type which is pass-by-value (except for the type that maps to the
9803  // built-in complex data type), the characteristic data type is int.
9804  // d) If none of the above three cases is applicable, the CDT is int.
9805  // The VLEN is then determined based on the CDT and the size of vector
9806  // register of that ISA for which current vector version is generated. The
9807  // VLEN is computed using the formula below:
9808  // VLEN = sizeof(vector_register) / sizeof(CDT),
9809  // where vector register size specified in section 3.2.1 Registers and the
9810  // Stack Frame of original AMD64 ABI document.
9811  QualType RetType = FD->getReturnType();
9812  if (RetType.isNull())
9813  return 0;
9814  ASTContext &C = FD->getASTContext();
9815  QualType CDT;
9816  if (!RetType.isNull() && !RetType->isVoidType()) {
9817  CDT = RetType;
9818  } else {
9819  unsigned Offset = 0;
9820  if (const auto *MD = dyn_cast<CXXMethodDecl>(FD)) {
9821  if (ParamAttrs[Offset].Kind == Vector)
9822  CDT = C.getPointerType(C.getRecordType(MD->getParent()));
9823  ++Offset;
9824  }
9825  if (CDT.isNull()) {
9826  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
9827  if (ParamAttrs[I + Offset].Kind == Vector) {
9828  CDT = FD->getParamDecl(I)->getType();
9829  break;
9830  }
9831  }
9832  }
9833  }
9834  if (CDT.isNull())
9835  CDT = C.IntTy;
9836  CDT = CDT->getCanonicalTypeUnqualified();
9837  if (CDT->isRecordType() || CDT->isUnionType())
9838  CDT = C.IntTy;
9839  return C.getTypeSize(CDT);
9840 }
9841 
9842 static void
9843 emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
9844  const llvm::APSInt &VLENVal,
9845  ArrayRef<ParamAttrTy> ParamAttrs,
9846  OMPDeclareSimdDeclAttr::BranchStateTy State) {
9847  struct ISADataTy {
9848  char ISA;
9849  unsigned VecRegSize;
9850  };
9851  ISADataTy ISAData[] = {
9852  {
9853  'b', 128
9854  }, // SSE
9855  {
9856  'c', 256
9857  }, // AVX
9858  {
9859  'd', 256
9860  }, // AVX2
9861  {
9862  'e', 512
9863  }, // AVX512
9864  };
9866  switch (State) {
9867  case OMPDeclareSimdDeclAttr::BS_Undefined:
9868  Masked.push_back('N');
9869  Masked.push_back('M');
9870  break;
9871  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
9872  Masked.push_back('N');
9873  break;
9874  case OMPDeclareSimdDeclAttr::BS_Inbranch:
9875  Masked.push_back('M');
9876  break;
9877  }
9878  for (char Mask : Masked) {
9879  for (const ISADataTy &Data : ISAData) {
9880  SmallString<256> Buffer;
9881  llvm::raw_svector_ostream Out(Buffer);
9882  Out << "_ZGV" << Data.ISA << Mask;
9883  if (!VLENVal) {
9884  unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
9885  assert(NumElts && "Non-zero simdlen/cdtsize expected");
9886  Out << llvm::APSInt::getUnsigned(Data.VecRegSize / NumElts);
9887  } else {
9888  Out << VLENVal;
9889  }
9890  for (const ParamAttrTy &ParamAttr : ParamAttrs) {
9891  switch (ParamAttr.Kind){
9892  case LinearWithVarStride:
9893  Out << 's' << ParamAttr.StrideOrArg;
9894  break;
9895  case Linear:
9896  Out << 'l';
9897  if (!!ParamAttr.StrideOrArg)
9898  Out << ParamAttr.StrideOrArg;
9899  break;
9900  case Uniform:
9901  Out << 'u';
9902  break;
9903  case Vector:
9904  Out << 'v';
9905  break;
9906  }
9907  if (!!ParamAttr.Alignment)
9908  Out << 'a' << ParamAttr.Alignment;
9909  }
9910  Out << '_' << Fn->getName();
9911  Fn->addFnAttr(Out.str());
9912  }
9913  }
9914 }
9915 
9916 // This are the Functions that are needed to mangle the name of the
9917 // vector functions generated by the compiler, according to the rules
9918 // defined in the "Vector Function ABI specifications for AArch64",
9919 // available at
9920 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
9921 
9922 /// Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
9923 ///
9924 /// TODO: Need to implement the behavior for reference marked with a
9925 /// var or no linear modifiers (1.b in the section). For this, we
9926 /// need to extend ParamKindTy to support the linear modifiers.
9927 static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
9928  QT = QT.getCanonicalType();
9929 
9930  if (QT->isVoidType())
9931  return false;
9932 
9933  if (Kind == ParamKindTy::Uniform)
9934  return false;
9935 
9936  if (Kind == ParamKindTy::Linear)
9937  return false;
9938 
9939  // TODO: Handle linear references with modifiers
9940 
9941  if (Kind == ParamKindTy::LinearWithVarStride)
9942  return false;
9943 
9944  return true;
9945 }
9946 
9947 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
9948 static bool getAArch64PBV(QualType QT, ASTContext &C) {
9949  QT = QT.getCanonicalType();
9950  unsigned Size = C.getTypeSize(QT);
9951 
9952  // Only scalars and complex within 16 bytes wide set PVB to true.
9953  if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
9954  return false;
9955 
9956  if (QT->isFloatingType())
9957  return true;
9958 
9959  if (QT->isIntegerType())
9960  return true;
9961 
9962  if (QT->isPointerType())
9963  return true;
9964 
9965  // TODO: Add support for complex types (section 3.1.2, item 2).
9966 
9967  return false;
9968 }
9969 
9970 /// Computes the lane size (LS) of a return type or of an input parameter,
9971 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
9972 /// TODO: Add support for references, section 3.2.1, item 1.
9973 static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
9974  if (getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
9976  if (getAArch64PBV(PTy, C))
9977  return C.getTypeSize(PTy);
9978  }
9979  if (getAArch64PBV(QT, C))
9980  return C.getTypeSize(QT);
9981 
9982  return C.getTypeSize(C.getUIntPtrType());
9983 }
9984 
9985 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
9986 // signature of the scalar function, as defined in 3.2.2 of the
9987 // AAVFABI.
9988 static std::tuple<unsigned, unsigned, bool>
9989 getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
9990  QualType RetType = FD->getReturnType().getCanonicalType();
9991 
9992  ASTContext &C = FD->getASTContext();
9993 
9994  bool OutputBecomesInput = false;
9995 
9997  if (!RetType->isVoidType()) {
9998  Sizes.push_back(getAArch64LS(RetType, ParamKindTy::Vector, C));
9999  if (!getAArch64PBV(RetType, C) && getAArch64MTV(RetType, {}))
10000  OutputBecomesInput = true;
10001  }
10002  for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10003  QualType QT = FD->getParamDecl(I)->getType().getCanonicalType();
10004  Sizes.push_back(getAArch64LS(QT, ParamAttrs[I].Kind, C));
10005  }
10006 
10007  assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10008  // The LS of a function parameter / return value can only be a power
10009  // of 2, starting from 8 bits, up to 128.
10010  assert(std::all_of(Sizes.begin(), Sizes.end(),
10011  [](unsigned Size) {
10012  return Size == 8 || Size == 16 || Size == 32 ||
10013  Size == 64 || Size == 128;
10014  }) &&
10015  "Invalid size");
10016 
10017  return std::make_tuple(*std::min_element(std::begin(Sizes), std::end(Sizes)),
10018  *std::max_element(std::begin(Sizes), std::end(Sizes)),
10019  OutputBecomesInput);
10020 }
10021 
10022 /// Mangle the parameter part of the vector function name according to
10023 /// their OpenMP classification. The mangling function is defined in
10024 /// section 3.5 of the AAVFABI.
10025 static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10026  SmallString<256> Buffer;
10027  llvm::raw_svector_ostream Out(Buffer);
10028  for (const auto &ParamAttr : ParamAttrs) {
10029  switch (ParamAttr.Kind) {
10030  case LinearWithVarStride:
10031  Out << "ls" << ParamAttr.StrideOrArg;
10032  break;
10033  case Linear:
10034  Out << 'l';
10035  // Don't print the step value if it is not present or if it is
10036  // equal to 1.
10037  if (!!ParamAttr.StrideOrArg && ParamAttr.StrideOrArg != 1)
10038  Out << ParamAttr.StrideOrArg;
10039  break;
10040  case Uniform:
10041  Out << 'u';
10042  break;
10043  case Vector:
10044  Out << 'v';
10045  break;
10046  }
10047 
10048  if (!!ParamAttr.Alignment)
10049  Out << 'a' << ParamAttr.Alignment;
10050  }
10051 
10052  return Out.str();
10053 }
10054 
10055 // Function used to add the attribute. The parameter `VLEN` is
10056 // templated to allow the use of "x" when targeting scalable functions
10057 // for SVE.
10058 template <typename T>
10059 static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10060  char ISA, StringRef ParSeq,
10061  StringRef MangledName, bool OutputBecomesInput,
10062  llvm::Function *Fn) {
10063  SmallString<256> Buffer;
10064  llvm::raw_svector_ostream Out(Buffer);
10065  Out << Prefix << ISA << LMask << VLEN;
10066  if (OutputBecomesInput)
10067  Out << "v";
10068  Out << ParSeq << "_" << MangledName;
10069  Fn->addFnAttr(Out.str());
10070 }
10071 
10072 // Helper function to generate the Advanced SIMD names depending on
10073 // the value of the NDS when simdlen is not present.
10074 static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10075  StringRef Prefix, char ISA,
10076  StringRef ParSeq, StringRef MangledName,
10077  bool OutputBecomesInput,
10078  llvm::Function *Fn) {
10079  switch (NDS) {
10080  case 8:
10081  addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10082  OutputBecomesInput, Fn);
10083  addAArch64VectorName(16, Mask, Prefix, ISA, ParSeq, MangledName,
10084  OutputBecomesInput, Fn);
10085  break;
10086  case 16:
10087  addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10088  OutputBecomesInput, Fn);
10089  addAArch64VectorName(8, Mask, Prefix, ISA, ParSeq, MangledName,
10090  OutputBecomesInput, Fn);
10091  break;
10092  case 32:
10093  addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10094  OutputBecomesInput, Fn);
10095  addAArch64VectorName(4, Mask, Prefix, ISA, ParSeq, MangledName,
10096  OutputBecomesInput, Fn);
10097  break;
10098  case 64:
10099  case 128:
10100  addAArch64VectorName(2, Mask, Prefix, ISA, ParSeq, MangledName,
10101  OutputBecomesInput, Fn);
10102  break;
10103  default:
10104  llvm_unreachable("Scalar type is too wide.");
10105  }
10106 }
10107 
10108 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10110  CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10111  ArrayRef<ParamAttrTy> ParamAttrs,
10112  OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10113  char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10114 
10115  // Get basic data for building the vector signature.
10116  const auto Data = getNDSWDS(FD, ParamAttrs);
10117  const unsigned NDS = std::get<0>(Data);
10118  const unsigned WDS = std::get<1>(Data);
10119  const bool OutputBecomesInput = std::get<2>(Data);
10120 
10121  // Check the values provided via `simdlen` by the user.
10122  // 1. A `simdlen(1)` doesn't produce vector signatures,
10123  if (UserVLEN == 1) {
10124  unsigned DiagID = CGM.getDiags().getCustomDiagID(
10126  "The clause simdlen(1) has no effect when targeting aarch64.");
10127  CGM.getDiags().Report(SLoc, DiagID);
10128  return;
10129  }
10130 
10131  // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10132  // Advanced SIMD output.
10133  if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(UserVLEN)) {
10134  unsigned DiagID = CGM.getDiags().getCustomDiagID(
10135  DiagnosticsEngine::Warning, "The value specified in simdlen must be a "
10136  "power of 2 when targeting Advanced SIMD.");
10137  CGM.getDiags().Report(SLoc, DiagID);
10138  return;
10139  }
10140 
10141  // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10142  // limits.
10143  if (ISA == 's' && UserVLEN != 0) {
10144  if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10145  unsigned DiagID = CGM.getDiags().getCustomDiagID(
10146  DiagnosticsEngine::Warning, "The clause simdlen must fit the %0-bit "
10147  "lanes in the architectural constraints "
10148  "for SVE (min is 128-bit, max is "
10149  "2048-bit, by steps of 128-bit)");
10150  CGM.getDiags().Report(SLoc, DiagID) << WDS;
10151  return;
10152  }
10153  }
10154 
10155  // Sort out parameter sequence.
10156  const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10157  StringRef Prefix = "_ZGV";
10158  // Generate simdlen from user input (if any).
10159  if (UserVLEN) {
10160  if (ISA == 's') {
10161  // SVE generates only a masked function.
10162  addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10163  OutputBecomesInput, Fn);
10164  } else {
10165  assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10166  // Advanced SIMD generates one or two functions, depending on
10167  // the `[not]inbranch` clause.
10168  switch (State) {
10169  case OMPDeclareSimdDeclAttr::BS_Undefined:
10170  addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10171  OutputBecomesInput, Fn);
10172  addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10173  OutputBecomesInput, Fn);
10174  break;
10175  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10176  addAArch64VectorName(UserVLEN, "N", Prefix, ISA, ParSeq, MangledName,
10177  OutputBecomesInput, Fn);
10178  break;
10179  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10180  addAArch64VectorName(UserVLEN, "M", Prefix, ISA, ParSeq, MangledName,
10181  OutputBecomesInput, Fn);
10182  break;
10183  }
10184  }
10185  } else {
10186  // If no user simdlen is provided, follow the AAVFABI rules for
10187  // generating the vector length.
10188  if (ISA == 's') {
10189  // SVE, section 3.4.1, item 1.
10190  addAArch64VectorName("x", "M", Prefix, ISA, ParSeq, MangledName,
10191  OutputBecomesInput, Fn);
10192  } else {
10193  assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10194  // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10195  // two vector names depending on the use of the clause
10196  // `[not]inbranch`.
10197  switch (State) {
10198  case OMPDeclareSimdDeclAttr::BS_Undefined:
10199  addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10200  OutputBecomesInput, Fn);
10201  addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10202  OutputBecomesInput, Fn);
10203  break;
10204  case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10205  addAArch64AdvSIMDNDSNames(NDS, "N", Prefix, ISA, ParSeq, MangledName,
10206  OutputBecomesInput, Fn);
10207  break;
10208  case OMPDeclareSimdDeclAttr::BS_Inbranch:
10209  addAArch64AdvSIMDNDSNames(NDS, "M", Prefix, ISA, ParSeq, MangledName,
10210  OutputBecomesInput, Fn);
10211  break;
10212  }
10213  }
10214  }
10215 }
10216 
10218  llvm::Function *Fn) {
10219  ASTContext &C = CGM.getContext();
10220  FD = FD->getMostRecentDecl();
10221  // Map params to their positions in function decl.
10222  llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10223  if (isa<CXXMethodDecl>(FD))
10224  ParamPositions.try_emplace(FD, 0);
10225  unsigned ParamPos = ParamPositions.size();
10226  for (const ParmVarDecl *P : FD->parameters()) {
10227  ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10228  ++ParamPos;
10229  }
10230  while (FD) {
10231  for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10232  llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10233  // Mark uniform parameters.
10234  for (const Expr *E : Attr->uniforms()) {
10235  E = E->IgnoreParenImpCasts();
10236  unsigned Pos;
10237  if (isa<CXXThisExpr>(E)) {
10238  Pos = ParamPositions[FD];
10239  } else {
10240  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10241  ->getCanonicalDecl();
10242  Pos = ParamPositions[PVD];
10243  }
10244  ParamAttrs[Pos].Kind = Uniform;
10245  }
10246  // Get alignment info.
10247  auto NI = Attr->alignments_begin();
10248  for (const Expr *E : Attr->aligneds()) {
10249  E = E->IgnoreParenImpCasts();
10250  unsigned Pos;
10251  QualType ParmTy;
10252  if (isa<CXXThisExpr>(E)) {
10253  Pos = ParamPositions[FD];
10254  ParmTy = E->getType();
10255  } else {
10256  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10257  ->getCanonicalDecl();
10258  Pos = ParamPositions[PVD];
10259  ParmTy = PVD->getType();
10260  }
10261  ParamAttrs[Pos].Alignment =
10262  (*NI)
10263  ? (*NI)->EvaluateKnownConstInt(C)
10264  : llvm::APSInt::getUnsigned(
10266  .getQuantity());
10267  ++NI;
10268  }
10269  // Mark linear parameters.
10270  auto SI = Attr->steps_begin();
10271  auto MI = Attr->modifiers_begin();
10272  for (const Expr *E : Attr->linears()) {
10273  E = E->IgnoreParenImpCasts();
10274  unsigned Pos;
10275  if (isa<CXXThisExpr>(E)) {
10276  Pos = ParamPositions[FD];
10277  } else {
10278  const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10279  ->getCanonicalDecl();
10280  Pos = ParamPositions[PVD];
10281  }
10282  ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10283  ParamAttr.Kind = Linear;
10284  if (*SI) {
10285  Expr::EvalResult Result;
10286  if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
10287  if (const auto *DRE =
10288  cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
10289  if (const auto *StridePVD = cast<ParmVarDecl>(DRE->getDecl())) {
10290  ParamAttr.Kind = LinearWithVarStride;
10291  ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(
10292  ParamPositions[StridePVD->getCanonicalDecl()]);
10293  }
10294  }
10295  } else {
10296  ParamAttr.StrideOrArg = Result.Val.getInt();
10297  }
10298  }
10299  ++SI;
10300  ++MI;
10301  }
10302  llvm::APSInt VLENVal;
10303  SourceLocation ExprLoc;
10304  const Expr *VLENExpr = Attr->getSimdlen();
10305  if (VLENExpr) {
10306  VLENVal = VLENExpr->EvaluateKnownConstInt(C);
10307  ExprLoc = VLENExpr->getExprLoc();
10308  }
10309  OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
10310  if (CGM.getTriple().getArch() == llvm::Triple::x86 ||
10311  CGM.getTriple().getArch() == llvm::Triple::x86_64) {
10312  emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
10313  } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
10314  unsigned VLEN = VLENVal.getExtValue();
10315  StringRef MangledName = Fn->getName();
10316  if (CGM.getTarget().hasFeature("sve"))
10317  emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10318  MangledName, 's', 128, Fn, ExprLoc);
10319  if (CGM.getTarget().hasFeature("neon"))
10320  emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
10321  MangledName, 'n', 128, Fn, ExprLoc);
10322  }
10323  }
10324  FD = FD->getPreviousDecl();
10325  }
10326 }
10327 
10328 namespace {
10329 /// Cleanup action for doacross support.
10330 class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
10331 public:
10332  static const int DoacrossFinArgs = 2;
10333 
10334 private:
10335  llvm::FunctionCallee RTLFn;
10336  llvm::Value *Args[DoacrossFinArgs];
10337 
10338 public:
10339  DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
10340  ArrayRef<llvm::Value *> CallArgs)
10341  : RTLFn(RTLFn) {
10342  assert(CallArgs.size() == DoacrossFinArgs);
10343  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10344  }
10345  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10346  if (!CGF.HaveInsertPoint())
10347  return;
10348  CGF.EmitRuntimeCall(RTLFn, Args);
10349  }
10350 };
10351 } // namespace
10352 
10354  const OMPLoopDirective &D,
10355  ArrayRef<Expr *> NumIterations) {
10356  if (!CGF.HaveInsertPoint())
10357  return;
10358 
10359  ASTContext &C = CGM.getContext();
10360  QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
10361  RecordDecl *RD;
10362  if (KmpDimTy.isNull()) {
10363  // Build struct kmp_dim { // loop bounds info casted to kmp_int64
10364  // kmp_int64 lo; // lower
10365  // kmp_int64 up; // upper
10366  // kmp_int64 st; // stride
10367  // };
10368  RD = C.buildImplicitRecord("kmp_dim");
10369  RD->startDefinition();
10370  addFieldToRecordDecl(C, RD, Int64Ty);
10371  addFieldToRecordDecl(C, RD, Int64Ty);
10372  addFieldToRecordDecl(C, RD, Int64Ty);
10373  RD->completeDefinition();
10374  KmpDimTy = C.getRecordType(RD);
10375  } else {
10376  RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
10377  }
10378  llvm::APInt Size(/*numBits=*/32, NumIterations.size());
10379  QualType ArrayTy =
10381 
10382  Address DimsAddr = CGF.CreateMemTemp(ArrayTy, "dims");
10383  CGF.EmitNullInitialization(DimsAddr, ArrayTy);
10384  enum { LowerFD = 0, UpperFD, StrideFD };
10385  // Fill dims with data.
10386  for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
10387  LValue DimsLVal = CGF.MakeAddrLValue(
10388  CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
10389  // dims.upper = num_iterations;
10390  LValue UpperLVal = CGF.EmitLValueForField(
10391  DimsLVal, *std::next(RD->field_begin(), UpperFD));
10392  llvm::Value *NumIterVal =
10393  CGF.EmitScalarConversion(CGF.EmitScalarExpr(NumIterations[I]),
10394  D.getNumIterations()->getType(), Int64Ty,
10395  D.getNumIterations()->getExprLoc());
10396  CGF.EmitStoreOfScalar(NumIterVal, UpperLVal);
10397  // dims.stride = 1;
10398  LValue StrideLVal = CGF.EmitLValueForField(
10399  DimsLVal, *std::next(RD->field_begin(), StrideFD));
10400  CGF.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM.Int64Ty, /*V=*/1),
10401  StrideLVal);
10402  }
10403 
10404  // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
10405  // kmp_int32 num_dims, struct kmp_dim * dims);
10406  llvm::Value *Args[] = {
10407  emitUpdateLocation(CGF, D.getBeginLoc()),
10408  getThreadID(CGF, D.getBeginLoc()),
10409  llvm::ConstantInt::getSigned(CGM.Int32Ty, NumIterations.size()),
10411  CGF.Builder.CreateConstArrayGEP(DimsAddr, 0).getPointer(),
10412  CGM.VoidPtrTy)};
10413 
10414  llvm::FunctionCallee RTLFn =
10416  CGF.EmitRuntimeCall(RTLFn, Args);
10417  llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
10418  emitUpdateLocation(CGF, D.getEndLoc()), getThreadID(CGF, D.getEndLoc())};
10419  llvm::FunctionCallee FiniRTLFn =
10421  CGF.EHStack.pushCleanup<DoacrossCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10422  llvm::makeArrayRef(FiniArgs));
10423 }
10424 
10426  const OMPDependClause *C) {
10427  QualType Int64Ty =
10428  CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
10429  llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
10430  QualType ArrayTy = CGM.getContext().getConstantArrayType(
10431  Int64Ty, Size, ArrayType::Normal, 0);
10432  Address CntAddr = CGF.CreateMemTemp(ArrayTy, ".cnt.addr");
10433  for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
10434  const Expr *CounterVal = C->getLoopData(I);
10435  assert(CounterVal);
10436  llvm::Value *CntVal = CGF.EmitScalarConversion(
10437  CGF.EmitScalarExpr(CounterVal), CounterVal->getType(), Int64Ty,
10438  CounterVal->getExprLoc());
10439  CGF.EmitStoreOfScalar(CntVal, CGF.Builder.CreateConstArrayGEP(CntAddr, I),
10440  /*Volatile=*/false, Int64Ty);
10441  }
10442  llvm::Value *Args[] = {
10443  emitUpdateLocation(CGF, C->getBeginLoc()),
10444  getThreadID(CGF, C->getBeginLoc()),
10445  CGF.Builder.CreateConstArrayGEP(CntAddr, 0).getPointer()};
10446  llvm::FunctionCallee RTLFn;
10447  if (C->getDependencyKind() == OMPC_DEPEND_source) {
10449  } else {
10450  assert(C->getDependencyKind() == OMPC_DEPEND_sink);
10452  }
10453  CGF.EmitRuntimeCall(RTLFn, Args);
10454 }
10455 
10457  llvm::FunctionCallee Callee,
10458  ArrayRef<llvm::Value *> Args) const {
10459  assert(Loc.isValid() && "Outlined function call location must be valid.");
10460  auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
10461 
10462  if (auto *Fn = dyn_cast<llvm::Function>(Callee.getCallee())) {
10463  if (Fn->doesNotThrow()) {
10464  CGF.EmitNounwindRuntimeCall(Fn, Args);
10465  return;
10466  }
10467  }
10468  CGF.EmitRuntimeCall(Callee, Args);
10469 }
10470 
10472  CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
10473  ArrayRef<llvm::Value *> Args) const {
10474  emitCall(CGF, Loc, OutlinedFn, Args);
10475 }
10476 
10478  if (const auto *FD = dyn_cast<FunctionDecl>(D))
10479  if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
10481 }
10482 
10484  const VarDecl *NativeParam,
10485  const VarDecl *TargetParam) const {
10486  return CGF.GetAddrOfLocalVar(NativeParam);
10487 }
10488 
10489 namespace {
10490 /// Cleanup action for allocate support.
10491 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
10492 public:
10493  static const int CleanupArgs = 3;
10494 
10495 private:
10496  llvm::FunctionCallee RTLFn;
10497  llvm::Value *Args[CleanupArgs];
10498 
10499 public:
10500  OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
10501  ArrayRef<llvm::Value *> CallArgs)
10502  : RTLFn(RTLFn) {
10503  assert(CallArgs.size() == CleanupArgs &&
10504  "Size of arguments does not match.");
10505  std::copy(CallArgs.begin(), CallArgs.end(), std::begin(Args));
10506  }
10507  void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
10508  if (!CGF.HaveInsertPoint())
10509  return;
10510  CGF.EmitRuntimeCall(RTLFn, Args);
10511  }
10512 };
10513 } // namespace
10514 
10516  const VarDecl *VD) {
10517  if (!VD)
10518  return Address::invalid();
10519  const VarDecl *CVD = VD->getCanonicalDecl();
10520  if (!CVD->hasAttr<OMPAllocateDeclAttr>())
10521  return Address::invalid();
10522  const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
10523  // Use the default allocation.
10524  if (AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
10525  !AA->getAllocator())
10526  return Address::invalid();
10527  llvm::Value *Size;
10528  CharUnits Align = CGM.getContext().getDeclAlign(CVD);
10529  if (CVD->getType()->isVariablyModifiedType()) {
10530  Size = CGF.getTypeSize(CVD->getType());
10531  // Align the size: ((size + align - 1) / align) * align
10532  Size = CGF.Builder.CreateNUWAdd(
10533  Size, CGM.getSize(Align - CharUnits::fromQuantity(1)));
10534  Size = CGF.Builder.CreateUDiv(Size, CGM.getSize(Align));
10535  Size = CGF.Builder.CreateNUWMul(Size, CGM.getSize(Align));
10536  } else {
10537  CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
10538  Size = CGM.getSize(Sz.alignTo(Align));
10539  }
10540  llvm::Value *ThreadID = getThreadID(CGF, CVD->getBeginLoc());
10541  assert(AA->getAllocator() &&
10542  "Expected allocator expression for non-default allocator.");
10543  llvm::Value *Allocator = CGF.EmitScalarExpr(AA->getAllocator());
10544  // According to the standard, the original allocator type is a enum (integer).
10545  // Convert to pointer type, if required.
10546  if (Allocator->getType()->isIntegerTy())
10547  Allocator = CGF.Builder.CreateIntToPtr(Allocator, CGM.VoidPtrTy);
10548  else if (Allocator->getType()->isPointerTy())
10549  Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(Allocator,
10550  CGM.VoidPtrTy);
10551  llvm::Value *Args[] = {ThreadID, Size, Allocator};
10552 
10553  llvm::Value *Addr =
10555  CVD->getName() + ".void.addr");
10556  llvm::Value *FiniArgs[OMPAllocateCleanupTy::CleanupArgs] = {ThreadID, Addr,
10557  Allocator};
10558  llvm::FunctionCallee FiniRTLFn = createRuntimeFunction(OMPRTL__kmpc_free);
10559 
10560  CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(NormalAndEHCleanup, FiniRTLFn,
10561  llvm::makeArrayRef(FiniArgs));
10563  Addr,
10564  CGF.ConvertTypeForMem(CGM.getContext().getPointerType(CVD->getType())),
10565  CVD->getName() + ".addr");
10566  return Address(Addr, Align);
10567 }
10568 
10570  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10571  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10572  llvm_unreachable("Not supported in SIMD-only mode");
10573 }
10574 
10576  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10577  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
10578  llvm_unreachable("Not supported in SIMD-only mode");
10579 }
10580 
10582  const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
10583  const VarDecl *PartIDVar, const VarDecl *TaskTVar,
10584  OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
10585  bool Tied, unsigned &NumberOfParts) {
10586  llvm_unreachable("Not supported in SIMD-only mode");
10587 }
10588 
10590  SourceLocation Loc,
10591  llvm::Function *OutlinedFn,
10592  ArrayRef<llvm::Value *> CapturedVars,
10593  const Expr *IfCond) {
10594  llvm_unreachable("Not supported in SIMD-only mode");
10595 }
10596 
10598  CodeGenFunction &CGF, StringRef CriticalName,
10599  const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
10600  const Expr *Hint) {
10601  llvm_unreachable("Not supported in SIMD-only mode");
10602 }
10603 
10605  const RegionCodeGenTy &MasterOpGen,
10606  SourceLocation Loc) {
10607  llvm_unreachable("Not supported in SIMD-only mode");
10608 }
10609 
10611  SourceLocation Loc) {
10612  llvm_unreachable("Not supported in SIMD-only mode");
10613 }
10614 
10616  CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
10617  SourceLocation Loc) {
10618  llvm_unreachable("Not supported in SIMD-only mode");
10619 }
10620 
10622  CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
10623  SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
10624  ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
10625  ArrayRef<const Expr *> AssignmentOps) {
10626  llvm_unreachable("Not supported in SIMD-only mode");
10627 }
10628 
10630  const RegionCodeGenTy &OrderedOpGen,
10631  SourceLocation Loc,
10632  bool IsThreads) {
10633  llvm_unreachable("Not supported in SIMD-only mode");
10634 }
10635 
10637  SourceLocation Loc,
10638  OpenMPDirectiveKind Kind,
10639  bool EmitChecks,
10640  bool ForceSimpleCall) {
10641  llvm_unreachable("Not supported in SIMD-only mode");
10642 }
10643 
10645  CodeGenFunction &CGF, SourceLocation Loc,
10646  const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
10647  bool Ordered, const DispatchRTInput &DispatchValues) {
10648  llvm_unreachable("Not supported in SIMD-only mode");
10649 }
10650 
10653  const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
10654  llvm_unreachable("Not supported in SIMD-only mode");
10655 }
10656 
10658  CodeGenFunction &CGF, SourceLocation Loc,
10659  OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
10660  llvm_unreachable("Not supported in SIMD-only mode");
10661 }
10662 
10664  SourceLocation Loc,
10665  unsigned IVSize,
10666  bool IVSigned) {
10667  llvm_unreachable("Not supported in SIMD-only mode");
10668 }
10669 
10671  SourceLocation Loc,
10672  OpenMPDirectiveKind DKind) {
10673  llvm_unreachable("Not supported in SIMD-only mode");
10674 }
10675 
10677  SourceLocation Loc,
10678  unsigned IVSize, bool IVSigned,
10679  Address IL, Address LB,
10680  Address UB, Address ST) {
10681  llvm_unreachable("Not supported in SIMD-only mode");
10682 }
10683 
10685  llvm::Value *NumThreads,
10686  SourceLocation Loc) {
10687  llvm_unreachable("Not supported in SIMD-only mode");
10688 }
10689 
10691  OpenMPProcBindClauseKind ProcBind,
10692  SourceLocation Loc) {
10693  llvm_unreachable("Not supported in SIMD-only mode");
10694 }
10695 
10697  const VarDecl *VD,
10698  Address VDAddr,
10699  SourceLocation Loc) {
10700  llvm_unreachable("Not supported in SIMD-only mode");
10701 }
10702 
10704  const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
10705  CodeGenFunction *CGF) {
10706  llvm_unreachable("Not supported in SIMD-only mode");
10707 }
10708 
10710  CodeGenFunction &CGF, QualType VarType, StringRef Name) {
10711  llvm_unreachable("Not supported in SIMD-only mode");
10712 }
10713 
10716  SourceLocation Loc) {
10717  llvm_unreachable("Not supported in SIMD-only mode");
10718 }
10719 
10721  const OMPExecutableDirective &D,
10722  llvm::Function *TaskFunction,
10723  QualType SharedsTy, Address Shareds,
10724  const Expr *IfCond,
10725  const OMPTaskDataTy &Data) {
10726  llvm_unreachable("Not supported in SIMD-only mode");
10727 }
10728 
10730  CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
10731  llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
10732  const Expr *IfCond, const OMPTaskDataTy &Data) {
10733  llvm_unreachable("Not supported in SIMD-only mode");
10734 }
10735 
10739  ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
10740  assert(Options.SimpleReduction && "Only simple reduction is expected.");
10741  CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
10742  ReductionOps, Options);
10743 }
10744 
10747  ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
10748  llvm_unreachable("Not supported in SIMD-only mode");
10749 }
10750 
10752  SourceLocation Loc,
10753  ReductionCodeGen &RCG,
10754  unsigned N) {
10755  llvm_unreachable("Not supported in SIMD-only mode");
10756 }
10757 
10759  SourceLocation Loc,
10760  llvm::Value *ReductionsPtr,
10761  LValue SharedLVal) {
10762  llvm_unreachable("Not supported in SIMD-only mode");
10763 }
10764 
10766  SourceLocation Loc) {
10767  llvm_unreachable("Not supported in SIMD-only mode");
10768 }
10769 
10771  CodeGenFunction &CGF, SourceLocation Loc,
10772  OpenMPDirectiveKind CancelRegion) {
10773  llvm_unreachable("Not supported in SIMD-only mode");
10774 }
10775 
10777  SourceLocation Loc, const Expr *IfCond,
10778  OpenMPDirectiveKind CancelRegion) {
10779  llvm_unreachable("Not supported in SIMD-only mode");
10780 }
10781 
10783  const OMPExecutableDirective &D, StringRef ParentName,
10784  llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
10785  bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
10786  llvm_unreachable("Not supported in SIMD-only mode");
10787 }
10788 
10790  const OMPExecutableDirective &D,
10791  llvm::Function *OutlinedFn,
10792  llvm::Value *OutlinedFnID,
10793  const Expr *IfCond,
10794  const Expr *Device) {
10795  llvm_unreachable("Not supported in SIMD-only mode");
10796 }
10797 
10799  llvm_unreachable("Not supported in SIMD-only mode");
10800 }
10801 
10803  llvm_unreachable("Not supported in SIMD-only mode");
10804 }
10805 
10807  return false;
10808 }
10809 
10811  return nullptr;
10812 }
10813 
10815  const OMPExecutableDirective &D,
10816  SourceLocation Loc,
10817  llvm::Function *OutlinedFn,
10818  ArrayRef<llvm::Value *> CapturedVars) {
10819  llvm_unreachable("Not supported in SIMD-only mode");
10820 }
10821 
10823  const Expr *NumTeams,
10824  const Expr *ThreadLimit,
10825  SourceLocation Loc) {
10826  llvm_unreachable("Not supported in SIMD-only mode");
10827 }
10828 
10830  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10831  const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) {
10832  llvm_unreachable("Not supported in SIMD-only mode");
10833 }
10834 
10836  CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10837  const Expr *Device) {
10838  llvm_unreachable("Not supported in SIMD-only mode");
10839 }
10840 
10842  const OMPLoopDirective &D,
10843  ArrayRef<Expr *> NumIterations) {
10844  llvm_unreachable("Not supported in SIMD-only mode");
10845 }
10846 
10848  const OMPDependClause *C) {
10849  llvm_unreachable("Not supported in SIMD-only mode");
10850 }
10851 
10852 const VarDecl *
10854  const VarDecl *NativeParam) const {
10855  llvm_unreachable("Not supported in SIMD-only mode");
10856 }
10857 
10858 Address
10860  const VarDecl *NativeParam,
10861  const VarDecl *TargetParam) const {
10862  llvm_unreachable("Not supported in SIMD-only mode");
10863 }
const CGFunctionInfo & arrangeBuiltinFunctionDeclaration(QualType resultType, const FunctionArgList &args)
A builtin function is a freestanding function using the default C conventions.
Definition: CGCall.cpp:652
RecordDecl * buildImplicitRecord(StringRef Name, RecordDecl::TagKind TK=TTK_Struct) const
Create a new implicit TU-level CXXRecordDecl or RecordDecl declaration.
TBAAAccessInfo getTBAAInfoForSubobject(LValue Base, QualType AccessType)
getTBAAInfoForSubobject - Get TBAA information for an access with a given base lvalue.
void pushTerminate()
Push a terminate handler on the stack.
Definition: CGCleanup.cpp:252
static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, llvm::Type *BaseLVType, CharUnits BaseLVAlignment, llvm::Value *Addr)
CharUnits alignTo(const CharUnits &Align) const
alignTo - Returns the next integer (mod 2**64) that is greater than or equal to this quantity and is ...
Definition: CharUnits.h:183
virtual void EmitBody(CodeGenFunction &CGF, const Stmt *S)
Emit the captured statement body.
This represents &#39;#pragma omp task&#39; directive.
Definition: StmtOpenMP.h:1851
static const Decl * getCanonicalDecl(const Decl *D)
Represents a function declaration or definition.
Definition: Decl.h:1748
llvm::IntegerType * IntTy
int
This represents &#39;thread_limit&#39; clause in the &#39;#pragma omp ...&#39; directive.
bool hasRequiresUnifiedSharedMemory() const
Return whether the unified_shared_memory has been specified.
External linkage, which indicates that the entity can be referred to from other translation units...
Definition: Linkage.h:59
virtual Address getAddressOfLocalVariable(CodeGenFunction &CGF, const VarDecl *VD)
Gets the OpenMP-specific address of the local variable.
Expr * getUpperBoundVariable() const
Definition: StmtOpenMP.h:913
Other implicit parameter.
Definition: Decl.h:1524
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param Data Additional data for task generation like final list of privates etc *TaskResultTy emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const OMPTaskDataTy &Data)
QualType TgtDeviceImageQTy
struct __tgt_device_image{ void *ImageStart; // Pointer to the target code start. ...
Complete object ctor.
Definition: ABI.h:25
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2569
CanQualType VoidPtrTy
Definition: ASTContext.h:1042
Scheduling data for loop-based OpenMP directives.
Definition: OpenMPKinds.h:156
Destroyer * getDestroyer(QualType::DestructionKind destructionKind)
Definition: CGDecl.cpp:2032
A (possibly-)qualified type.
Definition: Type.h:643
base_class_range bases()
Definition: DeclCXX.h:825
llvm::Function * emitReductionFunction(SourceLocation Loc, llvm::Type *ArgsType, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps)
Emits reduction function.
bool isArrayType() const
Definition: Type.h:6440
llvm::Type * ConvertTypeForMem(QualType T)
void EmitBranchOnBoolExpr(const Expr *Cond, llvm::BasicBlock *TrueBlock, llvm::BasicBlock *FalseBlock, uint64_t TrueCount)
EmitBranchOnBoolExpr - Emit a branch on a boolean condition (e.g.
const CodeGenOptions & getCodeGenOpts() const
void EmitVarDecl(const VarDecl &D)
EmitVarDecl - Emit a local variable declaration.
Definition: CGDecl.cpp:165
Address CreateMemTemp(QualType T, const Twine &Name="tmp", Address *Alloca=nullptr)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignmen and cas...
Definition: CGExpr.cpp:139
CGRecordLayout - This class handles struct and union layout info while lowering AST types to LLVM typ...
bool HaveInsertPoint() const
HaveInsertPoint - True if an insertion point is defined.
const RecordDecl * KmpTaskTQTyRD
static void EmitOMPTargetParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForSimdDirective &S)
Emit device code for the target parallel for simd directive.
void emitSingleReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp, const Expr *PrivateRef, const DeclRefExpr *LHS, const DeclRefExpr *RHS)
Emits single reduction combiner.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
The standard implementation of ConstantInitBuilder used in Clang.
const ASTRecordLayout & getASTRecordLayout(const RecordDecl *D) const
Get or compute information about the layout of the specified record (struct/union/class) D...
Stmt - This represents one statement.
Definition: Stmt.h:66
Expr * getLowerBoundVariable() const
Definition: StmtOpenMP.h:905
OpenMPOffloadingRequiresDirFlags
Values for bit flags for marking which requires clauses have been used.
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
virtual void emitUserDefinedReduction(CodeGenFunction *CGF, const OMPDeclareReductionDecl *D)
Emit code for the specified user defined reduction construct.
Expr * getLoopData(unsigned NumLoop)
Get the loop data.
bool capturesThis() const
Determine whether this capture handles the C++ &#39;this&#39; pointer.
Definition: Stmt.h:3388
VarDecl * getCapturedVar() const
Retrieve the declaration of the variable being captured.
Definition: Stmt.cpp:1183
bool emitTargetGlobal(GlobalDecl GD) override
Emit the global GD if it is meaningful for the target.
CharUnits getAlignOfGlobalVarInChars(QualType T) const
Return the alignment in characters that should be given to a global variable with type T...
QualType getTgtBinaryDescriptorQTy()
Returns __tgt_bin_desc type.
bool hasNonTrivialCall(const ASTContext &Ctx) const
Determine whether this expression involves a call to any function that is not trivial.
Definition: Expr.cpp:3646
SmallVector< std::pair< OpenMPDependClauseKind, const Expr * >, 4 > Dependences
CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, StringRef Separator)
Constructor allowing to redefine the name separator for the variables.
bool isRecordType() const
Definition: Type.h:6464
void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues) override
This is used for non static scheduled types and when the ordered clause is present on the loop constr...
void clearLocThreadIdInsertPt(CodeGenFunction &CGF)
Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const override
Gets the address of the native argument basing on the address of the target-specific parameter...
SmallVector< const Expr *, 4 > LastprivateCopies
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:88
specific_attr_iterator - Iterates over a subrange of an AttrVec, only providing attributes that are o...
Definition: AttrIterator.h:34
SourceLocation getBeginLoc() const
Returns starting location of directive kind.
Definition: StmtOpenMP.h:224
static llvm::Value * getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, llvm::Value *DefaultThreadLimitVal)
static llvm::Value * emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of teams for a target directive.
virtual llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data)
Emit a code for initialization of task reduction clause.
const RecordType * getAsStructureType() const
Definition: Type.cpp:521
llvm::SmallDenseSet< const VarDecl * > DeferredGlobalVariables
List of variables that can become declare target implicitly and, thus, must be emitted.
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP teams directive D.
VarDecl * getDefinition(ASTContext &)
Get the real (not just tentative) definition for this declaration.
Definition: Decl.cpp:2166
This represents &#39;if&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:422
static llvm::Value * emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc, ArrayRef< const Expr *> PrivateVars, ArrayRef< const Expr *> FirstprivateVars, ArrayRef< const Expr *> LastprivateVars, QualType PrivatesQTy, ArrayRef< PrivateDataTy > Privates)
Emit a privates mapping function for correct handling of private and firstprivate variables...
virtual Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name)
Creates artificial threadprivate variable with name Name and type VarType.
StringRef P
CapturedStmt * getInnermostCapturedStmt()
Get innermost captured statement for the construct.
Definition: StmtOpenMP.h:282
unsigned getFieldIndex() const
Returns the index of this field within its record, as appropriate for passing to ASTRecordLayout::get...
Definition: Decl.cpp:3950
Call to void __kmpc_threadprivate_register( ident_t *, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor);.
QualType getNonReferenceType() const
If Type is a reference type (e.g., const int&), returns the type that the reference refers to ("const...
Definition: Type.h:6333
ReductionCodeGen(ArrayRef< const Expr *> Shareds, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> ReductionOps)
static RecordDecl * createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef< PrivateDataTy > Privates)
bool isOpenMPTargetDataManagementDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target data offload directive.
The base class of the type hierarchy.
Definition: Type.h:1433
void EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit=false)
EmitStoreThroughLValue - Store the specified rvalue into the specified lvalue, where both are guarant...
Definition: CGExpr.cpp:1926
void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options) override
Emit a code for reduction clause.
void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device) override
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
Definition: Diagnostic.h:1297
Represents an array type, per C99 6.7.5.2 - Array Declarators.
Definition: Type.h:2844
virtual const FieldDecl * lookup(const VarDecl *VD) const
Lookup the captured field decl for a variable.
llvm::Value * PointersArray
The array of section pointers passed to the runtime library.
virtual void clear()
virtual void completeDefinition()
Note that the definition of this type is now complete.
Definition: Decl.cpp:4310
bool isZero() const
isZero - Test whether the quantity equals zero.
Definition: CharUnits.h:115
QualType withConst() const
Definition: Type.h:815
bool markAsGlobalTarget(GlobalDecl GD)
Marks the declaration as already emitted for the device code and returns true, if it was marked alrea...
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:693
The l-value was an access to a declared entity or something equivalently strong, like the address of ...
struct with the values to be passed to the dispatch runtime function
capture_const_range captures() const
Definition: DeclCXX.h:1253
Address EmitLoadOfPointer(Address Ptr, const PointerType *PtrTy, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
Definition: CGExpr.cpp:2341
Expr * getCondition() const
Returns condition.
Definition: OpenMPClause.h:490
LValue EmitLValueForFieldInitialization(LValue Base, const FieldDecl *Field)
EmitLValueForFieldInitialization - Like EmitLValueForField, except that if the Field is a reference...
Definition: CGExpr.cpp:4115
const Expr * getAnyInitializer() const
Get the initializer for this variable, no matter which declaration it is attached to...
Definition: Decl.h:1209
virtual void checkArchForUnifiedAddressing(const OMPRequiresDecl *D)
Perform check on requires decl to ensure that target architecture supports unified addressing...
Describes the capture of a variable or of this, or of a C++1y init-capture.
Definition: LambdaCapture.h:25
void scanForTargetRegionsFunctions(const Stmt *S, StringRef ParentName)
Start scanning from statement S and and emit all target regions found along the way.
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
Represents a C++ constructor within a class.
Definition: DeclCXX.h:2574
static llvm::Value * emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D)
Emit the number of threads for a target directive.
static void EmitOMPAggregateReduction(CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar, const VarDecl *RHSVar, const llvm::function_ref< void(CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *)> &RedOpGen, const Expr *XExpr=nullptr, const Expr *EExpr=nullptr, const Expr *UpExpr=nullptr)
Emit reduction operation for each element of array (required for array sections) LHS op = RHS...
bool ConstantFoldsToSimpleInteger(const Expr *Cond, bool &Result, bool AllowLabels=false)
ConstantFoldsToSimpleInteger - If the specified expression does not fold to a constant, or if it does but contains a label, return false.
void EmitOMPCopy(QualType OriginalType, Address DestAddr, Address SrcAddr, const VarDecl *DestVD, const VarDecl *SrcVD, const Expr *Copy)
Emit proper copying of data from one variable to another.
OpenMPSchedType
Schedule types for &#39;omp for&#39; loops (these enumerators are taken from the enum sched_type in kmp...
bool isTrivialType(const ASTContext &Context) const
Return true if this is a trivial type per (C++0x [basic.types]p9)
Definition: Type.cpp:2188
SmallVector< const Expr *, 4 > ReductionCopies
SourceLocation getEndLoc() const
Returns ending location of directive.
Definition: StmtOpenMP.h:226
Address GetAddrOfLocalVar(const VarDecl *VD)
GetAddrOfLocalVar - Return the address of a local variable.
static const Stmt * getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body)
Checks if the Body is the CompoundStmt and returns its child statement iff there is only one that is ...
llvm::Function * GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S)
Represents a variable declaration or definition.
Definition: Decl.h:812
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:36
llvm::Value * getThreadID(CodeGenFunction &CGF, SourceLocation Loc)
Gets thread id value for the current thread.
QualType getReturnType() const
Definition: Decl.h:2329
This represents &#39;num_threads&#39; clause in the &#39;#pragma omp ...&#39; directive.
Definition: OpenMPClause.h:585
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6851
virtual void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device)
Emit the target offloading code associated with D.
The "union" keyword.
Definition: Type.h:5109
Extra information about a function prototype.
Definition: Type.h:3799
LangAS
Defines the address space values used by the address space qualifier of QualType. ...
Definition: AddressSpaces.h:25
llvm::GlobalVariable * finishAndCreateGlobal(As &&...args)
Given that this builder was created by beginning an array or struct directly on a ConstantInitBuilder...
virtual llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST)
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
bool field_empty() const
Definition: Decl.h:3849
DiagnosticsEngine & getDiags() const
void EmitVariablyModifiedType(QualType Ty)
EmitVLASize - Capture all the sizes for the VLA expressions in the given variably-modified type and s...
llvm::Value * getPointer() const
Definition: Address.h:37
llvm::Type * ConvertTypeForMem(QualType T)
ConvertTypeForMem - Convert type T into a llvm::Type.
static llvm::GlobalVariable * createGlobalStruct(CodeGenModule &CGM, QualType Ty, bool IsConstant, ArrayRef< llvm::Constant *> Data, const Twine &Name, As &&... Args)
Represents a parameter to a function.
Definition: Decl.h:1564
Linkage
Describes the different kinds of linkage (C++ [basic.link], C99 6.2.2) that an entity may have...
Definition: Linkage.h:23
static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix, const Expr *Ref)
Generates unique name for artificial threadprivate variables.
void createOffloadEntriesAndInfoMetadata()
Creates all the offload entries in the current compilation unit along with the associated metadata...
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned...
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static unsigned evaluateCDTSize(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
std::string getName(ArrayRef< StringRef > Parts) const
Get the platform-specific name separator.
static void EmitOMPTargetParallelDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelDirective &S)
A jump destination is an abstract label, branching to which may require a jump out through normal cle...
Struct that keeps all the relevant information that should be kept throughout a &#39;target data&#39; region...
QualType getTgtOffloadEntryQTy()
Returns __tgt_offload_entry type.
virtual void emitTargetDataStandAloneCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device)
Emit the data mapping/movement code associated with the directive D that should be of the form &#39;targe...
SmallVector< const Expr *, 4 > PrivateVars
Represents a struct/union/class.
Definition: Decl.h:3626
clauselist_range clauselists()
Definition: DeclOpenMP.h:390
llvm::DenseMap< const VarDecl *, FieldDecl * > LambdaCaptureFields
Source[4] in Fortran, do not use for C++.
llvm::StringMap< llvm::WeakTrackingVH > EmittedNonTargetVariables
List of the global variables with their addresses that should not be emitted for the target...
static llvm::Value * emitDestructorsFunction(CodeGenModule &CGM, SourceLocation Loc, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy)
void emitDestroy(Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray)
emitDestroy - Immediately perform the destruction of the given object.
Definition: CGDecl.cpp:2107
bool HasEmittedTargetRegion
Flag for keeping track of weather a target region has been emitted.
static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy, LValue BaseLV)
virtual llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP parallel directive D.
Address getAddress() const
Definition: CGValue.h:326
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
virtual void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc)
Emits a master region.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:154
LineState State
virtual void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned)
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
void EmitExprAsInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit)
EmitExprAsInit - Emits the code necessary to initialize a location in memory with the given initializ...
Definition: CGDecl.cpp:1873
Call to void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, void *data...
The scope used to remap some variables as private in the OpenMP loop body (or other captured region e...
field_range fields() const
Definition: Decl.h:3841
SmallVector< const Expr *, 4 > LastprivateVars
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:263
Address EmitLoadOfReference(LValue RefLVal, LValueBaseInfo *PointeeBaseInfo=nullptr, TBAAAccessInfo *PointeeTBAAInfo=nullptr)
Definition: CGExpr.cpp:2319
virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C)
Emit code for doacross ordered directive with &#39;depend&#39; clause.
Represents a member of a struct/union/class.
Definition: Decl.h:2607
CharUnits getAlignment() const
Definition: CGValue.h:315
static std::string mangleVectorParameters(ArrayRef< ParamAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
const CapturedStmt * getCapturedStmt(OpenMPDirectiveKind RegionKind) const
Returns the captured statement associated with the component region within the (combined) directive...
Definition: StmtOpenMP.h:265
An RAII object to set (and then clear) a mapping for an OpaqueValueExpr.
static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C)
Computes the lane size (LS) of a return type or of an input parameter, as defined by LS(P) in 3...
might be used in Fortran
void setLocThreadIdInsertPt(CodeGenFunction &CGF, bool AtCurrentPoint=false)
std::pair< llvm::Value *, llvm::Value * > getSizes(unsigned N) const
Returns the size of the reduction item (in chars and total number of elements in the item)...
LValue EmitOMPArraySectionExpr(const OMPArraySectionExpr *E, bool IsLowerBound=true)
Definition: CGExpr.cpp:3634
unsigned getNonVirtualBaseLLVMFieldNo(const CXXRecordDecl *RD) const
static void emitOffloadingArrays(CodeGenFunction &CGF, MappableExprsHandler::MapBaseValuesArrayTy &BasePointers, MappableExprsHandler::MapValuesArrayTy &Pointers, MappableExprsHandler::MapValuesArrayTy &Sizes, MappableExprsHandler::MapFlagsArrayTy &MapTypes, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arrays used to pass the captures and map information to the offloading runtime library...
void startDefinition()
Starts the definition of this tag declaration.
Definition: Decl.cpp:4030
bool isReferenceType() const
Definition: Type.h:6396
This represents clause &#39;map&#39; in the &#39;#pragma omp ...&#39; directives.
InitKind getInitializerKind() const
Get initializer kind.
Definition: DeclOpenMP.h:173
void EmitOMPAggregateAssign(Address DestAddr, Address SrcAddr, QualType OriginalType, const llvm::function_ref< void(Address, Address)> CopyGen)
Perform element by element copying of arrays with type OriginalType from SrcAddr to DestAddr using co...
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const
Check if the specified ScheduleKind is dynamic.
static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array, unsigned Index, const VarDecl *Var)
Given an array of pointers to variables, project the address of a given variable. ...
This represents clause &#39;to&#39; in the &#39;#pragma omp ...&#39; directives.
static CharUnits Zero()
Zero - Construct a CharUnits quantity of zero.
Definition: CharUnits.h:52
clang::CharUnits operator*(clang::CharUnits::QuantityType Scale, const clang::CharUnits &CU)
Definition: CharUnits.h:207
static void emitAArch64DeclareSimdFunction(CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName, char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc)
Emit vector function attributes for AArch64, as defined in the AAVFABI.
llvm::Type * getKmpc_MicroPointerTy()
Returns pointer to kmpc_micro type.
OpenMPDirectiveKind getDirectiveKind() const
Definition: StmtOpenMP.h:300
__DEVICE__ int max(int __a, int __b)
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Decl.h:738
This is a common base class for loop directives (&#39;omp simd&#39;, &#39;omp for&#39;, &#39;omp for simd&#39; etc...
Definition: StmtOpenMP.h:418
void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc) override
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
virtual bool emitTargetFunctions(GlobalDecl GD)
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
An r-value expression (a pr-value in the C++11 taxonomy) produces a temporary value.
Definition: Specifiers.h:124
CharUnits GetTargetTypeStoreSize(llvm::Type *Ty) const
Return the store size, in character units, of the given LLVM type.
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
ComplexPairTy EmitLoadOfComplex(LValue src, SourceLocation loc)
EmitLoadOfComplex - Load a complex number from the specified l-value.
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:742
ArrayRef< ParmVarDecl * > parameters() const
Definition: Decl.h:2289
bool EvaluateAsBooleanCondition(bool &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsBooleanCondition - Return true if this is a constant which we can fold and convert to a boo...
OpenMPDistScheduleClauseKind
OpenMP attributes for &#39;dist_schedule&#39; clause.
Definition: OpenMPKinds.h:124
void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc) override
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr *> NumIterations)
Emit initialization for doacross loop nesting support.
BinaryOperatorKind
Expr * getInitializer()
Get initializer expression (if specified) of the declare reduction construct.
Definition: DeclOpenMP.h:170
QualType TgtOffloadEntryQTy
Type struct __tgt_offload_entry{ void *addr; // Pointer to the offload entry info.
static void buildStructValue(ConstantStructBuilder &Fields, CodeGenModule &CGM, const RecordDecl *RD, const CGRecordLayout &RL, ArrayRef< llvm::Constant *> Data)
static llvm::Function * emitParallelOrTeamsOutlinedFunction(CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen)
static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr, QualType Type, bool EmitDeclareReductionInit, const Expr *Init, const OMPDeclareReductionDecl *DRD, Address SrcAddr=Address::invalid())
Emit initialization of arrays of complex types.
llvm::Function * emitRegistrationFunction() override
Creates the offloading descriptor in the event any target region was emitted in the current module an...
static bool getAArch64MTV(QualType QT, ParamKindTy Kind)
Maps To Vector (MTV), as defined in 3.1.1 of the AAVFABI.
OpenMPScheduleClauseKind Schedule
Definition: OpenMPKinds.h:157
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:156
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:37
bool Ordered
true if loop is ordered, false otherwise.
void emitForOrderedIterationEnd(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned) override
Call the appropriate runtime routine to notify that we finished iteration of the ordered loop with th...
bool isOpenMPTeamsDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a teams-kind directive.
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:582
llvm::CallInst * EmitRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
virtual void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info)
Emit the target data mapping code associated with D.
const ArrayType * getAsArrayTypeUnsafe() const
A variant of getAs<> for array types which silently discards qualifiers from the outermost type...
Definition: Type.h:6902
void emitCleanups(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Emits cleanup code for the reduction item.
RAII for correct setting/restoring of CapturedStmtInfo.
const Stmt * getPreInitStmt() const
Get pre-initialization statement for the clause.
Definition: OpenMPClause.h:129
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:66
void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emits code for a taskyield directive.
llvm::StringSet ThreadPrivateWithDefinition
Set of threadprivate variables with the generated initializer.
bool empty() const
Return true if a there are no entries defined.
child_range children()
Definition: Stmt.cpp:212
String describing the source location.
void emitMasterRegion(CodeGenFunction &CGF, const RegionCodeGenTy &MasterOpGen, SourceLocation Loc) override
Emits a master region.
virtual std::pair< llvm::Function *, llvm::Function * > getUserDefinedReduction(const OMPDeclareReductionDecl *D)
Get combiner/initializer for the specified user-defined reduction, if any.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
OpenMPScheduleClauseModifier M2
Definition: OpenMPKinds.h:159
bool needsEHCleanup(QualType::DestructionKind kind)
Determines whether an EH cleanup is required to destroy a type with the given destruction kind...
void emitTargetCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond, const Expr *Device) override
Emit the target offloading code associated with D.
SmallVector< const Expr *, 4 > PrivateCopies
llvm::CallInst * CreateMemCpy(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:274
llvm::StructType * getBaseSubobjectLLVMType() const
Return the "base subobject" LLVM type associated with this record.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
void operator()(CodeGenFunction &CGF) const
bool isConstexpr() const
Whether this variable is (C++11) constexpr.
Definition: Decl.h:1386
CharUnits getDeclAlign(const Decl *D, bool ForAlignof=false) const
Return a conservative estimate of the alignment of the specified decl D.
bool isLambda() const
Determine whether this class describes a lambda function object.
Definition: DeclCXX.h:1198
llvm::StringSet AlreadyEmittedTargetFunctions
List of the emitted functions.
Address adjustPrivateAddress(CodeGenFunction &CGF, unsigned N, Address PrivateAddr)
Adjusts PrivatedAddr for using instead of the original variable address in normal operations...
Expr * getSizeExpr() const
Definition: Type.h:3023
Scope - A scope is a transient data structure that is used while parsing the program.
Definition: Scope.h:40
const Type * getTypePtr() const
Retrieves a pointer to the underlying (unqualified) type.
Definition: Type.h:6142
virtual void emitDeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn)
Marks function Fn with properly mangled versions of vector functions.
field_iterator field_begin() const
Definition: Decl.cpp:4301
bool usesReductionInitializer(unsigned N) const
Returns true if the initialization of the reduction item uses initializer from declare reduction cons...
static std::tuple< unsigned, unsigned, bool > getNDSWDS(const FunctionDecl *FD, ArrayRef< ParamAttrTy > ParamAttrs)
bool isOpenMPTargetExecutionDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a target code offload directive.
llvm::BasicBlock * createBasicBlock(const Twine &name="", llvm::Function *parent=nullptr, llvm::BasicBlock *before=nullptr)
createBasicBlock - Create an LLVM basic block.
ArrayRef< MappableComponent > MappableExprComponentListRef
void EmitIgnoredExpr(const Expr *E)
EmitIgnoredExpr - Emit an expression in a context which ignores the result.
Definition: CGExpr.cpp:182
void addCompilerUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.compiler.used metadata.
SmallVector< const Expr *, 4 > FirstprivateCopies
OpenMPDependClauseKind getDependencyKind() const
Get dependency type.
static void emitPrivatesInit(CodeGenFunction &CGF, const OMPExecutableDirective &D, Address KmpTaskSharedsPtr, LValue TDBase, const RecordDecl *KmpTaskTWithPrivatesQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool ForDup)
Emit initialization for private variables in task-based directives.
static int addMonoNonMonoModifier(OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2)
DiagnosticsEngine & getDiagnostics() const
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static llvm::Function * emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty, const Expr *CombinerInitializer, const VarDecl *In, const VarDecl *Out, bool IsCombiner)
virtual void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D)
Emits OpenMP-specific function prolog.
static void emitOffloadingArraysArgument(CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg, llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg, llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info)
Emit the arguments to be passed to the runtime library based on the arrays of pointers, sizes and map types.
Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
static const OMPDeclareReductionDecl * getReductionInit(const Expr *ReductionOp)
Check if the combiner is a call to UDR combiner and if it is so return the UDR decl used for reductio...
llvm::Value * emitUpdateLocation(CodeGenFunction &CGF, SourceLocation Loc, unsigned Flags=0)
Emits object of ident_t type with info for source location.
bool isOpenMPWorksharingDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a worksharing directive.
static IntegerLiteral * Create(const ASTContext &C, const llvm::APInt &V, QualType type, SourceLocation l)
Returns a new integer literal with value &#39;V&#39; and type &#39;type&#39;.
Definition: Expr.cpp:919
SmallVector< const Expr *, 4 > ReductionOps
This represents clause &#39;is_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
SmallVector< const Expr *, 4 > ReductionVars
virtual bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS)
Checks if the variable has associated OMPAllocateDeclAttr attribute with the predefined allocator and...
virtual void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind)
Call the appropriate runtime routine to notify that we finished all the work with current loop...
virtual Decl * getCanonicalDecl()
Retrieves the "canonical" declaration of the given declaration.
Definition: DeclBase.h:877
This represents clause &#39;from&#39; in the &#39;#pragma omp ...&#39; directives.
LValue EmitLValueForField(LValue Base, const FieldDecl *Field)
Definition: CGExpr.cpp:3973
const VarDecl * translateParameter(const FieldDecl *FD, const VarDecl *NativeParam) const override
Translates the native parameter of outlined function if this is required for target.
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:4561
NodeId Parent
Definition: ASTDiff.cpp:191
OpenMP 4.0 [2.4, Array Sections].
Definition: ExprOpenMP.h:44
bool hasAttr() const
Definition: DeclBase.h:542
bool isValid() const
Definition: Address.h:35
AutoVarEmission EmitAutoVarAlloca(const VarDecl &var)
EmitAutoVarAlloca - Emit the alloca and debug information for a local variable.
Definition: CGDecl.cpp:1381
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1636
void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr) override
Emits a critical region.
Describes the capture of either a variable, or &#39;this&#39;, or variable-length array type.
Definition: Stmt.h:3363
const CodeGen::CGBlockInfo * BlockInfo
ArrayBuilder beginArray(llvm::Type *eltTy=nullptr)
virtual Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam, const VarDecl *TargetParam) const
Gets the address of the native argument basing on the address of the target-specific parameter...
CGBlockInfo - Information to generate a block literal.
Definition: CGBlocks.h:152
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:223
OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
LValueBaseInfo getBaseInfo() const
Definition: CGValue.h:318
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:38
static llvm::Function * emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpTaskTWithPrivatesPtrQTy, QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy, QualType SharedsPtrTy, llvm::Function *TaskFunction, llvm::Value *TaskPrivatesMap)
Emit a proxy function which accepts kmp_task_t as the second argument.
bool needCleanups(unsigned N)
Returns true if the private copy requires cleanups.
Class intended to support codegen of all kind of the reduction clauses.
llvm::FunctionCallee createRuntimeFunction(unsigned Function)
Returns specified OpenMP runtime function.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:178
Expr * getCombiner()
Get combiner expression of the declare reduction construct.
Definition: DeclOpenMP.h:152
TypeSourceInfo * getTrivialTypeSourceInfo(QualType T, SourceLocation Loc=SourceLocation()) const
Allocate a TypeSourceInfo where all locations have been initialized to a given location, which defaults to the empty location.
virtual bool isStaticChunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static chunked.
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
unsigned getNumLoops() const
Get number of loops associated with the clause.
This represents &#39;#pragma omp requires...&#39; directive.
Definition: DeclOpenMP.h:345
virtual void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, uint64_t Size, int32_t Flags, llvm::GlobalValue::LinkageTypes Linkage)
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags...
unsigned Offset
Definition: Format.cpp:1713
llvm::CallingConv::ID getRuntimeCC() const
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3319
QualType getTgtDeviceImageQTy()
Returns __tgt_device_image type.
This represents implicit clause &#39;depend&#39; for the &#39;#pragma omp task&#39; directive.
virtual unsigned getDefaultFirstprivateAddressSpace() const
Returns default address space for the constant firstprivates, 0 by default.
KmpTaskTFields
Indexes of fields for type kmp_task_t.
void emitDeferredTargetDecls() const
Emit deferred declare target variables marked for deferred emission.
static TypeEvaluationKind getEvaluationKind(QualType T)
getEvaluationKind - Return the TypeEvaluationKind of QualType T.
virtual void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Required to resolve existing problems in the runtime.
virtual void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc)
Emit flush of the variables specified in &#39;omp flush&#39; directive.
static void createConstantGlobalStructAndAddToParent(CodeGenModule &CGM, QualType Ty, ArrayRef< llvm::Constant *> Data, T &Parent)
bool addPrivate(const VarDecl *LocalVD, const llvm::function_ref< Address()> PrivateGen)
Registers LocalVD variable as a private and apply PrivateGen function for it to generate correspondin...
void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc) override
Emit code for &#39;taskwait&#39; directive.
Represent the declaration of a variable (in which case it is an lvalue) a function (in which case it ...
Definition: Decl.h:636
This represents one expression.
Definition: Expr.h:108
Allow any unmodeled side effect.
Definition: Expr.h:609
static Address invalid()
Definition: Address.h:34
void loadOffloadInfoMetadata()
Loads all the offload entries information from the host IR metadata.
static void emitInitWithReductionInitializer(CodeGenFunction &CGF, const OMPDeclareReductionDecl *DRD, const Expr *InitOp, Address Private, Address Original, QualType Ty)
static llvm::Value * emitReduceCombFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N, const Expr *ReductionOp, const Expr *LHS, const Expr *RHS, const Expr *PrivateRef)
Emits reduction combiner function:
const AnnotatedLine * Line
Enters a new scope for capturing cleanups, all of which will be executed once the scope is exited...
Stmt * IgnoreContainers(bool IgnoreCaptured=false)
Skip no-op (attributed, compound) container stmts and skip captured stmt at the top, if IgnoreCaptured is true.
Definition: Stmt.cpp:122
bool isOpenMPParallelDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a parallel-kind directive.
const CGFunctionInfo & arrangeNullaryFunction()
A nullary function is a freestanding function of type &#39;void ()&#39;.
Definition: CGCall.cpp:694
std::string OMPHostIRFile
Name of the IR file that contains the result of the OpenMP target host code generation.
Definition: LangOptions.h:247
void emitKmpRoutineEntryT(QualType KmpInt32Ty)
Build type kmp_routine_entry_t (if not built yet).
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6916
llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts) override
Emits outlined function for the OpenMP task directive D.
unsigned getLine() const
Return the presumed line number of this location.
llvm::Value * emitForNext(CodeGenFunction &CGF, SourceLocation Loc, unsigned IVSize, bool IVSigned, Address IL, Address LB, Address UB, Address ST) override
Call __kmpc_dispatch_next( ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter, kmp_int[32|64] *p_lowe...
unsigned IVSize
Size of the iteration variable in bits.
static bool checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD)
Checks if destructor function is required to be generated.
#define V(N, I)
Definition: ASTContext.h:2907
void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps) override
Emits a single region.
Represents a C++ destructor within a class.
Definition: DeclCXX.h:2838
VariableCaptureKind getCaptureKind() const
Determine the kind of capture.
Definition: Stmt.cpp:1179
static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion)
QualType SavedKmpTaskloopTQTy
Saved kmp_task_t for taskloop-based directive.
static RecordDecl * createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind, QualType KmpInt32Ty, QualType KmpRoutineEntryPointerQTy)
VlaSizePair getVLASize(const VariableArrayType *vla)
Returns an LLVM value that corresponds to the size, in non-variably-sized elements, of a variable length array type, plus that largest non-variably-sized element type.
field_iterator field_end() const
Definition: Decl.h:3844
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:43
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
Address getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF, QualType VarType, StringRef Name) override
Creates artificial threadprivate variable with name Name and type VarType.
DeclContext * getDeclContext()
Definition: DeclBase.h:438
static llvm::iterator_range< specific_clause_iterator< SpecificClause > > getClausesOfKind(ArrayRef< OMPClause *> Clauses)
Definition: StmtOpenMP.h:186
llvm::FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned...
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:62
void emitFlush(CodeGenFunction &CGF, ArrayRef< const Expr *> Vars, SourceLocation Loc) override
Emit flush of the variables specified in &#39;omp flush&#39; directive.
This represents &#39;ordered&#39; clause in the &#39;#pragma omp ...&#39; directive.
static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind, bool Chunked, bool Ordered)
Map the OpenMP loop schedule to the runtime enumeration.
QualType getType() const
Definition: Expr.h:137
llvm::GlobalValue::LinkageTypes getLLVMLinkageVarDefinition(const VarDecl *VD, bool IsConstant)
Returns LLVM linkage for a declarator.
DefinitionKind hasDefinition(ASTContext &) const
Check whether this variable is defined in this translation unit.
Definition: Decl.cpp:2175
TagDecl * getAsTagDecl() const
Retrieves the TagDecl that this type refers to, either because the type is a TagType or because it is...
Definition: Type.cpp:1644
Provides LLVM&#39;s BitmaskEnum facility to enumeration types declared in namespace clang.
SmallVector< const Expr *, 4 > FirstprivateVars
QualType getConstantArrayType(QualType EltTy, const llvm::APInt &ArySize, ArrayType::ArraySizeModifier ASM, unsigned IndexTypeQuals) const
Return the unique reference to the type for a constant array of the specified element type...
TBAAAccessInfo getTBAAInfo() const
Definition: CGValue.h:307
CharUnits alignmentOfArrayElement(CharUnits elementSize) const
Given that this is the alignment of the first element of an array, return the minimum alignment of an...
Definition: CharUnits.h:196
static llvm::Value * emitCopyprivateCopyFunction(CodeGenModule &CGM, llvm::Type *ArgsType, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps, SourceLocation Loc)
virtual Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc)
Returns address of the threadprivate variable for the current thread.
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
QualType getRecordType(const RecordDecl *Decl) const
Represents an unpacked "presumed" location which can be presented to the user.
void Emit(CodeGenFunction &CGF, Flags) override
Emit the cleanup.
virtual void emitTargetOutlinedFunctionHelper(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Helper to emit outlined function for &#39;target&#39; directive.
void registerTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, llvm::Constant *Addr, llvm::Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
bool isOpenMPTaskLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a taskloop directive.
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
Definition: ASTContext.h:1382
llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr) override
Emit a code for initialization of threadprivate variable.
llvm::Value * EmitCastToVoidPtr(llvm::Value *value)
Emit a cast to void* in the appropriate address space.
Definition: CGExpr.cpp:50
Allow UB that we can give a value, but not arbitrary unmodeled side effects.
Definition: Expr.h:607
const Type * getBaseElementTypeUnsafe() const
Get the base element type of this type, potentially discarding type qualifiers.
Definition: Type.h:6791
Expr * getInitPriv()
Get Priv variable of the initializer.
Definition: DeclOpenMP.h:180
This represents clause &#39;firstprivate&#39; in the &#39;#pragma omp ...&#39; directives.
ValueDecl * getDecl()
Definition: Expr.h:1217
void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc) override
Emit a taskgroup region.
*QualType KmpTaskTQTy
const Qualifiers & getQuals() const
Definition: CGValue.h:310
Address getAddrOfThreadPrivate(CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, SourceLocation Loc) override
Returns address of the threadprivate variable for the current thread.
bool isUnionType() const
Definition: Type.cpp:475
const LangOptions & getLangOpts() const
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
ASTContext & getContext() const
bool isNull() const
Return true if this QualType doesn&#39;t point to a type yet.
Definition: Type.h:708
OpenMPProcBindClauseKind
OpenMP attributes for &#39;proc_bind&#39; clause.
Definition: OpenMPKinds.h:50
static bool getAArch64PBV(QualType QT, ASTContext &C)
Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
const SourceManager & SM
Definition: Format.cpp:1572
virtual void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName, const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc, const Expr *Hint=nullptr)
Emits a critical region.
VarDecl * getCanonicalDecl() override
Retrieves the "canonical" declaration of the given declaration.
Definition: Decl.cpp:2060
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:40
bool hasClausesOfKind() const
Returns true if the current directive has one or more clauses of a specific kind. ...
Definition: StmtOpenMP.h:218
virtual bool emitTargetGlobalVariable(GlobalDecl GD)
Emit the global variable if it is a valid device global variable.
void finishAndAddTo(AggregateBuilderBase &parent)
Given that this builder was created by beginning an array or struct component on the given parent bui...
AttrVec & getAttrs()
Definition: DeclBase.h:490
CanQualType getCanonicalTypeUnqualified() const
virtual bool hasBody() const
Returns true if this Decl represents a declaration for a body of code, such as a function or method d...
Definition: DeclBase.h:992
bool hasAttrs() const
Definition: DeclBase.h:484
QualType KmpDependInfoTy
Type typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; struct { bool in:1; bool ou...
LValue EmitLoadOfPointerLValue(Address Ptr, const PointerType *PtrTy)
Definition: CGExpr.cpp:2351
QualType TgtBinaryDescriptorQTy
struct __tgt_bin_desc{ int32_t NumDevices; // Number of devices supported.
The l-value was considered opaque, so the alignment was determined from a type.
RecordDecl * getDecl() const
Definition: Type.h:4448
const char * getFilename() const
Return the presumed filename of this location.
Address CreateConstGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = T* ...
Definition: CGBuilder.h:232
virtual Address getAddrOfDeclareTargetVar(const VarDecl *VD)
Returns the address of the variable marked as declare target with link clause OR as declare target wi...
const SpecificClause * getSingleClause() const
Gets a single clause of the specified kind associated with the current directive iff there is only on...
Definition: StmtOpenMP.h:204
static void emitReductionCombiner(CodeGenFunction &CGF, const Expr *ReductionOp)
Emit reduction combiner.
Expr * getStrideVariable() const
Definition: StmtOpenMP.h:921
This represents &#39;num_teams&#39; clause in the &#39;#pragma omp ...&#39; directive.
OpaqueValueExpr - An expression referring to an opaque object of a fixed type and value class...
Definition: Expr.h:1045
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:141
virtual void registerTargetGlobalVariable(const VarDecl *VD, llvm::Constant *Addr)
Checks if the provided global decl GD is a declare target variable and registers it when emitting cod...
#define false
Definition: stdbool.h:17
Kind
CanProxy< U > castAs() const
This captures a statement into a function.
Definition: Stmt.h:3350
QualType getCanonicalType() const
Definition: Type.h:6181
IdentFieldIndex
decl_type * getPreviousDecl()
Return the previous declaration of this declaration or NULL if this is the first declaration.
Definition: Redeclarable.h:203
llvm::Function * emitRequiresDirectiveRegFun()
Creates and returns a registration function for when at least one requires directives was used in the...
static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind)
Returns default flags for the barriers depending on the directive, for which this barier is going to ...
unsigned getColumn() const
Return the presumed column number of this location.
static with chunk adjustment (e.g., simd)
void pushDestroy(QualType::DestructionKind dtorKind, Address addr, QualType type)
pushDestroy - Push the standard destructor for the given type as at least a normal cleanup...
Definition: CGDecl.cpp:2059
void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond, const RegionCodeGenTy &ThenGen, const RegionCodeGenTy &ElseGen)
Emits code for OpenMP &#39;if&#39; clause using specified CodeGen function.
void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancel&#39; construct.
Encodes a location in the source.
static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D)
Check for inner distribute directive.
llvm::FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned...
llvm::Type * getIdentTyPointerTy()
Returns pointer to ident_t type.
llvm::Value * MapTypesArray
The array of map types passed to the runtime library.
This represents &#39;#pragma omp declare reduction ...&#39; directive.
Definition: DeclOpenMP.h:102
QualType getUIntPtrType() const
Return a type compatible with "uintptr_t" (C99 7.18.1.4), as defined by the target.
unsigned getOpenMPDefaultSimdAlign(QualType T) const
Get default simd alignment of the specified complete type in bits.
llvm::Value * EvaluateExprAsBool(const Expr *E)
EvaluateExprAsBool - Perform the usual unary conversions on the specified expression and compare the ...
Definition: CGExpr.cpp:164
llvm::PointerIntPair< llvm::Value *, 1, bool > Final
bool isVariablyModifiedType() const
Whether this type is a variably-modified type (C99 6.7.5).
Definition: Type.h:2124
virtual void emitSingleRegion(CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen, SourceLocation Loc, ArrayRef< const Expr *> CopyprivateVars, ArrayRef< const Expr *> DestExprs, ArrayRef< const Expr *> SrcExprs, ArrayRef< const Expr *> AssignmentOps)
Emits a single region.
void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond) override
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
Address CreateConstInBoundsGEP2_32(Address Addr, unsigned Idx0, unsigned Idx1, const llvm::Twine &Name="")
Definition: CGBuilder.h:258
virtual void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc)
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false) override
Emit an implicit/explicit barrier for OpenMP threads.
This is a basic class for representing single OpenMP executable directive.
Definition: StmtOpenMP.h:32
bool emitTargetGlobalVariable(GlobalDecl GD) override
Emit the global variable if it is a valid device global variable.
Lower bound for &#39;ordered&#39; versions.
ASTContext & getASTContext() const LLVM_READONLY
Definition: DeclBase.cpp:376
const Decl * getDecl() const
Definition: GlobalDecl.h:76
virtual void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancellation point&#39; construct.
OpenMPDirectiveKind
OpenMP directives.
Definition: OpenMPKinds.h:22
bool capturesVariable() const
Determine whether this capture handles a variable (by reference).
Definition: Stmt.h:3391
void emitDoacrossInit(CodeGenFunction &CGF, const OMPLoopDirective &D, ArrayRef< Expr *> NumIterations) override
Emit initialization for doacross loop nesting support.
Address CreateConstArrayGEP(Address Addr, uint64_t Index, const llvm::Twine &Name="")
Given addr = [n x T]* ...
Definition: CGBuilder.h:198
Set if the nonmonotonic schedule modifier was present.
virtual llvm::Function * emitThreadPrivateVarDefinition(const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit, CodeGenFunction *CGF=nullptr)
Emit a code for initialization of threadprivate variable.
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:101
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2312
static void EmitOMPTargetSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S)
Emit device code for the target simd directive.
void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) override
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
virtual void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars, const Expr *IfCond)
Emits code for parallel or serial call of the OutlinedFn with variables captured in a record which ad...
This is a basic class for representing single OpenMP clause.
Definition: OpenMPClause.h:50
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
bool isTrivialInitializer(const Expr *Init)
Determine whether the given initializer is trivial in the sense that it requires no code to be genera...
Definition: CGDecl.cpp:1653
CanQualType VoidTy
Definition: ASTContext.h:1014
bool IVSigned
Sign of the iteration variable.
void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) override
Call the appropriate runtime routine to initialize it before start of loop.
void emitAggregateType(CodeGenFunction &CGF, unsigned N)
Emits the code for the variable-modified type, if required.
static void EmitOMPTargetTeamsDistributeParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForDirective &S)
bool isAnyPointerType() const
Definition: Type.h:6388
This declaration is only a declaration.
Definition: Decl.h:1146
unsigned size() const
Return number of entries defined so far.
virtual void Enter(CodeGenFunction &CGF)
An aligned address.
Definition: Address.h:24
llvm::FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned...
void StartFunction(GlobalDecl GD, QualType RetTy, llvm::Function *Fn, const CGFunctionInfo &FnInfo, const FunctionArgList &Args, SourceLocation Loc=SourceLocation(), SourceLocation StartLoc=SourceLocation())
Emit code for the start of a function.
Stmt * getCapturedStmt()
Retrieve the statement being captured.
Definition: Stmt.h:3451
void emitTargetDataCalls(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond, const Expr *Device, const RegionCodeGenTy &CodeGen, TargetDataInfo &Info) override
Emit the target data mapping code associated with D.
DestructionKind isDestructedType() const
Returns a nonzero value if objects of this type require non-trivial work to clean up after...
Definition: Type.h:1157
llvm::Value * getCriticalRegionLock(StringRef CriticalName)
Returns corresponding lock object for the specified critical region name.
unsigned getCustomDiagID(Level L, const char(&FormatString)[N])
Return an ID for a diagnostic with the specified format string and level.
Definition: Diagnostic.h:778
void emitThreadPrivateVarInit(CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor, llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc)
Emits initialization code for the threadprivate variables.
Complete object dtor.
Definition: ABI.h:35
JumpDest getJumpDestInCurrentScope(llvm::BasicBlock *Target)
The given basic block lies in the current EH scope, but may be a target of a potentially scope-crossi...
virtual void emitProcBindClause(CodeGenFunction &CGF, OpenMPProcBindClauseKind ProcBind, SourceLocation Loc)
Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, int proc_bind) to generat...
virtual void emitTaskyieldCall(CodeGenFunction &CGF, SourceLocation Loc)
Emits code for a taskyield directive.
void EmitAggregateAssign(LValue Dest, LValue Src, QualType EltTy)
Emit an aggregate assignment.
QualType getType() const
Definition: CGValue.h:263
CharUnits getNaturalTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr, bool forPointeeType=false)
bool hasTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum) const
Return true if a target region entry with the provided information exists.
virtual void functionFinished(CodeGenFunction &CGF)
Cleans up references to the objects in finished function.
virtual void emitOutlinedFunctionCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits call of the outlined function with the provided arguments, translating these arguments to corre...
const Expr * getRefExpr(unsigned N) const
Returns the base declaration of the reduction item.
void FinishFunction(SourceLocation EndLoc=SourceLocation())
FinishFunction - Complete IR generation of the current function.
void emitTaskReductionFixups(CodeGenFunction &CGF, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N) override
Required to resolve existing problems in the runtime.
Expr * getInitOrig()
Get Orig variable of the initializer.
Definition: DeclOpenMP.h:177
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
void registerDeviceGlobalVarEntryInfo(StringRef VarName, llvm::Constant *Addr, CharUnits VarSize, OMPTargetGlobalVarEntryKind Flags, llvm::GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
Struct with the values to be passed to the static runtime function.
ConstantAddress GetAddrOfConstantCString(const std::string &Str, const char *GlobalName=nullptr)
Returns a pointer to a character array containing the literal and a terminating &#39;\0&#39; character...
llvm::Constant * GetAddrOfGlobal(GlobalDecl GD, ForDefinition_t IsForDefinition=NotForDefinition)
static void EmitOMPTargetTeamsDistributeDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeDirective &S)
Emit device code for the target teams distribute directive.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
JumpDest getOMPCancelDestination(OpenMPDirectiveKind Kind)
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads) override
Emit an ordered region.
void addUsedGlobal(llvm::GlobalValue *GV)
Add a global to a list to be added to the llvm.used metadata.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:358
llvm::Value * getScalarVal() const
getScalarVal() - Return the Value* of this scalar value.
Definition: CGValue.h:58
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
static void emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn, const llvm::APSInt &VLENVal, ArrayRef< ParamAttrTy > ParamAttrs, OMPDeclareSimdDeclAttr::BranchStateTy State)
CanQualType CharTy
Definition: ASTContext.h:1016
void setAction(PrePostActionTy &Action) const
CGFunctionInfo - Class to encapsulate the information about a function definition.
This class organizes the cross-function state that is used while generating LLVM code.
QualType withRestrict() const
Definition: Type.h:831
CGOpenMPRuntime & getOpenMPRuntime()
Return a reference to the configured OpenMP runtime.
static const VarDecl * getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE)
StructBuilder beginStruct(llvm::StructType *ty=nullptr)
OpenMPScheduleClauseModifier
OpenMP modifiers for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:66
LValue EmitOMPSharedLValue(const Expr *E)
Emits the lvalue for the expression with possibly captured variable.
llvm::GlobalValue * GetGlobalValue(StringRef Ref)
Dataflow Directional Tag Classes.
static ApplyDebugLocation CreateDefaultArtificial(CodeGenFunction &CGF, SourceLocation TemporaryLocation)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:749
Class provides a way to call simple version of codegen for OpenMP region, or an advanced with possibl...
LValue EmitLoadOfReferenceLValue(LValue RefLVal)
Definition: CGExpr.cpp:2332
static llvm::Value * emitReduceInitFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction initializer function:
This represents &#39;device&#39; clause in the &#39;#pragma omp ...&#39; directive.
bool isValid() const
Return true if this is a valid SourceLocation object.
bool emitTargetFunctions(GlobalDecl GD) override
Emit the target regions enclosed in GD function definition or the function itself in case it is a val...
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *virtual void emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data)
DeclContext - This is used only as base class of specific decl types that can act as declaration cont...
Definition: DeclBase.h:1271
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:92
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:580
static std::string getName(const CallEvent &Call)
virtual bool emitTargetGlobal(GlobalDecl GD)
Emit the global GD if it is meaningful for the target.
virtual void emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc, const Expr *IfCond, OpenMPDirectiveKind CancelRegion)
Emit code for &#39;cancel&#39; construct.
std::vector< llvm::Triple > OMPTargetTriples
Triples of the OpenMP targets that the host code codegen should take into account in order to generat...
Definition: LangOptions.h:243
OpenMPScheduleClauseModifier M1
Definition: OpenMPKinds.h:158
llvm::Value * LB
Loop lower bound.
A basic class for pre|post-action for advanced codegen sequence for OpenMP region.
LValue getSharedLValue(unsigned N) const
Returns LValue for the reduction item.
const Expr * getInit() const
Definition: Decl.h:1219
llvm::Constant * getPointer() const
Definition: Address.h:83
void initializeTargetRegionEntryInfo(unsigned DeviceID, unsigned FileID, StringRef ParentName, unsigned LineNum, unsigned Order)
Initialize target region entry.
llvm::Function * createOffloadingBinaryDescriptorRegistration()
Creates and registers offloading binary descriptor for the current compilation unit.
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:69
bool isOpenMPSimdDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a simd directive.
virtual void registerGlobalDtor(CodeGenFunction &CGF, const VarDecl &D, llvm::FunctionCallee Dtor, llvm::Constant *Addr)=0
Emit code to force the execution of a destructor during global teardown.
llvm::PointerIntPair< llvm::Value *, 1, bool > Priority
RTCancelKind
llvm::Constant * EmitNullConstant(QualType T)
Return the result of value-initializing the given type, i.e.
PresumedLoc getPresumedLoc(SourceLocation Loc, bool UseLineDirectives=true) const
Returns the "presumed" location of a SourceLocation specifies.
llvm::Value * UB
Loop upper bound.
llvm::Value * Chunk
Chunk size specified using &#39;schedule&#39; clause (nullptr if chunk was not specified) ...
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:107
llvm::Module & getModule() const
llvm::Value * Chunk
Value of the chunk for the static_chunked scheduled loop.
Call to void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...
Not really used in Fortran any more.
LValue MakeAddrLValue(Address Addr, QualType T, AlignmentSource Source=AlignmentSource::Type)
virtual void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen)
Emit outilined function for &#39;target&#39; directive.
llvm::StructType * ConvertRecordDeclType(const RecordDecl *TD)
ConvertRecordDeclType - Lay out a tagged decl type like struct or union.
virtual bool isDefaultLocationConstant() const
Check if the default location must be constant.
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:2942
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc)
Emits address of the word in a memory where current thread id is stored.
void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars) override
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
void emitInitialization(CodeGenFunction &CGF, unsigned N, Address PrivateAddr, LValue SharedLVal, llvm::function_ref< bool(CodeGenFunction &)> DefaultInit)
Performs initialization of the private copy for the reduction item.
JumpDest ReturnBlock
ReturnBlock - Unified return block.
OffloadEntriesInfoManagerTy OffloadEntriesInfoManager
virtual bool isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind, bool Chunked) const
Check if the specified ScheduleKind is static non-chunked.
bool capturesVariableByCopy() const
Determine whether this capture handles a variable by copy.
Definition: Stmt.h:3394
Class that represents a component of a mappable expression.
static void emitForStaticInitCall(CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId, llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule, OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2, const CGOpenMPRuntime::StaticRTInput &Values)
void EmitAutoVarCleanups(const AutoVarEmission &emission)
Definition: CGDecl.cpp:1984
API for captured statement code generation.
virtual bool emitDeclareTargetVarDefinition(const VarDecl *VD, llvm::GlobalVariable *Addr, bool PerformInit)
Emit a code for initialization of declare target variable.
virtual void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, SourceLocation Loc, llvm::Function *OutlinedFn, ArrayRef< llvm::Value *> CapturedVars)
Emits code for teams call of the OutlinedFn with variables captured in a record which address is stor...
virtual void emitDistributeStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values)
bool isOpenMPDistributeDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a distribute directive.
virtual StringRef getOutlinedHelperName() const
Get the function name of an outlined region.
static bool classof(const OMPClause *T)
bool HasEmittedDeclareTargetRegion
Flag for keeping track of weather a device routine has been emitted.
ArraySubscriptExpr - [C99 6.5.2.1] Array Subscripting.
Definition: Expr.h:2432
virtual void emitOrderedRegion(CodeGenFunction &CGF, const RegionCodeGenTy &OrderedOpGen, SourceLocation Loc, bool IsThreads)
Emit an ordered region.
This file defines OpenMP AST classes for executable directives and clauses.
static FieldDecl * addFieldToRecordDecl(ASTContext &C, DeclContext *DC, QualType FieldTy)
CodeGenTypes & getTypes() const
StructBuilder beginStruct(llvm::StructType *structTy=nullptr)
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6677
static void EmitOMPTargetTeamsDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDirective &S)
Emit device code for the target teams directive.
CleanupTy(PrePostActionTy *Action)
T * getAttr() const
Definition: DeclBase.h:538
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:51
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
static bool checkInitIsRequired(CodeGenFunction &CGF, ArrayRef< PrivateDataTy > Privates)
Check if duplication function is required for taskloops.
llvm::Value * EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef< llvm::Value *> IdxList, bool SignedIndices, bool IsSubtraction, SourceLocation Loc, const Twine &Name="")
Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to detect undefined behavior whe...
static void getTargetEntryUniqueInfo(ASTContext &C, SourceLocation Loc, unsigned &DeviceID, unsigned &FileID, unsigned &LineNum)
Obtain information that uniquely identifies a target entry.
Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal) override
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
llvm::AssertingVH< llvm::Instruction > AllocaInsertPt
AllocaInsertPoint - This is an instruction in the entry block before which we prefer to insert alloca...
OpenMPLocationFlags
Values for bit flags used in the ident_t to describe the fields.
virtual void emitInlinedDirective(CodeGenFunction &CGF, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool HasCancel=false)
Emit code for the directive that does not require outlining.
OpenMPScheduleClauseKind
OpenMP attributes for &#39;schedule&#39; clause.
Definition: OpenMPKinds.h:58
Expr * getNumIterations() const
Definition: StmtOpenMP.h:953
llvm::StringRef getName() const
Return the IR name of the pointer value.
Definition: Address.h:61
llvm::StructType * getLLVMType() const
Return the "complete object" LLVM type associated with this record.
Base for LValueReferenceType and RValueReferenceType.
Definition: Type.h:2705
bool hasSameType(QualType T1, QualType T2) const
Determine whether the given types T1 and T2 are equivalent.
Definition: ASTContext.h:2296
Entity that registers the offloading constants that were emitted so far.
void SetInternalFunctionAttributes(GlobalDecl GD, llvm::Function *F, const CGFunctionInfo &FI)
Set the attributes on the LLVM function for the given decl and function info.
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee Callee, ArrayRef< llvm::Value *> Args=llvm::None) const
Emits Callee function call with arguments Args with location Loc.
StringRef getMangledName(GlobalDecl GD)
llvm::Constant * getOrCreateInternalVariable(llvm::Type *Ty, const llvm::Twine &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
Internal linkage, which indicates that the entity can be referred to from within the translation unit...
Definition: Linkage.h:31
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create or return a runtime function declaration with the specified type and name. ...
void EmitBlock(llvm::BasicBlock *BB, bool IsFinished=false)
EmitBlock - Emit the given block.
Definition: CGStmt.cpp:454
void addDecl(Decl *D)
Add the declaration D into this context.
Definition: DeclBase.cpp:1514
llvm::Constant * registerTargetFirstprivateCopy(CodeGenFunction &CGF, const VarDecl *VD)
Registers provided target firstprivate variable as global on the target.
void getCaptureFields(llvm::DenseMap< const VarDecl *, FieldDecl *> &Captures, FieldDecl *&ThisCapture) const
For a closure type, retrieve the mapping from captured variables and this to the non-static data memb...
Definition: DeclCXX.cpp:1417
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2079
virtual void emitTaskgroupRegion(CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen, SourceLocation Loc)
Emit a taskgroup region.
SourceManager & getSourceManager()
Definition: ASTContext.h:665
virtual llvm::Function * emitRegistrationFunction()
Creates the offloading descriptor in the event any target region was emitted in the current module an...
llvm::ConstantInt * getSize(CharUnits numChars)
Emit the given number of characters as a value of type size_t.
const Decl * CurFuncDecl
CurFuncDecl - Holds the Decl for the current outermost non-closure context.
llvm::iterator_range< specific_attr_iterator< T > > specific_attrs() const
Definition: DeclBase.h:524
virtual void adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF, const OMPExecutableDirective &D) const
Adjust some parameters for the target-based directives, like addresses of the variables captured by r...
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:14445
CharUnits toCharUnitsFromBits(int64_t BitSize) const
Convert a size in bits to a size in characters.
Lower bound for default (unordered) versions.
void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, const Expr *ThreadLimit, SourceLocation Loc) override
Emits call to void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams...
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
TranslationUnitDecl * getTranslationUnitDecl() const
Definition: ASTContext.h:1007
MemberExpr - [C99 6.5.2.3] Structure and Union Members.
Definition: Expr.h:2807
virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> Privates, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, ArrayRef< const Expr *> ReductionOps, ReductionOptionsTy Options)
Emit a code for reduction clause.
This represents &#39;nowait&#39; clause in the &#39;#pragma omp ...&#39; directive.
static bool isTrivial(ASTContext &Ctx, const Expr *E)
Checks if the expression is constant or does not have non-trivial function calls. ...
llvm::PointerIntPair< llvm::Value *, 1, bool > Schedule
Represents a C++ struct/union/class.
Definition: DeclCXX.h:300
void EmitBranch(llvm::BasicBlock *Block)
EmitBranch - Emit a branch to the specified basic block from the current insert block, taking care to avoid creation of branches from dummy blocks.
Definition: CGStmt.cpp:474
llvm::Function * CreateGlobalInitOrDestructFunction(llvm::FunctionType *ty, const Twine &name, const CGFunctionInfo &FI, SourceLocation Loc=SourceLocation(), bool TLS=false)
Definition: CGDeclCXX.cpp:337
bool isVoidType() const
Definition: Type.h:6643
llvm::Value * BasePointersArray
The array of base pointer passed to the runtime library.
static QualType getBaseOriginalType(const Expr *Base)
Return original type of the base expression for array section.
Definition: Expr.cpp:4517
static llvm::Value * emitReduceFiniFunction(CodeGenModule &CGM, SourceLocation Loc, ReductionCodeGen &RCG, unsigned N)
Emits reduction finalizer function:
OpenMPOffloadingReservedDeviceIDs
llvm::Type * ConvertType(QualType T)
bool isTLSSupported() const
Whether the target supports thread-local storage.
Definition: TargetInfo.h:1173
void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C) override
Emit code for doacross ordered directive with &#39;depend&#39; clause.
Privates[]
Gets the list of initial values for linear variables.
Definition: OpenMPClause.h:150
OpenMPMapClauseKind
OpenMP mapping kind for &#39;map&#39; clause.
Definition: OpenMPKinds.h:91
Qualifiers getQualifiers() const
Retrieve the set of qualifiers applied to this type.
Definition: Type.h:6169
Capturing by reference.
Definition: Lambda.h:37
virtual llvm::Function * emitTeamsOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen)
Emits outlined function for the specified OpenMP teams directive D.
LValue EmitLValue(const Expr *E)
EmitLValue - Emit code to compute a designator that specifies the location of the expression...
Definition: CGExpr.cpp:1244
Address ReturnValue
ReturnValue - The temporary alloca to hold the return value.
A helper class of ConstantInitBuilder, used for building constant struct initializers.
void popTerminate()
Pops a terminate handler off the stack.
Definition: CGCleanup.h:582
CharUnits getNonVirtualSize() const
getNonVirtualSize - Get the non-virtual size (in chars) of an object, which is the size of the object...
Definition: RecordLayout.h:202
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
OpenMPRTLFunction
void addAttr(Attr *A)
Definition: DeclBase.cpp:829
unsigned getVirtualBaseIndex(const CXXRecordDecl *base) const
Return the LLVM field index corresponding to the given virtual base.
virtual void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind Kind, bool EmitChecks=true, bool ForceSimpleCall=false)
Emit an implicit/explicit barrier for OpenMP threads.
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition: Stmt.cpp:251
void emitTargetOutlinedFunction(const OMPExecutableDirective &D, StringRef ParentName, llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID, bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) override
Emit outilined function for &#39;target&#39; directive.
RValue EmitLoadOfLValue(LValue V, SourceLocation Loc)
EmitLoadOfLValue - Given an expression that represents a value lvalue, this method emits the address ...
Definition: CGExpr.cpp:1773
static llvm::Value * emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc, const OMPExecutableDirective &D, QualType KmpTaskTWithPrivatesPtrQTy, const RecordDecl *KmpTaskTWithPrivatesQTyRD, const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy, QualType SharedsPtrTy, const OMPTaskDataTy &Data, ArrayRef< PrivateDataTy > Privates, bool WithLastIter)
Emit task_dup function (for initialization of private/firstprivate/lastprivate vars and last_iter fla...
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc, const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned, bool Ordered, const DispatchRTInput &DispatchValues)
Call the appropriate runtime routine to initialize it before start of loop.
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:275
static RecordDecl * createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy, ArrayRef< PrivateDataTy > Privates)
llvm::Function * emitParallelOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) override
Emits outlined function for the specified OpenMP parallel directive D.
CGCapturedStmtInfo * CapturedStmtInfo
llvm::Value * EmitScalarConversion(llvm::Value *Src, QualType SrcTy, QualType DstTy, SourceLocation Loc)
Emit a conversion from the specified type to the specified destination type, both of which are LLVM s...
CGCXXABI & getCXXABI() const
Expr * getCombinerIn()
Get In variable of the combiner.
Definition: DeclOpenMP.h:155
const VariableArrayType * getAsVariableArrayType(QualType T) const
Definition: ASTContext.h:2444
CanQualType IntTy
Definition: ASTContext.h:1023
decl_type * getMostRecentDecl()
Returns the most recent (re)declaration of this declaration.
Definition: Redeclarable.h:225
bool isEvaluatable(const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
isEvaluatable - Call EvaluateAsRValue to see if this expression can be constant folded without side-e...
void EmitAggregateCopy(LValue Dest, LValue Src, QualType EltTy, AggValueSlot::Overlap_t MayOverlap, bool isVolatile=false)
EmitAggregateCopy - Emit an aggregate copy.
Definition: CGExprAgg.cpp:1911
capture_range captures()
Definition: Stmt.h:3485
A reference to a declared variable, function, enum, etc.
Definition: Expr.h:1141
QualType getIntPtrType() const
Return a type compatible with "intptr_t" (C99 7.18.1.4), as defined by the target.
static RValue get(llvm::Value *V)
Definition: CGValue.h:85
bool isUnion() const
Definition: Decl.h:3285
virtual void emitForStaticInit(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind, const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values)
Call the appropriate runtime routine to initialize it before start of loop.
llvm::Value * emitTaskReductionInit(CodeGenFunction &CGF, SourceLocation Loc, ArrayRef< const Expr *> LHSExprs, ArrayRef< const Expr *> RHSExprs, const OMPTaskDataTy &Data) override
Emit a code for initialization of task reduction clause.
bool isPointerType() const
Definition: Type.h:6384
static void EmitOMPTargetDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetDirective &S)
Emit device code for the target directive.
void EmitBranchThroughCleanup(JumpDest Dest)
EmitBranchThroughCleanup - Emit a branch from the current insert block through the normal cleanup han...
Definition: CGCleanup.cpp:1043
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:759
ParamKindTy
Kind of parameter in a function with &#39;declare simd&#39; directive.
bool HasRequiresUnifiedSharedMemory
Flag for keeping track of weather a requires unified_shared_memory directive is present.
static void EmitOMPTargetParallelForDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetParallelForDirective &S)
Emit device code for the target parallel for directive.
QualType KmpDimTy
struct kmp_dim { // loop bounds info casted to kmp_int64 kmp_int64 lo; // lower kmp_int64 up; // uppe...
void emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) override
Call the appropriate runtime routine to notify that we finished all the work with current loop...
QualType getType() const
Definition: Decl.h:647
An l-value expression is a reference to an object with independent storage.
Definition: Specifiers.h:128
bool isFloatingType() const
Definition: Type.cpp:1952
static RValue getAggregate(Address addr, bool isVolatile=false)
Definition: CGValue.h:106
bool isOpenMPLoopDirective(OpenMPDirectiveKind DKind)
Checks if the specified directive is a directive with an associated loop construct.
i32 captured_struct **param SharedsTy A type which contains references the shared variables *param Shareds Context with the list of shared variables from the p *TaskFunction *param IfCond Not a nullptr if if clause was nullptr *otherwise *param Data Additional data for task generation like final list of privates etc *void emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D, llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds, const Expr *IfCond, const OMPTaskDataTy &Data) override
LValue - This represents an lvalue references.
Definition: CGValue.h:166
virtual unsigned getDefaultLocationReserved2Flags() const
Returns additional flags that can be stored in reserved_2 field of the default location.
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:146
QualType SavedKmpTaskTQTy
Saved kmp_task_t for task directive.
virtual Address getTaskReductionItem(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *ReductionsPtr, LValue SharedLVal)
Get the address of void * type of the privatue copy of the reduction item specified by the SharedLVal...
Represents a C array with a specified size that is not an integer-constant-expression.
Definition: Type.h:3003
CanQualType BoolTy
Definition: ASTContext.h:1015
bool isConstant(const ASTContext &Ctx) const
Definition: Type.h:778
APSInt & getInt()
Definition: APValue.h:336
virtual void getDefaultScheduleAndChunk(CodeGenFunction &CGF, const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const
Choose default schedule type and chunk value for the schedule clause.
const LangOptions & getLangOpts() const
llvm::CallInst * EmitNounwindRuntimeCall(llvm::FunctionCallee callee, const Twine &name="")
llvm::StringSet DeclareTargetWithDefinition
Set of declare target variables with the generated initializer.
static FieldDecl * Create(const ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, TypeSourceInfo *TInfo, Expr *BW, bool Mutable, InClassInitStyle InitStyle)
Definition: Decl.cpp:3882
Address CreatePointerBitCastOrAddrSpaceCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:163
llvm::Value * emitArrayLength(const ArrayType *arrayType, QualType &baseType, Address &addr)
emitArrayLength - Compute the length of an array, even if it&#39;s a VLA, and drill down to the base elem...
virtual llvm::Function * emitTaskOutlinedFunction(const OMPExecutableDirective &D, const VarDecl *ThreadIDVar, const VarDecl *PartIDVar, const VarDecl *TaskTVar, OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, bool Tied, unsigned &NumberOfParts)
Emits outlined function for the OpenMP task directive D.
Expr * getCombinerOut()
Get Out variable of the combiner.
Definition: DeclOpenMP.h:158
unsigned getNumParams() const
Return the number of parameters this function must have based on its FunctionType.
Definition: Decl.cpp:3138
const CGRecordLayout & getCGRecordLayout(const RecordDecl *)
getCGRecordLayout - Return record layout info for the given record decl.
bool hasInit() const
Definition: Decl.cpp:2198
No in-class initializer.
Definition: Specifiers.h:258
llvm::Value * getPointer() const
Definition: CGValue.h:322
base_class_range vbases()
Definition: DeclCXX.h:842
This class handles loading and caching of source files into memory.
void emitCancellationPointCall(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind CancelRegion) override
Emit code for &#39;cancellation point&#39; construct.
A helper class of ConstantInitBuilder, used for building constant array initializers.
void EmitGlobal(GlobalDecl D)
Emit code for a single global function or var decl.
virtual void emitNumThreadsClause(CodeGenFunction &CGF, llvm::Value *NumThreads, SourceLocation Loc)
Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)...
bool capturesVariableArrayType() const
Determine whether this capture handles a variable-length array type.
Definition: Stmt.h:3400
bool isLocalVarDeclOrParm() const
Similar to isLocalVarDecl but also includes parameters.
Definition: Decl.h:1113
Attr - This represents one attribute.
Definition: Attr.h:43
SmallVector< const Expr *, 4 > FirstprivateInits
SourceLocation getLocation() const
Definition: DeclBase.h:429
This represents clause &#39;use_device_ptr&#39; in the &#39;#pragma omp ...&#39; directives.
static void EmitOMPTargetTeamsDistributeSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeSimdDirective &S)
Emit device code for the target teams distribute simd directive.
void EmitNullInitialization(Address DestPtr, QualType Ty)
EmitNullInitialization - Generate code to set a value of the given type to null, If the type contains...
bool isExternallyVisible() const
Definition: Decl.h:379
llvm::Constant * getOrCreateThreadPrivateCache(const VarDecl *VD)
If the specified mangled name is not in the module, create and return threadprivate cache object...
llvm::Value * SizesArray
The array of sizes passed to the runtime library.
virtual void emitTargetNumIterationsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device, const llvm::function_ref< llvm::Value *(CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter)
Emit code that pushes the trip count of loops associated with constructs &#39;target teams distribute&#39; an...
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
bool Privatize()
Privatizes local variables previously registered as private.
static void EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(CodeGenModule &CGM, StringRef ParentName, const OMPTargetTeamsDistributeParallelForSimdDirective &S)
Emit device code for the target teams distribute parallel for simd directive.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
virtual void emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc)
Emit code for &#39;taskwait&#39; directive.
unsigned getLLVMFieldNo(const FieldDecl *FD) const
Return llvm::StructType element number that corresponds to the field FD.
llvm::FunctionType * GetFunctionType(const CGFunctionInfo &Info)
GetFunctionType - Get the LLVM function type for.
Definition: CGCall.cpp:1541