LLVM 19.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
23#include <forward_list>
24#include <map>
25#include <optional>
26
27namespace llvm {
28class CanonicalLoopInfo;
29struct TargetRegionEntryInfo;
30class OffloadEntriesInfoManager;
31class OpenMPIRBuilder;
32
33/// Move the instruction after an InsertPoint to the beginning of another
34/// BasicBlock.
35///
36/// The instructions after \p IP are moved to the beginning of \p New which must
37/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
38/// \p New will be added such that there is no semantic change. Otherwise, the
39/// \p IP insert block remains degenerate and it is up to the caller to insert a
40/// terminator.
41void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
42 bool CreateBranch);
43
44/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
45/// insert location will stick to after the instruction before the insertion
46/// point (instead of moving with the instruction the InsertPoint stores
47/// internally).
48void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
49
50/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
51/// (missing the terminator).
52///
53/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
54/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
55/// is true, a branch to the new successor will new created such that
56/// semantically there is no change; otherwise the block of the insertion point
57/// remains degenerate and it is the caller's responsibility to insert a
58/// terminator. Returns the new successor block.
59BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
60 llvm::Twine Name = {});
61
62/// Split a BasicBlock at \p Builder's insertion point, even if the block is
63/// degenerate (missing the terminator). Its new insert location will stick to
64/// after the instruction before the insertion point (instead of moving with the
65/// instruction the InsertPoint stores internally).
66BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
67 llvm::Twine Name = {});
68
69/// Split a BasicBlock at \p Builder's insertion point, even if the block is
70/// degenerate (missing the terminator). Its new insert location will stick to
71/// after the instruction before the insertion point (instead of moving with the
72/// instruction the InsertPoint stores internally).
73BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
74
75/// Like splitBB, but reuses the current block's name for the new name.
76BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
77 llvm::Twine Suffix = ".split");
78
79/// Captures attributes that affect generating LLVM-IR using the
80/// OpenMPIRBuilder and related classes. Note that not all attributes are
81/// required for all classes or functions. In some use cases the configuration
82/// is not necessary at all, because because the only functions that are called
83/// are ones that are not dependent on the configuration.
85public:
86 /// Flag to define whether to generate code for the role of the OpenMP host
87 /// (if set to false) or device (if set to true) in an offloading context. It
88 /// is set when the -fopenmp-is-target-device compiler frontend option is
89 /// specified.
90 std::optional<bool> IsTargetDevice;
91
92 /// Flag for specifying if the compilation is done for an accelerator. It is
93 /// set according to the architecture of the target triple and currently only
94 /// true when targeting AMDGPU or NVPTX. Today, these targets can only perform
95 /// the role of an OpenMP target device, so `IsTargetDevice` must also be true
96 /// if `IsGPU` is true. This restriction might be lifted if an accelerator-
97 /// like target with the ability to work as the OpenMP host is added, or if
98 /// the capabilities of the currently supported GPU architectures are
99 /// expanded.
100 std::optional<bool> IsGPU;
101
102 // Flag for specifying if offloading is mandatory.
103 std::optional<bool> OpenMPOffloadMandatory;
104
105 /// First separator used between the initial two parts of a name.
106 std::optional<StringRef> FirstSeparator;
107 /// Separator used between all of the rest consecutive parts of s name
108 std::optional<StringRef> Separator;
109
113 bool HasRequiresReverseOffload,
114 bool HasRequiresUnifiedAddress,
115 bool HasRequiresUnifiedSharedMemory,
116 bool HasRequiresDynamicAllocators);
117
118 // Getters functions that assert if the required values are not present.
119 bool isTargetDevice() const {
120 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
121 return *IsTargetDevice;
122 }
123
124 bool isGPU() const {
125 assert(IsGPU.has_value() && "IsGPU is not set");
126 return *IsGPU;
127 }
128
130 assert(OpenMPOffloadMandatory.has_value() &&
131 "OpenMPOffloadMandatory is not set");
133 }
134
135 bool hasRequiresFlags() const { return RequiresFlags; }
136 bool hasRequiresReverseOffload() const;
137 bool hasRequiresUnifiedAddress() const;
139 bool hasRequiresDynamicAllocators() const;
140
141 /// Returns requires directive clauses as flags compatible with those expected
142 /// by libomptarget.
143 int64_t getRequiresFlags() const;
144
145 // Returns the FirstSeparator if set, otherwise use the default separator
146 // depending on isGPU
148 if (FirstSeparator.has_value())
149 return *FirstSeparator;
150 if (isGPU())
151 return "_";
152 return ".";
153 }
154
155 // Returns the Separator if set, otherwise use the default separator depending
156 // on isGPU
158 if (Separator.has_value())
159 return *Separator;
160 if (isGPU())
161 return "$";
162 return ".";
163 }
164
166 void setIsGPU(bool Value) { IsGPU = Value; }
170
175
176private:
177 /// Flags for specifying which requires directive clauses are present.
178 int64_t RequiresFlags;
179};
180
181/// Data structure to contain the information needed to uniquely identify
182/// a target entry.
184 std::string ParentName;
185 unsigned DeviceID;
186 unsigned FileID;
187 unsigned Line;
188 unsigned Count;
189
192 unsigned FileID, unsigned Line, unsigned Count = 0)
194 Count(Count) {}
195
198 unsigned DeviceID, unsigned FileID,
199 unsigned Line, unsigned Count);
200
202 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
203 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
204 RHS.Count);
205 }
206};
207
208/// Class that manages information about offload code regions and data
210 /// Number of entries registered so far.
211 OpenMPIRBuilder *OMPBuilder;
212 unsigned OffloadingEntriesNum = 0;
213
214public:
215 /// Base class of the entries info.
217 public:
218 /// Kind of a given entry.
219 enum OffloadingEntryInfoKinds : unsigned {
220 /// Entry is a target region.
222 /// Entry is a declare target variable.
224 /// Invalid entry info.
226 };
227
228 protected:
230 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
231 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
232 uint32_t Flags)
233 : Flags(Flags), Order(Order), Kind(Kind) {}
234 ~OffloadEntryInfo() = default;
235
236 public:
237 bool isValid() const { return Order != ~0u; }
238 unsigned getOrder() const { return Order; }
239 OffloadingEntryInfoKinds getKind() const { return Kind; }
240 uint32_t getFlags() const { return Flags; }
241 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
242 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
244 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
245 Addr = V;
246 }
247 static bool classof(const OffloadEntryInfo *Info) { return true; }
248
249 private:
250 /// Address of the entity that has to be mapped for offloading.
251 WeakTrackingVH Addr;
252
253 /// Flags associated with the device global.
254 uint32_t Flags = 0u;
255
256 /// Order this entry was emitted.
257 unsigned Order = ~0u;
258
260 };
261
262 /// Return true if a there are no entries defined.
263 bool empty() const;
264 /// Return number of entries defined so far.
265 unsigned size() const { return OffloadingEntriesNum; }
266
267 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
268
269 //
270 // Target region entries related.
271 //
272
273 /// Kind of the target registry entry.
275 /// Mark the entry as target region.
277 };
278
279 /// Target region entries info.
281 /// Address that can be used as the ID of the entry.
282 Constant *ID = nullptr;
283
284 public:
287 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
288 Constant *ID,
291 ID(ID) {
293 }
294
295 Constant *getID() const { return ID; }
296 void setID(Constant *V) {
297 assert(!ID && "ID has been set before!");
298 ID = V;
299 }
300 static bool classof(const OffloadEntryInfo *Info) {
301 return Info->getKind() == OffloadingEntryInfoTargetRegion;
302 }
303 };
304
305 /// Initialize target region entry.
306 /// This is ONLY needed for DEVICE compilation.
308 unsigned Order);
309 /// Register target region entry.
313 /// Return true if a target region entry with the provided information
314 /// exists.
316 bool IgnoreAddressId = false) const;
317
318 // Return the Name based on \a EntryInfo using the next available Count.
320 const TargetRegionEntryInfo &EntryInfo);
321
322 /// brief Applies action \a Action on all registered entries.
323 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
324 const OffloadEntryInfoTargetRegion &)>
326 void
328
329 //
330 // Device global variable entries related.
331 //
332
333 /// Kind of the global variable entry..
335 /// Mark the entry as a to declare target.
337 /// Mark the entry as a to declare target link.
339 /// Mark the entry as a declare target enter.
341 /// Mark the entry as having no declare target entry kind.
343 /// Mark the entry as a declare target indirect global.
345 /// Mark the entry as a register requires global.
347 };
348
349 /// Kind of device clause for declare target variables
350 /// and functions
351 /// NOTE: Currently not used as a part of a variable entry
352 /// used for Flang and Clang to interface with the variable
353 /// related registration functions
355 /// The target is marked for all devices
357 /// The target is marked for non-host devices
359 /// The target is marked for host devices
361 /// The target is marked as having no clause
363 };
364
365 /// Device global variable entries info.
367 /// Type of the global variable.
368 int64_t VarSize;
370 const std::string VarName;
371
372 public:
375 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
378 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
379 int64_t VarSize,
382 const std::string &VarName)
384 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
386 }
387
388 int64_t getVarSize() const { return VarSize; }
389 StringRef getVarName() const { return VarName; }
390 void setVarSize(int64_t Size) { VarSize = Size; }
391 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
392 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
393 static bool classof(const OffloadEntryInfo *Info) {
394 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
395 }
396 };
397
398 /// Initialize device global variable entry.
399 /// This is ONLY used for DEVICE compilation.
402 unsigned Order);
403
404 /// Register device global variable entry.
406 int64_t VarSize,
409 /// Checks if the variable with the given name has been registered already.
411 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
412 }
413 /// Applies action \a Action on all registered entries.
414 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
418
419private:
420 /// Return the count of entries at a particular source location.
421 unsigned
422 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
423
424 /// Update the count of entries at a particular source location.
425 void
426 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
427
429 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
430 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
431 EntryInfo.FileID, EntryInfo.Line, 0);
432 }
433
434 // Count of entries at a location.
435 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
436
437 // Storage for target region entries kind.
438 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
439 OffloadEntriesTargetRegionTy;
440 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
441 /// Storage for device global variable entries kind. The storage is to be
442 /// indexed by mangled name.
444 OffloadEntriesDeviceGlobalVarTy;
445 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
446};
447
448/// An interface to create LLVM-IR for OpenMP directives.
449///
450/// Each OpenMP directive has a corresponding public generator method.
452public:
453 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
454 /// not have an effect on \p M (see initialize)
456 : M(M), Builder(M.getContext()), OffloadInfoManager(this),
457 T(Triple(M.getTargetTriple())) {}
459
460 /// Initialize the internal state, this will put structures types and
461 /// potentially other helpers into the underlying module. Must be called
462 /// before any other method and only once! This internal state includes types
463 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
464 void initialize();
465
467
468 /// Finalize the underlying module, e.g., by outlining regions.
469 /// \param Fn The function to be finalized. If not used,
470 /// all functions are finalized.
471 void finalize(Function *Fn = nullptr);
472
473 /// Add attributes known for \p FnID to \p Fn.
475
476 /// Type used throughout for insertion points.
478
479 /// Get the create a name using the platform specific separators.
480 /// \param Parts parts of the final name that needs separation
481 /// The created name has a first separator between the first and second part
482 /// and a second separator between all other parts.
483 /// E.g. with FirstSeparator "$" and Separator "." and
484 /// parts: "p1", "p2", "p3", "p4"
485 /// The resulting name is "p1$p2.p3.p4"
486 /// The separators are retrieved from the OpenMPIRBuilderConfig.
487 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
488
489 /// Callback type for variable finalization (think destructors).
490 ///
491 /// \param CodeGenIP is the insertion point at which the finalization code
492 /// should be placed.
493 ///
494 /// A finalize callback knows about all objects that need finalization, e.g.
495 /// destruction, when the scope of the currently generated construct is left
496 /// at the time, and location, the callback is invoked.
497 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
498
500 /// The finalization callback provided by the last in-flight invocation of
501 /// createXXXX for the directive of kind DK.
503
504 /// The directive kind of the innermost directive that has an associated
505 /// region which might require finalization when it is left.
506 omp::Directive DK;
507
508 /// Flag to indicate if the directive is cancellable.
510 };
511
512 /// Push a finalization callback on the finalization stack.
513 ///
514 /// NOTE: Temporary solution until Clang CG is gone.
516 FinalizationStack.push_back(FI);
517 }
518
519 /// Pop the last finalization callback from the finalization stack.
520 ///
521 /// NOTE: Temporary solution until Clang CG is gone.
523
524 /// Callback type for body (=inner region) code generation
525 ///
526 /// The callback takes code locations as arguments, each describing a
527 /// location where additional instructions can be inserted.
528 ///
529 /// The CodeGenIP may be in the middle of a basic block or point to the end of
530 /// it. The basic block may have a terminator or be degenerate. The callback
531 /// function may just insert instructions at that position, but also split the
532 /// block (without the Before argument of BasicBlock::splitBasicBlock such
533 /// that the identify of the split predecessor block is preserved) and insert
534 /// additional control flow, including branches that do not lead back to what
535 /// follows the CodeGenIP. Note that since the callback is allowed to split
536 /// the block, callers must assume that InsertPoints to positions in the
537 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
538 /// such InsertPoints need to be preserved, it can split the block itself
539 /// before calling the callback.
540 ///
541 /// AllocaIP and CodeGenIP must not point to the same position.
542 ///
543 /// \param AllocaIP is the insertion point at which new alloca instructions
544 /// should be placed. The BasicBlock it is pointing to must
545 /// not be split.
546 /// \param CodeGenIP is the insertion point at which the body code should be
547 /// placed.
549 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
550
551 // This is created primarily for sections construct as llvm::function_ref
552 // (BodyGenCallbackTy) is not storable (as described in the comments of
553 // function_ref class - function_ref contains non-ownable reference
554 // to the callable.
556 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
557
558 /// Callback type for loop body code generation.
559 ///
560 /// \param CodeGenIP is the insertion point where the loop's body code must be
561 /// placed. This will be a dedicated BasicBlock with a
562 /// conditional branch from the loop condition check and
563 /// terminated with an unconditional branch to the loop
564 /// latch.
565 /// \param IndVar is the induction variable usable at the insertion point.
567 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
568
569 /// Callback type for variable privatization (think copy & default
570 /// constructor).
571 ///
572 /// \param AllocaIP is the insertion point at which new alloca instructions
573 /// should be placed.
574 /// \param CodeGenIP is the insertion point at which the privatization code
575 /// should be placed.
576 /// \param Original The value being copied/created, should not be used in the
577 /// generated IR.
578 /// \param Inner The equivalent of \p Original that should be used in the
579 /// generated IR; this is equal to \p Original if the value is
580 /// a pointer and can thus be passed directly, otherwise it is
581 /// an equivalent but different value.
582 /// \param ReplVal The replacement value, thus a copy or new created version
583 /// of \p Inner.
584 ///
585 /// \returns The new insertion point where code generation continues and
586 /// \p ReplVal the replacement value.
588 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
589 Value &Inner, Value *&ReplVal)>;
590
591 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
592 /// (filename, line, column, ...).
595 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
598 : IP(IP), DL(DL) {}
601 };
602
603 /// Emitter methods for OpenMP directives.
604 ///
605 ///{
606
607 /// Generator for '#omp barrier'
608 ///
609 /// \param Loc The location where the barrier directive was encountered.
610 /// \param DK The kind of directive that caused the barrier.
611 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
612 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
613 /// should be checked and acted upon.
614 ///
615 /// \returns The insertion point after the barrier.
616 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
617 bool ForceSimpleCall = false,
618 bool CheckCancelFlag = true);
619
620 /// Generator for '#omp cancel'
621 ///
622 /// \param Loc The location where the directive was encountered.
623 /// \param IfCondition The evaluated 'if' clause expression, if any.
624 /// \param CanceledDirective The kind of directive that is cancled.
625 ///
626 /// \returns The insertion point after the barrier.
627 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
628 omp::Directive CanceledDirective);
629
630 /// Generator for '#omp parallel'
631 ///
632 /// \param Loc The insert and source location description.
633 /// \param AllocaIP The insertion points to be used for alloca instructions.
634 /// \param BodyGenCB Callback that will generate the region code.
635 /// \param PrivCB Callback to copy a given variable (think copy constructor).
636 /// \param FiniCB Callback to finalize variable copies.
637 /// \param IfCondition The evaluated 'if' clause expression, if any.
638 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
639 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
640 /// \param IsCancellable Flag to indicate a cancellable parallel region.
641 ///
642 /// \returns The insertion position *after* the parallel.
645 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
646 FinalizeCallbackTy FiniCB, Value *IfCondition,
647 Value *NumThreads, omp::ProcBindKind ProcBind,
648 bool IsCancellable);
649
650 /// Generator for the control flow structure of an OpenMP canonical loop.
651 ///
652 /// This generator operates on the logical iteration space of the loop, i.e.
653 /// the caller only has to provide a loop trip count of the loop as defined by
654 /// base language semantics. The trip count is interpreted as an unsigned
655 /// integer. The induction variable passed to \p BodyGenCB will be of the same
656 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
657 /// convert the logical iteration variable to the loop counter variable in the
658 /// loop body.
659 ///
660 /// \param Loc The insert and source location description. The insert
661 /// location can be between two instructions or the end of a
662 /// degenerate block (e.g. a BB under construction).
663 /// \param BodyGenCB Callback that will generate the loop body code.
664 /// \param TripCount Number of iterations the loop body is executed.
665 /// \param Name Base name used to derive BB and instruction names.
666 ///
667 /// \returns An object representing the created control flow structure which
668 /// can be used for loop-associated directives.
670 LoopBodyGenCallbackTy BodyGenCB,
671 Value *TripCount,
672 const Twine &Name = "loop");
673
674 /// Generator for the control flow structure of an OpenMP canonical loop.
675 ///
676 /// Instead of a logical iteration space, this allows specifying user-defined
677 /// loop counter values using increment, upper- and lower bounds. To
678 /// disambiguate the terminology when counting downwards, instead of lower
679 /// bounds we use \p Start for the loop counter value in the first body
680 /// iteration.
681 ///
682 /// Consider the following limitations:
683 ///
684 /// * A loop counter space over all integer values of its bit-width cannot be
685 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
686 /// stored into an 8 bit integer):
687 ///
688 /// DO I = 0, 255, 1
689 ///
690 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
691 /// effectively counting downwards:
692 ///
693 /// for (uint8_t i = 100u; i > 0; i += 127u)
694 ///
695 ///
696 /// TODO: May need to add additional parameters to represent:
697 ///
698 /// * Allow representing downcounting with unsigned integers.
699 ///
700 /// * Sign of the step and the comparison operator might disagree:
701 ///
702 /// for (int i = 0; i < 42; i -= 1u)
703 ///
704 //
705 /// \param Loc The insert and source location description.
706 /// \param BodyGenCB Callback that will generate the loop body code.
707 /// \param Start Value of the loop counter for the first iterations.
708 /// \param Stop Loop counter values past this will stop the loop.
709 /// \param Step Loop counter increment after each iteration; negative
710 /// means counting down.
711 /// \param IsSigned Whether Start, Stop and Step are signed integers.
712 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
713 /// counter.
714 /// \param ComputeIP Insertion point for instructions computing the trip
715 /// count. Can be used to ensure the trip count is available
716 /// at the outermost loop of a loop nest. If not set,
717 /// defaults to the preheader of the generated loop.
718 /// \param Name Base name used to derive BB and instruction names.
719 ///
720 /// \returns An object representing the created control flow structure which
721 /// can be used for loop-associated directives.
723 LoopBodyGenCallbackTy BodyGenCB,
724 Value *Start, Value *Stop, Value *Step,
725 bool IsSigned, bool InclusiveStop,
726 InsertPointTy ComputeIP = {},
727 const Twine &Name = "loop");
728
729 /// Collapse a loop nest into a single loop.
730 ///
731 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
732 /// that has the same number of innermost loop iterations as the origin loop
733 /// nest. The induction variables of the input loops are derived from the
734 /// collapsed loop's induction variable. This is intended to be used to
735 /// implement OpenMP's collapse clause. Before applying a directive,
736 /// collapseLoops normalizes a loop nest to contain only a single loop and the
737 /// directive's implementation does not need to handle multiple loops itself.
738 /// This does not remove the need to handle all loop nest handling by
739 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
740 /// modifier of the worksharing-loop directive.
741 ///
742 /// Example:
743 /// \code
744 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
745 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
746 /// body(i, j);
747 /// \endcode
748 ///
749 /// After collapsing with Loops={i,j}, the loop is changed to
750 /// \code
751 /// for (int ij = 0; ij < 63; ++ij) {
752 /// int i = ij / 9;
753 /// int j = ij % 9;
754 /// body(i, j);
755 /// }
756 /// \endcode
757 ///
758 /// In the current implementation, the following limitations apply:
759 ///
760 /// * All input loops have an induction variable of the same type.
761 ///
762 /// * The collapsed loop will have the same trip count integer type as the
763 /// input loops. Therefore it is possible that the collapsed loop cannot
764 /// represent all iterations of the input loops. For instance, assuming a
765 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
766 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
767 /// which cannot be represented in an 32-bit integer. Behavior is undefined
768 /// in this case.
769 ///
770 /// * The trip counts of every input loop must be available at \p ComputeIP.
771 /// Non-rectangular loops are not yet supported.
772 ///
773 /// * At each nest level, code between a surrounding loop and its nested loop
774 /// is hoisted into the loop body, and such code will be executed more
775 /// often than before collapsing (or not at all if any inner loop iteration
776 /// has a trip count of 0). This is permitted by the OpenMP specification.
777 ///
778 /// \param DL Debug location for instructions added for collapsing,
779 /// such as instructions to compute/derive the input loop's
780 /// induction variables.
781 /// \param Loops Loops in the loop nest to collapse. Loops are specified
782 /// from outermost-to-innermost and every control flow of a
783 /// loop's body must pass through its directly nested loop.
784 /// \param ComputeIP Where additional instruction that compute the collapsed
785 /// trip count. If not set, defaults to before the generated
786 /// loop.
787 ///
788 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
791 InsertPointTy ComputeIP);
792
793 /// Get the default alignment value for given target
794 ///
795 /// \param TargetTriple Target triple
796 /// \param Features StringMap which describes extra CPU features
797 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
798 const StringMap<bool> &Features);
799
800 /// Retrieve (or create if non-existent) the address of a declare
801 /// target variable, used in conjunction with registerTargetGlobalVariable
802 /// to create declare target global variables.
803 ///
804 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
805 /// clause used in conjunction with the variable being registered (link,
806 /// to, enter).
807 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
808 /// clause used in conjunction with the variable being registered (nohost,
809 /// host, any)
810 /// \param IsDeclaration - boolean stating if the variable being registered
811 /// is a declaration-only and not a definition
812 /// \param IsExternallyVisible - boolean stating if the variable is externally
813 /// visible
814 /// \param EntryInfo - Unique entry information for the value generated
815 /// using getTargetEntryUniqueInfo, used to name generated pointer references
816 /// to the declare target variable
817 /// \param MangledName - the mangled name of the variable being registered
818 /// \param GeneratedRefs - references generated by invocations of
819 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
820 /// these are required by Clang for book keeping.
821 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
822 /// \param TargetTriple - The OpenMP device target triple we are compiling
823 /// for
824 /// \param LlvmPtrTy - The type of the variable we are generating or
825 /// retrieving an address for
826 /// \param GlobalInitializer - a lambda function which creates a constant
827 /// used for initializing a pointer reference to the variable in certain
828 /// cases. If a nullptr is passed, it will default to utilising the original
829 /// variable to initialize the pointer reference.
830 /// \param VariableLinkage - a lambda function which returns the variables
831 /// linkage type, if unspecified and a nullptr is given, it will instead
832 /// utilise the linkage stored on the existing global variable in the
833 /// LLVMModule.
837 bool IsDeclaration, bool IsExternallyVisible,
838 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
839 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
840 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
841 std::function<Constant *()> GlobalInitializer,
842 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
843
844 /// Registers a target variable for device or host.
845 ///
846 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
847 /// clause used in conjunction with the variable being registered (link,
848 /// to, enter).
849 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
850 /// clause used in conjunction with the variable being registered (nohost,
851 /// host, any)
852 /// \param IsDeclaration - boolean stating if the variable being registered
853 /// is a declaration-only and not a definition
854 /// \param IsExternallyVisible - boolean stating if the variable is externally
855 /// visible
856 /// \param EntryInfo - Unique entry information for the value generated
857 /// using getTargetEntryUniqueInfo, used to name generated pointer references
858 /// to the declare target variable
859 /// \param MangledName - the mangled name of the variable being registered
860 /// \param GeneratedRefs - references generated by invocations of
861 /// registerTargetGlobalVariable these are required by Clang for book
862 /// keeping.
863 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
864 /// \param TargetTriple - The OpenMP device target triple we are compiling
865 /// for
866 /// \param GlobalInitializer - a lambda function which creates a constant
867 /// used for initializing a pointer reference to the variable in certain
868 /// cases. If a nullptr is passed, it will default to utilising the original
869 /// variable to initialize the pointer reference.
870 /// \param VariableLinkage - a lambda function which returns the variables
871 /// linkage type, if unspecified and a nullptr is given, it will instead
872 /// utilise the linkage stored on the existing global variable in the
873 /// LLVMModule.
874 /// \param LlvmPtrTy - The type of the variable we are generating or
875 /// retrieving an address for
876 /// \param Addr - the original llvm value (addr) of the variable to be
877 /// registered
881 bool IsDeclaration, bool IsExternallyVisible,
882 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
883 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
884 std::vector<Triple> TargetTriple,
885 std::function<Constant *()> GlobalInitializer,
886 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
887 Type *LlvmPtrTy, Constant *Addr);
888
889 /// Get the offset of the OMP_MAP_MEMBER_OF field.
890 unsigned getFlagMemberOffset();
891
892 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
893 /// the position given.
894 /// \param Position - A value indicating the position of the parent
895 /// of the member in the kernel argument structure, often retrieved
896 /// by the parents position in the combined information vectors used
897 /// to generate the structure itself. Multiple children (member's of)
898 /// with the same parent will use the same returned member flag.
900
901 /// Given an initial flag set, this function modifies it to contain
902 /// the passed in MemberOfFlag generated from the getMemberOfFlag
903 /// function. The results are dependent on the existing flag bits
904 /// set in the original flag set.
905 /// \param Flags - The original set of flags to be modified with the
906 /// passed in MemberOfFlag.
907 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
908 /// slightly based on the getMemberOfFlag which adjusts the flag bits
909 /// based on the members position in its parent.
911 omp::OpenMPOffloadMappingFlags MemberOfFlag);
912
913private:
914 /// Modifies the canonical loop to be a statically-scheduled workshare loop
915 /// which is executed on the device
916 ///
917 /// This takes a \p CLI representing a canonical loop, such as the one
918 /// created by \see createCanonicalLoop and emits additional instructions to
919 /// turn it into a workshare loop. In particular, it calls to an OpenMP
920 /// runtime function in the preheader to call OpenMP device rtl function
921 /// which handles worksharing of loop body interations.
922 ///
923 /// \param DL Debug location for instructions added for the
924 /// workshare-loop construct itself.
925 /// \param CLI A descriptor of the canonical loop to workshare.
926 /// \param AllocaIP An insertion point for Alloca instructions usable in the
927 /// preheader of the loop.
928 /// \param LoopType Information about type of loop worksharing.
929 /// It corresponds to type of loop workshare OpenMP pragma.
930 ///
931 /// \returns Point where to insert code after the workshare construct.
932 InsertPointTy applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
933 InsertPointTy AllocaIP,
934 omp::WorksharingLoopType LoopType);
935
936 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
937 ///
938 /// This takes a \p LoopInfo representing a canonical loop, such as the one
939 /// created by \p createCanonicalLoop and emits additional instructions to
940 /// turn it into a workshare loop. In particular, it calls to an OpenMP
941 /// runtime function in the preheader to obtain the loop bounds to be used in
942 /// the current thread, updates the relevant instructions in the canonical
943 /// loop and calls to an OpenMP runtime finalization function after the loop.
944 ///
945 /// \param DL Debug location for instructions added for the
946 /// workshare-loop construct itself.
947 /// \param CLI A descriptor of the canonical loop to workshare.
948 /// \param AllocaIP An insertion point for Alloca instructions usable in the
949 /// preheader of the loop.
950 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
951 /// the loop.
952 ///
953 /// \returns Point where to insert code after the workshare construct.
954 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
955 InsertPointTy AllocaIP,
956 bool NeedsBarrier);
957
958 /// Modifies the canonical loop a statically-scheduled workshare loop with a
959 /// user-specified chunk size.
960 ///
961 /// \param DL Debug location for instructions added for the
962 /// workshare-loop construct itself.
963 /// \param CLI A descriptor of the canonical loop to workshare.
964 /// \param AllocaIP An insertion point for Alloca instructions usable in
965 /// the preheader of the loop.
966 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
967 /// loop.
968 /// \param ChunkSize The user-specified chunk size.
969 ///
970 /// \returns Point where to insert code after the workshare construct.
971 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
973 InsertPointTy AllocaIP,
974 bool NeedsBarrier,
975 Value *ChunkSize);
976
977 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
978 ///
979 /// This takes a \p LoopInfo representing a canonical loop, such as the one
980 /// created by \p createCanonicalLoop and emits additional instructions to
981 /// turn it into a workshare loop. In particular, it calls to an OpenMP
982 /// runtime function in the preheader to obtain, and then in each iteration
983 /// to update the loop counter.
984 ///
985 /// \param DL Debug location for instructions added for the
986 /// workshare-loop construct itself.
987 /// \param CLI A descriptor of the canonical loop to workshare.
988 /// \param AllocaIP An insertion point for Alloca instructions usable in the
989 /// preheader of the loop.
990 /// \param SchedType Type of scheduling to be passed to the init function.
991 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
992 /// the loop.
993 /// \param Chunk The size of loop chunk considered as a unit when
994 /// scheduling. If \p nullptr, defaults to 1.
995 ///
996 /// \returns Point where to insert code after the workshare construct.
997 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
998 InsertPointTy AllocaIP,
999 omp::OMPScheduleType SchedType,
1000 bool NeedsBarrier,
1001 Value *Chunk = nullptr);
1002
1003 /// Create alternative version of the loop to support if clause
1004 ///
1005 /// OpenMP if clause can require to generate second loop. This loop
1006 /// will be executed when if clause condition is not met. createIfVersion
1007 /// adds branch instruction to the copied loop if \p ifCond is not met.
1008 ///
1009 /// \param Loop Original loop which should be versioned.
1010 /// \param IfCond Value which corresponds to if clause condition
1011 /// \param VMap Value to value map to define relation between
1012 /// original and copied loop values and loop blocks.
1013 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
1014 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
1015 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
1016
1017public:
1018 /// Modifies the canonical loop to be a workshare loop.
1019 ///
1020 /// This takes a \p LoopInfo representing a canonical loop, such as the one
1021 /// created by \p createCanonicalLoop and emits additional instructions to
1022 /// turn it into a workshare loop. In particular, it calls to an OpenMP
1023 /// runtime function in the preheader to obtain the loop bounds to be used in
1024 /// the current thread, updates the relevant instructions in the canonical
1025 /// loop and calls to an OpenMP runtime finalization function after the loop.
1026 ///
1027 /// The concrete transformation is done by applyStaticWorkshareLoop,
1028 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
1029 /// on the value of \p SchedKind and \p ChunkSize.
1030 ///
1031 /// \param DL Debug location for instructions added for the
1032 /// workshare-loop construct itself.
1033 /// \param CLI A descriptor of the canonical loop to workshare.
1034 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1035 /// preheader of the loop.
1036 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1037 /// the loop.
1038 /// \param SchedKind Scheduling algorithm to use.
1039 /// \param ChunkSize The chunk size for the inner loop.
1040 /// \param HasSimdModifier Whether the simd modifier is present in the
1041 /// schedule clause.
1042 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1043 /// the schedule clause.
1044 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1045 /// present in the schedule clause.
1046 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1047 /// present.
1048 /// \param LoopType Information about type of loop worksharing.
1049 /// It corresponds to type of loop workshare OpenMP pragma.
1050 ///
1051 /// \returns Point where to insert code after the workshare construct.
1054 bool NeedsBarrier,
1055 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1056 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1057 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1058 bool HasOrderedClause = false,
1059 omp::WorksharingLoopType LoopType =
1061
1062 /// Tile a loop nest.
1063 ///
1064 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1065 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1066 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1067 /// of every loop and every tile sizes must be usable in the outermost
1068 /// loop's preheader. This implies that the loop nest is rectangular.
1069 ///
1070 /// Example:
1071 /// \code
1072 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1073 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1074 /// body(i, j);
1075 /// \endcode
1076 ///
1077 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1078 /// \code
1079 /// for (int i1 = 0; i1 < 3; ++i1)
1080 /// for (int j1 = 0; j1 < 2; ++j1)
1081 /// for (int i2 = 0; i2 < 5; ++i2)
1082 /// for (int j2 = 0; j2 < 7; ++j2)
1083 /// body(i1*3+i2, j1*3+j2);
1084 /// \endcode
1085 ///
1086 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1087 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1088 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1089 /// that are not multiples of the tile size. In the latter case the tile loop
1090 /// of the last floor-loop iteration will have fewer iterations than specified
1091 /// as its tile size.
1092 ///
1093 ///
1094 /// @param DL Debug location for instructions added by tiling, for
1095 /// instance the floor- and tile trip count computation.
1096 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1097 /// invalidated by this method, i.e. should not used after
1098 /// tiling.
1099 /// @param TileSizes For each loop in \p Loops, the tile size for that
1100 /// dimensions.
1101 ///
1102 /// \returns A list of generated loops. Contains twice as many loops as the
1103 /// input loop nest; the first half are the floor loops and the
1104 /// second half are the tile loops.
1105 std::vector<CanonicalLoopInfo *>
1107 ArrayRef<Value *> TileSizes);
1108
1109 /// Fully unroll a loop.
1110 ///
1111 /// Instead of unrolling the loop immediately (and duplicating its body
1112 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1113 /// metadata.
1114 ///
1115 /// \param DL Debug location for instructions added by unrolling.
1116 /// \param Loop The loop to unroll. The loop will be invalidated.
1118
1119 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1120 /// using LLVM's LoopUnrollPass.
1121 ///
1122 /// \param DL Debug location for instructions added by unrolling.
1123 /// \param Loop The loop to unroll. The loop will be invalidated.
1125
1126 /// Partially unroll a loop.
1127 ///
1128 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1129 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1130 /// needing the CanonicalLoopInfo allows more efficient code generation by
1131 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1132 /// A loop-associated directive applied to the unrolled loop needs to know the
1133 /// new trip count which means that if using a heuristically determined unroll
1134 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1135 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1136 /// but which assumes that some canonicalization has taken place (e.g.
1137 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1138 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1139 ///
1140 /// \param DL Debug location for instructions added by unrolling.
1141 /// \param Loop The loop to unroll. The loop will be invalidated.
1142 /// \param Factor The factor to unroll the loop by. A factor of 0
1143 /// indicates that a heuristic should be used to determine
1144 /// the unroll-factor.
1145 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1146 /// partially unrolled loop. Otherwise, uses loop metadata
1147 /// to defer unrolling to the LoopUnrollPass.
1148 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1149 CanonicalLoopInfo **UnrolledCLI);
1150
1151 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1152 /// is cloned. The metadata which prevents vectorization is added to
1153 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1154 /// to false.
1155 ///
1156 /// \param Loop The loop to simd-ize.
1157 /// \param AlignedVars The map which containts pairs of the pointer
1158 /// and its corresponding alignment.
1159 /// \param IfCond The value which corresponds to the if clause
1160 /// condition.
1161 /// \param Order The enum to map order clause.
1162 /// \param Simdlen The Simdlen length to apply to the simd loop.
1163 /// \param Safelen The Safelen length to apply to the simd loop.
1165 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1166 omp::OrderKind Order, ConstantInt *Simdlen,
1167 ConstantInt *Safelen);
1168
1169 /// Generator for '#omp flush'
1170 ///
1171 /// \param Loc The location where the flush directive was encountered
1172 void createFlush(const LocationDescription &Loc);
1173
1174 /// Generator for '#omp taskwait'
1175 ///
1176 /// \param Loc The location where the taskwait directive was encountered.
1177 void createTaskwait(const LocationDescription &Loc);
1178
1179 /// Generator for '#omp taskyield'
1180 ///
1181 /// \param Loc The location where the taskyield directive was encountered.
1182 void createTaskyield(const LocationDescription &Loc);
1183
1184 /// A struct to pack the relevant information for an OpenMP depend clause.
1185 struct DependData {
1189 explicit DependData() = default;
1191 Value *DepVal)
1193 };
1194
1195 /// Generator for `#omp task`
1196 ///
1197 /// \param Loc The location where the task construct was encountered.
1198 /// \param AllocaIP The insertion point to be used for alloca instructions.
1199 /// \param BodyGenCB Callback that will generate the region code.
1200 /// \param Tied True if the task is tied, false if the task is untied.
1201 /// \param Final i1 value which is `true` if the task is final, `false` if the
1202 /// task is not final.
1203 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1204 /// task is generated, and the encountering thread must
1205 /// suspend the current task region, for which execution
1206 /// cannot be resumed until execution of the structured
1207 /// block that is associated with the generated task is
1208 /// completed.
1209 InsertPointTy createTask(const LocationDescription &Loc,
1210 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1211 bool Tied = true, Value *Final = nullptr,
1212 Value *IfCondition = nullptr,
1213 SmallVector<DependData> Dependencies = {});
1214
1215 /// Generator for the taskgroup construct
1216 ///
1217 /// \param Loc The location where the taskgroup construct was encountered.
1218 /// \param AllocaIP The insertion point to be used for alloca instructions.
1219 /// \param BodyGenCB Callback that will generate the region code.
1220 InsertPointTy createTaskgroup(const LocationDescription &Loc,
1221 InsertPointTy AllocaIP,
1222 BodyGenCallbackTy BodyGenCB);
1223
1225 std::function<std::tuple<std::string, uint64_t>()>;
1226
1227 /// Creates a unique info for a target entry when provided a filename and
1228 /// line number from.
1229 ///
1230 /// \param CallBack A callback function which should return filename the entry
1231 /// resides in as well as the line number for the target entry
1232 /// \param ParentName The name of the parent the target entry resides in, if
1233 /// any.
1236 StringRef ParentName = "");
1237
1238 /// Functions used to generate reductions. Such functions take two Values
1239 /// representing LHS and RHS of the reduction, respectively, and a reference
1240 /// to the value that is updated to refer to the reduction result.
1243
1244 /// Functions used to generate atomic reductions. Such functions take two
1245 /// Values representing pointers to LHS and RHS of the reduction, as well as
1246 /// the element type of these pointers. They are expected to atomically
1247 /// update the LHS to the reduced value.
1250
1251 /// Information about an OpenMP reduction.
1259
1260 /// Reduction element type, must match pointee type of variable.
1262
1263 /// Reduction variable of pointer type.
1265
1266 /// Thread-private partial reduction variable.
1268
1269 /// Callback for generating the reduction body. The IR produced by this will
1270 /// be used to combine two values in a thread-safe context, e.g., under
1271 /// lock or within the same thread, and therefore need not be atomic.
1273
1274 /// Callback for generating the atomic reduction body, may be null. The IR
1275 /// produced by this will be used to atomically combine two values during
1276 /// reduction. If null, the implementation will use the non-atomic version
1277 /// along with the appropriate synchronization mechanisms.
1279 };
1280
1281 // TODO: provide atomic and non-atomic reduction generators for reduction
1282 // operators defined by the OpenMP specification.
1283
1284 /// Generator for '#omp reduction'.
1285 ///
1286 /// Emits the IR instructing the runtime to perform the specific kind of
1287 /// reductions. Expects reduction variables to have been privatized and
1288 /// initialized to reduction-neutral values separately. Emits the calls to
1289 /// runtime functions as well as the reduction function and the basic blocks
1290 /// performing the reduction atomically and non-atomically.
1291 ///
1292 /// The code emitted for the following:
1293 ///
1294 /// \code
1295 /// type var_1;
1296 /// type var_2;
1297 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1298 /// /* body */;
1299 /// \endcode
1300 ///
1301 /// corresponds to the following sketch.
1302 ///
1303 /// \code
1304 /// void _outlined_par() {
1305 /// // N is the number of different reductions.
1306 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1307 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1308 /// _omp_reduction_func,
1309 /// _gomp_critical_user.reduction.var)) {
1310 /// case 1: {
1311 /// var_1 = var_1 <reduction-op> privatized_var_1;
1312 /// var_2 = var_2 <reduction-op> privatized_var_2;
1313 /// // ...
1314 /// __kmpc_end_reduce(...);
1315 /// break;
1316 /// }
1317 /// case 2: {
1318 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
1319 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
1320 /// // ...
1321 /// break;
1322 /// }
1323 /// default: break;
1324 /// }
1325 /// }
1326 ///
1327 /// void _omp_reduction_func(void **lhs, void **rhs) {
1328 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1329 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1330 /// // ...
1331 /// }
1332 /// \endcode
1333 ///
1334 /// \param Loc The location where the reduction was
1335 /// encountered. Must be within the associate
1336 /// directive and after the last local access to the
1337 /// reduction variables.
1338 /// \param AllocaIP An insertion point suitable for allocas usable
1339 /// in reductions.
1340 /// \param ReductionInfos A list of info on each reduction variable.
1341 /// \param IsNoWait A flag set if the reduction is marked as nowait.
1342 /// \param IsByRef A flag set if the reduction is using reference
1343 /// or direct value.
1345 InsertPointTy AllocaIP,
1346 ArrayRef<ReductionInfo> ReductionInfos,
1347 ArrayRef<bool> IsByRef, bool IsNoWait = false);
1348
1349 ///}
1350
1351 /// Return the insertion point used by the underlying IRBuilder.
1353
1354 /// Update the internal location to \p Loc.
1356 Builder.restoreIP(Loc.IP);
1358 return Loc.IP.getBlock() != nullptr;
1359 }
1360
1361 /// Return the function declaration for the runtime function with \p FnID.
1364
1366
1367 /// Return the (LLVM-IR) string describing the source location \p LocStr.
1368 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1369
1370 /// Return the (LLVM-IR) string describing the default source location.
1372
1373 /// Return the (LLVM-IR) string describing the source location identified by
1374 /// the arguments.
1375 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1376 unsigned Line, unsigned Column,
1377 uint32_t &SrcLocStrSize);
1378
1379 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1380 /// fallback if \p DL does not specify the function name.
1382 Function *F = nullptr);
1383
1384 /// Return the (LLVM-IR) string describing the source location \p Loc.
1385 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1386 uint32_t &SrcLocStrSize);
1387
1388 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1389 /// TODO: Create a enum class for the Reserve2Flags
1390 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1391 omp::IdentFlag Flags = omp::IdentFlag(0),
1392 unsigned Reserve2Flags = 0);
1393
1394 /// Create a hidden global flag \p Name in the module with initial value \p
1395 /// Value.
1397
1398 /// Generate control flow and cleanup for cancellation.
1399 ///
1400 /// \param CancelFlag Flag indicating if the cancellation is performed.
1401 /// \param CanceledDirective The kind of directive that is cancled.
1402 /// \param ExitCB Extra code to be generated in the exit block.
1403 void emitCancelationCheckImpl(Value *CancelFlag,
1404 omp::Directive CanceledDirective,
1405 FinalizeCallbackTy ExitCB = {});
1406
1407 /// Generate a target region entry call.
1408 ///
1409 /// \param Loc The location at which the request originated and is fulfilled.
1410 /// \param AllocaIP The insertion point to be used for alloca instructions.
1411 /// \param Return Return value of the created function returned by reference.
1412 /// \param DeviceID Identifier for the device via the 'device' clause.
1413 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1414 /// or 0 if unspecified and -1 if there is no 'teams' clause.
1415 /// \param NumThreads Number of threads via the 'thread_limit' clause.
1416 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1417 /// \param KernelArgs Array of arguments to the kernel.
1418 InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1419 InsertPointTy AllocaIP, Value *&Return,
1420 Value *Ident, Value *DeviceID, Value *NumTeams,
1421 Value *NumThreads, Value *HostPtr,
1422 ArrayRef<Value *> KernelArgs);
1423
1424 /// Generate a barrier runtime call.
1425 ///
1426 /// \param Loc The location at which the request originated and is fulfilled.
1427 /// \param DK The directive which caused the barrier
1428 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1429 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1430 /// should be checked and acted upon.
1431 ///
1432 /// \returns The insertion point after the barrier.
1433 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1434 omp::Directive DK, bool ForceSimpleCall,
1435 bool CheckCancelFlag);
1436
1437 /// Generate a flush runtime call.
1438 ///
1439 /// \param Loc The location at which the request originated and is fulfilled.
1440 void emitFlush(const LocationDescription &Loc);
1441
1442 /// The finalization stack made up of finalize callbacks currently in-flight,
1443 /// wrapped into FinalizationInfo objects that reference also the finalization
1444 /// target block and the kind of cancellable directive.
1446
1447 /// Return true if the last entry in the finalization stack is of kind \p DK
1448 /// and cancellable.
1449 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1450 return !FinalizationStack.empty() &&
1451 FinalizationStack.back().IsCancellable &&
1452 FinalizationStack.back().DK == DK;
1453 }
1454
1455 /// Generate a taskwait runtime call.
1456 ///
1457 /// \param Loc The location at which the request originated and is fulfilled.
1458 void emitTaskwaitImpl(const LocationDescription &Loc);
1459
1460 /// Generate a taskyield runtime call.
1461 ///
1462 /// \param Loc The location at which the request originated and is fulfilled.
1463 void emitTaskyieldImpl(const LocationDescription &Loc);
1464
1465 /// Return the current thread ID.
1466 ///
1467 /// \param Ident The ident (ident_t*) describing the query origin.
1469
1470 /// The OpenMPIRBuilder Configuration
1472
1473 /// The underlying LLVM-IR module
1475
1476 /// The LLVM-IR Builder used to create IR.
1478
1479 /// Map to remember source location strings
1481
1482 /// Map to remember existing ident_t*.
1484
1485 /// Info manager to keep track of target regions.
1487
1488 /// The target triple of the underlying module.
1489 const Triple T;
1490
1491 /// Helper that contains information about regions we need to outline
1492 /// during finalization.
1494 using PostOutlineCBTy = std::function<void(Function &)>;
1498
1499 /// Collect all blocks in between EntryBB and ExitBB in both the given
1500 /// vector and set.
1502 SmallVectorImpl<BasicBlock *> &BlockVector);
1503
1504 /// Return the function that contains the region to be outlined.
1505 Function *getFunction() const { return EntryBB->getParent(); }
1506 };
1507
1508 /// Collection of regions that need to be outlined during finalization.
1510
1511 /// A collection of candidate target functions that's constant allocas will
1512 /// attempt to be raised on a call of finalize after all currently enqueued
1513 /// outline info's have been processed.
1515
1516 /// Collection of owned canonical loop objects that eventually need to be
1517 /// free'd.
1518 std::forward_list<CanonicalLoopInfo> LoopInfos;
1519
1520 /// Add a new region that will be outlined later.
1521 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1522
1523 /// An ordered map of auto-generated variables to their unique names.
1524 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1525 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1526 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1527 /// variables.
1529
1530 /// Computes the size of type in bytes.
1531 Value *getSizeInBytes(Value *BasePtr);
1532
1533 // Emit a branch from the current block to the Target block only if
1534 // the current block has a terminator.
1536
1537 // If BB has no use then delete it and return. Else place BB after the current
1538 // block, if possible, or else at the end of the function. Also add a branch
1539 // from current block to BB if current block does not have a terminator.
1540 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1541
1542 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1543 /// Here is the logic:
1544 /// if (Cond) {
1545 /// ThenGen();
1546 /// } else {
1547 /// ElseGen();
1548 /// }
1550 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1551
1552 /// Create the global variable holding the offload mappings information.
1554 std::string VarName);
1555
1556 /// Create the global variable holding the offload names information.
1559 std::string VarName);
1560
1563 AllocaInst *Args = nullptr;
1565 };
1566
1567 /// Create the allocas instruction used in call to mapper functions.
1569 InsertPointTy AllocaIP, unsigned NumOperands,
1571
1572 /// Create the call for the target mapper function.
1573 /// \param Loc The source location description.
1574 /// \param MapperFunc Function to be called.
1575 /// \param SrcLocInfo Source location information global.
1576 /// \param MaptypesArg The argument types.
1577 /// \param MapnamesArg The argument names.
1578 /// \param MapperAllocas The AllocaInst used for the call.
1579 /// \param DeviceID Device ID for the call.
1580 /// \param NumOperands Number of operands in the call.
1581 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1582 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1583 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1584 unsigned NumOperands);
1585
1586 /// Container for the arguments used to pass data to the runtime library.
1588 /// The array of base pointer passed to the runtime library.
1590 /// The array of section pointers passed to the runtime library.
1592 /// The array of sizes passed to the runtime library.
1593 Value *SizesArray = nullptr;
1594 /// The array of map types passed to the runtime library for the beginning
1595 /// of the region or for the entire region if there are no separate map
1596 /// types for the region end.
1598 /// The array of map types passed to the runtime library for the end of the
1599 /// region, or nullptr if there are no separate map types for the region
1600 /// end.
1602 /// The array of user-defined mappers passed to the runtime library.
1604 /// The array of original declaration names of mapped pointers sent to the
1605 /// runtime library for debugging
1607
1608 explicit TargetDataRTArgs() {}
1617 };
1618
1619 /// Data structure that contains the needed information to construct the
1620 /// kernel args vector.
1622 /// Number of arguments passed to the runtime library.
1624 /// Arguments passed to the runtime library
1626 /// The number of iterations
1628 /// The number of teams.
1630 /// The number of threads.
1632 /// The size of the dynamic shared memory.
1634 /// True if the kernel has 'no wait' clause.
1636
1637 /// Constructor for TargetKernelArgs
1645 };
1646
1647 /// Create the kernel args vector used by emitTargetKernel. This function
1648 /// creates various constant values that are used in the resulting args
1649 /// vector.
1650 static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1652 SmallVector<Value *> &ArgsVector);
1653
1654 /// Struct that keeps the information that should be kept throughout
1655 /// a 'target data' region.
1657 /// Set to true if device pointer information have to be obtained.
1658 bool RequiresDevicePointerInfo = false;
1659 /// Set to true if Clang emits separate runtime calls for the beginning and
1660 /// end of the region. These calls might have separate map type arrays.
1661 bool SeparateBeginEndCalls = false;
1662
1663 public:
1665
1668
1669 /// Indicate whether any user-defined mapper exists.
1670 bool HasMapper = false;
1671 /// The total number of pointers passed to the runtime library.
1672 unsigned NumberOfPtrs = 0u;
1673
1674 explicit TargetDataInfo() {}
1675 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1676 bool SeparateBeginEndCalls)
1677 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1678 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1679 /// Clear information about the data arrays.
1682 HasMapper = false;
1683 NumberOfPtrs = 0u;
1684 }
1685 /// Return true if the current target data information has valid arrays.
1686 bool isValid() {
1690 }
1691 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1692 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1693 };
1694
1702
1703 /// This structure contains combined information generated for mappable
1704 /// clauses, including base pointers, pointers, sizes, map types, user-defined
1705 /// mappers, and non-contiguous information.
1706 struct MapInfosTy {
1708 bool IsNonContiguous = false;
1713 };
1721
1722 /// Append arrays in \a CurInfo.
1723 void append(MapInfosTy &CurInfo) {
1725 CurInfo.BasePointers.end());
1726 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1728 CurInfo.DevicePointers.end());
1729 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1730 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1731 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1733 CurInfo.NonContigInfo.Dims.end());
1735 CurInfo.NonContigInfo.Offsets.end());
1737 CurInfo.NonContigInfo.Counts.end());
1739 CurInfo.NonContigInfo.Strides.end());
1740 }
1741 };
1742
1743 /// Callback function type for functions emitting the host fallback code that
1744 /// is executed when the kernel launch fails. It takes an insertion point as
1745 /// parameter where the code should be emitted. It returns an insertion point
1746 /// that points right after after the emitted code.
1748
1749 /// Generate a target region entry call and host fallback call.
1750 ///
1751 /// \param Loc The location at which the request originated and is fulfilled.
1752 /// \param OutlinedFn The outlined kernel function.
1753 /// \param OutlinedFnID The ooulined function ID.
1754 /// \param EmitTargetCallFallbackCB Call back function to generate host
1755 /// fallback code.
1756 /// \param Args Data structure holding information about the kernel arguments.
1757 /// \param DeviceID Identifier for the device via the 'device' clause.
1758 /// \param RTLoc Source location identifier
1759 /// \param AllocaIP The insertion point to be used for alloca instructions.
1761 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1762 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1763 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1764
1765 /// Emit the arguments to be passed to the runtime library based on the
1766 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
1767 /// ForEndCall, emit map types to be passed for the end of the region instead
1768 /// of the beginning.
1772 bool EmitDebug = false,
1773 bool ForEndCall = false);
1774
1775 /// Emit an array of struct descriptors to be assigned to the offload args.
1777 InsertPointTy CodeGenIP,
1778 MapInfosTy &CombinedInfo,
1780
1781 /// Emit the arrays used to pass the captures and map information to the
1782 /// offloading runtime library. If there is no map or capture information,
1783 /// return nullptr by reference.
1785 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1786 TargetDataInfo &Info, bool IsNonContiguous = false,
1787 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1788 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1789
1790 /// Creates offloading entry for the provided entry ID \a ID, address \a
1791 /// Addr, size \a Size, and flags \a Flags.
1793 int32_t Flags, GlobalValue::LinkageTypes,
1794 StringRef Name = "");
1795
1796 /// The kind of errors that can occur when emitting the offload entries and
1797 /// metadata.
1803
1804 /// Callback function type
1806 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1807
1808 // Emit the offloading entries and metadata so that the device codegen side
1809 // can easily figure out what to emit. The produced metadata looks like
1810 // this:
1811 //
1812 // !omp_offload.info = !{!1, ...}
1813 //
1814 // We only generate metadata for function that contain target regions.
1816 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1817
1818public:
1819 /// Generator for __kmpc_copyprivate
1820 ///
1821 /// \param Loc The source location description.
1822 /// \param BufSize Number of elements in the buffer.
1823 /// \param CpyBuf List of pointers to data to be copied.
1824 /// \param CpyFn function to call for copying data.
1825 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1826 ///
1827 /// \return The insertion position *after* the CopyPrivate call.
1828
1830 llvm::Value *BufSize, llvm::Value *CpyBuf,
1831 llvm::Value *CpyFn, llvm::Value *DidIt);
1832
1833 /// Generator for '#omp single'
1834 ///
1835 /// \param Loc The source location description.
1836 /// \param BodyGenCB Callback that will generate the region code.
1837 /// \param FiniCB Callback to finalize variable copies.
1838 /// \param IsNowait If false, a barrier is emitted.
1839 /// \param CPVars copyprivate variables.
1840 /// \param CPFuncs copy functions to use for each copyprivate variable.
1841 ///
1842 /// \returns The insertion position *after* the single call.
1844 BodyGenCallbackTy BodyGenCB,
1845 FinalizeCallbackTy FiniCB, bool IsNowait,
1846 ArrayRef<llvm::Value *> CPVars = {},
1847 ArrayRef<llvm::Function *> CPFuncs = {});
1848
1849 /// Generator for '#omp master'
1850 ///
1851 /// \param Loc The insert and source location description.
1852 /// \param BodyGenCB Callback that will generate the region code.
1853 /// \param FiniCB Callback to finalize variable copies.
1854 ///
1855 /// \returns The insertion position *after* the master.
1856 InsertPointTy createMaster(const LocationDescription &Loc,
1857 BodyGenCallbackTy BodyGenCB,
1858 FinalizeCallbackTy FiniCB);
1859
1860 /// Generator for '#omp masked'
1861 ///
1862 /// \param Loc The insert and source location description.
1863 /// \param BodyGenCB Callback that will generate the region code.
1864 /// \param FiniCB Callback to finialize variable copies.
1865 ///
1866 /// \returns The insertion position *after* the masked.
1867 InsertPointTy createMasked(const LocationDescription &Loc,
1868 BodyGenCallbackTy BodyGenCB,
1869 FinalizeCallbackTy FiniCB, Value *Filter);
1870
1871 /// Generator for '#omp critical'
1872 ///
1873 /// \param Loc The insert and source location description.
1874 /// \param BodyGenCB Callback that will generate the region body code.
1875 /// \param FiniCB Callback to finalize variable copies.
1876 /// \param CriticalName name of the lock used by the critical directive
1877 /// \param HintInst Hint Instruction for hint clause associated with critical
1878 ///
1879 /// \returns The insertion position *after* the critical.
1880 InsertPointTy createCritical(const LocationDescription &Loc,
1881 BodyGenCallbackTy BodyGenCB,
1882 FinalizeCallbackTy FiniCB,
1883 StringRef CriticalName, Value *HintInst);
1884
1885 /// Generator for '#omp ordered depend (source | sink)'
1886 ///
1887 /// \param Loc The insert and source location description.
1888 /// \param AllocaIP The insertion point to be used for alloca instructions.
1889 /// \param NumLoops The number of loops in depend clause.
1890 /// \param StoreValues The value will be stored in vector address.
1891 /// \param Name The name of alloca instruction.
1892 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1893 ///
1894 /// \return The insertion position *after* the ordered.
1895 InsertPointTy createOrderedDepend(const LocationDescription &Loc,
1896 InsertPointTy AllocaIP, unsigned NumLoops,
1897 ArrayRef<llvm::Value *> StoreValues,
1898 const Twine &Name, bool IsDependSource);
1899
1900 /// Generator for '#omp ordered [threads | simd]'
1901 ///
1902 /// \param Loc The insert and source location description.
1903 /// \param BodyGenCB Callback that will generate the region code.
1904 /// \param FiniCB Callback to finalize variable copies.
1905 /// \param IsThreads If true, with threads clause or without clause;
1906 /// otherwise, with simd clause;
1907 ///
1908 /// \returns The insertion position *after* the ordered.
1909 InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc,
1910 BodyGenCallbackTy BodyGenCB,
1911 FinalizeCallbackTy FiniCB,
1912 bool IsThreads);
1913
1914 /// Generator for '#omp sections'
1915 ///
1916 /// \param Loc The insert and source location description.
1917 /// \param AllocaIP The insertion points to be used for alloca instructions.
1918 /// \param SectionCBs Callbacks that will generate body of each section.
1919 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1920 /// \param FiniCB Callback to finalize variable copies.
1921 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1922 /// \param IsNowait If true, barrier - to ensure all sections are executed
1923 /// before moving forward will not be generated.
1924 /// \returns The insertion position *after* the sections.
1925 InsertPointTy createSections(const LocationDescription &Loc,
1926 InsertPointTy AllocaIP,
1927 ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
1928 PrivatizeCallbackTy PrivCB,
1929 FinalizeCallbackTy FiniCB, bool IsCancellable,
1930 bool IsNowait);
1931
1932 /// Generator for '#omp section'
1933 ///
1934 /// \param Loc The insert and source location description.
1935 /// \param BodyGenCB Callback that will generate the region body code.
1936 /// \param FiniCB Callback to finalize variable copies.
1937 /// \returns The insertion position *after* the section.
1938 InsertPointTy createSection(const LocationDescription &Loc,
1939 BodyGenCallbackTy BodyGenCB,
1940 FinalizeCallbackTy FiniCB);
1941
1942 /// Generator for `#omp teams`
1943 ///
1944 /// \param Loc The location where the teams construct was encountered.
1945 /// \param BodyGenCB Callback that will generate the region code.
1946 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
1947 /// it is as if lower bound is specified as equal to upperbound. If
1948 /// this is non-null, then upperbound must also be non-null.
1949 /// \param NumTeamsUpper Upper bound on the number of teams.
1950 /// \param ThreadLimit on the number of threads that may participate in a
1951 /// contention group created by each team.
1952 /// \param IfExpr is the integer argument value of the if condition on the
1953 /// teams clause.
1955 createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
1956 Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
1957 Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
1958
1959 /// Generate conditional branch and relevant BasicBlocks through which private
1960 /// threads copy the 'copyin' variables from Master copy to threadprivate
1961 /// copies.
1962 ///
1963 /// \param IP insertion block for copyin conditional
1964 /// \param MasterVarPtr a pointer to the master variable
1965 /// \param PrivateVarPtr a pointer to the threadprivate variable
1966 /// \param IntPtrTy Pointer size type
1967 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1968 // and copy.in.end block
1969 ///
1970 /// \returns The insertion point where copying operation to be emitted.
1972 Value *PrivateAddr,
1973 llvm::IntegerType *IntPtrTy,
1974 bool BranchtoEnd = true);
1975
1976 /// Create a runtime call for kmpc_Alloc
1977 ///
1978 /// \param Loc The insert and source location description.
1979 /// \param Size Size of allocated memory space
1980 /// \param Allocator Allocator information instruction
1981 /// \param Name Name of call Instruction for OMP_alloc
1982 ///
1983 /// \returns CallInst to the OMP_Alloc call
1984 CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
1985 Value *Allocator, std::string Name = "");
1986
1987 /// Create a runtime call for kmpc_free
1988 ///
1989 /// \param Loc The insert and source location description.
1990 /// \param Addr Address of memory space to be freed
1991 /// \param Allocator Allocator information instruction
1992 /// \param Name Name of call Instruction for OMP_Free
1993 ///
1994 /// \returns CallInst to the OMP_Free call
1995 CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
1996 Value *Allocator, std::string Name = "");
1997
1998 /// Create a runtime call for kmpc_threadprivate_cached
1999 ///
2000 /// \param Loc The insert and source location description.
2001 /// \param Pointer pointer to data to be cached
2002 /// \param Size size of data to be cached
2003 /// \param Name Name of call Instruction for callinst
2004 ///
2005 /// \returns CallInst to the thread private cache call.
2006 CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
2009 const llvm::Twine &Name = Twine(""));
2010
2011 /// Create a runtime call for __tgt_interop_init
2012 ///
2013 /// \param Loc The insert and source location description.
2014 /// \param InteropVar variable to be allocated
2015 /// \param InteropType type of interop operation
2016 /// \param Device devide to which offloading will occur
2017 /// \param NumDependences number of dependence variables
2018 /// \param DependenceAddress pointer to dependence variables
2019 /// \param HaveNowaitClause does nowait clause exist
2020 ///
2021 /// \returns CallInst to the __tgt_interop_init call
2022 CallInst *createOMPInteropInit(const LocationDescription &Loc,
2023 Value *InteropVar,
2024 omp::OMPInteropType InteropType, Value *Device,
2025 Value *NumDependences,
2026 Value *DependenceAddress,
2027 bool HaveNowaitClause);
2028
2029 /// Create a runtime call for __tgt_interop_destroy
2030 ///
2031 /// \param Loc The insert and source location description.
2032 /// \param InteropVar variable to be allocated
2033 /// \param Device devide to which offloading will occur
2034 /// \param NumDependences number of dependence variables
2035 /// \param DependenceAddress pointer to dependence variables
2036 /// \param HaveNowaitClause does nowait clause exist
2037 ///
2038 /// \returns CallInst to the __tgt_interop_destroy call
2039 CallInst *createOMPInteropDestroy(const LocationDescription &Loc,
2040 Value *InteropVar, Value *Device,
2041 Value *NumDependences,
2042 Value *DependenceAddress,
2043 bool HaveNowaitClause);
2044
2045 /// Create a runtime call for __tgt_interop_use
2046 ///
2047 /// \param Loc The insert and source location description.
2048 /// \param InteropVar variable to be allocated
2049 /// \param Device devide to which offloading will occur
2050 /// \param NumDependences number of dependence variables
2051 /// \param DependenceAddress pointer to dependence variables
2052 /// \param HaveNowaitClause does nowait clause exist
2053 ///
2054 /// \returns CallInst to the __tgt_interop_use call
2055 CallInst *createOMPInteropUse(const LocationDescription &Loc,
2056 Value *InteropVar, Value *Device,
2057 Value *NumDependences, Value *DependenceAddress,
2058 bool HaveNowaitClause);
2059
2060 /// The `omp target` interface
2061 ///
2062 /// For more information about the usage of this interface,
2063 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2064 ///
2065 ///{
2066
2067 /// Create a runtime call for kmpc_target_init
2068 ///
2069 /// \param Loc The insert and source location description.
2070 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2071 /// \param MinThreads Minimal number of threads, or 0.
2072 /// \param MaxThreads Maximal number of threads, or 0.
2073 /// \param MinTeams Minimal number of teams, or 0.
2074 /// \param MaxTeams Maximal number of teams, or 0.
2075 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
2076 int32_t MinThreadsVal = 0,
2077 int32_t MaxThreadsVal = 0,
2078 int32_t MinTeamsVal = 0,
2079 int32_t MaxTeamsVal = 0);
2080
2081 /// Create a runtime call for kmpc_target_deinit
2082 ///
2083 /// \param Loc The insert and source location description.
2084 /// \param TeamsReductionDataSize The maximal size of all the reduction data
2085 /// for teams reduction.
2086 /// \param TeamsReductionBufferLength The number of elements (each of up to
2087 /// \p TeamsReductionDataSize size), in the teams reduction buffer.
2088 void createTargetDeinit(const LocationDescription &Loc,
2089 int32_t TeamsReductionDataSize = 0,
2090 int32_t TeamsReductionBufferLength = 1024);
2091
2092 ///}
2093
2094 /// Helpers to read/write kernel annotations from the IR.
2095 ///
2096 ///{
2097
2098 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
2099 /// is set.
2100 static std::pair<int32_t, int32_t>
2101 readThreadBoundsForKernel(const Triple &T, Function &Kernel);
2102 static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
2103 int32_t LB, int32_t UB);
2104
2105 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
2106 /// is set.
2107 static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
2108 Function &Kernel);
2109 static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB,
2110 int32_t UB);
2111 ///}
2112
2113private:
2114 // Sets the function attributes expected for the outlined function
2115 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
2116
2117 // Creates the function ID/Address for the given outlined function.
2118 // In the case of an embedded device function the address of the function is
2119 // used, in the case of a non-offload function a constant is created.
2120 Constant *createOutlinedFunctionID(Function *OutlinedFn,
2121 StringRef EntryFnIDName);
2122
2123 // Creates the region entry address for the outlined function
2124 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2125 StringRef EntryFnName);
2126
2127public:
2128 /// Functions used to generate a function with the given name.
2129 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2130
2131 /// Create a unique name for the entry function using the source location
2132 /// information of the current target region. The name will be something like:
2133 ///
2134 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2135 ///
2136 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2137 /// mangled name of the function that encloses the target region and BB is the
2138 /// line number of the target region. CC is a count added when more than one
2139 /// region is located at the same location.
2140 ///
2141 /// If this target outline function is not an offload entry, we don't need to
2142 /// register it. This may happen if it is guarded by an if clause that is
2143 /// false at compile time, or no target archs have been specified.
2144 ///
2145 /// The created target region ID is used by the runtime library to identify
2146 /// the current target region, so it only has to be unique and not
2147 /// necessarily point to anything. It could be the pointer to the outlined
2148 /// function that implements the target region, but we aren't using that so
2149 /// that the compiler doesn't need to keep that, and could therefore inline
2150 /// the host function if proven worthwhile during optimization. In the other
2151 /// hand, if emitting code for the device, the ID has to be the function
2152 /// address so that it can retrieved from the offloading entry and launched
2153 /// by the runtime library. We also mark the outlined function to have
2154 /// external linkage in case we are emitting code for the device, because
2155 /// these functions will be entry points to the device.
2156 ///
2157 /// \param InfoManager The info manager keeping track of the offload entries
2158 /// \param EntryInfo The entry information about the function
2159 /// \param GenerateFunctionCallback The callback function to generate the code
2160 /// \param OutlinedFunction Pointer to the outlined function
2161 /// \param EntryFnIDName Name of the ID o be created
2163 FunctionGenCallback &GenerateFunctionCallback,
2164 bool IsOffloadEntry, Function *&OutlinedFn,
2165 Constant *&OutlinedFnID);
2166
2167 /// Registers the given function and sets up the attribtues of the function
2168 /// Returns the FunctionID.
2169 ///
2170 /// \param InfoManager The info manager keeping track of the offload entries
2171 /// \param EntryInfo The entry information about the function
2172 /// \param OutlinedFunction Pointer to the outlined function
2173 /// \param EntryFnName Name of the outlined function
2174 /// \param EntryFnIDName Name of the ID o be created
2176 Function *OutlinedFunction,
2177 StringRef EntryFnName,
2178 StringRef EntryFnIDName);
2179
2180 /// Type of BodyGen to use for region codegen
2181 ///
2182 /// Priv: If device pointer privatization is required, emit the body of the
2183 /// region here. It will have to be duplicated: with and without
2184 /// privatization.
2185 /// DupNoPriv: If we need device pointer privatization, we need
2186 /// to emit the body of the region with no privatization in the 'else' branch
2187 /// of the conditional.
2188 /// NoPriv: If we don't require privatization of device
2189 /// pointers, we emit the body in between the runtime calls. This avoids
2190 /// duplicating the body code.
2192
2193 /// Callback type for creating the map infos for the kernel parameters.
2194 /// \param CodeGenIP is the insertion point where code should be generated,
2195 /// if any.
2198
2199 /// Generator for '#omp target data'
2200 ///
2201 /// \param Loc The location where the target data construct was encountered.
2202 /// \param AllocaIP The insertion points to be used for alloca instructions.
2203 /// \param CodeGenIP The insertion point at which the target directive code
2204 /// should be placed.
2205 /// \param IsBegin If true then emits begin mapper call otherwise emits
2206 /// end mapper call.
2207 /// \param DeviceID Stores the DeviceID from the device clause.
2208 /// \param IfCond Value which corresponds to the if clause condition.
2209 /// \param Info Stores all information realted to the Target Data directive.
2210 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2211 /// \param BodyGenCB Optional Callback to generate the region code.
2212 /// \param DeviceAddrCB Optional callback to generate code related to
2213 /// use_device_ptr and use_device_addr.
2214 /// \param CustomMapperCB Optional callback to generate code related to
2215 /// custom mappers.
2217 const LocationDescription &Loc, InsertPointTy AllocaIP,
2218 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2220 omp::RuntimeFunction *MapperFunc = nullptr,
2222 BodyGenTy BodyGenType)>
2223 BodyGenCB = nullptr,
2224 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2225 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2226 Value *SrcLocInfo = nullptr);
2227
2229 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2230
2232 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
2233 InsertPointTy CodeGenIP)>;
2234
2235 /// Generator for '#omp target'
2236 ///
2237 /// \param Loc where the target data construct was encountered.
2238 /// \param CodeGenIP The insertion point where the call to the outlined
2239 /// function should be emitted.
2240 /// \param EntryInfo The entry information about the function.
2241 /// \param NumTeams Number of teams specified in the num_teams clause.
2242 /// \param NumThreads Number of teams specified in the thread_limit clause.
2243 /// \param Inputs The input values to the region that will be passed.
2244 /// as arguments to the outlined function.
2245 /// \param BodyGenCB Callback that will generate the region code.
2246 /// \param ArgAccessorFuncCB Callback that will generate accessors
2247 /// instructions for passed in target arguments where neccessary
2251 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2252 int32_t NumThreads,
2254 GenMapInfoCallbackTy GenMapInfoCB,
2255 TargetBodyGenCallbackTy BodyGenCB,
2256 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB);
2257
2258 /// Returns __kmpc_for_static_init_* runtime function for the specified
2259 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2260 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2261 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2262 bool IsGPUDistribute);
2263
2264 /// Returns __kmpc_dispatch_init_* runtime function for the specified
2265 /// size \a IVSize and sign \a IVSigned.
2266 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2267
2268 /// Returns __kmpc_dispatch_next_* runtime function for the specified
2269 /// size \a IVSize and sign \a IVSigned.
2270 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2271
2272 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2273 /// size \a IVSize and sign \a IVSigned.
2274 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2275
2276 /// Declarations for LLVM-IR types (simple, array, function and structure) are
2277 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2278 /// we provide the declarations, the initializeTypes function will provide the
2279 /// values.
2280 ///
2281 ///{
2282#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2283#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2284 ArrayType *VarName##Ty = nullptr; \
2285 PointerType *VarName##PtrTy = nullptr;
2286#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2287 FunctionType *VarName = nullptr; \
2288 PointerType *VarName##Ptr = nullptr;
2289#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
2290 StructType *VarName = nullptr; \
2291 PointerType *VarName##Ptr = nullptr;
2292#include "llvm/Frontend/OpenMP/OMPKinds.def"
2293
2294 ///}
2295
2296private:
2297 /// Create all simple and struct types exposed by the runtime and remember
2298 /// the llvm::PointerTypes of them for easy access later.
2299 void initializeTypes(Module &M);
2300
2301 /// Common interface for generating entry calls for OMP Directives.
2302 /// if the directive has a region/body, It will set the insertion
2303 /// point to the body
2304 ///
2305 /// \param OMPD Directive to generate entry blocks for
2306 /// \param EntryCall Call to the entry OMP Runtime Function
2307 /// \param ExitBB block where the region ends.
2308 /// \param Conditional indicate if the entry call result will be used
2309 /// to evaluate a conditional of whether a thread will execute
2310 /// body code or not.
2311 ///
2312 /// \return The insertion position in exit block
2313 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2314 BasicBlock *ExitBB,
2315 bool Conditional = false);
2316
2317 /// Common interface to finalize the region
2318 ///
2319 /// \param OMPD Directive to generate exiting code for
2320 /// \param FinIP Insertion point for emitting Finalization code and exit call
2321 /// \param ExitCall Call to the ending OMP Runtime Function
2322 /// \param HasFinalize indicate if the directive will require finalization
2323 /// and has a finalization callback in the stack that
2324 /// should be called.
2325 ///
2326 /// \return The insertion position in exit block
2327 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2328 InsertPointTy FinIP,
2329 Instruction *ExitCall,
2330 bool HasFinalize = true);
2331
2332 /// Common Interface to generate OMP inlined regions
2333 ///
2334 /// \param OMPD Directive to generate inlined region for
2335 /// \param EntryCall Call to the entry OMP Runtime Function
2336 /// \param ExitCall Call to the ending OMP Runtime Function
2337 /// \param BodyGenCB Body code generation callback.
2338 /// \param FiniCB Finalization Callback. Will be called when finalizing region
2339 /// \param Conditional indicate if the entry call result will be used
2340 /// to evaluate a conditional of whether a thread will execute
2341 /// body code or not.
2342 /// \param HasFinalize indicate if the directive will require finalization
2343 /// and has a finalization callback in the stack that
2344 /// should be called.
2345 /// \param IsCancellable if HasFinalize is set to true, indicate if the
2346 /// the directive should be cancellable.
2347 /// \return The insertion point after the region
2348
2350 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2351 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2352 FinalizeCallbackTy FiniCB, bool Conditional = false,
2353 bool HasFinalize = true, bool IsCancellable = false);
2354
2355 /// Get the platform-specific name separator.
2356 /// \param Parts different parts of the final name that needs separation
2357 /// \param FirstSeparator First separator used between the initial two
2358 /// parts of the name.
2359 /// \param Separator separator used between all of the rest consecutive
2360 /// parts of the name
2361 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2362 StringRef FirstSeparator,
2363 StringRef Separator);
2364
2365 /// Returns corresponding lock object for the specified critical region
2366 /// name. If the lock object does not exist it is created, otherwise the
2367 /// reference to the existing copy is returned.
2368 /// \param CriticalName Name of the critical region.
2369 ///
2370 Value *getOMPCriticalRegionLock(StringRef CriticalName);
2371
2372 /// Callback type for Atomic Expression update
2373 /// ex:
2374 /// \code{.cpp}
2375 /// unsigned x = 0;
2376 /// #pragma omp atomic update
2377 /// x = Expr(x_old); //Expr() is any legal operation
2378 /// \endcode
2379 ///
2380 /// \param XOld the value of the atomic memory address to use for update
2381 /// \param IRB reference to the IRBuilder to use
2382 ///
2383 /// \returns Value to update X to.
2384 using AtomicUpdateCallbackTy =
2385 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2386
2387private:
2388 enum AtomicKind { Read, Write, Update, Capture, Compare };
2389
2390 /// Determine whether to emit flush or not
2391 ///
2392 /// \param Loc The insert and source location description.
2393 /// \param AO The required atomic ordering
2394 /// \param AK The OpenMP atomic operation kind used.
2395 ///
2396 /// \returns wether a flush was emitted or not
2397 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2398 AtomicOrdering AO, AtomicKind AK);
2399
2400 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2401 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2402 /// Only Scalar data types.
2403 ///
2404 /// \param AllocaIP The insertion point to be used for alloca
2405 /// instructions.
2406 /// \param X The target atomic pointer to be updated
2407 /// \param XElemTy The element type of the atomic pointer.
2408 /// \param Expr The value to update X with.
2409 /// \param AO Atomic ordering of the generated atomic
2410 /// instructions.
2411 /// \param RMWOp The binary operation used for update. If
2412 /// operation is not supported by atomicRMW,
2413 /// or belong to {FADD, FSUB, BAD_BINOP}.
2414 /// Then a `cmpExch` based atomic will be generated.
2415 /// \param UpdateOp Code generator for complex expressions that cannot be
2416 /// expressed through atomicrmw instruction.
2417 /// \param VolatileX true if \a X volatile?
2418 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2419 /// update expression, false otherwise.
2420 /// (e.g. true for X = X BinOp Expr)
2421 ///
2422 /// \returns A pair of the old value of X before the update, and the value
2423 /// used for the update.
2424 std::pair<Value *, Value *>
2425 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2427 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2428 bool IsXBinopExpr);
2429
2430 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2431 ///
2432 /// \Return The instruction
2433 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2434 AtomicRMWInst::BinOp RMWOp);
2435
2436public:
2437 /// a struct to pack relevant information while generating atomic Ops
2439 Value *Var = nullptr;
2440 Type *ElemTy = nullptr;
2441 bool IsSigned = false;
2442 bool IsVolatile = false;
2443 };
2444
2445 /// Emit atomic Read for : V = X --- Only Scalar data types.
2446 ///
2447 /// \param Loc The insert and source location description.
2448 /// \param X The target pointer to be atomically read
2449 /// \param V Memory address where to store atomically read
2450 /// value
2451 /// \param AO Atomic ordering of the generated atomic
2452 /// instructions.
2453 ///
2454 /// \return Insertion point after generated atomic read IR.
2457 AtomicOrdering AO);
2458
2459 /// Emit atomic write for : X = Expr --- Only Scalar data types.
2460 ///
2461 /// \param Loc The insert and source location description.
2462 /// \param X The target pointer to be atomically written to
2463 /// \param Expr The value to store.
2464 /// \param AO Atomic ordering of the generated atomic
2465 /// instructions.
2466 ///
2467 /// \return Insertion point after generated atomic Write IR.
2469 AtomicOpValue &X, Value *Expr,
2470 AtomicOrdering AO);
2471
2472 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2473 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2474 /// Only Scalar data types.
2475 ///
2476 /// \param Loc The insert and source location description.
2477 /// \param AllocaIP The insertion point to be used for alloca instructions.
2478 /// \param X The target atomic pointer to be updated
2479 /// \param Expr The value to update X with.
2480 /// \param AO Atomic ordering of the generated atomic instructions.
2481 /// \param RMWOp The binary operation used for update. If operation
2482 /// is not supported by atomicRMW, or belong to
2483 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2484 /// atomic will be generated.
2485 /// \param UpdateOp Code generator for complex expressions that cannot be
2486 /// expressed through atomicrmw instruction.
2487 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2488 /// update expression, false otherwise.
2489 /// (e.g. true for X = X BinOp Expr)
2490 ///
2491 /// \return Insertion point after generated atomic update IR.
2493 InsertPointTy AllocaIP, AtomicOpValue &X,
2494 Value *Expr, AtomicOrdering AO,
2496 AtomicUpdateCallbackTy &UpdateOp,
2497 bool IsXBinopExpr);
2498
2499 /// Emit atomic update for constructs: --- Only Scalar data types
2500 /// V = X; X = X BinOp Expr ,
2501 /// X = X BinOp Expr; V = X,
2502 /// V = X; X = Expr BinOp X,
2503 /// X = Expr BinOp X; V = X,
2504 /// V = X; X = UpdateOp(X),
2505 /// X = UpdateOp(X); V = X,
2506 ///
2507 /// \param Loc The insert and source location description.
2508 /// \param AllocaIP The insertion point to be used for alloca instructions.
2509 /// \param X The target atomic pointer to be updated
2510 /// \param V Memory address where to store captured value
2511 /// \param Expr The value to update X with.
2512 /// \param AO Atomic ordering of the generated atomic instructions
2513 /// \param RMWOp The binary operation used for update. If
2514 /// operation is not supported by atomicRMW, or belong to
2515 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2516 /// atomic will be generated.
2517 /// \param UpdateOp Code generator for complex expressions that cannot be
2518 /// expressed through atomicrmw instruction.
2519 /// \param UpdateExpr true if X is an in place update of the form
2520 /// X = X BinOp Expr or X = Expr BinOp X
2521 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2522 /// update expression, false otherwise.
2523 /// (e.g. true for X = X BinOp Expr)
2524 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2525 /// 'v', not an updated one.
2526 ///
2527 /// \return Insertion point after generated atomic capture IR.
2530 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2532 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2533 bool IsPostfixUpdate, bool IsXBinopExpr);
2534
2535 /// Emit atomic compare for constructs: --- Only scalar data types
2536 /// cond-expr-stmt:
2537 /// x = x ordop expr ? expr : x;
2538 /// x = expr ordop x ? expr : x;
2539 /// x = x == e ? d : x;
2540 /// x = e == x ? d : x; (this one is not in the spec)
2541 /// cond-update-stmt:
2542 /// if (x ordop expr) { x = expr; }
2543 /// if (expr ordop x) { x = expr; }
2544 /// if (x == e) { x = d; }
2545 /// if (e == x) { x = d; } (this one is not in the spec)
2546 /// conditional-update-capture-atomic:
2547 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2548 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2549 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2550 /// IsFailOnly=true)
2551 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2552 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2553 /// IsFailOnly=true)
2554 ///
2555 /// \param Loc The insert and source location description.
2556 /// \param X The target atomic pointer to be updated.
2557 /// \param V Memory address where to store captured value (for
2558 /// compare capture only).
2559 /// \param R Memory address where to store comparison result
2560 /// (for compare capture with '==' only).
2561 /// \param E The expected value ('e') for forms that use an
2562 /// equality comparison or an expression ('expr') for
2563 /// forms that use 'ordop' (logically an atomic maximum or
2564 /// minimum).
2565 /// \param D The desired value for forms that use an equality
2566 /// comparison. If forms that use 'ordop', it should be
2567 /// \p nullptr.
2568 /// \param AO Atomic ordering of the generated atomic instructions.
2569 /// \param Op Atomic compare operation. It can only be ==, <, or >.
2570 /// \param IsXBinopExpr True if the conditional statement is in the form where
2571 /// x is on LHS. It only matters for < or >.
2572 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
2573 /// 'v', not an updated one (for compare capture
2574 /// only).
2575 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
2576 /// only when the comparison fails. This is only valid for
2577 /// the case the comparison is '=='.
2578 ///
2579 /// \return Insertion point after generated atomic capture IR.
2584 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2587 AtomicOpValue &R, Value *E, Value *D,
2588 AtomicOrdering AO,
2590 bool IsXBinopExpr, bool IsPostfixUpdate,
2591 bool IsFailOnly, AtomicOrdering Failure);
2592
2593 /// Create the control flow structure of a canonical OpenMP loop.
2594 ///
2595 /// The emitted loop will be disconnected, i.e. no edge to the loop's
2596 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2597 /// IRBuilder location is not preserved.
2598 ///
2599 /// \param DL DebugLoc used for the instructions in the skeleton.
2600 /// \param TripCount Value to be used for the trip count.
2601 /// \param F Function in which to insert the BasicBlocks.
2602 /// \param PreInsertBefore Where to insert BBs that execute before the body,
2603 /// typically the body itself.
2604 /// \param PostInsertBefore Where to insert BBs that execute after the body.
2605 /// \param Name Base name used to derive BB
2606 /// and instruction names.
2607 ///
2608 /// \returns The CanonicalLoopInfo that represents the emitted loop.
2610 Function *F,
2611 BasicBlock *PreInsertBefore,
2612 BasicBlock *PostInsertBefore,
2613 const Twine &Name = {});
2614 /// OMP Offload Info Metadata name string
2615 const std::string ompOffloadInfoName = "omp_offload.info";
2616
2617 /// Loads all the offload entries information from the host IR
2618 /// metadata. This function is only meant to be used with device code
2619 /// generation.
2620 ///
2621 /// \param M Module to load Metadata info from. Module passed maybe
2622 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2624
2625 /// Loads all the offload entries information from the host IR
2626 /// metadata read from the file passed in as the HostFilePath argument. This
2627 /// function is only meant to be used with device code generation.
2628 ///
2629 /// \param HostFilePath The path to the host IR file,
2630 /// used to load in offload metadata for the device, allowing host and device
2631 /// to maintain the same metadata mapping.
2632 void loadOffloadInfoMetadata(StringRef HostFilePath);
2633
2634 /// Gets (if variable with the given name already exist) or creates
2635 /// internal global variable with the specified Name. The created variable has
2636 /// linkage CommonLinkage by default and is initialized by null value.
2637 /// \param Ty Type of the global variable. If it is exist already the type
2638 /// must be the same.
2639 /// \param Name Name of the variable.
2641 unsigned AddressSpace = 0);
2642};
2643
2644/// Class to represented the control flow structure of an OpenMP canonical loop.
2645///
2646/// The control-flow structure is standardized for easy consumption by
2647/// directives associated with loops. For instance, the worksharing-loop
2648/// construct may change this control flow such that each loop iteration is
2649/// executed on only one thread. The constraints of a canonical loop in brief
2650/// are:
2651///
2652/// * The number of loop iterations must have been computed before entering the
2653/// loop.
2654///
2655/// * Has an (unsigned) logical induction variable that starts at zero and
2656/// increments by one.
2657///
2658/// * The loop's CFG itself has no side-effects. The OpenMP specification
2659/// itself allows side-effects, but the order in which they happen, including
2660/// how often or whether at all, is unspecified. We expect that the frontend
2661/// will emit those side-effect instructions somewhere (e.g. before the loop)
2662/// such that the CanonicalLoopInfo itself can be side-effect free.
2663///
2664/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2665/// execution of a loop body that satifies these constraints. It does NOT
2666/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2667/// CanonicalLoopInfo for such purposes.
2668///
2669/// The control flow can be described as follows:
2670///
2671/// Preheader
2672/// |
2673/// /-> Header
2674/// | |
2675/// | Cond---\
2676/// | | |
2677/// | Body |
2678/// | | | |
2679/// | <...> |
2680/// | | | |
2681/// \--Latch |
2682/// |
2683/// Exit
2684/// |
2685/// After
2686///
2687/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2688/// including) and end at AfterIP (at the After's first instruction, excluding).
2689/// That is, instructions in the Preheader and After blocks (except the
2690/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2691/// side-effects. Typically, the Preheader is used to compute the loop's trip
2692/// count. The instructions from BodyIP (at the Body block's first instruction,
2693/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2694/// control and thus can have side-effects. The body block is the single entry
2695/// point into the loop body, which may contain arbitrary control flow as long
2696/// as all control paths eventually branch to the Latch block.
2697///
2698/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2699/// Latch to guarantee that there is only a single edge to the latch. It would
2700/// make loop transformations easier to not needing to consider multiple
2701/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2702/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2703/// executes after each body iteration.
2704///
2705/// There must be no loop-carried dependencies through llvm::Values. This is
2706/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2707/// for the induction variable.
2708///
2709/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2710/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2711/// by assertOK(). They are expected to not be modified unless explicitly
2712/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2713/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2714/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2715/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2716/// anymore as its underlying control flow may not exist anymore.
2717/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2718/// may also return a new CanonicalLoopInfo that can be passed to other
2719/// loop-associated construct implementing methods. These loop-transforming
2720/// methods may either create a new CanonicalLoopInfo usually using
2721/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2722/// modify one of the input CanonicalLoopInfo and return it as representing the
2723/// modified loop. What is done is an implementation detail of
2724/// transformation-implementing method and callers should always assume that the
2725/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2726/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2727/// created by createCanonicalLoop, such that transforming methods do not have
2728/// to special case where the CanonicalLoopInfo originated from.
2729///
2730/// Generally, methods consuming CanonicalLoopInfo do not need an
2731/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2732/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2733/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2734/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2735/// any InsertPoint in the Preheader, After or Block can still be used after
2736/// calling such a method.
2737///
2738/// TODO: Provide mechanisms for exception handling and cancellation points.
2739///
2740/// Defined outside OpenMPIRBuilder because nested classes cannot be
2741/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2743 friend class OpenMPIRBuilder;
2744
2745private:
2746 BasicBlock *Header = nullptr;
2747 BasicBlock *Cond = nullptr;
2748 BasicBlock *Latch = nullptr;
2749 BasicBlock *Exit = nullptr;
2750
2751 /// Add the control blocks of this loop to \p BBs.
2752 ///
2753 /// This does not include any block from the body, including the one returned
2754 /// by getBody().
2755 ///
2756 /// FIXME: This currently includes the Preheader and After blocks even though
2757 /// their content is (mostly) not under CanonicalLoopInfo's control.
2758 /// Re-evaluated whether this makes sense.
2759 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2760
2761 /// Sets the number of loop iterations to the given value. This value must be
2762 /// valid in the condition block (i.e., defined in the preheader) and is
2763 /// interpreted as an unsigned integer.
2764 void setTripCount(Value *TripCount);
2765
2766 /// Replace all uses of the canonical induction variable in the loop body with
2767 /// a new one.
2768 ///
2769 /// The intended use case is to update the induction variable for an updated
2770 /// iteration space such that it can stay normalized in the 0...tripcount-1
2771 /// range.
2772 ///
2773 /// The \p Updater is called with the (presumable updated) current normalized
2774 /// induction variable and is expected to return the value that uses of the
2775 /// pre-updated induction values should use instead, typically dependent on
2776 /// the new induction variable. This is a lambda (instead of e.g. just passing
2777 /// the new value) to be able to distinguish the uses of the pre-updated
2778 /// induction variable and uses of the induction varible to compute the
2779 /// updated induction variable value.
2780 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2781
2782public:
2783 /// Returns whether this object currently represents the IR of a loop. If
2784 /// returning false, it may have been consumed by a loop transformation or not
2785 /// been intialized. Do not use in this case;
2786 bool isValid() const { return Header; }
2787
2788 /// The preheader ensures that there is only a single edge entering the loop.
2789 /// Code that must be execute before any loop iteration can be emitted here,
2790 /// such as computing the loop trip count and begin lifetime markers. Code in
2791 /// the preheader is not considered part of the canonical loop.
2792 BasicBlock *getPreheader() const;
2793
2794 /// The header is the entry for each iteration. In the canonical control flow,
2795 /// it only contains the PHINode for the induction variable.
2797 assert(isValid() && "Requires a valid canonical loop");
2798 return Header;
2799 }
2800
2801 /// The condition block computes whether there is another loop iteration. If
2802 /// yes, branches to the body; otherwise to the exit block.
2804 assert(isValid() && "Requires a valid canonical loop");
2805 return Cond;
2806 }
2807
2808 /// The body block is the single entry for a loop iteration and not controlled
2809 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2810 /// eventually branch to the \p Latch block.
2812 assert(isValid() && "Requires a valid canonical loop");
2813 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2814 }
2815
2816 /// Reaching the latch indicates the end of the loop body code. In the
2817 /// canonical control flow, it only contains the increment of the induction
2818 /// variable.
2820 assert(isValid() && "Requires a valid canonical loop");
2821 return Latch;
2822 }
2823
2824 /// Reaching the exit indicates no more iterations are being executed.
2826 assert(isValid() && "Requires a valid canonical loop");
2827 return Exit;
2828 }
2829
2830 /// The after block is intended for clean-up code such as lifetime end
2831 /// markers. It is separate from the exit block to ensure, analogous to the
2832 /// preheader, it having just a single entry edge and being free from PHI
2833 /// nodes should there be multiple loop exits (such as from break
2834 /// statements/cancellations).
2836 assert(isValid() && "Requires a valid canonical loop");
2837 return Exit->getSingleSuccessor();
2838 }
2839
2840 /// Returns the llvm::Value containing the number of loop iterations. It must
2841 /// be valid in the preheader and always interpreted as an unsigned integer of
2842 /// any bit-width.
2844 assert(isValid() && "Requires a valid canonical loop");
2845 Instruction *CmpI = &Cond->front();
2846 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2847 return CmpI->getOperand(1);
2848 }
2849
2850 /// Returns the instruction representing the current logical induction
2851 /// variable. Always unsigned, always starting at 0 with an increment of one.
2853 assert(isValid() && "Requires a valid canonical loop");
2854 Instruction *IndVarPHI = &Header->front();
2855 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2856 return IndVarPHI;
2857 }
2858
2859 /// Return the type of the induction variable (and the trip count).
2861 assert(isValid() && "Requires a valid canonical loop");
2862 return getIndVar()->getType();
2863 }
2864
2865 /// Return the insertion point for user code before the loop.
2867 assert(isValid() && "Requires a valid canonical loop");
2868 BasicBlock *Preheader = getPreheader();
2869 return {Preheader, std::prev(Preheader->end())};
2870 };
2871
2872 /// Return the insertion point for user code in the body.
2874 assert(isValid() && "Requires a valid canonical loop");
2875 BasicBlock *Body = getBody();
2876 return {Body, Body->begin()};
2877 };
2878
2879 /// Return the insertion point for user code after the loop.
2881 assert(isValid() && "Requires a valid canonical loop");
2883 return {After, After->begin()};
2884 };
2885
2887 assert(isValid() && "Requires a valid canonical loop");
2888 return Header->getParent();
2889 }
2890
2891 /// Consistency self-check.
2892 void assertOK() const;
2893
2894 /// Invalidate this loop. That is, the underlying IR does not fulfill the
2895 /// requirements of an OpenMP canonical loop anymore.
2896 void invalidate();
2897};
2898
2899} // end namespace llvm
2900
2901#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
arc branch finalize
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
This file defines constans and helpers used when dealing with OpenMP.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
an instruction to allocate memory on the stack
Definition: Instructions.h:60
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:707
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:445
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:432
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:208
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:50
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:255
BasicBlock * getBlock() const
Definition: IRBuilder.h:270
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:92
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:218
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:275
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:287
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2664
Class to represent integer types.
Definition: DerivedTypes.h:40
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
Definition: OMPIRBuilder.h:375
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage, const std::string &VarName)
Definition: OMPIRBuilder.h:378
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:393
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:300
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Definition: OMPIRBuilder.h:287
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:221
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:223
OffloadingEntryInfoKinds getKind() const
Definition: OMPIRBuilder.h:239
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
Definition: OMPIRBuilder.h:230
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:247
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Definition: OMPIRBuilder.h:231
Class that manages information about offload code regions and data.
Definition: OMPIRBuilder.h:209
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:415
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
Definition: OMPIRBuilder.h:354
@ OMPTargetDeviceClauseNoHost
The target is marked for non-host devices.
Definition: OMPIRBuilder.h:358
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
Definition: OMPIRBuilder.h:356
@ OMPTargetDeviceClauseNone
The target is marked as having no clause.
Definition: OMPIRBuilder.h:362
@ OMPTargetDeviceClauseHost
The target is marked for host devices.
Definition: OMPIRBuilder.h:360
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:274
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:276
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
Definition: OMPIRBuilder.h:267
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:265
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:334
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
Definition: OMPIRBuilder.h:340
@ OMPTargetGlobalVarEntryNone
Mark the entry as having no declare target entry kind.
Definition: OMPIRBuilder.h:342
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
Definition: OMPIRBuilder.h:346
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
Definition: OMPIRBuilder.h:344
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:338
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:336
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:325
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:410
bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
Definition: OMPIRBuilder.h:84
void setIsGPU(bool Value)
Definition: OMPIRBuilder.h:166
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
Definition: OMPIRBuilder.h:90
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
Definition: OMPIRBuilder.h:100
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
Definition: OMPIRBuilder.h:106
StringRef separator() const
Definition: OMPIRBuilder.h:157
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
void setFirstSeparator(StringRef FS)
Definition: OMPIRBuilder.h:168
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:147
std::optional< bool > OpenMPOffloadMandatory
Definition: OMPIRBuilder.h:103
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
Definition: OMPIRBuilder.h:108
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:129
void setHasRequiresUnifiedAddress(bool Value)
void setOpenMPOffloadMandatory(bool Value)
Definition: OMPIRBuilder.h:167
void setIsTargetDevice(bool Value)
Definition: OMPIRBuilder.h:165
void setSeparator(StringRef S)
Definition: OMPIRBuilder.h:169
void setHasRequiresDynamicAllocators(bool Value)
bool hasRequiresReverseOffload() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
SmallMapVector< const Value *, std::pair< Value *, Value * >, 4 > DevicePtrInfoMap
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:451
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:497
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
function_ref< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
Definition: OMPIRBuilder.h:549
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
std::function< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> StorableBodyGenCallbackTy
Definition: OMPIRBuilder.h:556
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< InsertPointTy(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
Definition: OMPIRBuilder.h:589
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void setConfig(OpenMPIRBuilderConfig C)
Definition: OMPIRBuilder.h:466
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
Definition: OMPIRBuilder.h:515
InsertPointTy getInsertionPoint()
}
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:477
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB)
Generator for '#omp target'.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
Definition: OMPIRBuilder.h:455
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
Definition: OMPIRBuilder.h:522
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false)
Generator for '#omp reduction'.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:128
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
An efficient, type-erasing, non-owning reference to a callable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
Definition: OMPConstants.h:195
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:65
RTLDependenceKindTy
Dependence kind for RTL.
Definition: OMPConstants.h:273
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:45
WorksharingLoopType
A type of worksharing loop construct.
Definition: OMPConstants.h:283
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:267
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
AtomicOrdering
Atomic ordering for LLVM's memory model.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
bool IsCancellable
Flag to indicate if the directive is cancellable.
Definition: OMPIRBuilder.h:509
FinalizeCallbackTy FiniCB
The finalization callback provided by the last in-flight invocation of createXXXX for the directive o...
Definition: OMPIRBuilder.h:502
omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
Definition: OMPIRBuilder.h:506
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:593
LocationDescription(const InsertPointTy &IP)
Definition: OMPIRBuilder.h:596
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
Definition: OMPIRBuilder.h:597
LocationDescription(const IRBuilderBase &IRB)
Definition: OMPIRBuilder.h:594
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
std::function< void(Function &)> PostOutlineCBTy
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, ReductionGenTy ReductionGen, AtomicReductionGenTy AtomicReductionGen)
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, Value *SizesArray, Value *MapTypesArray, Value *MapTypesArrayEnd, Value *MappersArray, Value *MapNamesArray)
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, Value *NumIterations, Value *NumTeams, Value *NumThreads, Value *DynCGGroupMem, bool HasNoWait)
Constructor for TargetKernelArgs.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:183
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
bool operator<(const TargetRegionEntryInfo &RHS) const
Definition: OMPIRBuilder.h:201
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Definition: OMPIRBuilder.h:191