LLVM 18.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
23#include <forward_list>
24#include <map>
25#include <optional>
26
27namespace llvm {
28class CanonicalLoopInfo;
29struct TargetRegionEntryInfo;
30class OffloadEntriesInfoManager;
31class OpenMPIRBuilder;
32
33/// Move the instruction after an InsertPoint to the beginning of another
34/// BasicBlock.
35///
36/// The instructions after \p IP are moved to the beginning of \p New which must
37/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
38/// \p New will be added such that there is no semantic change. Otherwise, the
39/// \p IP insert block remains degenerate and it is up to the caller to insert a
40/// terminator.
41void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
42 bool CreateBranch);
43
44/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
45/// insert location will stick to after the instruction before the insertion
46/// point (instead of moving with the instruction the InsertPoint stores
47/// internally).
48void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
49
50/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
51/// (missing the terminator).
52///
53/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
54/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
55/// is true, a branch to the new successor will new created such that
56/// semantically there is no change; otherwise the block of the insertion point
57/// remains degenerate and it is the caller's responsibility to insert a
58/// terminator. Returns the new successor block.
59BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
60 llvm::Twine Name = {});
61
62/// Split a BasicBlock at \p Builder's insertion point, even if the block is
63/// degenerate (missing the terminator). Its new insert location will stick to
64/// after the instruction before the insertion point (instead of moving with the
65/// instruction the InsertPoint stores internally).
66BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
67 llvm::Twine Name = {});
68
69/// Split a BasicBlock at \p Builder's insertion point, even if the block is
70/// degenerate (missing the terminator). Its new insert location will stick to
71/// after the instruction before the insertion point (instead of moving with the
72/// instruction the InsertPoint stores internally).
73BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
74
75/// Like splitBB, but reuses the current block's name for the new name.
76BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
77 llvm::Twine Suffix = ".split");
78
79/// Captures attributes that affect generating LLVM-IR using the
80/// OpenMPIRBuilder and related classes. Note that not all attributes are
81/// required for all classes or functions. In some use cases the configuration
82/// is not necessary at all, because because the only functions that are called
83/// are ones that are not dependent on the configuration.
85public:
86 /// Flag for specifying if the compilation is done for embedded device code
87 /// or host code.
88 std::optional<bool> IsTargetDevice;
89
90 /// Flag for specifying if the compilation is done for an accelerator.
91 std::optional<bool> IsGPU;
92
93 // Flag for specifying if offloading is mandatory.
94 std::optional<bool> OpenMPOffloadMandatory;
95
96 /// First separator used between the initial two parts of a name.
97 std::optional<StringRef> FirstSeparator;
98 /// Separator used between all of the rest consecutive parts of s name
99 std::optional<StringRef> Separator;
100
104 bool HasRequiresReverseOffload,
105 bool HasRequiresUnifiedAddress,
106 bool HasRequiresUnifiedSharedMemory,
107 bool HasRequiresDynamicAllocators);
108
109 // Getters functions that assert if the required values are not present.
110 bool isTargetDevice() const {
111 assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
112 return *IsTargetDevice;
113 }
114
115 bool isGPU() const {
116 assert(IsGPU.has_value() && "IsGPU is not set");
117 return *IsGPU;
118 }
119
121 assert(OpenMPOffloadMandatory.has_value() &&
122 "OpenMPOffloadMandatory is not set");
124 }
125
126 bool hasRequiresFlags() const { return RequiresFlags; }
127 bool hasRequiresReverseOffload() const;
128 bool hasRequiresUnifiedAddress() const;
130 bool hasRequiresDynamicAllocators() const;
131
132 /// Returns requires directive clauses as flags compatible with those expected
133 /// by libomptarget.
134 int64_t getRequiresFlags() const;
135
136 // Returns the FirstSeparator if set, otherwise use the default separator
137 // depending on isGPU
139 if (FirstSeparator.has_value())
140 return *FirstSeparator;
141 if (isGPU())
142 return "_";
143 return ".";
144 }
145
146 // Returns the Separator if set, otherwise use the default separator depending
147 // on isGPU
149 if (Separator.has_value())
150 return *Separator;
151 if (isGPU())
152 return "$";
153 return ".";
154 }
155
157 void setIsGPU(bool Value) { IsGPU = Value; }
161
166
167private:
168 /// Flags for specifying which requires directive clauses are present.
169 int64_t RequiresFlags;
170};
171
172/// Data structure to contain the information needed to uniquely identify
173/// a target entry.
175 std::string ParentName;
176 unsigned DeviceID;
177 unsigned FileID;
178 unsigned Line;
179 unsigned Count;
180
183 unsigned FileID, unsigned Line, unsigned Count = 0)
185 Count(Count) {}
186
189 unsigned DeviceID, unsigned FileID,
190 unsigned Line, unsigned Count);
191
193 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
194 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
195 RHS.Count);
196 }
197};
198
199/// Class that manages information about offload code regions and data
201 /// Number of entries registered so far.
202 OpenMPIRBuilder *OMPBuilder;
203 unsigned OffloadingEntriesNum = 0;
204
205public:
206 /// Base class of the entries info.
208 public:
209 /// Kind of a given entry.
210 enum OffloadingEntryInfoKinds : unsigned {
211 /// Entry is a target region.
213 /// Entry is a declare target variable.
215 /// Invalid entry info.
217 };
218
219 protected:
221 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
222 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
223 uint32_t Flags)
224 : Flags(Flags), Order(Order), Kind(Kind) {}
225 ~OffloadEntryInfo() = default;
226
227 public:
228 bool isValid() const { return Order != ~0u; }
229 unsigned getOrder() const { return Order; }
230 OffloadingEntryInfoKinds getKind() const { return Kind; }
231 uint32_t getFlags() const { return Flags; }
232 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
233 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
235 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
236 Addr = V;
237 }
238 static bool classof(const OffloadEntryInfo *Info) { return true; }
239
240 private:
241 /// Address of the entity that has to be mapped for offloading.
242 WeakTrackingVH Addr;
243
244 /// Flags associated with the device global.
245 uint32_t Flags = 0u;
246
247 /// Order this entry was emitted.
248 unsigned Order = ~0u;
249
251 };
252
253 /// Return true if a there are no entries defined.
254 bool empty() const;
255 /// Return number of entries defined so far.
256 unsigned size() const { return OffloadingEntriesNum; }
257
258 OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
259
260 //
261 // Target region entries related.
262 //
263
264 /// Kind of the target registry entry.
266 /// Mark the entry as target region.
268 };
269
270 /// Target region entries info.
272 /// Address that can be used as the ID of the entry.
273 Constant *ID = nullptr;
274
275 public:
278 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
279 Constant *ID,
282 ID(ID) {
284 }
285
286 Constant *getID() const { return ID; }
287 void setID(Constant *V) {
288 assert(!ID && "ID has been set before!");
289 ID = V;
290 }
291 static bool classof(const OffloadEntryInfo *Info) {
292 return Info->getKind() == OffloadingEntryInfoTargetRegion;
293 }
294 };
295
296 /// Initialize target region entry.
297 /// This is ONLY needed for DEVICE compilation.
299 unsigned Order);
300 /// Register target region entry.
304 /// Return true if a target region entry with the provided information
305 /// exists.
307 bool IgnoreAddressId = false) const;
308
309 // Return the Name based on \a EntryInfo using the next available Count.
311 const TargetRegionEntryInfo &EntryInfo);
312
313 /// brief Applies action \a Action on all registered entries.
314 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
315 const OffloadEntryInfoTargetRegion &)>
317 void
319
320 //
321 // Device global variable entries related.
322 //
323
324 /// Kind of the global variable entry..
326 /// Mark the entry as a to declare target.
328 /// Mark the entry as a to declare target link.
330 /// Mark the entry as a declare target enter.
332 /// Mark the entry as having no declare target entry kind.
334 /// Mark the entry as a declare target indirect global.
336 };
337
338 /// Kind of device clause for declare target variables
339 /// and functions
340 /// NOTE: Currently not used as a part of a variable entry
341 /// used for Flang and Clang to interface with the variable
342 /// related registration functions
344 /// The target is marked for all devices
346 /// The target is marked for non-host devices
348 /// The target is marked for host devices
350 /// The target is marked as having no clause
352 };
353
354 /// Device global variable entries info.
356 /// Type of the global variable.
357 int64_t VarSize;
359 const std::string VarName;
360
361 public:
364 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
367 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
368 int64_t VarSize,
371 const std::string &VarName)
373 VarSize(VarSize), Linkage(Linkage), VarName(VarName) {
375 }
376
377 int64_t getVarSize() const { return VarSize; }
378 StringRef getVarName() const { return VarName; }
379 void setVarSize(int64_t Size) { VarSize = Size; }
380 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
381 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
382 static bool classof(const OffloadEntryInfo *Info) {
383 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
384 }
385 };
386
387 /// Initialize device global variable entry.
388 /// This is ONLY used for DEVICE compilation.
391 unsigned Order);
392
393 /// Register device global variable entry.
395 int64_t VarSize,
398 /// Checks if the variable with the given name has been registered already.
400 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
401 }
402 /// Applies action \a Action on all registered entries.
403 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
407
408private:
409 /// Return the count of entries at a particular source location.
410 unsigned
411 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
412
413 /// Update the count of entries at a particular source location.
414 void
415 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
416
418 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
419 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
420 EntryInfo.FileID, EntryInfo.Line, 0);
421 }
422
423 // Count of entries at a location.
424 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
425
426 // Storage for target region entries kind.
427 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
428 OffloadEntriesTargetRegionTy;
429 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
430 /// Storage for device global variable entries kind. The storage is to be
431 /// indexed by mangled name.
433 OffloadEntriesDeviceGlobalVarTy;
434 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
435};
436
437/// An interface to create LLVM-IR for OpenMP directives.
438///
439/// Each OpenMP directive has a corresponding public generator method.
441public:
442 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
443 /// not have an effect on \p M (see initialize)
445 : M(M), Builder(M.getContext()), OffloadInfoManager(this),
446 T(Triple(M.getTargetTriple())) {}
448
449 /// Initialize the internal state, this will put structures types and
450 /// potentially other helpers into the underlying module. Must be called
451 /// before any other method and only once! This internal state includes types
452 /// used in the OpenMPIRBuilder generated from OMPKinds.def.
453 void initialize();
454
456
457 /// Finalize the underlying module, e.g., by outlining regions.
458 /// \param Fn The function to be finalized. If not used,
459 /// all functions are finalized.
460 void finalize(Function *Fn = nullptr);
461
462 /// Add attributes known for \p FnID to \p Fn.
464
465 /// Type used throughout for insertion points.
467
468 /// Get the create a name using the platform specific separators.
469 /// \param Parts parts of the final name that needs separation
470 /// The created name has a first separator between the first and second part
471 /// and a second separator between all other parts.
472 /// E.g. with FirstSeparator "$" and Separator "." and
473 /// parts: "p1", "p2", "p3", "p4"
474 /// The resulting name is "p1$p2.p3.p4"
475 /// The separators are retrieved from the OpenMPIRBuilderConfig.
476 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
477
478 /// Callback type for variable finalization (think destructors).
479 ///
480 /// \param CodeGenIP is the insertion point at which the finalization code
481 /// should be placed.
482 ///
483 /// A finalize callback knows about all objects that need finalization, e.g.
484 /// destruction, when the scope of the currently generated construct is left
485 /// at the time, and location, the callback is invoked.
486 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
487
489 /// The finalization callback provided by the last in-flight invocation of
490 /// createXXXX for the directive of kind DK.
492
493 /// The directive kind of the innermost directive that has an associated
494 /// region which might require finalization when it is left.
495 omp::Directive DK;
496
497 /// Flag to indicate if the directive is cancellable.
499 };
500
501 /// Push a finalization callback on the finalization stack.
502 ///
503 /// NOTE: Temporary solution until Clang CG is gone.
505 FinalizationStack.push_back(FI);
506 }
507
508 /// Pop the last finalization callback from the finalization stack.
509 ///
510 /// NOTE: Temporary solution until Clang CG is gone.
512
513 /// Callback type for body (=inner region) code generation
514 ///
515 /// The callback takes code locations as arguments, each describing a
516 /// location where additional instructions can be inserted.
517 ///
518 /// The CodeGenIP may be in the middle of a basic block or point to the end of
519 /// it. The basic block may have a terminator or be degenerate. The callback
520 /// function may just insert instructions at that position, but also split the
521 /// block (without the Before argument of BasicBlock::splitBasicBlock such
522 /// that the identify of the split predecessor block is preserved) and insert
523 /// additional control flow, including branches that do not lead back to what
524 /// follows the CodeGenIP. Note that since the callback is allowed to split
525 /// the block, callers must assume that InsertPoints to positions in the
526 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
527 /// such InsertPoints need to be preserved, it can split the block itself
528 /// before calling the callback.
529 ///
530 /// AllocaIP and CodeGenIP must not point to the same position.
531 ///
532 /// \param AllocaIP is the insertion point at which new alloca instructions
533 /// should be placed. The BasicBlock it is pointing to must
534 /// not be split.
535 /// \param CodeGenIP is the insertion point at which the body code should be
536 /// placed.
538 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
539
540 // This is created primarily for sections construct as llvm::function_ref
541 // (BodyGenCallbackTy) is not storable (as described in the comments of
542 // function_ref class - function_ref contains non-ownable reference
543 // to the callable.
545 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
546
547 /// Callback type for loop body code generation.
548 ///
549 /// \param CodeGenIP is the insertion point where the loop's body code must be
550 /// placed. This will be a dedicated BasicBlock with a
551 /// conditional branch from the loop condition check and
552 /// terminated with an unconditional branch to the loop
553 /// latch.
554 /// \param IndVar is the induction variable usable at the insertion point.
556 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
557
558 /// Callback type for variable privatization (think copy & default
559 /// constructor).
560 ///
561 /// \param AllocaIP is the insertion point at which new alloca instructions
562 /// should be placed.
563 /// \param CodeGenIP is the insertion point at which the privatization code
564 /// should be placed.
565 /// \param Original The value being copied/created, should not be used in the
566 /// generated IR.
567 /// \param Inner The equivalent of \p Original that should be used in the
568 /// generated IR; this is equal to \p Original if the value is
569 /// a pointer and can thus be passed directly, otherwise it is
570 /// an equivalent but different value.
571 /// \param ReplVal The replacement value, thus a copy or new created version
572 /// of \p Inner.
573 ///
574 /// \returns The new insertion point where code generation continues and
575 /// \p ReplVal the replacement value.
577 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
578 Value &Inner, Value *&ReplVal)>;
579
580 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
581 /// (filename, line, column, ...).
584 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
587 : IP(IP), DL(DL) {}
590 };
591
592 /// Emitter methods for OpenMP directives.
593 ///
594 ///{
595
596 /// Generator for '#omp barrier'
597 ///
598 /// \param Loc The location where the barrier directive was encountered.
599 /// \param DK The kind of directive that caused the barrier.
600 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
601 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
602 /// should be checked and acted upon.
603 ///
604 /// \returns The insertion point after the barrier.
605 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
606 bool ForceSimpleCall = false,
607 bool CheckCancelFlag = true);
608
609 /// Generator for '#omp cancel'
610 ///
611 /// \param Loc The location where the directive was encountered.
612 /// \param IfCondition The evaluated 'if' clause expression, if any.
613 /// \param CanceledDirective The kind of directive that is cancled.
614 ///
615 /// \returns The insertion point after the barrier.
616 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
617 omp::Directive CanceledDirective);
618
619 /// Generator for '#omp parallel'
620 ///
621 /// \param Loc The insert and source location description.
622 /// \param AllocaIP The insertion points to be used for alloca instructions.
623 /// \param BodyGenCB Callback that will generate the region code.
624 /// \param PrivCB Callback to copy a given variable (think copy constructor).
625 /// \param FiniCB Callback to finalize variable copies.
626 /// \param IfCondition The evaluated 'if' clause expression, if any.
627 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
628 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
629 /// \param IsCancellable Flag to indicate a cancellable parallel region.
630 ///
631 /// \returns The insertion position *after* the parallel.
634 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
635 FinalizeCallbackTy FiniCB, Value *IfCondition,
636 Value *NumThreads, omp::ProcBindKind ProcBind,
637 bool IsCancellable);
638
639 /// Generator for the control flow structure of an OpenMP canonical loop.
640 ///
641 /// This generator operates on the logical iteration space of the loop, i.e.
642 /// the caller only has to provide a loop trip count of the loop as defined by
643 /// base language semantics. The trip count is interpreted as an unsigned
644 /// integer. The induction variable passed to \p BodyGenCB will be of the same
645 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
646 /// convert the logical iteration variable to the loop counter variable in the
647 /// loop body.
648 ///
649 /// \param Loc The insert and source location description. The insert
650 /// location can be between two instructions or the end of a
651 /// degenerate block (e.g. a BB under construction).
652 /// \param BodyGenCB Callback that will generate the loop body code.
653 /// \param TripCount Number of iterations the loop body is executed.
654 /// \param Name Base name used to derive BB and instruction names.
655 ///
656 /// \returns An object representing the created control flow structure which
657 /// can be used for loop-associated directives.
659 LoopBodyGenCallbackTy BodyGenCB,
660 Value *TripCount,
661 const Twine &Name = "loop");
662
663 /// Generator for the control flow structure of an OpenMP canonical loop.
664 ///
665 /// Instead of a logical iteration space, this allows specifying user-defined
666 /// loop counter values using increment, upper- and lower bounds. To
667 /// disambiguate the terminology when counting downwards, instead of lower
668 /// bounds we use \p Start for the loop counter value in the first body
669 /// iteration.
670 ///
671 /// Consider the following limitations:
672 ///
673 /// * A loop counter space over all integer values of its bit-width cannot be
674 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
675 /// stored into an 8 bit integer):
676 ///
677 /// DO I = 0, 255, 1
678 ///
679 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
680 /// effectively counting downwards:
681 ///
682 /// for (uint8_t i = 100u; i > 0; i += 127u)
683 ///
684 ///
685 /// TODO: May need to add additional parameters to represent:
686 ///
687 /// * Allow representing downcounting with unsigned integers.
688 ///
689 /// * Sign of the step and the comparison operator might disagree:
690 ///
691 /// for (int i = 0; i < 42; i -= 1u)
692 ///
693 //
694 /// \param Loc The insert and source location description.
695 /// \param BodyGenCB Callback that will generate the loop body code.
696 /// \param Start Value of the loop counter for the first iterations.
697 /// \param Stop Loop counter values past this will stop the loop.
698 /// \param Step Loop counter increment after each iteration; negative
699 /// means counting down.
700 /// \param IsSigned Whether Start, Stop and Step are signed integers.
701 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
702 /// counter.
703 /// \param ComputeIP Insertion point for instructions computing the trip
704 /// count. Can be used to ensure the trip count is available
705 /// at the outermost loop of a loop nest. If not set,
706 /// defaults to the preheader of the generated loop.
707 /// \param Name Base name used to derive BB and instruction names.
708 ///
709 /// \returns An object representing the created control flow structure which
710 /// can be used for loop-associated directives.
712 LoopBodyGenCallbackTy BodyGenCB,
713 Value *Start, Value *Stop, Value *Step,
714 bool IsSigned, bool InclusiveStop,
715 InsertPointTy ComputeIP = {},
716 const Twine &Name = "loop");
717
718 /// Collapse a loop nest into a single loop.
719 ///
720 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
721 /// that has the same number of innermost loop iterations as the origin loop
722 /// nest. The induction variables of the input loops are derived from the
723 /// collapsed loop's induction variable. This is intended to be used to
724 /// implement OpenMP's collapse clause. Before applying a directive,
725 /// collapseLoops normalizes a loop nest to contain only a single loop and the
726 /// directive's implementation does not need to handle multiple loops itself.
727 /// This does not remove the need to handle all loop nest handling by
728 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
729 /// modifier of the worksharing-loop directive.
730 ///
731 /// Example:
732 /// \code
733 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
734 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
735 /// body(i, j);
736 /// \endcode
737 ///
738 /// After collapsing with Loops={i,j}, the loop is changed to
739 /// \code
740 /// for (int ij = 0; ij < 63; ++ij) {
741 /// int i = ij / 9;
742 /// int j = ij % 9;
743 /// body(i, j);
744 /// }
745 /// \endcode
746 ///
747 /// In the current implementation, the following limitations apply:
748 ///
749 /// * All input loops have an induction variable of the same type.
750 ///
751 /// * The collapsed loop will have the same trip count integer type as the
752 /// input loops. Therefore it is possible that the collapsed loop cannot
753 /// represent all iterations of the input loops. For instance, assuming a
754 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
755 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
756 /// which cannot be represented in an 32-bit integer. Behavior is undefined
757 /// in this case.
758 ///
759 /// * The trip counts of every input loop must be available at \p ComputeIP.
760 /// Non-rectangular loops are not yet supported.
761 ///
762 /// * At each nest level, code between a surrounding loop and its nested loop
763 /// is hoisted into the loop body, and such code will be executed more
764 /// often than before collapsing (or not at all if any inner loop iteration
765 /// has a trip count of 0). This is permitted by the OpenMP specification.
766 ///
767 /// \param DL Debug location for instructions added for collapsing,
768 /// such as instructions to compute/derive the input loop's
769 /// induction variables.
770 /// \param Loops Loops in the loop nest to collapse. Loops are specified
771 /// from outermost-to-innermost and every control flow of a
772 /// loop's body must pass through its directly nested loop.
773 /// \param ComputeIP Where additional instruction that compute the collapsed
774 /// trip count. If not set, defaults to before the generated
775 /// loop.
776 ///
777 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
780 InsertPointTy ComputeIP);
781
782 /// Get the default alignment value for given target
783 ///
784 /// \param TargetTriple Target triple
785 /// \param Features StringMap which describes extra CPU features
786 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
787 const StringMap<bool> &Features);
788
789 /// Retrieve (or create if non-existent) the address of a declare
790 /// target variable, used in conjunction with registerTargetGlobalVariable
791 /// to create declare target global variables.
792 ///
793 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
794 /// clause used in conjunction with the variable being registered (link,
795 /// to, enter).
796 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
797 /// clause used in conjunction with the variable being registered (nohost,
798 /// host, any)
799 /// \param IsDeclaration - boolean stating if the variable being registered
800 /// is a declaration-only and not a definition
801 /// \param IsExternallyVisible - boolean stating if the variable is externally
802 /// visible
803 /// \param EntryInfo - Unique entry information for the value generated
804 /// using getTargetEntryUniqueInfo, used to name generated pointer references
805 /// to the declare target variable
806 /// \param MangledName - the mangled name of the variable being registered
807 /// \param GeneratedRefs - references generated by invocations of
808 /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
809 /// these are required by Clang for book keeping.
810 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
811 /// \param TargetTriple - The OpenMP device target triple we are compiling
812 /// for
813 /// \param LlvmPtrTy - The type of the variable we are generating or
814 /// retrieving an address for
815 /// \param GlobalInitializer - a lambda function which creates a constant
816 /// used for initializing a pointer reference to the variable in certain
817 /// cases. If a nullptr is passed, it will default to utilising the original
818 /// variable to initialize the pointer reference.
819 /// \param VariableLinkage - a lambda function which returns the variables
820 /// linkage type, if unspecified and a nullptr is given, it will instead
821 /// utilise the linkage stored on the existing global variable in the
822 /// LLVMModule.
826 bool IsDeclaration, bool IsExternallyVisible,
827 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
828 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
829 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
830 std::function<Constant *()> GlobalInitializer,
831 std::function<GlobalValue::LinkageTypes()> VariableLinkage);
832
833 /// Registers a target variable for device or host.
834 ///
835 /// \param CaptureClause - enumerator corresponding to the OpenMP capture
836 /// clause used in conjunction with the variable being registered (link,
837 /// to, enter).
838 /// \param DeviceClause - enumerator corresponding to the OpenMP capture
839 /// clause used in conjunction with the variable being registered (nohost,
840 /// host, any)
841 /// \param IsDeclaration - boolean stating if the variable being registered
842 /// is a declaration-only and not a definition
843 /// \param IsExternallyVisible - boolean stating if the variable is externally
844 /// visible
845 /// \param EntryInfo - Unique entry information for the value generated
846 /// using getTargetEntryUniqueInfo, used to name generated pointer references
847 /// to the declare target variable
848 /// \param MangledName - the mangled name of the variable being registered
849 /// \param GeneratedRefs - references generated by invocations of
850 /// registerTargetGlobalVariable these are required by Clang for book
851 /// keeping.
852 /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
853 /// \param TargetTriple - The OpenMP device target triple we are compiling
854 /// for
855 /// \param GlobalInitializer - a lambda function which creates a constant
856 /// used for initializing a pointer reference to the variable in certain
857 /// cases. If a nullptr is passed, it will default to utilising the original
858 /// variable to initialize the pointer reference.
859 /// \param VariableLinkage - a lambda function which returns the variables
860 /// linkage type, if unspecified and a nullptr is given, it will instead
861 /// utilise the linkage stored on the existing global variable in the
862 /// LLVMModule.
863 /// \param LlvmPtrTy - The type of the variable we are generating or
864 /// retrieving an address for
865 /// \param Addr - the original llvm value (addr) of the variable to be
866 /// registered
870 bool IsDeclaration, bool IsExternallyVisible,
871 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
872 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
873 std::vector<Triple> TargetTriple,
874 std::function<Constant *()> GlobalInitializer,
875 std::function<GlobalValue::LinkageTypes()> VariableLinkage,
876 Type *LlvmPtrTy, Constant *Addr);
877
878 /// Get the offset of the OMP_MAP_MEMBER_OF field.
879 unsigned getFlagMemberOffset();
880
881 /// Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on
882 /// the position given.
883 /// \param Position - A value indicating the position of the parent
884 /// of the member in the kernel argument structure, often retrieved
885 /// by the parents position in the combined information vectors used
886 /// to generate the structure itself. Multiple children (member's of)
887 /// with the same parent will use the same returned member flag.
889
890 /// Given an initial flag set, this function modifies it to contain
891 /// the passed in MemberOfFlag generated from the getMemberOfFlag
892 /// function. The results are dependent on the existing flag bits
893 /// set in the original flag set.
894 /// \param Flags - The original set of flags to be modified with the
895 /// passed in MemberOfFlag.
896 /// \param MemberOfFlag - A modified OMP_MAP_MEMBER_OF flag, adjusted
897 /// slightly based on the getMemberOfFlag which adjusts the flag bits
898 /// based on the members position in its parent.
900 omp::OpenMPOffloadMappingFlags MemberOfFlag);
901
902private:
903 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
904 ///
905 /// This takes a \p LoopInfo representing a canonical loop, such as the one
906 /// created by \p createCanonicalLoop and emits additional instructions to
907 /// turn it into a workshare loop. In particular, it calls to an OpenMP
908 /// runtime function in the preheader to obtain the loop bounds to be used in
909 /// the current thread, updates the relevant instructions in the canonical
910 /// loop and calls to an OpenMP runtime finalization function after the loop.
911 ///
912 /// \param DL Debug location for instructions added for the
913 /// workshare-loop construct itself.
914 /// \param CLI A descriptor of the canonical loop to workshare.
915 /// \param AllocaIP An insertion point for Alloca instructions usable in the
916 /// preheader of the loop.
917 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
918 /// the loop.
919 ///
920 /// \returns Point where to insert code after the workshare construct.
921 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
922 InsertPointTy AllocaIP,
923 bool NeedsBarrier);
924
925 /// Modifies the canonical loop a statically-scheduled workshare loop with a
926 /// user-specified chunk size.
927 ///
928 /// \param DL Debug location for instructions added for the
929 /// workshare-loop construct itself.
930 /// \param CLI A descriptor of the canonical loop to workshare.
931 /// \param AllocaIP An insertion point for Alloca instructions usable in
932 /// the preheader of the loop.
933 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
934 /// loop.
935 /// \param ChunkSize The user-specified chunk size.
936 ///
937 /// \returns Point where to insert code after the workshare construct.
938 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
940 InsertPointTy AllocaIP,
941 bool NeedsBarrier,
942 Value *ChunkSize);
943
944 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
945 ///
946 /// This takes a \p LoopInfo representing a canonical loop, such as the one
947 /// created by \p createCanonicalLoop and emits additional instructions to
948 /// turn it into a workshare loop. In particular, it calls to an OpenMP
949 /// runtime function in the preheader to obtain, and then in each iteration
950 /// to update the loop counter.
951 ///
952 /// \param DL Debug location for instructions added for the
953 /// workshare-loop construct itself.
954 /// \param CLI A descriptor of the canonical loop to workshare.
955 /// \param AllocaIP An insertion point for Alloca instructions usable in the
956 /// preheader of the loop.
957 /// \param SchedType Type of scheduling to be passed to the init function.
958 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
959 /// the loop.
960 /// \param Chunk The size of loop chunk considered as a unit when
961 /// scheduling. If \p nullptr, defaults to 1.
962 ///
963 /// \returns Point where to insert code after the workshare construct.
964 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
965 InsertPointTy AllocaIP,
966 omp::OMPScheduleType SchedType,
967 bool NeedsBarrier,
968 Value *Chunk = nullptr);
969
970 /// Create alternative version of the loop to support if clause
971 ///
972 /// OpenMP if clause can require to generate second loop. This loop
973 /// will be executed when if clause condition is not met. createIfVersion
974 /// adds branch instruction to the copied loop if \p ifCond is not met.
975 ///
976 /// \param Loop Original loop which should be versioned.
977 /// \param IfCond Value which corresponds to if clause condition
978 /// \param VMap Value to value map to define relation between
979 /// original and copied loop values and loop blocks.
980 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
981 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
982 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
983
984public:
985 /// Modifies the canonical loop to be a workshare loop.
986 ///
987 /// This takes a \p LoopInfo representing a canonical loop, such as the one
988 /// created by \p createCanonicalLoop and emits additional instructions to
989 /// turn it into a workshare loop. In particular, it calls to an OpenMP
990 /// runtime function in the preheader to obtain the loop bounds to be used in
991 /// the current thread, updates the relevant instructions in the canonical
992 /// loop and calls to an OpenMP runtime finalization function after the loop.
993 ///
994 /// The concrete transformation is done by applyStaticWorkshareLoop,
995 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
996 /// on the value of \p SchedKind and \p ChunkSize.
997 ///
998 /// \param DL Debug location for instructions added for the
999 /// workshare-loop construct itself.
1000 /// \param CLI A descriptor of the canonical loop to workshare.
1001 /// \param AllocaIP An insertion point for Alloca instructions usable in the
1002 /// preheader of the loop.
1003 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
1004 /// the loop.
1005 /// \param SchedKind Scheduling algorithm to use.
1006 /// \param ChunkSize The chunk size for the inner loop.
1007 /// \param HasSimdModifier Whether the simd modifier is present in the
1008 /// schedule clause.
1009 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
1010 /// the schedule clause.
1011 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
1012 /// present in the schedule clause.
1013 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
1014 /// present.
1015 ///
1016 /// \returns Point where to insert code after the workshare construct.
1019 bool NeedsBarrier,
1020 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
1021 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
1022 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
1023 bool HasOrderedClause = false);
1024
1025 /// Tile a loop nest.
1026 ///
1027 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
1028 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
1029 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
1030 /// of every loop and every tile sizes must be usable in the outermost
1031 /// loop's preheader. This implies that the loop nest is rectangular.
1032 ///
1033 /// Example:
1034 /// \code
1035 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
1036 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
1037 /// body(i, j);
1038 /// \endcode
1039 ///
1040 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
1041 /// \code
1042 /// for (int i1 = 0; i1 < 3; ++i1)
1043 /// for (int j1 = 0; j1 < 2; ++j1)
1044 /// for (int i2 = 0; i2 < 5; ++i2)
1045 /// for (int j2 = 0; j2 < 7; ++j2)
1046 /// body(i1*3+i2, j1*3+j2);
1047 /// \endcode
1048 ///
1049 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
1050 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
1051 /// handles non-constant trip counts, non-constant tile sizes and trip counts
1052 /// that are not multiples of the tile size. In the latter case the tile loop
1053 /// of the last floor-loop iteration will have fewer iterations than specified
1054 /// as its tile size.
1055 ///
1056 ///
1057 /// @param DL Debug location for instructions added by tiling, for
1058 /// instance the floor- and tile trip count computation.
1059 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
1060 /// invalidated by this method, i.e. should not used after
1061 /// tiling.
1062 /// @param TileSizes For each loop in \p Loops, the tile size for that
1063 /// dimensions.
1064 ///
1065 /// \returns A list of generated loops. Contains twice as many loops as the
1066 /// input loop nest; the first half are the floor loops and the
1067 /// second half are the tile loops.
1068 std::vector<CanonicalLoopInfo *>
1070 ArrayRef<Value *> TileSizes);
1071
1072 /// Fully unroll a loop.
1073 ///
1074 /// Instead of unrolling the loop immediately (and duplicating its body
1075 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
1076 /// metadata.
1077 ///
1078 /// \param DL Debug location for instructions added by unrolling.
1079 /// \param Loop The loop to unroll. The loop will be invalidated.
1081
1082 /// Fully or partially unroll a loop. How the loop is unrolled is determined
1083 /// using LLVM's LoopUnrollPass.
1084 ///
1085 /// \param DL Debug location for instructions added by unrolling.
1086 /// \param Loop The loop to unroll. The loop will be invalidated.
1088
1089 /// Partially unroll a loop.
1090 ///
1091 /// The CanonicalLoopInfo of the unrolled loop for use with chained
1092 /// loop-associated directive can be requested using \p UnrolledCLI. Not
1093 /// needing the CanonicalLoopInfo allows more efficient code generation by
1094 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
1095 /// A loop-associated directive applied to the unrolled loop needs to know the
1096 /// new trip count which means that if using a heuristically determined unroll
1097 /// factor (\p Factor == 0), that factor must be computed immediately. We are
1098 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
1099 /// but which assumes that some canonicalization has taken place (e.g.
1100 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
1101 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
1102 ///
1103 /// \param DL Debug location for instructions added by unrolling.
1104 /// \param Loop The loop to unroll. The loop will be invalidated.
1105 /// \param Factor The factor to unroll the loop by. A factor of 0
1106 /// indicates that a heuristic should be used to determine
1107 /// the unroll-factor.
1108 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
1109 /// partially unrolled loop. Otherwise, uses loop metadata
1110 /// to defer unrolling to the LoopUnrollPass.
1111 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
1112 CanonicalLoopInfo **UnrolledCLI);
1113
1114 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
1115 /// is cloned. The metadata which prevents vectorization is added to
1116 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
1117 /// to false.
1118 ///
1119 /// \param Loop The loop to simd-ize.
1120 /// \param AlignedVars The map which containts pairs of the pointer
1121 /// and its corresponding alignment.
1122 /// \param IfCond The value which corresponds to the if clause
1123 /// condition.
1124 /// \param Order The enum to map order clause.
1125 /// \param Simdlen The Simdlen length to apply to the simd loop.
1126 /// \param Safelen The Safelen length to apply to the simd loop.
1128 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
1129 omp::OrderKind Order, ConstantInt *Simdlen,
1130 ConstantInt *Safelen);
1131
1132 /// Generator for '#omp flush'
1133 ///
1134 /// \param Loc The location where the flush directive was encountered
1135 void createFlush(const LocationDescription &Loc);
1136
1137 /// Generator for '#omp taskwait'
1138 ///
1139 /// \param Loc The location where the taskwait directive was encountered.
1140 void createTaskwait(const LocationDescription &Loc);
1141
1142 /// Generator for '#omp taskyield'
1143 ///
1144 /// \param Loc The location where the taskyield directive was encountered.
1145 void createTaskyield(const LocationDescription &Loc);
1146
1147 /// A struct to pack the relevant information for an OpenMP depend clause.
1148 struct DependData {
1152 explicit DependData() = default;
1154 Value *DepVal)
1156 };
1157
1158 /// Generator for `#omp task`
1159 ///
1160 /// \param Loc The location where the task construct was encountered.
1161 /// \param AllocaIP The insertion point to be used for alloca instructions.
1162 /// \param BodyGenCB Callback that will generate the region code.
1163 /// \param Tied True if the task is tied, false if the task is untied.
1164 /// \param Final i1 value which is `true` if the task is final, `false` if the
1165 /// task is not final.
1166 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1167 /// task is generated, and the encountering thread must
1168 /// suspend the current task region, for which execution
1169 /// cannot be resumed until execution of the structured
1170 /// block that is associated with the generated task is
1171 /// completed.
1172 InsertPointTy createTask(const LocationDescription &Loc,
1173 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1174 bool Tied = true, Value *Final = nullptr,
1175 Value *IfCondition = nullptr,
1176 SmallVector<DependData> Dependencies = {});
1177
1178 /// Generator for the taskgroup construct
1179 ///
1180 /// \param Loc The location where the taskgroup construct was encountered.
1181 /// \param AllocaIP The insertion point to be used for alloca instructions.
1182 /// \param BodyGenCB Callback that will generate the region code.
1183 InsertPointTy createTaskgroup(const LocationDescription &Loc,
1184 InsertPointTy AllocaIP,
1185 BodyGenCallbackTy BodyGenCB);
1186
1188 std::function<std::tuple<std::string, uint64_t>()>;
1189
1190 /// Creates a unique info for a target entry when provided a filename and
1191 /// line number from.
1192 ///
1193 /// \param CallBack A callback function which should return filename the entry
1194 /// resides in as well as the line number for the target entry
1195 /// \param ParentName The name of the parent the target entry resides in, if
1196 /// any.
1199 StringRef ParentName = "");
1200
1201 /// Functions used to generate reductions. Such functions take two Values
1202 /// representing LHS and RHS of the reduction, respectively, and a reference
1203 /// to the value that is updated to refer to the reduction result.
1206
1207 /// Functions used to generate atomic reductions. Such functions take two
1208 /// Values representing pointers to LHS and RHS of the reduction, as well as
1209 /// the element type of these pointers. They are expected to atomically
1210 /// update the LHS to the reduced value.
1213
1214 /// Information about an OpenMP reduction.
1222
1223 /// Reduction element type, must match pointee type of variable.
1225
1226 /// Reduction variable of pointer type.
1228
1229 /// Thread-private partial reduction variable.
1231
1232 /// Callback for generating the reduction body. The IR produced by this will
1233 /// be used to combine two values in a thread-safe context, e.g., under
1234 /// lock or within the same thread, and therefore need not be atomic.
1236
1237 /// Callback for generating the atomic reduction body, may be null. The IR
1238 /// produced by this will be used to atomically combine two values during
1239 /// reduction. If null, the implementation will use the non-atomic version
1240 /// along with the appropriate synchronization mechanisms.
1242 };
1243
1244 // TODO: provide atomic and non-atomic reduction generators for reduction
1245 // operators defined by the OpenMP specification.
1246
1247 /// Generator for '#omp reduction'.
1248 ///
1249 /// Emits the IR instructing the runtime to perform the specific kind of
1250 /// reductions. Expects reduction variables to have been privatized and
1251 /// initialized to reduction-neutral values separately. Emits the calls to
1252 /// runtime functions as well as the reduction function and the basic blocks
1253 /// performing the reduction atomically and non-atomically.
1254 ///
1255 /// The code emitted for the following:
1256 ///
1257 /// \code
1258 /// type var_1;
1259 /// type var_2;
1260 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1261 /// /* body */;
1262 /// \endcode
1263 ///
1264 /// corresponds to the following sketch.
1265 ///
1266 /// \code
1267 /// void _outlined_par() {
1268 /// // N is the number of different reductions.
1269 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1270 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1271 /// _omp_reduction_func,
1272 /// _gomp_critical_user.reduction.var)) {
1273 /// case 1: {
1274 /// var_1 = var_1 <reduction-op> privatized_var_1;
1275 /// var_2 = var_2 <reduction-op> privatized_var_2;
1276 /// // ...
1277 /// __kmpc_end_reduce(...);
1278 /// break;
1279 /// }
1280 /// case 2: {
1281 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
1282 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
1283 /// // ...
1284 /// break;
1285 /// }
1286 /// default: break;
1287 /// }
1288 /// }
1289 ///
1290 /// void _omp_reduction_func(void **lhs, void **rhs) {
1291 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1292 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1293 /// // ...
1294 /// }
1295 /// \endcode
1296 ///
1297 /// \param Loc The location where the reduction was
1298 /// encountered. Must be within the associate
1299 /// directive and after the last local access to the
1300 /// reduction variables.
1301 /// \param AllocaIP An insertion point suitable for allocas usable
1302 /// in reductions.
1303 /// \param ReductionInfos A list of info on each reduction variable.
1304 /// \param IsNoWait A flag set if the reduction is marked as nowait.
1306 InsertPointTy AllocaIP,
1307 ArrayRef<ReductionInfo> ReductionInfos,
1308 bool IsNoWait = false);
1309
1310 ///}
1311
1312 /// Return the insertion point used by the underlying IRBuilder.
1314
1315 /// Update the internal location to \p Loc.
1317 Builder.restoreIP(Loc.IP);
1319 return Loc.IP.getBlock() != nullptr;
1320 }
1321
1322 /// Return the function declaration for the runtime function with \p FnID.
1325
1327
1328 /// Return the (LLVM-IR) string describing the source location \p LocStr.
1329 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1330
1331 /// Return the (LLVM-IR) string describing the default source location.
1333
1334 /// Return the (LLVM-IR) string describing the source location identified by
1335 /// the arguments.
1336 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1337 unsigned Line, unsigned Column,
1338 uint32_t &SrcLocStrSize);
1339
1340 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1341 /// fallback if \p DL does not specify the function name.
1343 Function *F = nullptr);
1344
1345 /// Return the (LLVM-IR) string describing the source location \p Loc.
1346 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1347 uint32_t &SrcLocStrSize);
1348
1349 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1350 /// TODO: Create a enum class for the Reserve2Flags
1351 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1352 omp::IdentFlag Flags = omp::IdentFlag(0),
1353 unsigned Reserve2Flags = 0);
1354
1355 /// Create a hidden global flag \p Name in the module with initial value \p
1356 /// Value.
1358
1359 /// Generate control flow and cleanup for cancellation.
1360 ///
1361 /// \param CancelFlag Flag indicating if the cancellation is performed.
1362 /// \param CanceledDirective The kind of directive that is cancled.
1363 /// \param ExitCB Extra code to be generated in the exit block.
1364 void emitCancelationCheckImpl(Value *CancelFlag,
1365 omp::Directive CanceledDirective,
1366 FinalizeCallbackTy ExitCB = {});
1367
1368 /// Generate a target region entry call.
1369 ///
1370 /// \param Loc The location at which the request originated and is fulfilled.
1371 /// \param AllocaIP The insertion point to be used for alloca instructions.
1372 /// \param Return Return value of the created function returned by reference.
1373 /// \param DeviceID Identifier for the device via the 'device' clause.
1374 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1375 /// or 0 if unspecified and -1 if there is no 'teams' clause.
1376 /// \param NumThreads Number of threads via the 'thread_limit' clause.
1377 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1378 /// \param KernelArgs Array of arguments to the kernel.
1379 InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1380 InsertPointTy AllocaIP, Value *&Return,
1381 Value *Ident, Value *DeviceID, Value *NumTeams,
1382 Value *NumThreads, Value *HostPtr,
1383 ArrayRef<Value *> KernelArgs);
1384
1385 /// Generate a barrier runtime call.
1386 ///
1387 /// \param Loc The location at which the request originated and is fulfilled.
1388 /// \param DK The directive which caused the barrier
1389 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1390 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1391 /// should be checked and acted upon.
1392 ///
1393 /// \returns The insertion point after the barrier.
1394 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1395 omp::Directive DK, bool ForceSimpleCall,
1396 bool CheckCancelFlag);
1397
1398 /// Generate a flush runtime call.
1399 ///
1400 /// \param Loc The location at which the request originated and is fulfilled.
1401 void emitFlush(const LocationDescription &Loc);
1402
1403 /// The finalization stack made up of finalize callbacks currently in-flight,
1404 /// wrapped into FinalizationInfo objects that reference also the finalization
1405 /// target block and the kind of cancellable directive.
1407
1408 /// Return true if the last entry in the finalization stack is of kind \p DK
1409 /// and cancellable.
1410 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1411 return !FinalizationStack.empty() &&
1412 FinalizationStack.back().IsCancellable &&
1413 FinalizationStack.back().DK == DK;
1414 }
1415
1416 /// Generate a taskwait runtime call.
1417 ///
1418 /// \param Loc The location at which the request originated and is fulfilled.
1419 void emitTaskwaitImpl(const LocationDescription &Loc);
1420
1421 /// Generate a taskyield runtime call.
1422 ///
1423 /// \param Loc The location at which the request originated and is fulfilled.
1424 void emitTaskyieldImpl(const LocationDescription &Loc);
1425
1426 /// Return the current thread ID.
1427 ///
1428 /// \param Ident The ident (ident_t*) describing the query origin.
1430
1431 /// The OpenMPIRBuilder Configuration
1433
1434 /// The underlying LLVM-IR module
1436
1437 /// The LLVM-IR Builder used to create IR.
1439
1440 /// Map to remember source location strings
1442
1443 /// Map to remember existing ident_t*.
1445
1446 /// Info manager to keep track of target regions.
1448
1449 /// The target triple of the underlying module.
1450 const Triple T;
1451
1452 /// Helper that contains information about regions we need to outline
1453 /// during finalization.
1455 using PostOutlineCBTy = std::function<void(Function &)>;
1459
1460 /// Collect all blocks in between EntryBB and ExitBB in both the given
1461 /// vector and set.
1463 SmallVectorImpl<BasicBlock *> &BlockVector);
1464
1465 /// Return the function that contains the region to be outlined.
1466 Function *getFunction() const { return EntryBB->getParent(); }
1467 };
1468
1469 /// Collection of regions that need to be outlined during finalization.
1471
1472 /// Collection of owned canonical loop objects that eventually need to be
1473 /// free'd.
1474 std::forward_list<CanonicalLoopInfo> LoopInfos;
1475
1476 /// Add a new region that will be outlined later.
1477 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1478
1479 /// An ordered map of auto-generated variables to their unique names.
1480 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1481 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1482 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1483 /// variables.
1485
1486 /// Computes the size of type in bytes.
1487 Value *getSizeInBytes(Value *BasePtr);
1488
1489 // Emit a branch from the current block to the Target block only if
1490 // the current block has a terminator.
1492
1493 // If BB has no use then delete it and return. Else place BB after the current
1494 // block, if possible, or else at the end of the function. Also add a branch
1495 // from current block to BB if current block does not have a terminator.
1496 void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
1497
1498 /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
1499 /// Here is the logic:
1500 /// if (Cond) {
1501 /// ThenGen();
1502 /// } else {
1503 /// ElseGen();
1504 /// }
1506 BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
1507
1508 /// Create the global variable holding the offload mappings information.
1510 std::string VarName);
1511
1512 /// Create the global variable holding the offload names information.
1515 std::string VarName);
1516
1519 AllocaInst *Args = nullptr;
1521 };
1522
1523 /// Create the allocas instruction used in call to mapper functions.
1525 InsertPointTy AllocaIP, unsigned NumOperands,
1527
1528 /// Create the call for the target mapper function.
1529 /// \param Loc The source location description.
1530 /// \param MapperFunc Function to be called.
1531 /// \param SrcLocInfo Source location information global.
1532 /// \param MaptypesArg The argument types.
1533 /// \param MapnamesArg The argument names.
1534 /// \param MapperAllocas The AllocaInst used for the call.
1535 /// \param DeviceID Device ID for the call.
1536 /// \param NumOperands Number of operands in the call.
1537 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1538 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1539 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1540 unsigned NumOperands);
1541
1542 /// Container for the arguments used to pass data to the runtime library.
1544 /// The array of base pointer passed to the runtime library.
1546 /// The array of section pointers passed to the runtime library.
1548 /// The array of sizes passed to the runtime library.
1549 Value *SizesArray = nullptr;
1550 /// The array of map types passed to the runtime library for the beginning
1551 /// of the region or for the entire region if there are no separate map
1552 /// types for the region end.
1554 /// The array of map types passed to the runtime library for the end of the
1555 /// region, or nullptr if there are no separate map types for the region
1556 /// end.
1558 /// The array of user-defined mappers passed to the runtime library.
1560 /// The array of original declaration names of mapped pointers sent to the
1561 /// runtime library for debugging
1563
1564 explicit TargetDataRTArgs() {}
1573 };
1574
1575 /// Data structure that contains the needed information to construct the
1576 /// kernel args vector.
1578 /// Number of arguments passed to the runtime library.
1580 /// Arguments passed to the runtime library
1582 /// The number of iterations
1584 /// The number of teams.
1586 /// The number of threads.
1588 /// The size of the dynamic shared memory.
1590 /// True if the kernel has 'no wait' clause.
1592
1593 /// Constructor for TargetKernelArgs
1601 };
1602
1603 /// Create the kernel args vector used by emitTargetKernel. This function
1604 /// creates various constant values that are used in the resulting args
1605 /// vector.
1606 static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
1608 SmallVector<Value *> &ArgsVector);
1609
1610 /// Struct that keeps the information that should be kept throughout
1611 /// a 'target data' region.
1613 /// Set to true if device pointer information have to be obtained.
1614 bool RequiresDevicePointerInfo = false;
1615 /// Set to true if Clang emits separate runtime calls for the beginning and
1616 /// end of the region. These calls might have separate map type arrays.
1617 bool SeparateBeginEndCalls = false;
1618
1619 public:
1621
1624
1625 /// Indicate whether any user-defined mapper exists.
1626 bool HasMapper = false;
1627 /// The total number of pointers passed to the runtime library.
1628 unsigned NumberOfPtrs = 0u;
1629
1630 explicit TargetDataInfo() {}
1631 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1632 bool SeparateBeginEndCalls)
1633 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1634 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1635 /// Clear information about the data arrays.
1638 HasMapper = false;
1639 NumberOfPtrs = 0u;
1640 }
1641 /// Return true if the current target data information has valid arrays.
1642 bool isValid() {
1646 }
1647 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1648 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1649 };
1650
1658
1659 /// This structure contains combined information generated for mappable
1660 /// clauses, including base pointers, pointers, sizes, map types, user-defined
1661 /// mappers, and non-contiguous information.
1662 struct MapInfosTy {
1664 bool IsNonContiguous = false;
1669 };
1677
1678 /// Append arrays in \a CurInfo.
1679 void append(MapInfosTy &CurInfo) {
1681 CurInfo.BasePointers.end());
1682 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1684 CurInfo.DevicePointers.end());
1685 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1686 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1687 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1689 CurInfo.NonContigInfo.Dims.end());
1691 CurInfo.NonContigInfo.Offsets.end());
1693 CurInfo.NonContigInfo.Counts.end());
1695 CurInfo.NonContigInfo.Strides.end());
1696 }
1697 };
1698
1699 /// Callback function type for functions emitting the host fallback code that
1700 /// is executed when the kernel launch fails. It takes an insertion point as
1701 /// parameter where the code should be emitted. It returns an insertion point
1702 /// that points right after after the emitted code.
1704
1705 /// Generate a target region entry call and host fallback call.
1706 ///
1707 /// \param Loc The location at which the request originated and is fulfilled.
1708 /// \param OutlinedFn The outlined kernel function.
1709 /// \param OutlinedFnID The ooulined function ID.
1710 /// \param EmitTargetCallFallbackCB Call back function to generate host
1711 /// fallback code.
1712 /// \param Args Data structure holding information about the kernel arguments.
1713 /// \param DeviceID Identifier for the device via the 'device' clause.
1714 /// \param RTLoc Source location identifier
1715 /// \param AllocaIP The insertion point to be used for alloca instructions.
1717 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1718 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1719 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
1720
1721 /// Emit the arguments to be passed to the runtime library based on the
1722 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
1723 /// ForEndCall, emit map types to be passed for the end of the region instead
1724 /// of the beginning.
1728 bool EmitDebug = false,
1729 bool ForEndCall = false);
1730
1731 /// Emit an array of struct descriptors to be assigned to the offload args.
1733 InsertPointTy CodeGenIP,
1734 MapInfosTy &CombinedInfo,
1736
1737 /// Emit the arrays used to pass the captures and map information to the
1738 /// offloading runtime library. If there is no map or capture information,
1739 /// return nullptr by reference.
1741 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
1742 TargetDataInfo &Info, bool IsNonContiguous = false,
1743 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
1744 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
1745
1746 /// Creates offloading entry for the provided entry ID \a ID, address \a
1747 /// Addr, size \a Size, and flags \a Flags.
1749 int32_t Flags, GlobalValue::LinkageTypes,
1750 StringRef Name = "");
1751
1752 /// The kind of errors that can occur when emitting the offload entries and
1753 /// metadata.
1759
1760 /// Callback function type
1762 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1763
1764 // Emit the offloading entries and metadata so that the device codegen side
1765 // can easily figure out what to emit. The produced metadata looks like
1766 // this:
1767 //
1768 // !omp_offload.info = !{!1, ...}
1769 //
1770 // We only generate metadata for function that contain target regions.
1772 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1773
1774public:
1775 /// Generator for __kmpc_copyprivate
1776 ///
1777 /// \param Loc The source location description.
1778 /// \param BufSize Number of elements in the buffer.
1779 /// \param CpyBuf List of pointers to data to be copied.
1780 /// \param CpyFn function to call for copying data.
1781 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1782 ///
1783 /// \return The insertion position *after* the CopyPrivate call.
1784
1786 llvm::Value *BufSize, llvm::Value *CpyBuf,
1787 llvm::Value *CpyFn, llvm::Value *DidIt);
1788
1789 /// Generator for '#omp single'
1790 ///
1791 /// \param Loc The source location description.
1792 /// \param BodyGenCB Callback that will generate the region code.
1793 /// \param FiniCB Callback to finalize variable copies.
1794 /// \param IsNowait If false, a barrier is emitted.
1795 /// \param DidIt Local variable used as a flag to indicate 'single' thread
1796 ///
1797 /// \returns The insertion position *after* the single call.
1799 BodyGenCallbackTy BodyGenCB,
1800 FinalizeCallbackTy FiniCB, bool IsNowait,
1801 llvm::Value *DidIt);
1802
1803 /// Generator for '#omp master'
1804 ///
1805 /// \param Loc The insert and source location description.
1806 /// \param BodyGenCB Callback that will generate the region code.
1807 /// \param FiniCB Callback to finalize variable copies.
1808 ///
1809 /// \returns The insertion position *after* the master.
1811 BodyGenCallbackTy BodyGenCB,
1812 FinalizeCallbackTy FiniCB);
1813
1814 /// Generator for '#omp masked'
1815 ///
1816 /// \param Loc The insert and source location description.
1817 /// \param BodyGenCB Callback that will generate the region code.
1818 /// \param FiniCB Callback to finialize variable copies.
1819 ///
1820 /// \returns The insertion position *after* the masked.
1822 BodyGenCallbackTy BodyGenCB,
1824
1825 /// Generator for '#omp critical'
1826 ///
1827 /// \param Loc The insert and source location description.
1828 /// \param BodyGenCB Callback that will generate the region body code.
1829 /// \param FiniCB Callback to finalize variable copies.
1830 /// \param CriticalName name of the lock used by the critical directive
1831 /// \param HintInst Hint Instruction for hint clause associated with critical
1832 ///
1833 /// \returns The insertion position *after* the critical.
1835 BodyGenCallbackTy BodyGenCB,
1836 FinalizeCallbackTy FiniCB,
1837 StringRef CriticalName, Value *HintInst);
1838
1839 /// Generator for '#omp ordered depend (source | sink)'
1840 ///
1841 /// \param Loc The insert and source location description.
1842 /// \param AllocaIP The insertion point to be used for alloca instructions.
1843 /// \param NumLoops The number of loops in depend clause.
1844 /// \param StoreValues The value will be stored in vector address.
1845 /// \param Name The name of alloca instruction.
1846 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1847 ///
1848 /// \return The insertion position *after* the ordered.
1850 InsertPointTy AllocaIP, unsigned NumLoops,
1851 ArrayRef<llvm::Value *> StoreValues,
1852 const Twine &Name, bool IsDependSource);
1853
1854 /// Generator for '#omp ordered [threads | simd]'
1855 ///
1856 /// \param Loc The insert and source location description.
1857 /// \param BodyGenCB Callback that will generate the region code.
1858 /// \param FiniCB Callback to finalize variable copies.
1859 /// \param IsThreads If true, with threads clause or without clause;
1860 /// otherwise, with simd clause;
1861 ///
1862 /// \returns The insertion position *after* the ordered.
1864 BodyGenCallbackTy BodyGenCB,
1865 FinalizeCallbackTy FiniCB,
1866 bool IsThreads);
1867
1868 /// Generator for '#omp sections'
1869 ///
1870 /// \param Loc The insert and source location description.
1871 /// \param AllocaIP The insertion points to be used for alloca instructions.
1872 /// \param SectionCBs Callbacks that will generate body of each section.
1873 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1874 /// \param FiniCB Callback to finalize variable copies.
1875 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1876 /// \param IsNowait If true, barrier - to ensure all sections are executed
1877 /// before moving forward will not be generated.
1878 /// \returns The insertion position *after* the sections.
1880 InsertPointTy AllocaIP,
1882 PrivatizeCallbackTy PrivCB,
1883 FinalizeCallbackTy FiniCB, bool IsCancellable,
1884 bool IsNowait);
1885
1886 /// Generator for '#omp section'
1887 ///
1888 /// \param Loc The insert and source location description.
1889 /// \param BodyGenCB Callback that will generate the region body code.
1890 /// \param FiniCB Callback to finalize variable copies.
1891 /// \returns The insertion position *after* the section.
1893 BodyGenCallbackTy BodyGenCB,
1894 FinalizeCallbackTy FiniCB);
1895
1896 /// Generator for `#omp teams`
1897 ///
1898 /// \param Loc The location where the teams construct was encountered.
1899 /// \param BodyGenCB Callback that will generate the region code.
1900 /// \param NumTeamsLower Lower bound on number of teams. If this is nullptr,
1901 /// it is as if lower bound is specified as equal to upperbound. If
1902 /// this is non-null, then upperbound must also be non-null.
1903 /// \param NumTeamsUpper Upper bound on the number of teams.
1904 /// \param ThreadLimit on the number of threads that may participate in a
1905 /// contention group created by each team.
1906 /// \param IfExpr is the integer argument value of the if condition on the
1907 /// teams clause.
1910 Value *NumTeamsLower = nullptr, Value *NumTeamsUpper = nullptr,
1911 Value *ThreadLimit = nullptr, Value *IfExpr = nullptr);
1912
1913 /// Generate conditional branch and relevant BasicBlocks through which private
1914 /// threads copy the 'copyin' variables from Master copy to threadprivate
1915 /// copies.
1916 ///
1917 /// \param IP insertion block for copyin conditional
1918 /// \param MasterVarPtr a pointer to the master variable
1919 /// \param PrivateVarPtr a pointer to the threadprivate variable
1920 /// \param IntPtrTy Pointer size type
1921 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1922 // and copy.in.end block
1923 ///
1924 /// \returns The insertion point where copying operation to be emitted.
1926 Value *PrivateAddr,
1927 llvm::IntegerType *IntPtrTy,
1928 bool BranchtoEnd = true);
1929
1930 /// Create a runtime call for kmpc_Alloc
1931 ///
1932 /// \param Loc The insert and source location description.
1933 /// \param Size Size of allocated memory space
1934 /// \param Allocator Allocator information instruction
1935 /// \param Name Name of call Instruction for OMP_alloc
1936 ///
1937 /// \returns CallInst to the OMP_Alloc call
1939 Value *Allocator, std::string Name = "");
1940
1941 /// Create a runtime call for kmpc_free
1942 ///
1943 /// \param Loc The insert and source location description.
1944 /// \param Addr Address of memory space to be freed
1945 /// \param Allocator Allocator information instruction
1946 /// \param Name Name of call Instruction for OMP_Free
1947 ///
1948 /// \returns CallInst to the OMP_Free call
1950 Value *Allocator, std::string Name = "");
1951
1952 /// Create a runtime call for kmpc_threadprivate_cached
1953 ///
1954 /// \param Loc The insert and source location description.
1955 /// \param Pointer pointer to data to be cached
1956 /// \param Size size of data to be cached
1957 /// \param Name Name of call Instruction for callinst
1958 ///
1959 /// \returns CallInst to the thread private cache call.
1963 const llvm::Twine &Name = Twine(""));
1964
1965 /// Create a runtime call for __tgt_interop_init
1966 ///
1967 /// \param Loc The insert and source location description.
1968 /// \param InteropVar variable to be allocated
1969 /// \param InteropType type of interop operation
1970 /// \param Device devide to which offloading will occur
1971 /// \param NumDependences number of dependence variables
1972 /// \param DependenceAddress pointer to dependence variables
1973 /// \param HaveNowaitClause does nowait clause exist
1974 ///
1975 /// \returns CallInst to the __tgt_interop_init call
1977 Value *InteropVar,
1978 omp::OMPInteropType InteropType, Value *Device,
1979 Value *NumDependences,
1980 Value *DependenceAddress,
1981 bool HaveNowaitClause);
1982
1983 /// Create a runtime call for __tgt_interop_destroy
1984 ///
1985 /// \param Loc The insert and source location description.
1986 /// \param InteropVar variable to be allocated
1987 /// \param Device devide to which offloading will occur
1988 /// \param NumDependences number of dependence variables
1989 /// \param DependenceAddress pointer to dependence variables
1990 /// \param HaveNowaitClause does nowait clause exist
1991 ///
1992 /// \returns CallInst to the __tgt_interop_destroy call
1994 Value *InteropVar, Value *Device,
1995 Value *NumDependences,
1996 Value *DependenceAddress,
1997 bool HaveNowaitClause);
1998
1999 /// Create a runtime call for __tgt_interop_use
2000 ///
2001 /// \param Loc The insert and source location description.
2002 /// \param InteropVar variable to be allocated
2003 /// \param Device devide to which offloading will occur
2004 /// \param NumDependences number of dependence variables
2005 /// \param DependenceAddress pointer to dependence variables
2006 /// \param HaveNowaitClause does nowait clause exist
2007 ///
2008 /// \returns CallInst to the __tgt_interop_use call
2010 Value *InteropVar, Value *Device,
2011 Value *NumDependences, Value *DependenceAddress,
2012 bool HaveNowaitClause);
2013
2014 /// The `omp target` interface
2015 ///
2016 /// For more information about the usage of this interface,
2017 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
2018 ///
2019 ///{
2020
2021 /// Create a runtime call for kmpc_target_init
2022 ///
2023 /// \param Loc The insert and source location description.
2024 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
2025 /// \param MinThreads Minimal number of threads, or 0.
2026 /// \param MaxThreads Maximal number of threads, or 0.
2027 /// \param MinTeams Minimal number of teams, or 0.
2028 /// \param MaxTeams Maximal number of teams, or 0.
2029 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD,
2030 int32_t MinThreadsVal = 0,
2031 int32_t MaxThreadsVal = 0,
2032 int32_t MinTeamsVal = 0,
2033 int32_t MaxTeamsVal = 0);
2034
2035 /// Create a runtime call for kmpc_target_deinit
2036 ///
2037 /// \param Loc The insert and source location description.
2038 /// \param TeamsReductionDataSize The maximal size of all the reduction data
2039 /// for teams reduction.
2040 /// \param TeamsReductionBufferLength The number of elements (each of up to
2041 /// \p TeamsReductionDataSize size), in the teams reduction buffer.
2043 int32_t TeamsReductionDataSize = 0,
2044 int32_t TeamsReductionBufferLength = 1024);
2045
2046 ///}
2047
2048 /// Helpers to read/write kernel annotations from the IR.
2049 ///
2050 ///{
2051
2052 /// Read/write a bounds on threads for \p Kernel. Read will return 0 if none
2053 /// is set.
2054 static std::pair<int32_t, int32_t>
2055 readThreadBoundsForKernel(const Triple &T, Function &Kernel);
2056 static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel,
2057 int32_t LB, int32_t UB);
2058
2059 /// Read/write a bounds on teams for \p Kernel. Read will return 0 if none
2060 /// is set.
2061 static std::pair<int32_t, int32_t> readTeamBoundsForKernel(const Triple &T,
2062 Function &Kernel);
2063 static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB,
2064 int32_t UB);
2065 ///}
2066
2067private:
2068 // Sets the function attributes expected for the outlined function
2069 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn);
2070
2071 // Creates the function ID/Address for the given outlined function.
2072 // In the case of an embedded device function the address of the function is
2073 // used, in the case of a non-offload function a constant is created.
2074 Constant *createOutlinedFunctionID(Function *OutlinedFn,
2075 StringRef EntryFnIDName);
2076
2077 // Creates the region entry address for the outlined function
2078 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
2079 StringRef EntryFnName);
2080
2081public:
2082 /// Functions used to generate a function with the given name.
2083 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
2084
2085 /// Create a unique name for the entry function using the source location
2086 /// information of the current target region. The name will be something like:
2087 ///
2088 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
2089 ///
2090 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
2091 /// mangled name of the function that encloses the target region and BB is the
2092 /// line number of the target region. CC is a count added when more than one
2093 /// region is located at the same location.
2094 ///
2095 /// If this target outline function is not an offload entry, we don't need to
2096 /// register it. This may happen if it is guarded by an if clause that is
2097 /// false at compile time, or no target archs have been specified.
2098 ///
2099 /// The created target region ID is used by the runtime library to identify
2100 /// the current target region, so it only has to be unique and not
2101 /// necessarily point to anything. It could be the pointer to the outlined
2102 /// function that implements the target region, but we aren't using that so
2103 /// that the compiler doesn't need to keep that, and could therefore inline
2104 /// the host function if proven worthwhile during optimization. In the other
2105 /// hand, if emitting code for the device, the ID has to be the function
2106 /// address so that it can retrieved from the offloading entry and launched
2107 /// by the runtime library. We also mark the outlined function to have
2108 /// external linkage in case we are emitting code for the device, because
2109 /// these functions will be entry points to the device.
2110 ///
2111 /// \param InfoManager The info manager keeping track of the offload entries
2112 /// \param EntryInfo The entry information about the function
2113 /// \param GenerateFunctionCallback The callback function to generate the code
2114 /// \param OutlinedFunction Pointer to the outlined function
2115 /// \param EntryFnIDName Name of the ID o be created
2117 FunctionGenCallback &GenerateFunctionCallback,
2118 bool IsOffloadEntry, Function *&OutlinedFn,
2119 Constant *&OutlinedFnID);
2120
2121 /// Registers the given function and sets up the attribtues of the function
2122 /// Returns the FunctionID.
2123 ///
2124 /// \param InfoManager The info manager keeping track of the offload entries
2125 /// \param EntryInfo The entry information about the function
2126 /// \param OutlinedFunction Pointer to the outlined function
2127 /// \param EntryFnName Name of the outlined function
2128 /// \param EntryFnIDName Name of the ID o be created
2130 Function *OutlinedFunction,
2131 StringRef EntryFnName,
2132 StringRef EntryFnIDName);
2133
2134 /// Type of BodyGen to use for region codegen
2135 ///
2136 /// Priv: If device pointer privatization is required, emit the body of the
2137 /// region here. It will have to be duplicated: with and without
2138 /// privatization.
2139 /// DupNoPriv: If we need device pointer privatization, we need
2140 /// to emit the body of the region with no privatization in the 'else' branch
2141 /// of the conditional.
2142 /// NoPriv: If we don't require privatization of device
2143 /// pointers, we emit the body in between the runtime calls. This avoids
2144 /// duplicating the body code.
2146
2147 /// Callback type for creating the map infos for the kernel parameters.
2148 /// \param CodeGenIP is the insertion point where code should be generated,
2149 /// if any.
2152
2153 /// Generator for '#omp target data'
2154 ///
2155 /// \param Loc The location where the target data construct was encountered.
2156 /// \param AllocaIP The insertion points to be used for alloca instructions.
2157 /// \param CodeGenIP The insertion point at which the target directive code
2158 /// should be placed.
2159 /// \param IsBegin If true then emits begin mapper call otherwise emits
2160 /// end mapper call.
2161 /// \param DeviceID Stores the DeviceID from the device clause.
2162 /// \param IfCond Value which corresponds to the if clause condition.
2163 /// \param Info Stores all information realted to the Target Data directive.
2164 /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
2165 /// \param BodyGenCB Optional Callback to generate the region code.
2166 /// \param DeviceAddrCB Optional callback to generate code related to
2167 /// use_device_ptr and use_device_addr.
2168 /// \param CustomMapperCB Optional callback to generate code related to
2169 /// custom mappers.
2171 const LocationDescription &Loc, InsertPointTy AllocaIP,
2172 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
2174 omp::RuntimeFunction *MapperFunc = nullptr,
2176 BodyGenTy BodyGenType)>
2177 BodyGenCB = nullptr,
2178 function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
2179 function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
2180 Value *SrcLocInfo = nullptr);
2181
2183 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
2184
2186 Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP,
2187 InsertPointTy CodeGenIP)>;
2188
2189 /// Generator for '#omp target'
2190 ///
2191 /// \param Loc where the target data construct was encountered.
2192 /// \param CodeGenIP The insertion point where the call to the outlined
2193 /// function should be emitted.
2194 /// \param EntryInfo The entry information about the function.
2195 /// \param NumTeams Number of teams specified in the num_teams clause.
2196 /// \param NumThreads Number of teams specified in the thread_limit clause.
2197 /// \param Inputs The input values to the region that will be passed.
2198 /// as arguments to the outlined function.
2199 /// \param BodyGenCB Callback that will generate the region code.
2200 /// \param ArgAccessorFuncCB Callback that will generate accessors
2201 /// instructions for passed in target arguments where neccessary
2205 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
2206 int32_t NumThreads,
2208 GenMapInfoCallbackTy GenMapInfoCB,
2209 TargetBodyGenCallbackTy BodyGenCB,
2210 TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB);
2211
2212 /// Returns __kmpc_for_static_init_* runtime function for the specified
2213 /// size \a IVSize and sign \a IVSigned. Will create a distribute call
2214 /// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
2215 FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned,
2216 bool IsGPUDistribute);
2217
2218 /// Returns __kmpc_dispatch_init_* runtime function for the specified
2219 /// size \a IVSize and sign \a IVSigned.
2220 FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned);
2221
2222 /// Returns __kmpc_dispatch_next_* runtime function for the specified
2223 /// size \a IVSize and sign \a IVSigned.
2224 FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned);
2225
2226 /// Returns __kmpc_dispatch_fini_* runtime function for the specified
2227 /// size \a IVSize and sign \a IVSigned.
2228 FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned);
2229
2230 /// Declarations for LLVM-IR types (simple, array, function and structure) are
2231 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
2232 /// we provide the declarations, the initializeTypes function will provide the
2233 /// values.
2234 ///
2235 ///{
2236#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
2237#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2238 ArrayType *VarName##Ty = nullptr; \
2239 PointerType *VarName##PtrTy = nullptr;
2240#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2241 FunctionType *VarName = nullptr; \
2242 PointerType *VarName##Ptr = nullptr;
2243#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
2244 StructType *VarName = nullptr; \
2245 PointerType *VarName##Ptr = nullptr;
2246#include "llvm/Frontend/OpenMP/OMPKinds.def"
2247
2248 ///}
2249
2250private:
2251 /// Create all simple and struct types exposed by the runtime and remember
2252 /// the llvm::PointerTypes of them for easy access later.
2253 void initializeTypes(Module &M);
2254
2255 /// Common interface for generating entry calls for OMP Directives.
2256 /// if the directive has a region/body, It will set the insertion
2257 /// point to the body
2258 ///
2259 /// \param OMPD Directive to generate entry blocks for
2260 /// \param EntryCall Call to the entry OMP Runtime Function
2261 /// \param ExitBB block where the region ends.
2262 /// \param Conditional indicate if the entry call result will be used
2263 /// to evaluate a conditional of whether a thread will execute
2264 /// body code or not.
2265 ///
2266 /// \return The insertion position in exit block
2267 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
2268 BasicBlock *ExitBB,
2269 bool Conditional = false);
2270
2271 /// Common interface to finalize the region
2272 ///
2273 /// \param OMPD Directive to generate exiting code for
2274 /// \param FinIP Insertion point for emitting Finalization code and exit call
2275 /// \param ExitCall Call to the ending OMP Runtime Function
2276 /// \param HasFinalize indicate if the directive will require finalization
2277 /// and has a finalization callback in the stack that
2278 /// should be called.
2279 ///
2280 /// \return The insertion position in exit block
2281 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
2282 InsertPointTy FinIP,
2283 Instruction *ExitCall,
2284 bool HasFinalize = true);
2285
2286 /// Common Interface to generate OMP inlined regions
2287 ///
2288 /// \param OMPD Directive to generate inlined region for
2289 /// \param EntryCall Call to the entry OMP Runtime Function
2290 /// \param ExitCall Call to the ending OMP Runtime Function
2291 /// \param BodyGenCB Body code generation callback.
2292 /// \param FiniCB Finalization Callback. Will be called when finalizing region
2293 /// \param Conditional indicate if the entry call result will be used
2294 /// to evaluate a conditional of whether a thread will execute
2295 /// body code or not.
2296 /// \param HasFinalize indicate if the directive will require finalization
2297 /// and has a finalization callback in the stack that
2298 /// should be called.
2299 /// \param IsCancellable if HasFinalize is set to true, indicate if the
2300 /// the directive should be cancellable.
2301 /// \return The insertion point after the region
2302
2304 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
2305 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
2306 FinalizeCallbackTy FiniCB, bool Conditional = false,
2307 bool HasFinalize = true, bool IsCancellable = false);
2308
2309 /// Get the platform-specific name separator.
2310 /// \param Parts different parts of the final name that needs separation
2311 /// \param FirstSeparator First separator used between the initial two
2312 /// parts of the name.
2313 /// \param Separator separator used between all of the rest consecutive
2314 /// parts of the name
2315 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
2316 StringRef FirstSeparator,
2317 StringRef Separator);
2318
2319 /// Returns corresponding lock object for the specified critical region
2320 /// name. If the lock object does not exist it is created, otherwise the
2321 /// reference to the existing copy is returned.
2322 /// \param CriticalName Name of the critical region.
2323 ///
2324 Value *getOMPCriticalRegionLock(StringRef CriticalName);
2325
2326 /// Callback type for Atomic Expression update
2327 /// ex:
2328 /// \code{.cpp}
2329 /// unsigned x = 0;
2330 /// #pragma omp atomic update
2331 /// x = Expr(x_old); //Expr() is any legal operation
2332 /// \endcode
2333 ///
2334 /// \param XOld the value of the atomic memory address to use for update
2335 /// \param IRB reference to the IRBuilder to use
2336 ///
2337 /// \returns Value to update X to.
2338 using AtomicUpdateCallbackTy =
2339 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2340
2341private:
2342 enum AtomicKind { Read, Write, Update, Capture, Compare };
2343
2344 /// Determine whether to emit flush or not
2345 ///
2346 /// \param Loc The insert and source location description.
2347 /// \param AO The required atomic ordering
2348 /// \param AK The OpenMP atomic operation kind used.
2349 ///
2350 /// \returns wether a flush was emitted or not
2351 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2352 AtomicOrdering AO, AtomicKind AK);
2353
2354 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2355 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2356 /// Only Scalar data types.
2357 ///
2358 /// \param AllocaIP The insertion point to be used for alloca
2359 /// instructions.
2360 /// \param X The target atomic pointer to be updated
2361 /// \param XElemTy The element type of the atomic pointer.
2362 /// \param Expr The value to update X with.
2363 /// \param AO Atomic ordering of the generated atomic
2364 /// instructions.
2365 /// \param RMWOp The binary operation used for update. If
2366 /// operation is not supported by atomicRMW,
2367 /// or belong to {FADD, FSUB, BAD_BINOP}.
2368 /// Then a `cmpExch` based atomic will be generated.
2369 /// \param UpdateOp Code generator for complex expressions that cannot be
2370 /// expressed through atomicrmw instruction.
2371 /// \param VolatileX true if \a X volatile?
2372 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2373 /// update expression, false otherwise.
2374 /// (e.g. true for X = X BinOp Expr)
2375 ///
2376 /// \returns A pair of the old value of X before the update, and the value
2377 /// used for the update.
2378 std::pair<Value *, Value *>
2379 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2381 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2382 bool IsXBinopExpr);
2383
2384 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2385 ///
2386 /// \Return The instruction
2387 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2388 AtomicRMWInst::BinOp RMWOp);
2389
2390public:
2391 /// a struct to pack relevant information while generating atomic Ops
2393 Value *Var = nullptr;
2394 Type *ElemTy = nullptr;
2395 bool IsSigned = false;
2396 bool IsVolatile = false;
2397 };
2398
2399 /// Emit atomic Read for : V = X --- Only Scalar data types.
2400 ///
2401 /// \param Loc The insert and source location description.
2402 /// \param X The target pointer to be atomically read
2403 /// \param V Memory address where to store atomically read
2404 /// value
2405 /// \param AO Atomic ordering of the generated atomic
2406 /// instructions.
2407 ///
2408 /// \return Insertion point after generated atomic read IR.
2411 AtomicOrdering AO);
2412
2413 /// Emit atomic write for : X = Expr --- Only Scalar data types.
2414 ///
2415 /// \param Loc The insert and source location description.
2416 /// \param X The target pointer to be atomically written to
2417 /// \param Expr The value to store.
2418 /// \param AO Atomic ordering of the generated atomic
2419 /// instructions.
2420 ///
2421 /// \return Insertion point after generated atomic Write IR.
2423 AtomicOpValue &X, Value *Expr,
2424 AtomicOrdering AO);
2425
2426 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2427 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2428 /// Only Scalar data types.
2429 ///
2430 /// \param Loc The insert and source location description.
2431 /// \param AllocaIP The insertion point to be used for alloca instructions.
2432 /// \param X The target atomic pointer to be updated
2433 /// \param Expr The value to update X with.
2434 /// \param AO Atomic ordering of the generated atomic instructions.
2435 /// \param RMWOp The binary operation used for update. If operation
2436 /// is not supported by atomicRMW, or belong to
2437 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2438 /// atomic will be generated.
2439 /// \param UpdateOp Code generator for complex expressions that cannot be
2440 /// expressed through atomicrmw instruction.
2441 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2442 /// update expression, false otherwise.
2443 /// (e.g. true for X = X BinOp Expr)
2444 ///
2445 /// \return Insertion point after generated atomic update IR.
2447 InsertPointTy AllocaIP, AtomicOpValue &X,
2448 Value *Expr, AtomicOrdering AO,
2450 AtomicUpdateCallbackTy &UpdateOp,
2451 bool IsXBinopExpr);
2452
2453 /// Emit atomic update for constructs: --- Only Scalar data types
2454 /// V = X; X = X BinOp Expr ,
2455 /// X = X BinOp Expr; V = X,
2456 /// V = X; X = Expr BinOp X,
2457 /// X = Expr BinOp X; V = X,
2458 /// V = X; X = UpdateOp(X),
2459 /// X = UpdateOp(X); V = X,
2460 ///
2461 /// \param Loc The insert and source location description.
2462 /// \param AllocaIP The insertion point to be used for alloca instructions.
2463 /// \param X The target atomic pointer to be updated
2464 /// \param V Memory address where to store captured value
2465 /// \param Expr The value to update X with.
2466 /// \param AO Atomic ordering of the generated atomic instructions
2467 /// \param RMWOp The binary operation used for update. If
2468 /// operation is not supported by atomicRMW, or belong to
2469 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2470 /// atomic will be generated.
2471 /// \param UpdateOp Code generator for complex expressions that cannot be
2472 /// expressed through atomicrmw instruction.
2473 /// \param UpdateExpr true if X is an in place update of the form
2474 /// X = X BinOp Expr or X = Expr BinOp X
2475 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2476 /// update expression, false otherwise.
2477 /// (e.g. true for X = X BinOp Expr)
2478 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2479 /// 'v', not an updated one.
2480 ///
2481 /// \return Insertion point after generated atomic capture IR.
2484 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2486 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2487 bool IsPostfixUpdate, bool IsXBinopExpr);
2488
2489 /// Emit atomic compare for constructs: --- Only scalar data types
2490 /// cond-expr-stmt:
2491 /// x = x ordop expr ? expr : x;
2492 /// x = expr ordop x ? expr : x;
2493 /// x = x == e ? d : x;
2494 /// x = e == x ? d : x; (this one is not in the spec)
2495 /// cond-update-stmt:
2496 /// if (x ordop expr) { x = expr; }
2497 /// if (expr ordop x) { x = expr; }
2498 /// if (x == e) { x = d; }
2499 /// if (e == x) { x = d; } (this one is not in the spec)
2500 /// conditional-update-capture-atomic:
2501 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2502 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2503 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2504 /// IsFailOnly=true)
2505 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2506 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2507 /// IsFailOnly=true)
2508 ///
2509 /// \param Loc The insert and source location description.
2510 /// \param X The target atomic pointer to be updated.
2511 /// \param V Memory address where to store captured value (for
2512 /// compare capture only).
2513 /// \param R Memory address where to store comparison result
2514 /// (for compare capture with '==' only).
2515 /// \param E The expected value ('e') for forms that use an
2516 /// equality comparison or an expression ('expr') for
2517 /// forms that use 'ordop' (logically an atomic maximum or
2518 /// minimum).
2519 /// \param D The desired value for forms that use an equality
2520 /// comparison. If forms that use 'ordop', it should be
2521 /// \p nullptr.
2522 /// \param AO Atomic ordering of the generated atomic instructions.
2523 /// \param Op Atomic compare operation. It can only be ==, <, or >.
2524 /// \param IsXBinopExpr True if the conditional statement is in the form where
2525 /// x is on LHS. It only matters for < or >.
2526 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
2527 /// 'v', not an updated one (for compare capture
2528 /// only).
2529 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
2530 /// only when the comparison fails. This is only valid for
2531 /// the case the comparison is '=='.
2532 ///
2533 /// \return Insertion point after generated atomic capture IR.
2538 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2539
2540 /// Create the control flow structure of a canonical OpenMP loop.
2541 ///
2542 /// The emitted loop will be disconnected, i.e. no edge to the loop's
2543 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2544 /// IRBuilder location is not preserved.
2545 ///
2546 /// \param DL DebugLoc used for the instructions in the skeleton.
2547 /// \param TripCount Value to be used for the trip count.
2548 /// \param F Function in which to insert the BasicBlocks.
2549 /// \param PreInsertBefore Where to insert BBs that execute before the body,
2550 /// typically the body itself.
2551 /// \param PostInsertBefore Where to insert BBs that execute after the body.
2552 /// \param Name Base name used to derive BB
2553 /// and instruction names.
2554 ///
2555 /// \returns The CanonicalLoopInfo that represents the emitted loop.
2557 Function *F,
2558 BasicBlock *PreInsertBefore,
2559 BasicBlock *PostInsertBefore,
2560 const Twine &Name = {});
2561 /// OMP Offload Info Metadata name string
2562 const std::string ompOffloadInfoName = "omp_offload.info";
2563
2564 /// Loads all the offload entries information from the host IR
2565 /// metadata. This function is only meant to be used with device code
2566 /// generation.
2567 ///
2568 /// \param M Module to load Metadata info from. Module passed maybe
2569 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2571
2572 /// Loads all the offload entries information from the host IR
2573 /// metadata read from the file passed in as the HostFilePath argument. This
2574 /// function is only meant to be used with device code generation.
2575 ///
2576 /// \param HostFilePath The path to the host IR file,
2577 /// used to load in offload metadata for the device, allowing host and device
2578 /// to maintain the same metadata mapping.
2579 void loadOffloadInfoMetadata(StringRef HostFilePath);
2580
2581 /// Gets (if variable with the given name already exist) or creates
2582 /// internal global variable with the specified Name. The created variable has
2583 /// linkage CommonLinkage by default and is initialized by null value.
2584 /// \param Ty Type of the global variable. If it is exist already the type
2585 /// must be the same.
2586 /// \param Name Name of the variable.
2588 unsigned AddressSpace = 0);
2589
2590 /// Create a global function to register OpenMP requires flags into the
2591 /// runtime, according to the `Config`.
2592 ///
2593 /// This function should be added to the list of constructors of the
2594 /// compilation unit in order to be called before other OpenMP runtime
2595 /// functions.
2596 ///
2597 /// \param Name Name of the created function.
2599};
2600
2601/// Class to represented the control flow structure of an OpenMP canonical loop.
2602///
2603/// The control-flow structure is standardized for easy consumption by
2604/// directives associated with loops. For instance, the worksharing-loop
2605/// construct may change this control flow such that each loop iteration is
2606/// executed on only one thread. The constraints of a canonical loop in brief
2607/// are:
2608///
2609/// * The number of loop iterations must have been computed before entering the
2610/// loop.
2611///
2612/// * Has an (unsigned) logical induction variable that starts at zero and
2613/// increments by one.
2614///
2615/// * The loop's CFG itself has no side-effects. The OpenMP specification
2616/// itself allows side-effects, but the order in which they happen, including
2617/// how often or whether at all, is unspecified. We expect that the frontend
2618/// will emit those side-effect instructions somewhere (e.g. before the loop)
2619/// such that the CanonicalLoopInfo itself can be side-effect free.
2620///
2621/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2622/// execution of a loop body that satifies these constraints. It does NOT
2623/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2624/// CanonicalLoopInfo for such purposes.
2625///
2626/// The control flow can be described as follows:
2627///
2628/// Preheader
2629/// |
2630/// /-> Header
2631/// | |
2632/// | Cond---\
2633/// | | |
2634/// | Body |
2635/// | | | |
2636/// | <...> |
2637/// | | | |
2638/// \--Latch |
2639/// |
2640/// Exit
2641/// |
2642/// After
2643///
2644/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2645/// including) and end at AfterIP (at the After's first instruction, excluding).
2646/// That is, instructions in the Preheader and After blocks (except the
2647/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2648/// side-effects. Typically, the Preheader is used to compute the loop's trip
2649/// count. The instructions from BodyIP (at the Body block's first instruction,
2650/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2651/// control and thus can have side-effects. The body block is the single entry
2652/// point into the loop body, which may contain arbitrary control flow as long
2653/// as all control paths eventually branch to the Latch block.
2654///
2655/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2656/// Latch to guarantee that there is only a single edge to the latch. It would
2657/// make loop transformations easier to not needing to consider multiple
2658/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2659/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2660/// executes after each body iteration.
2661///
2662/// There must be no loop-carried dependencies through llvm::Values. This is
2663/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2664/// for the induction variable.
2665///
2666/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2667/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2668/// by assertOK(). They are expected to not be modified unless explicitly
2669/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2670/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2671/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2672/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2673/// anymore as its underlying control flow may not exist anymore.
2674/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2675/// may also return a new CanonicalLoopInfo that can be passed to other
2676/// loop-associated construct implementing methods. These loop-transforming
2677/// methods may either create a new CanonicalLoopInfo usually using
2678/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2679/// modify one of the input CanonicalLoopInfo and return it as representing the
2680/// modified loop. What is done is an implementation detail of
2681/// transformation-implementing method and callers should always assume that the
2682/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2683/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2684/// created by createCanonicalLoop, such that transforming methods do not have
2685/// to special case where the CanonicalLoopInfo originated from.
2686///
2687/// Generally, methods consuming CanonicalLoopInfo do not need an
2688/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2689/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2690/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2691/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2692/// any InsertPoint in the Preheader, After or Block can still be used after
2693/// calling such a method.
2694///
2695/// TODO: Provide mechanisms for exception handling and cancellation points.
2696///
2697/// Defined outside OpenMPIRBuilder because nested classes cannot be
2698/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2700 friend class OpenMPIRBuilder;
2701
2702private:
2703 BasicBlock *Header = nullptr;
2704 BasicBlock *Cond = nullptr;
2705 BasicBlock *Latch = nullptr;
2706 BasicBlock *Exit = nullptr;
2707
2708 /// Add the control blocks of this loop to \p BBs.
2709 ///
2710 /// This does not include any block from the body, including the one returned
2711 /// by getBody().
2712 ///
2713 /// FIXME: This currently includes the Preheader and After blocks even though
2714 /// their content is (mostly) not under CanonicalLoopInfo's control.
2715 /// Re-evaluated whether this makes sense.
2716 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2717
2718 /// Sets the number of loop iterations to the given value. This value must be
2719 /// valid in the condition block (i.e., defined in the preheader) and is
2720 /// interpreted as an unsigned integer.
2721 void setTripCount(Value *TripCount);
2722
2723 /// Replace all uses of the canonical induction variable in the loop body with
2724 /// a new one.
2725 ///
2726 /// The intended use case is to update the induction variable for an updated
2727 /// iteration space such that it can stay normalized in the 0...tripcount-1
2728 /// range.
2729 ///
2730 /// The \p Updater is called with the (presumable updated) current normalized
2731 /// induction variable and is expected to return the value that uses of the
2732 /// pre-updated induction values should use instead, typically dependent on
2733 /// the new induction variable. This is a lambda (instead of e.g. just passing
2734 /// the new value) to be able to distinguish the uses of the pre-updated
2735 /// induction variable and uses of the induction varible to compute the
2736 /// updated induction variable value.
2737 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2738
2739public:
2740 /// Returns whether this object currently represents the IR of a loop. If
2741 /// returning false, it may have been consumed by a loop transformation or not
2742 /// been intialized. Do not use in this case;
2743 bool isValid() const { return Header; }
2744
2745 /// The preheader ensures that there is only a single edge entering the loop.
2746 /// Code that must be execute before any loop iteration can be emitted here,
2747 /// such as computing the loop trip count and begin lifetime markers. Code in
2748 /// the preheader is not considered part of the canonical loop.
2749 BasicBlock *getPreheader() const;
2750
2751 /// The header is the entry for each iteration. In the canonical control flow,
2752 /// it only contains the PHINode for the induction variable.
2754 assert(isValid() && "Requires a valid canonical loop");
2755 return Header;
2756 }
2757
2758 /// The condition block computes whether there is another loop iteration. If
2759 /// yes, branches to the body; otherwise to the exit block.
2761 assert(isValid() && "Requires a valid canonical loop");
2762 return Cond;
2763 }
2764
2765 /// The body block is the single entry for a loop iteration and not controlled
2766 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2767 /// eventually branch to the \p Latch block.
2769 assert(isValid() && "Requires a valid canonical loop");
2770 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2771 }
2772
2773 /// Reaching the latch indicates the end of the loop body code. In the
2774 /// canonical control flow, it only contains the increment of the induction
2775 /// variable.
2777 assert(isValid() && "Requires a valid canonical loop");
2778 return Latch;
2779 }
2780
2781 /// Reaching the exit indicates no more iterations are being executed.
2783 assert(isValid() && "Requires a valid canonical loop");
2784 return Exit;
2785 }
2786
2787 /// The after block is intended for clean-up code such as lifetime end
2788 /// markers. It is separate from the exit block to ensure, analogous to the
2789 /// preheader, it having just a single entry edge and being free from PHI
2790 /// nodes should there be multiple loop exits (such as from break
2791 /// statements/cancellations).
2793 assert(isValid() && "Requires a valid canonical loop");
2794 return Exit->getSingleSuccessor();
2795 }
2796
2797 /// Returns the llvm::Value containing the number of loop iterations. It must
2798 /// be valid in the preheader and always interpreted as an unsigned integer of
2799 /// any bit-width.
2801 assert(isValid() && "Requires a valid canonical loop");
2802 Instruction *CmpI = &Cond->front();
2803 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2804 return CmpI->getOperand(1);
2805 }
2806
2807 /// Returns the instruction representing the current logical induction
2808 /// variable. Always unsigned, always starting at 0 with an increment of one.
2810 assert(isValid() && "Requires a valid canonical loop");
2811 Instruction *IndVarPHI = &Header->front();
2812 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2813 return IndVarPHI;
2814 }
2815
2816 /// Return the type of the induction variable (and the trip count).
2818 assert(isValid() && "Requires a valid canonical loop");
2819 return getIndVar()->getType();
2820 }
2821
2822 /// Return the insertion point for user code before the loop.
2824 assert(isValid() && "Requires a valid canonical loop");
2825 BasicBlock *Preheader = getPreheader();
2826 return {Preheader, std::prev(Preheader->end())};
2827 };
2828
2829 /// Return the insertion point for user code in the body.
2831 assert(isValid() && "Requires a valid canonical loop");
2832 BasicBlock *Body = getBody();
2833 return {Body, Body->begin()};
2834 };
2835
2836 /// Return the insertion point for user code after the loop.
2838 assert(isValid() && "Requires a valid canonical loop");
2840 return {After, After->begin()};
2841 };
2842
2844 assert(isValid() && "Requires a valid canonical loop");
2845 return Header->getParent();
2846 }
2847
2848 /// Consistency self-check.
2849 void assertOK() const;
2850
2851 /// Invalidate this loop. That is, the underlying IR does not fulfill the
2852 /// requirements of an OpenMP canonical loop anymore.
2853 void invalidate();
2854};
2855
2856} // end namespace llvm
2857
2858#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
arc branch finalize
This file defines the BumpPtrAllocator interface.
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
This file defines constans and helpers used when dealing with OpenMP.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Value * RHS
an instruction to allocate memory on the stack
Definition: Instructions.h:58
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:730
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:519
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:243
BasicBlock * getBlock() const
Definition: IRBuilder.h:258
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:212
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:263
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:275
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2636
Class to represent integer types.
Definition: DerivedTypes.h:40
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
Definition: OMPIRBuilder.h:364
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage, const std::string &VarName)
Definition: OMPIRBuilder.h:367
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:382
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:291
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Definition: OMPIRBuilder.h:278
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:212
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:214
OffloadingEntryInfoKinds getKind() const
Definition: OMPIRBuilder.h:230
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
Definition: OMPIRBuilder.h:221
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:238
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Definition: OMPIRBuilder.h:222
Class that manages information about offload code regions and data.
Definition: OMPIRBuilder.h:200
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:404
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
Definition: OMPIRBuilder.h:343
@ OMPTargetDeviceClauseNoHost
The target is marked for non-host devices.
Definition: OMPIRBuilder.h:347
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
Definition: OMPIRBuilder.h:345
@ OMPTargetDeviceClauseNone
The target is marked as having no clause.
Definition: OMPIRBuilder.h:351
@ OMPTargetDeviceClauseHost
The target is marked for host devices.
Definition: OMPIRBuilder.h:349
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:265
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:267
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
Definition: OMPIRBuilder.h:258
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:256
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:325
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
Definition: OMPIRBuilder.h:331
@ OMPTargetGlobalVarEntryNone
Mark the entry as having no declare target entry kind.
Definition: OMPIRBuilder.h:333
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
Definition: OMPIRBuilder.h:335
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:329
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:327
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:316
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:399
bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
Definition: OMPIRBuilder.h:84
void setIsGPU(bool Value)
Definition: OMPIRBuilder.h:157
std::optional< bool > IsTargetDevice
Flag for specifying if the compilation is done for embedded device code or host code.
Definition: OMPIRBuilder.h:88
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
Definition: OMPIRBuilder.h:91
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
Definition: OMPIRBuilder.h:97
StringRef separator() const
Definition: OMPIRBuilder.h:148
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
void setFirstSeparator(StringRef FS)
Definition: OMPIRBuilder.h:159
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:138
std::optional< bool > OpenMPOffloadMandatory
Definition: OMPIRBuilder.h:94
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
Definition: OMPIRBuilder.h:99
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:120
void setHasRequiresUnifiedAddress(bool Value)
void setOpenMPOffloadMandatory(bool Value)
Definition: OMPIRBuilder.h:158
void setIsTargetDevice(bool Value)
Definition: OMPIRBuilder.h:156
void setSeparator(StringRef S)
Definition: OMPIRBuilder.h:160
void setHasRequiresDynamicAllocators(bool Value)
bool hasRequiresReverseOffload() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
SmallMapVector< const Value *, std::pair< Value *, Value * >, 4 > DevicePtrInfoMap
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:440
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:486
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
function_ref< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
Definition: OMPIRBuilder.h:538
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
std::function< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> StorableBodyGenCallbackTy
Definition: OMPIRBuilder.h:545
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void setConfig(OpenMPIRBuilderConfig C)
Definition: OMPIRBuilder.h:455
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt)
Generator for '#omp single'.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false)
Modifies the canonical loop to be a workshare loop.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
Function * createRegisterRequires(StringRef Name)
Create a global function to register OpenMP requires flags into the runtime, according to the Config.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
Definition: OMPIRBuilder.h:504
InsertPointTy getInsertionPoint()
}
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:466
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB)
Generator for '#omp target'.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
Definition: OMPIRBuilder.h:444
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
Definition: OMPIRBuilder.h:511
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:112
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:257
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
An efficient, type-erasing, non-owning reference to a callable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
Definition: OMPConstants.h:193
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:66
RTLDependenceKindTy
Dependence kind for RTL.
Definition: OMPConstants.h:271
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:46
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:265
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
AtomicOrdering
Atomic ordering for LLVM's memory model.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
bool IsCancellable
Flag to indicate if the directive is cancellable.
Definition: OMPIRBuilder.h:498
FinalizeCallbackTy FiniCB
The finalization callback provided by the last in-flight invocation of createXXXX for the directive o...
Definition: OMPIRBuilder.h:491
omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
Definition: OMPIRBuilder.h:495
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:582
LocationDescription(const InsertPointTy &IP)
Definition: OMPIRBuilder.h:585
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
Definition: OMPIRBuilder.h:586
LocationDescription(const IRBuilderBase &IRB)
Definition: OMPIRBuilder.h:583
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
std::function< void(Function &)> PostOutlineCBTy
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, ReductionGenTy ReductionGen, AtomicReductionGenTy AtomicReductionGen)
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray, Value *SizesArray, Value *MapTypesArray, Value *MapTypesArrayEnd, Value *MappersArray, Value *MapNamesArray)
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs, Value *NumIterations, Value *NumTeams, Value *NumThreads, Value *DynCGGroupMem, bool HasNoWait)
Constructor for TargetKernelArgs.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:254
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:174
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Definition: OMPIRBuilder.h:182
bool operator<(const TargetRegionEntryInfo RHS) const
Definition: OMPIRBuilder.h:192