LLVM 17.0.0git
OMPIRBuilder.h
Go to the documentation of this file.
1//===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the OpenMPIRBuilder class and helpers used as a convenient
10// way to create LLVM instructions for OpenMP directives.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15#define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16
19#include "llvm/IR/DebugLoc.h"
20#include "llvm/IR/IRBuilder.h"
22#include <forward_list>
23#include <map>
24#include <optional>
25
26namespace llvm {
27class CanonicalLoopInfo;
28struct TargetRegionEntryInfo;
29class OffloadEntriesInfoManager;
30class OpenMPIRBuilder;
31
32/// Move the instruction after an InsertPoint to the beginning of another
33/// BasicBlock.
34///
35/// The instructions after \p IP are moved to the beginning of \p New which must
36/// not have any PHINodes. If \p CreateBranch is true, a branch instruction to
37/// \p New will be added such that there is no semantic change. Otherwise, the
38/// \p IP insert block remains degenerate and it is up to the caller to insert a
39/// terminator.
40void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New,
41 bool CreateBranch);
42
43/// Splice a BasicBlock at an IRBuilder's current insertion point. Its new
44/// insert location will stick to after the instruction before the insertion
45/// point (instead of moving with the instruction the InsertPoint stores
46/// internally).
47void spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch);
48
49/// Split a BasicBlock at an InsertPoint, even if the block is degenerate
50/// (missing the terminator).
51///
52/// llvm::SplitBasicBlock and BasicBlock::splitBasicBlock require a well-formed
53/// BasicBlock. \p Name is used for the new successor block. If \p CreateBranch
54/// is true, a branch to the new successor will new created such that
55/// semantically there is no change; otherwise the block of the insertion point
56/// remains degenerate and it is the caller's responsibility to insert a
57/// terminator. Returns the new successor block.
58BasicBlock *splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch,
59 llvm::Twine Name = {});
60
61/// Split a BasicBlock at \p Builder's insertion point, even if the block is
62/// degenerate (missing the terminator). Its new insert location will stick to
63/// after the instruction before the insertion point (instead of moving with the
64/// instruction the InsertPoint stores internally).
65BasicBlock *splitBB(IRBuilderBase &Builder, bool CreateBranch,
66 llvm::Twine Name = {});
67
68/// Split a BasicBlock at \p Builder's insertion point, even if the block is
69/// degenerate (missing the terminator). Its new insert location will stick to
70/// after the instruction before the insertion point (instead of moving with the
71/// instruction the InsertPoint stores internally).
72BasicBlock *splitBB(IRBuilder<> &Builder, bool CreateBranch, llvm::Twine Name);
73
74/// Like splitBB, but reuses the current block's name for the new name.
75BasicBlock *splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
76 llvm::Twine Suffix = ".split");
77
78/// Captures attributes that affect generating LLVM-IR using the
79/// OpenMPIRBuilder and related classes. Note that not all attributes are
80/// required for all classes or functions. In some use cases the configuration
81/// is not necessary at all, because because the only functions that are called
82/// are ones that are not dependent on the configuration.
84public:
85 /// Flag for specifying if the compilation is done for embedded device code
86 /// or host code.
87 std::optional<bool> IsEmbedded;
88
89 /// Flag for specifying if the compilation is done for an offloading target,
90 /// like GPU.
91 std::optional<bool> IsTargetCodegen;
92
93 /// Flag for specifying weather a requires unified_shared_memory
94 /// directive is present or not.
95 std::optional<bool> HasRequiresUnifiedSharedMemory;
96
97 // Flag for specifying if offloading is mandatory.
98 std::optional<bool> OpenMPOffloadMandatory;
99
100 /// First separator used between the initial two parts of a name.
101 std::optional<StringRef> FirstSeparator;
102 /// Separator used between all of the rest consecutive parts of s name
103 std::optional<StringRef> Separator;
104
112
113 // Getters functions that assert if the required values are not present.
114 bool isEmbedded() const {
115 assert(IsEmbedded.has_value() && "IsEmbedded is not set");
116 return *IsEmbedded;
117 }
118
119 bool isTargetCodegen() const {
120 assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set");
121 return *IsTargetCodegen;
122 }
123
126 "HasUnifiedSharedMemory is not set");
128 }
129
131 assert(OpenMPOffloadMandatory.has_value() &&
132 "OpenMPOffloadMandatory is not set");
134 }
135 // Returns the FirstSeparator if set, otherwise use the default
136 // separator depending on isTargetCodegen
138 if (FirstSeparator.has_value())
139 return *FirstSeparator;
140 if (isTargetCodegen())
141 return "_";
142 return ".";
143 }
144
145 // Returns the Separator if set, otherwise use the default
146 // separator depending on isTargetCodegen
148 if (Separator.has_value())
149 return *Separator;
150 if (isTargetCodegen())
151 return "$";
152 return ".";
153 }
154
159 }
162};
163
164/// Data structure to contain the information needed to uniquely identify
165/// a target entry.
167 std::string ParentName;
168 unsigned DeviceID;
169 unsigned FileID;
170 unsigned Line;
171 unsigned Count;
172
175 unsigned FileID, unsigned Line, unsigned Count = 0)
177 Count(Count) {}
178
181 unsigned DeviceID, unsigned FileID,
182 unsigned Line, unsigned Count);
183
185 return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
186 std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
187 RHS.Count);
188 }
189};
190
191/// Class that manages information about offload code regions and data
193 /// Number of entries registered so far.
194 OpenMPIRBuilder *OMPBuilder;
195 unsigned OffloadingEntriesNum = 0;
196
197public:
198 /// Base class of the entries info.
200 public:
201 /// Kind of a given entry.
202 enum OffloadingEntryInfoKinds : unsigned {
203 /// Entry is a target region.
205 /// Entry is a declare target variable.
207 /// Invalid entry info.
209 };
210
211 protected:
213 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
214 explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
215 uint32_t Flags)
216 : Flags(Flags), Order(Order), Kind(Kind) {}
217 ~OffloadEntryInfo() = default;
218
219 public:
220 bool isValid() const { return Order != ~0u; }
221 unsigned getOrder() const { return Order; }
222 OffloadingEntryInfoKinds getKind() const { return Kind; }
223 uint32_t getFlags() const { return Flags; }
224 void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
225 Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
227 assert(!Addr.pointsToAliveValue() && "Address has been set before!");
228 Addr = V;
229 }
230 static bool classof(const OffloadEntryInfo *Info) { return true; }
231
232 private:
233 /// Address of the entity that has to be mapped for offloading.
234 WeakTrackingVH Addr;
235
236 /// Flags associated with the device global.
237 uint32_t Flags = 0u;
238
239 /// Order this entry was emitted.
240 unsigned Order = ~0u;
241
243 };
244
245 /// Return true if a there are no entries defined.
246 bool empty() const;
247 /// Return number of entries defined so far.
248 unsigned size() const { return OffloadingEntriesNum; }
249
251
252 //
253 // Target region entries related.
254 //
255
256 /// Kind of the target registry entry.
258 /// Mark the entry as target region.
260 /// Mark the entry as a global constructor.
262 /// Mark the entry as a global destructor.
264 };
265
266 /// Target region entries info.
268 /// Address that can be used as the ID of the entry.
269 Constant *ID = nullptr;
270
271 public:
274 explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
275 Constant *ID,
278 ID(ID) {
280 }
281
282 Constant *getID() const { return ID; }
283 void setID(Constant *V) {
284 assert(!ID && "ID has been set before!");
285 ID = V;
286 }
287 static bool classof(const OffloadEntryInfo *Info) {
288 return Info->getKind() == OffloadingEntryInfoTargetRegion;
289 }
290 };
291
292 /// Initialize target region entry.
293 /// This is ONLY needed for DEVICE compilation.
295 unsigned Order);
296 /// Register target region entry.
300 /// Return true if a target region entry with the provided information
301 /// exists.
303 bool IgnoreAddressId = false) const;
304
305 // Return the Name based on \a EntryInfo using the next available Count.
307 const TargetRegionEntryInfo &EntryInfo);
308
309 /// brief Applies action \a Action on all registered entries.
310 typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
311 const OffloadEntryInfoTargetRegion &)>
313 void
315
316 //
317 // Device global variable entries related.
318 //
319
320 /// Kind of the global variable entry..
322 /// Mark the entry as a to declare target.
324 /// Mark the entry as a to declare target link.
326 };
327
328 /// Device global variable entries info.
330 /// Type of the global variable.
331 int64_t VarSize;
333
334 public:
337 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
340 explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
341 int64_t VarSize,
345 VarSize(VarSize), Linkage(Linkage) {
347 }
348
349 int64_t getVarSize() const { return VarSize; }
350 void setVarSize(int64_t Size) { VarSize = Size; }
351 GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
352 void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
353 static bool classof(const OffloadEntryInfo *Info) {
354 return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
355 }
356 };
357
358 /// Initialize device global variable entry.
359 /// This is ONLY used for DEVICE compilation.
362 unsigned Order);
363
364 /// Register device global variable entry.
366 int64_t VarSize,
369 /// Checks if the variable with the given name has been registered already.
371 return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
372 }
373 /// Applies action \a Action on all registered entries.
374 typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
378
379private:
380 /// Return the count of entries at a particular source location.
381 unsigned
382 getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
383
384 /// Update the count of entries at a particular source location.
385 void
386 incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
387
389 getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
390 return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
391 EntryInfo.FileID, EntryInfo.Line, 0);
392 }
393
394 // Count of entries at a location.
395 std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
396
397 // Storage for target region entries kind.
398 typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
399 OffloadEntriesTargetRegionTy;
400 OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
401 /// Storage for device global variable entries kind. The storage is to be
402 /// indexed by mangled name.
404 OffloadEntriesDeviceGlobalVarTy;
405 OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
406};
407
408/// An interface to create LLVM-IR for OpenMP directives.
409///
410/// Each OpenMP directive has a corresponding public generator method.
412public:
413 /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
414 /// not have an effect on \p M (see initialize)
416 : M(M), Builder(M.getContext()), OffloadInfoManager(this) {}
418
419 /// Initialize the internal state, this will put structures types and
420 /// potentially other helpers into the underlying module. Must be called
421 /// before any other method and only once! This internal state includes
422 /// Types used in the OpenMPIRBuilder generated from OMPKinds.def as well
423 /// as loading offload metadata for device from the OpenMP host IR file
424 /// passed in as the HostFilePath argument.
425 /// \param HostFilePath The path to the host IR file, used to load in
426 /// offload metadata for the device, allowing host and device to
427 /// maintain the same metadata mapping.
428 void initialize(StringRef HostFilePath = {});
429
431
432 /// Finalize the underlying module, e.g., by outlining regions.
433 /// \param Fn The function to be finalized. If not used,
434 /// all functions are finalized.
435 void finalize(Function *Fn = nullptr);
436
437 /// Add attributes known for \p FnID to \p Fn.
439
440 /// Type used throughout for insertion points.
442
443 /// Get the create a name using the platform specific separators.
444 /// \param Parts parts of the final name that needs separation
445 /// The created name has a first separator between the first and second part
446 /// and a second separator between all other parts.
447 /// E.g. with FirstSeparator "$" and Separator "." and
448 /// parts: "p1", "p2", "p3", "p4"
449 /// The resulting name is "p1$p2.p3.p4"
450 /// The separators are retrieved from the OpenMPIRBuilderConfig.
451 std::string createPlatformSpecificName(ArrayRef<StringRef> Parts) const;
452
453 /// Callback type for variable finalization (think destructors).
454 ///
455 /// \param CodeGenIP is the insertion point at which the finalization code
456 /// should be placed.
457 ///
458 /// A finalize callback knows about all objects that need finalization, e.g.
459 /// destruction, when the scope of the currently generated construct is left
460 /// at the time, and location, the callback is invoked.
461 using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
462
464 /// The finalization callback provided by the last in-flight invocation of
465 /// createXXXX for the directive of kind DK.
467
468 /// The directive kind of the innermost directive that has an associated
469 /// region which might require finalization when it is left.
470 omp::Directive DK;
471
472 /// Flag to indicate if the directive is cancellable.
474 };
475
476 /// Push a finalization callback on the finalization stack.
477 ///
478 /// NOTE: Temporary solution until Clang CG is gone.
480 FinalizationStack.push_back(FI);
481 }
482
483 /// Pop the last finalization callback from the finalization stack.
484 ///
485 /// NOTE: Temporary solution until Clang CG is gone.
487
488 /// Callback type for body (=inner region) code generation
489 ///
490 /// The callback takes code locations as arguments, each describing a
491 /// location where additional instructions can be inserted.
492 ///
493 /// The CodeGenIP may be in the middle of a basic block or point to the end of
494 /// it. The basic block may have a terminator or be degenerate. The callback
495 /// function may just insert instructions at that position, but also split the
496 /// block (without the Before argument of BasicBlock::splitBasicBlock such
497 /// that the identify of the split predecessor block is preserved) and insert
498 /// additional control flow, including branches that do not lead back to what
499 /// follows the CodeGenIP. Note that since the callback is allowed to split
500 /// the block, callers must assume that InsertPoints to positions in the
501 /// BasicBlock after CodeGenIP including CodeGenIP itself are invalidated. If
502 /// such InsertPoints need to be preserved, it can split the block itself
503 /// before calling the callback.
504 ///
505 /// AllocaIP and CodeGenIP must not point to the same position.
506 ///
507 /// \param AllocaIP is the insertion point at which new alloca instructions
508 /// should be placed. The BasicBlock it is pointing to must
509 /// not be split.
510 /// \param CodeGenIP is the insertion point at which the body code should be
511 /// placed.
513 function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
514
515 // This is created primarily for sections construct as llvm::function_ref
516 // (BodyGenCallbackTy) is not storable (as described in the comments of
517 // function_ref class - function_ref contains non-ownable reference
518 // to the callable.
520 std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
521
522 /// Callback type for loop body code generation.
523 ///
524 /// \param CodeGenIP is the insertion point where the loop's body code must be
525 /// placed. This will be a dedicated BasicBlock with a
526 /// conditional branch from the loop condition check and
527 /// terminated with an unconditional branch to the loop
528 /// latch.
529 /// \param IndVar is the induction variable usable at the insertion point.
531 function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
532
533 /// Callback type for variable privatization (think copy & default
534 /// constructor).
535 ///
536 /// \param AllocaIP is the insertion point at which new alloca instructions
537 /// should be placed.
538 /// \param CodeGenIP is the insertion point at which the privatization code
539 /// should be placed.
540 /// \param Original The value being copied/created, should not be used in the
541 /// generated IR.
542 /// \param Inner The equivalent of \p Original that should be used in the
543 /// generated IR; this is equal to \p Original if the value is
544 /// a pointer and can thus be passed directly, otherwise it is
545 /// an equivalent but different value.
546 /// \param ReplVal The replacement value, thus a copy or new created version
547 /// of \p Inner.
548 ///
549 /// \returns The new insertion point where code generation continues and
550 /// \p ReplVal the replacement value.
552 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
553 Value &Inner, Value *&ReplVal)>;
554
555 /// Description of a LLVM-IR insertion point (IP) and a debug/source location
556 /// (filename, line, column, ...).
559 : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
562 : IP(IP), DL(DL) {}
565 };
566
567 /// Emitter methods for OpenMP directives.
568 ///
569 ///{
570
571 /// Generator for '#omp barrier'
572 ///
573 /// \param Loc The location where the barrier directive was encountered.
574 /// \param DK The kind of directive that caused the barrier.
575 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
576 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
577 /// should be checked and acted upon.
578 ///
579 /// \returns The insertion point after the barrier.
580 InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
581 bool ForceSimpleCall = false,
582 bool CheckCancelFlag = true);
583
584 /// Generator for '#omp cancel'
585 ///
586 /// \param Loc The location where the directive was encountered.
587 /// \param IfCondition The evaluated 'if' clause expression, if any.
588 /// \param CanceledDirective The kind of directive that is cancled.
589 ///
590 /// \returns The insertion point after the barrier.
591 InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
592 omp::Directive CanceledDirective);
593
594 /// Generator for '#omp parallel'
595 ///
596 /// \param Loc The insert and source location description.
597 /// \param AllocaIP The insertion points to be used for alloca instructions.
598 /// \param BodyGenCB Callback that will generate the region code.
599 /// \param PrivCB Callback to copy a given variable (think copy constructor).
600 /// \param FiniCB Callback to finalize variable copies.
601 /// \param IfCondition The evaluated 'if' clause expression, if any.
602 /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
603 /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
604 /// \param IsCancellable Flag to indicate a cancellable parallel region.
605 ///
606 /// \returns The insertion position *after* the parallel.
609 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
610 FinalizeCallbackTy FiniCB, Value *IfCondition,
611 Value *NumThreads, omp::ProcBindKind ProcBind,
612 bool IsCancellable);
613
614 /// Generator for the control flow structure of an OpenMP canonical loop.
615 ///
616 /// This generator operates on the logical iteration space of the loop, i.e.
617 /// the caller only has to provide a loop trip count of the loop as defined by
618 /// base language semantics. The trip count is interpreted as an unsigned
619 /// integer. The induction variable passed to \p BodyGenCB will be of the same
620 /// type and run from 0 to \p TripCount - 1. It is up to the callback to
621 /// convert the logical iteration variable to the loop counter variable in the
622 /// loop body.
623 ///
624 /// \param Loc The insert and source location description. The insert
625 /// location can be between two instructions or the end of a
626 /// degenerate block (e.g. a BB under construction).
627 /// \param BodyGenCB Callback that will generate the loop body code.
628 /// \param TripCount Number of iterations the loop body is executed.
629 /// \param Name Base name used to derive BB and instruction names.
630 ///
631 /// \returns An object representing the created control flow structure which
632 /// can be used for loop-associated directives.
634 LoopBodyGenCallbackTy BodyGenCB,
635 Value *TripCount,
636 const Twine &Name = "loop");
637
638 /// Generator for the control flow structure of an OpenMP canonical loop.
639 ///
640 /// Instead of a logical iteration space, this allows specifying user-defined
641 /// loop counter values using increment, upper- and lower bounds. To
642 /// disambiguate the terminology when counting downwards, instead of lower
643 /// bounds we use \p Start for the loop counter value in the first body
644 /// iteration.
645 ///
646 /// Consider the following limitations:
647 ///
648 /// * A loop counter space over all integer values of its bit-width cannot be
649 /// represented. E.g using uint8_t, its loop trip count of 256 cannot be
650 /// stored into an 8 bit integer):
651 ///
652 /// DO I = 0, 255, 1
653 ///
654 /// * Unsigned wrapping is only supported when wrapping only "once"; E.g.
655 /// effectively counting downwards:
656 ///
657 /// for (uint8_t i = 100u; i > 0; i += 127u)
658 ///
659 ///
660 /// TODO: May need to add additional parameters to represent:
661 ///
662 /// * Allow representing downcounting with unsigned integers.
663 ///
664 /// * Sign of the step and the comparison operator might disagree:
665 ///
666 /// for (int i = 0; i < 42; i -= 1u)
667 ///
668 //
669 /// \param Loc The insert and source location description.
670 /// \param BodyGenCB Callback that will generate the loop body code.
671 /// \param Start Value of the loop counter for the first iterations.
672 /// \param Stop Loop counter values past this will stop the loop.
673 /// \param Step Loop counter increment after each iteration; negative
674 /// means counting down.
675 /// \param IsSigned Whether Start, Stop and Step are signed integers.
676 /// \param InclusiveStop Whether \p Stop itself is a valid value for the loop
677 /// counter.
678 /// \param ComputeIP Insertion point for instructions computing the trip
679 /// count. Can be used to ensure the trip count is available
680 /// at the outermost loop of a loop nest. If not set,
681 /// defaults to the preheader of the generated loop.
682 /// \param Name Base name used to derive BB and instruction names.
683 ///
684 /// \returns An object representing the created control flow structure which
685 /// can be used for loop-associated directives.
687 LoopBodyGenCallbackTy BodyGenCB,
688 Value *Start, Value *Stop, Value *Step,
689 bool IsSigned, bool InclusiveStop,
690 InsertPointTy ComputeIP = {},
691 const Twine &Name = "loop");
692
693 /// Collapse a loop nest into a single loop.
694 ///
695 /// Merges loops of a loop nest into a single CanonicalLoopNest representation
696 /// that has the same number of innermost loop iterations as the origin loop
697 /// nest. The induction variables of the input loops are derived from the
698 /// collapsed loop's induction variable. This is intended to be used to
699 /// implement OpenMP's collapse clause. Before applying a directive,
700 /// collapseLoops normalizes a loop nest to contain only a single loop and the
701 /// directive's implementation does not need to handle multiple loops itself.
702 /// This does not remove the need to handle all loop nest handling by
703 /// directives, such as the ordered(<n>) clause or the simd schedule-clause
704 /// modifier of the worksharing-loop directive.
705 ///
706 /// Example:
707 /// \code
708 /// for (int i = 0; i < 7; ++i) // Canonical loop "i"
709 /// for (int j = 0; j < 9; ++j) // Canonical loop "j"
710 /// body(i, j);
711 /// \endcode
712 ///
713 /// After collapsing with Loops={i,j}, the loop is changed to
714 /// \code
715 /// for (int ij = 0; ij < 63; ++ij) {
716 /// int i = ij / 9;
717 /// int j = ij % 9;
718 /// body(i, j);
719 /// }
720 /// \endcode
721 ///
722 /// In the current implementation, the following limitations apply:
723 ///
724 /// * All input loops have an induction variable of the same type.
725 ///
726 /// * The collapsed loop will have the same trip count integer type as the
727 /// input loops. Therefore it is possible that the collapsed loop cannot
728 /// represent all iterations of the input loops. For instance, assuming a
729 /// 32 bit integer type, and two input loops both iterating 2^16 times, the
730 /// theoretical trip count of the collapsed loop would be 2^32 iteration,
731 /// which cannot be represented in an 32-bit integer. Behavior is undefined
732 /// in this case.
733 ///
734 /// * The trip counts of every input loop must be available at \p ComputeIP.
735 /// Non-rectangular loops are not yet supported.
736 ///
737 /// * At each nest level, code between a surrounding loop and its nested loop
738 /// is hoisted into the loop body, and such code will be executed more
739 /// often than before collapsing (or not at all if any inner loop iteration
740 /// has a trip count of 0). This is permitted by the OpenMP specification.
741 ///
742 /// \param DL Debug location for instructions added for collapsing,
743 /// such as instructions to compute/derive the input loop's
744 /// induction variables.
745 /// \param Loops Loops in the loop nest to collapse. Loops are specified
746 /// from outermost-to-innermost and every control flow of a
747 /// loop's body must pass through its directly nested loop.
748 /// \param ComputeIP Where additional instruction that compute the collapsed
749 /// trip count. If not set, defaults to before the generated
750 /// loop.
751 ///
752 /// \returns The CanonicalLoopInfo object representing the collapsed loop.
755 InsertPointTy ComputeIP);
756
757 /// Get the default alignment value for given target
758 ///
759 /// \param TargetTriple Target triple
760 /// \param Features StringMap which describes extra CPU features
761 static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
762 const StringMap<bool> &Features);
763
764private:
765 /// Modifies the canonical loop to be a statically-scheduled workshare loop.
766 ///
767 /// This takes a \p LoopInfo representing a canonical loop, such as the one
768 /// created by \p createCanonicalLoop and emits additional instructions to
769 /// turn it into a workshare loop. In particular, it calls to an OpenMP
770 /// runtime function in the preheader to obtain the loop bounds to be used in
771 /// the current thread, updates the relevant instructions in the canonical
772 /// loop and calls to an OpenMP runtime finalization function after the loop.
773 ///
774 /// \param DL Debug location for instructions added for the
775 /// workshare-loop construct itself.
776 /// \param CLI A descriptor of the canonical loop to workshare.
777 /// \param AllocaIP An insertion point for Alloca instructions usable in the
778 /// preheader of the loop.
779 /// \param NeedsBarrier Indicates whether a barrier must be inserted after
780 /// the loop.
781 ///
782 /// \returns Point where to insert code after the workshare construct.
783 InsertPointTy applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
784 InsertPointTy AllocaIP,
785 bool NeedsBarrier);
786
787 /// Modifies the canonical loop a statically-scheduled workshare loop with a
788 /// user-specified chunk size.
789 ///
790 /// \param DL Debug location for instructions added for the
791 /// workshare-loop construct itself.
792 /// \param CLI A descriptor of the canonical loop to workshare.
793 /// \param AllocaIP An insertion point for Alloca instructions usable in
794 /// the preheader of the loop.
795 /// \param NeedsBarrier Indicates whether a barrier must be inserted after the
796 /// loop.
797 /// \param ChunkSize The user-specified chunk size.
798 ///
799 /// \returns Point where to insert code after the workshare construct.
800 InsertPointTy applyStaticChunkedWorkshareLoop(DebugLoc DL,
802 InsertPointTy AllocaIP,
803 bool NeedsBarrier,
804 Value *ChunkSize);
805
806 /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
807 ///
808 /// This takes a \p LoopInfo representing a canonical loop, such as the one
809 /// created by \p createCanonicalLoop and emits additional instructions to
810 /// turn it into a workshare loop. In particular, it calls to an OpenMP
811 /// runtime function in the preheader to obtain, and then in each iteration
812 /// to update the loop counter.
813 ///
814 /// \param DL Debug location for instructions added for the
815 /// workshare-loop construct itself.
816 /// \param CLI A descriptor of the canonical loop to workshare.
817 /// \param AllocaIP An insertion point for Alloca instructions usable in the
818 /// preheader of the loop.
819 /// \param SchedType Type of scheduling to be passed to the init function.
820 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
821 /// the loop.
822 /// \param Chunk The size of loop chunk considered as a unit when
823 /// scheduling. If \p nullptr, defaults to 1.
824 ///
825 /// \returns Point where to insert code after the workshare construct.
826 InsertPointTy applyDynamicWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
827 InsertPointTy AllocaIP,
828 omp::OMPScheduleType SchedType,
829 bool NeedsBarrier,
830 Value *Chunk = nullptr);
831
832 /// Create alternative version of the loop to support if clause
833 ///
834 /// OpenMP if clause can require to generate second loop. This loop
835 /// will be executed when if clause condition is not met. createIfVersion
836 /// adds branch instruction to the copied loop if \p ifCond is not met.
837 ///
838 /// \param Loop Original loop which should be versioned.
839 /// \param IfCond Value which corresponds to if clause condition
840 /// \param VMap Value to value map to define relation between
841 /// original and copied loop values and loop blocks.
842 /// \param NamePrefix Optional name prefix for if.then if.else blocks.
843 void createIfVersion(CanonicalLoopInfo *Loop, Value *IfCond,
844 ValueToValueMapTy &VMap, const Twine &NamePrefix = "");
845
846public:
847 /// Modifies the canonical loop to be a workshare loop.
848 ///
849 /// This takes a \p LoopInfo representing a canonical loop, such as the one
850 /// created by \p createCanonicalLoop and emits additional instructions to
851 /// turn it into a workshare loop. In particular, it calls to an OpenMP
852 /// runtime function in the preheader to obtain the loop bounds to be used in
853 /// the current thread, updates the relevant instructions in the canonical
854 /// loop and calls to an OpenMP runtime finalization function after the loop.
855 ///
856 /// The concrete transformation is done by applyStaticWorkshareLoop,
857 /// applyStaticChunkedWorkshareLoop, or applyDynamicWorkshareLoop, depending
858 /// on the value of \p SchedKind and \p ChunkSize.
859 ///
860 /// \param DL Debug location for instructions added for the
861 /// workshare-loop construct itself.
862 /// \param CLI A descriptor of the canonical loop to workshare.
863 /// \param AllocaIP An insertion point for Alloca instructions usable in the
864 /// preheader of the loop.
865 /// \param NeedsBarrier Indicates whether a barrier must be insterted after
866 /// the loop.
867 /// \param SchedKind Scheduling algorithm to use.
868 /// \param ChunkSize The chunk size for the inner loop.
869 /// \param HasSimdModifier Whether the simd modifier is present in the
870 /// schedule clause.
871 /// \param HasMonotonicModifier Whether the monotonic modifier is present in
872 /// the schedule clause.
873 /// \param HasNonmonotonicModifier Whether the nonmonotonic modifier is
874 /// present in the schedule clause.
875 /// \param HasOrderedClause Whether the (parameterless) ordered clause is
876 /// present.
877 ///
878 /// \returns Point where to insert code after the workshare construct.
881 bool NeedsBarrier,
882 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default,
883 Value *ChunkSize = nullptr, bool HasSimdModifier = false,
884 bool HasMonotonicModifier = false, bool HasNonmonotonicModifier = false,
885 bool HasOrderedClause = false);
886
887 /// Tile a loop nest.
888 ///
889 /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
890 /// \p/ Loops must be perfectly nested, from outermost to innermost loop
891 /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
892 /// of every loop and every tile sizes must be usable in the outermost
893 /// loop's preheader. This implies that the loop nest is rectangular.
894 ///
895 /// Example:
896 /// \code
897 /// for (int i = 0; i < 15; ++i) // Canonical loop "i"
898 /// for (int j = 0; j < 14; ++j) // Canonical loop "j"
899 /// body(i, j);
900 /// \endcode
901 ///
902 /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
903 /// \code
904 /// for (int i1 = 0; i1 < 3; ++i1)
905 /// for (int j1 = 0; j1 < 2; ++j1)
906 /// for (int i2 = 0; i2 < 5; ++i2)
907 /// for (int j2 = 0; j2 < 7; ++j2)
908 /// body(i1*3+i2, j1*3+j2);
909 /// \endcode
910 ///
911 /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
912 /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
913 /// handles non-constant trip counts, non-constant tile sizes and trip counts
914 /// that are not multiples of the tile size. In the latter case the tile loop
915 /// of the last floor-loop iteration will have fewer iterations than specified
916 /// as its tile size.
917 ///
918 ///
919 /// @param DL Debug location for instructions added by tiling, for
920 /// instance the floor- and tile trip count computation.
921 /// @param Loops Loops to tile. The CanonicalLoopInfo objects are
922 /// invalidated by this method, i.e. should not used after
923 /// tiling.
924 /// @param TileSizes For each loop in \p Loops, the tile size for that
925 /// dimensions.
926 ///
927 /// \returns A list of generated loops. Contains twice as many loops as the
928 /// input loop nest; the first half are the floor loops and the
929 /// second half are the tile loops.
930 std::vector<CanonicalLoopInfo *>
932 ArrayRef<Value *> TileSizes);
933
934 /// Fully unroll a loop.
935 ///
936 /// Instead of unrolling the loop immediately (and duplicating its body
937 /// instructions), it is deferred to LLVM's LoopUnrollPass by adding loop
938 /// metadata.
939 ///
940 /// \param DL Debug location for instructions added by unrolling.
941 /// \param Loop The loop to unroll. The loop will be invalidated.
943
944 /// Fully or partially unroll a loop. How the loop is unrolled is determined
945 /// using LLVM's LoopUnrollPass.
946 ///
947 /// \param DL Debug location for instructions added by unrolling.
948 /// \param Loop The loop to unroll. The loop will be invalidated.
950
951 /// Partially unroll a loop.
952 ///
953 /// The CanonicalLoopInfo of the unrolled loop for use with chained
954 /// loop-associated directive can be requested using \p UnrolledCLI. Not
955 /// needing the CanonicalLoopInfo allows more efficient code generation by
956 /// deferring the actual unrolling to the LoopUnrollPass using loop metadata.
957 /// A loop-associated directive applied to the unrolled loop needs to know the
958 /// new trip count which means that if using a heuristically determined unroll
959 /// factor (\p Factor == 0), that factor must be computed immediately. We are
960 /// using the same logic as the LoopUnrollPass to derived the unroll factor,
961 /// but which assumes that some canonicalization has taken place (e.g.
962 /// Mem2Reg, LICM, GVN, Inlining, etc.). That is, the heuristic will perform
963 /// better when the unrolled loop's CanonicalLoopInfo is not needed.
964 ///
965 /// \param DL Debug location for instructions added by unrolling.
966 /// \param Loop The loop to unroll. The loop will be invalidated.
967 /// \param Factor The factor to unroll the loop by. A factor of 0
968 /// indicates that a heuristic should be used to determine
969 /// the unroll-factor.
970 /// \param UnrolledCLI If non-null, receives the CanonicalLoopInfo of the
971 /// partially unrolled loop. Otherwise, uses loop metadata
972 /// to defer unrolling to the LoopUnrollPass.
973 void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor,
974 CanonicalLoopInfo **UnrolledCLI);
975
976 /// Add metadata to simd-ize a loop. If IfCond is not nullptr, the loop
977 /// is cloned. The metadata which prevents vectorization is added to
978 /// to the cloned loop. The cloned loop is executed when ifCond is evaluated
979 /// to false.
980 ///
981 /// \param Loop The loop to simd-ize.
982 /// \param AlignedVars The map which containts pairs of the pointer
983 /// and its corresponding alignment.
984 /// \param IfCond The value which corresponds to the if clause
985 /// condition.
986 /// \param Order The enum to map order clause.
987 /// \param Simdlen The Simdlen length to apply to the simd loop.
988 /// \param Safelen The Safelen length to apply to the simd loop.
990 MapVector<Value *, Value *> AlignedVars, Value *IfCond,
991 omp::OrderKind Order, ConstantInt *Simdlen,
992 ConstantInt *Safelen);
993
994 /// Generator for '#omp flush'
995 ///
996 /// \param Loc The location where the flush directive was encountered
997 void createFlush(const LocationDescription &Loc);
998
999 /// Generator for '#omp taskwait'
1000 ///
1001 /// \param Loc The location where the taskwait directive was encountered.
1002 void createTaskwait(const LocationDescription &Loc);
1003
1004 /// Generator for '#omp taskyield'
1005 ///
1006 /// \param Loc The location where the taskyield directive was encountered.
1007 void createTaskyield(const LocationDescription &Loc);
1008
1009 /// A struct to pack the relevant information for an OpenMP depend clause.
1010 struct DependData {
1014 explicit DependData() = default;
1016 Value *DepVal)
1018 };
1019
1020 /// Generator for `#omp task`
1021 ///
1022 /// \param Loc The location where the task construct was encountered.
1023 /// \param AllocaIP The insertion point to be used for alloca instructions.
1024 /// \param BodyGenCB Callback that will generate the region code.
1025 /// \param Tied True if the task is tied, false if the task is untied.
1026 /// \param Final i1 value which is `true` if the task is final, `false` if the
1027 /// task is not final.
1028 /// \param IfCondition i1 value. If it evaluates to `false`, an undeferred
1029 /// task is generated, and the encountering thread must
1030 /// suspend the current task region, for which execution
1031 /// cannot be resumed until execution of the structured
1032 /// block that is associated with the generated task is
1033 /// completed.
1034 InsertPointTy createTask(const LocationDescription &Loc,
1035 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1036 bool Tied = true, Value *Final = nullptr,
1037 Value *IfCondition = nullptr,
1038 SmallVector<DependData> Dependencies = {});
1039
1040 /// Generator for the taskgroup construct
1041 ///
1042 /// \param Loc The location where the taskgroup construct was encountered.
1043 /// \param AllocaIP The insertion point to be used for alloca instructions.
1044 /// \param BodyGenCB Callback that will generate the region code.
1045 InsertPointTy createTaskgroup(const LocationDescription &Loc,
1046 InsertPointTy AllocaIP,
1047 BodyGenCallbackTy BodyGenCB);
1048
1049 /// Functions used to generate reductions. Such functions take two Values
1050 /// representing LHS and RHS of the reduction, respectively, and a reference
1051 /// to the value that is updated to refer to the reduction result.
1054
1055 /// Functions used to generate atomic reductions. Such functions take two
1056 /// Values representing pointers to LHS and RHS of the reduction, as well as
1057 /// the element type of these pointers. They are expected to atomically
1058 /// update the LHS to the reduced value.
1061
1062 /// Information about an OpenMP reduction.
1070 assert(cast<PointerType>(Variable->getType())
1071 ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
1072 }
1073
1074 /// Reduction element type, must match pointee type of variable.
1076
1077 /// Reduction variable of pointer type.
1079
1080 /// Thread-private partial reduction variable.
1082
1083 /// Callback for generating the reduction body. The IR produced by this will
1084 /// be used to combine two values in a thread-safe context, e.g., under
1085 /// lock or within the same thread, and therefore need not be atomic.
1087
1088 /// Callback for generating the atomic reduction body, may be null. The IR
1089 /// produced by this will be used to atomically combine two values during
1090 /// reduction. If null, the implementation will use the non-atomic version
1091 /// along with the appropriate synchronization mechanisms.
1093 };
1094
1095 // TODO: provide atomic and non-atomic reduction generators for reduction
1096 // operators defined by the OpenMP specification.
1097
1098 /// Generator for '#omp reduction'.
1099 ///
1100 /// Emits the IR instructing the runtime to perform the specific kind of
1101 /// reductions. Expects reduction variables to have been privatized and
1102 /// initialized to reduction-neutral values separately. Emits the calls to
1103 /// runtime functions as well as the reduction function and the basic blocks
1104 /// performing the reduction atomically and non-atomically.
1105 ///
1106 /// The code emitted for the following:
1107 ///
1108 /// \code
1109 /// type var_1;
1110 /// type var_2;
1111 /// #pragma omp <directive> reduction(reduction-op:var_1,var_2)
1112 /// /* body */;
1113 /// \endcode
1114 ///
1115 /// corresponds to the following sketch.
1116 ///
1117 /// \code
1118 /// void _outlined_par() {
1119 /// // N is the number of different reductions.
1120 /// void *red_array[] = {privatized_var_1, privatized_var_2, ...};
1121 /// switch(__kmpc_reduce(..., N, /*size of data in red array*/, red_array,
1122 /// _omp_reduction_func,
1123 /// _gomp_critical_user.reduction.var)) {
1124 /// case 1: {
1125 /// var_1 = var_1 <reduction-op> privatized_var_1;
1126 /// var_2 = var_2 <reduction-op> privatized_var_2;
1127 /// // ...
1128 /// __kmpc_end_reduce(...);
1129 /// break;
1130 /// }
1131 /// case 2: {
1132 /// _Atomic<ReductionOp>(var_1, privatized_var_1);
1133 /// _Atomic<ReductionOp>(var_2, privatized_var_2);
1134 /// // ...
1135 /// break;
1136 /// }
1137 /// default: break;
1138 /// }
1139 /// }
1140 ///
1141 /// void _omp_reduction_func(void **lhs, void **rhs) {
1142 /// *(type *)lhs[0] = *(type *)lhs[0] <reduction-op> *(type *)rhs[0];
1143 /// *(type *)lhs[1] = *(type *)lhs[1] <reduction-op> *(type *)rhs[1];
1144 /// // ...
1145 /// }
1146 /// \endcode
1147 ///
1148 /// \param Loc The location where the reduction was
1149 /// encountered. Must be within the associate
1150 /// directive and after the last local access to the
1151 /// reduction variables.
1152 /// \param AllocaIP An insertion point suitable for allocas usable
1153 /// in reductions.
1154 /// \param ReductionInfos A list of info on each reduction variable.
1155 /// \param IsNoWait A flag set if the reduction is marked as nowait.
1157 InsertPointTy AllocaIP,
1158 ArrayRef<ReductionInfo> ReductionInfos,
1159 bool IsNoWait = false);
1160
1161 ///}
1162
1163 /// Return the insertion point used by the underlying IRBuilder.
1165
1166 /// Update the internal location to \p Loc.
1168 Builder.restoreIP(Loc.IP);
1170 return Loc.IP.getBlock() != nullptr;
1171 }
1172
1173 /// Return the function declaration for the runtime function with \p FnID.
1176
1178
1179 /// Return the (LLVM-IR) string describing the source location \p LocStr.
1180 Constant *getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize);
1181
1182 /// Return the (LLVM-IR) string describing the default source location.
1184
1185 /// Return the (LLVM-IR) string describing the source location identified by
1186 /// the arguments.
1187 Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
1188 unsigned Line, unsigned Column,
1189 uint32_t &SrcLocStrSize);
1190
1191 /// Return the (LLVM-IR) string describing the DebugLoc \p DL. Use \p F as
1192 /// fallback if \p DL does not specify the function name.
1194 Function *F = nullptr);
1195
1196 /// Return the (LLVM-IR) string describing the source location \p Loc.
1197 Constant *getOrCreateSrcLocStr(const LocationDescription &Loc,
1198 uint32_t &SrcLocStrSize);
1199
1200 /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
1201 /// TODO: Create a enum class for the Reserve2Flags
1202 Constant *getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize,
1204 unsigned Reserve2Flags = 0);
1205
1206 /// Create a hidden global flag \p Name in the module with initial value \p
1207 /// Value.
1209
1210 /// Create an offloading section struct used to register this global at
1211 /// runtime.
1212 ///
1213 /// Type struct __tgt_offload_entry{
1214 /// void *addr; // Pointer to the offload entry info.
1215 /// // (function or global)
1216 /// char *name; // Name of the function or global.
1217 /// size_t size; // Size of the entry info (0 if it a function).
1218 /// int32_t flags;
1219 /// int32_t reserved;
1220 /// };
1221 ///
1222 /// \param Addr The pointer to the global being registered.
1223 /// \param Name The symbol name associated with the global.
1224 /// \param Size The size in bytes of the global (0 for functions).
1225 /// \param Flags Flags associated with the entry.
1226 /// \param SectionName The section this entry will be placed at.
1228 int32_t Flags,
1229 StringRef SectionName = "omp_offloading_entries");
1230
1231 /// Generate control flow and cleanup for cancellation.
1232 ///
1233 /// \param CancelFlag Flag indicating if the cancellation is performed.
1234 /// \param CanceledDirective The kind of directive that is cancled.
1235 /// \param ExitCB Extra code to be generated in the exit block.
1236 void emitCancelationCheckImpl(Value *CancelFlag,
1237 omp::Directive CanceledDirective,
1238 FinalizeCallbackTy ExitCB = {});
1239
1240 /// Generate a target region entry call.
1241 ///
1242 /// \param Loc The location at which the request originated and is fulfilled.
1243 /// \param AllocaIP The insertion point to be used for alloca instructions.
1244 /// \param Return Return value of the created function returned by reference.
1245 /// \param DeviceID Identifier for the device via the 'device' clause.
1246 /// \param NumTeams Numer of teams for the region via the 'num_teams' clause
1247 /// or 0 if unspecified and -1 if there is no 'teams' clause.
1248 /// \param NumThreads Number of threads via the 'thread_limit' clause.
1249 /// \param HostPtr Pointer to the host-side pointer of the target kernel.
1250 /// \param KernelArgs Array of arguments to the kernel.
1251 InsertPointTy emitTargetKernel(const LocationDescription &Loc,
1252 InsertPointTy AllocaIP, Value *&Return,
1253 Value *Ident, Value *DeviceID, Value *NumTeams,
1254 Value *NumThreads, Value *HostPtr,
1255 ArrayRef<Value *> KernelArgs);
1256
1257 /// Generate a barrier runtime call.
1258 ///
1259 /// \param Loc The location at which the request originated and is fulfilled.
1260 /// \param DK The directive which caused the barrier
1261 /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
1262 /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
1263 /// should be checked and acted upon.
1264 ///
1265 /// \returns The insertion point after the barrier.
1266 InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
1267 omp::Directive DK, bool ForceSimpleCall,
1268 bool CheckCancelFlag);
1269
1270 /// Generate a flush runtime call.
1271 ///
1272 /// \param Loc The location at which the request originated and is fulfilled.
1273 void emitFlush(const LocationDescription &Loc);
1274
1275 /// The finalization stack made up of finalize callbacks currently in-flight,
1276 /// wrapped into FinalizationInfo objects that reference also the finalization
1277 /// target block and the kind of cancellable directive.
1279
1280 /// Return true if the last entry in the finalization stack is of kind \p DK
1281 /// and cancellable.
1282 bool isLastFinalizationInfoCancellable(omp::Directive DK) {
1283 return !FinalizationStack.empty() &&
1284 FinalizationStack.back().IsCancellable &&
1285 FinalizationStack.back().DK == DK;
1286 }
1287
1288 /// Generate a taskwait runtime call.
1289 ///
1290 /// \param Loc The location at which the request originated and is fulfilled.
1291 void emitTaskwaitImpl(const LocationDescription &Loc);
1292
1293 /// Generate a taskyield runtime call.
1294 ///
1295 /// \param Loc The location at which the request originated and is fulfilled.
1296 void emitTaskyieldImpl(const LocationDescription &Loc);
1297
1298 /// Return the current thread ID.
1299 ///
1300 /// \param Ident The ident (ident_t*) describing the query origin.
1302
1303 /// The OpenMPIRBuilder Configuration
1305
1306 /// The underlying LLVM-IR module
1308
1309 /// The LLVM-IR Builder used to create IR.
1311
1312 /// Map to remember source location strings
1314
1315 /// Map to remember existing ident_t*.
1317
1318 /// Info manager to keep track of target regions.
1320
1321 /// Helper that contains information about regions we need to outline
1322 /// during finalization.
1324 using PostOutlineCBTy = std::function<void(Function &)>;
1328
1329 /// Collect all blocks in between EntryBB and ExitBB in both the given
1330 /// vector and set.
1332 SmallVectorImpl<BasicBlock *> &BlockVector);
1333
1334 /// Return the function that contains the region to be outlined.
1335 Function *getFunction() const { return EntryBB->getParent(); }
1336 };
1337
1338 /// Collection of regions that need to be outlined during finalization.
1340
1341 /// Collection of owned canonical loop objects that eventually need to be
1342 /// free'd.
1343 std::forward_list<CanonicalLoopInfo> LoopInfos;
1344
1345 /// Add a new region that will be outlined later.
1346 void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
1347
1348 /// An ordered map of auto-generated variables to their unique names.
1349 /// It stores variables with the following names: 1) ".gomp_critical_user_" +
1350 /// <critical_section_name> + ".var" for "omp critical" directives; 2)
1351 /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
1352 /// variables.
1354
1355 /// Computes the size of type in bytes.
1356 Value *getSizeInBytes(Value *BasePtr);
1357
1358 /// Create the global variable holding the offload mappings information.
1360 std::string VarName);
1361
1362 /// Create the global variable holding the offload names information.
1365 std::string VarName);
1366
1369 AllocaInst *Args = nullptr;
1371 };
1372
1373 /// Create the allocas instruction used in call to mapper functions.
1375 InsertPointTy AllocaIP, unsigned NumOperands,
1377
1378 /// Create the call for the target mapper function.
1379 /// \param Loc The source location description.
1380 /// \param MapperFunc Function to be called.
1381 /// \param SrcLocInfo Source location information global.
1382 /// \param MaptypesArg The argument types.
1383 /// \param MapnamesArg The argument names.
1384 /// \param MapperAllocas The AllocaInst used for the call.
1385 /// \param DeviceID Device ID for the call.
1386 /// \param NumOperands Number of operands in the call.
1387 void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
1388 Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
1389 struct MapperAllocas &MapperAllocas, int64_t DeviceID,
1390 unsigned NumOperands);
1391
1392 /// Container for the arguments used to pass data to the runtime library.
1394 explicit TargetDataRTArgs() {}
1395 /// The array of base pointer passed to the runtime library.
1397 /// The array of section pointers passed to the runtime library.
1399 /// The array of sizes passed to the runtime library.
1400 Value *SizesArray = nullptr;
1401 /// The array of map types passed to the runtime library for the beginning
1402 /// of the region or for the entire region if there are no separate map
1403 /// types for the region end.
1405 /// The array of map types passed to the runtime library for the end of the
1406 /// region, or nullptr if there are no separate map types for the region
1407 /// end.
1409 /// The array of user-defined mappers passed to the runtime library.
1411 /// The array of original declaration names of mapped pointers sent to the
1412 /// runtime library for debugging
1414 };
1415
1416 /// Struct that keeps the information that should be kept throughout
1417 /// a 'target data' region.
1419 /// Set to true if device pointer information have to be obtained.
1420 bool RequiresDevicePointerInfo = false;
1421 /// Set to true if Clang emits separate runtime calls for the beginning and
1422 /// end of the region. These calls might have separate map type arrays.
1423 bool SeparateBeginEndCalls = false;
1424
1425 public:
1427
1428 /// Indicate whether any user-defined mapper exists.
1429 bool HasMapper = false;
1430 /// The total number of pointers passed to the runtime library.
1431 unsigned NumberOfPtrs = 0u;
1432
1433 explicit TargetDataInfo() {}
1434 explicit TargetDataInfo(bool RequiresDevicePointerInfo,
1435 bool SeparateBeginEndCalls)
1436 : RequiresDevicePointerInfo(RequiresDevicePointerInfo),
1437 SeparateBeginEndCalls(SeparateBeginEndCalls) {}
1438 /// Clear information about the data arrays.
1441 HasMapper = false;
1442 NumberOfPtrs = 0u;
1443 }
1444 /// Return true if the current target data information has valid arrays.
1445 bool isValid() {
1449 }
1450 bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
1451 bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
1452 };
1453
1459
1460 /// This structure contains combined information generated for mappable
1461 /// clauses, including base pointers, pointers, sizes, map types, user-defined
1462 /// mappers, and non-contiguous information.
1463 struct MapInfosTy {
1465 bool IsNonContiguous = false;
1470 };
1477
1478 /// Append arrays in \a CurInfo.
1479 void append(MapInfosTy &CurInfo) {
1481 CurInfo.BasePointers.end());
1482 Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
1483 Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
1484 Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
1485 Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
1487 CurInfo.NonContigInfo.Dims.end());
1489 CurInfo.NonContigInfo.Offsets.end());
1491 CurInfo.NonContigInfo.Counts.end());
1493 CurInfo.NonContigInfo.Strides.end());
1494 }
1495 };
1496
1497 /// Emit the arguments to be passed to the runtime library based on the
1498 /// arrays of base pointers, pointers, sizes, map types, and mappers. If
1499 /// ForEndCall, emit map types to be passed for the end of the region instead
1500 /// of the beginning.
1504 bool EmitDebug = false,
1505 bool ForEndCall = false);
1506
1507 /// Creates offloading entry for the provided entry ID \a ID, address \a
1508 /// Addr, size \a Size, and flags \a Flags.
1511
1512 /// The kind of errors that can occur when emitting the offload entries and
1513 /// metadata.
1519
1520 /// Callback function type
1522 std::function<void(EmitMetadataErrorKind, TargetRegionEntryInfo)>;
1523
1524 // Emit the offloading entries and metadata so that the device codegen side
1525 // can easily figure out what to emit. The produced metadata looks like
1526 // this:
1527 //
1528 // !omp_offload.info = !{!1, ...}
1529 //
1530 // We only generate metadata for function that contain target regions.
1532 EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
1533
1534public:
1535 /// Generator for __kmpc_copyprivate
1536 ///
1537 /// \param Loc The source location description.
1538 /// \param BufSize Number of elements in the buffer.
1539 /// \param CpyBuf List of pointers to data to be copied.
1540 /// \param CpyFn function to call for copying data.
1541 /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
1542 ///
1543 /// \return The insertion position *after* the CopyPrivate call.
1544
1546 llvm::Value *BufSize, llvm::Value *CpyBuf,
1547 llvm::Value *CpyFn, llvm::Value *DidIt);
1548
1549 /// Generator for '#omp single'
1550 ///
1551 /// \param Loc The source location description.
1552 /// \param BodyGenCB Callback that will generate the region code.
1553 /// \param FiniCB Callback to finalize variable copies.
1554 /// \param IsNowait If false, a barrier is emitted.
1555 /// \param DidIt Local variable used as a flag to indicate 'single' thread
1556 ///
1557 /// \returns The insertion position *after* the single call.
1559 BodyGenCallbackTy BodyGenCB,
1560 FinalizeCallbackTy FiniCB, bool IsNowait,
1561 llvm::Value *DidIt);
1562
1563 /// Generator for '#omp master'
1564 ///
1565 /// \param Loc The insert and source location description.
1566 /// \param BodyGenCB Callback that will generate the region code.
1567 /// \param FiniCB Callback to finalize variable copies.
1568 ///
1569 /// \returns The insertion position *after* the master.
1571 BodyGenCallbackTy BodyGenCB,
1572 FinalizeCallbackTy FiniCB);
1573
1574 /// Generator for '#omp masked'
1575 ///
1576 /// \param Loc The insert and source location description.
1577 /// \param BodyGenCB Callback that will generate the region code.
1578 /// \param FiniCB Callback to finialize variable copies.
1579 ///
1580 /// \returns The insertion position *after* the masked.
1582 BodyGenCallbackTy BodyGenCB,
1584
1585 /// Generator for '#omp critical'
1586 ///
1587 /// \param Loc The insert and source location description.
1588 /// \param BodyGenCB Callback that will generate the region body code.
1589 /// \param FiniCB Callback to finalize variable copies.
1590 /// \param CriticalName name of the lock used by the critical directive
1591 /// \param HintInst Hint Instruction for hint clause associated with critical
1592 ///
1593 /// \returns The insertion position *after* the critical.
1595 BodyGenCallbackTy BodyGenCB,
1596 FinalizeCallbackTy FiniCB,
1597 StringRef CriticalName, Value *HintInst);
1598
1599 /// Generator for '#omp ordered depend (source | sink)'
1600 ///
1601 /// \param Loc The insert and source location description.
1602 /// \param AllocaIP The insertion point to be used for alloca instructions.
1603 /// \param NumLoops The number of loops in depend clause.
1604 /// \param StoreValues The value will be stored in vector address.
1605 /// \param Name The name of alloca instruction.
1606 /// \param IsDependSource If true, depend source; otherwise, depend sink.
1607 ///
1608 /// \return The insertion position *after* the ordered.
1610 InsertPointTy AllocaIP, unsigned NumLoops,
1611 ArrayRef<llvm::Value *> StoreValues,
1612 const Twine &Name, bool IsDependSource);
1613
1614 /// Generator for '#omp ordered [threads | simd]'
1615 ///
1616 /// \param Loc The insert and source location description.
1617 /// \param BodyGenCB Callback that will generate the region code.
1618 /// \param FiniCB Callback to finalize variable copies.
1619 /// \param IsThreads If true, with threads clause or without clause;
1620 /// otherwise, with simd clause;
1621 ///
1622 /// \returns The insertion position *after* the ordered.
1624 BodyGenCallbackTy BodyGenCB,
1625 FinalizeCallbackTy FiniCB,
1626 bool IsThreads);
1627
1628 /// Generator for '#omp sections'
1629 ///
1630 /// \param Loc The insert and source location description.
1631 /// \param AllocaIP The insertion points to be used for alloca instructions.
1632 /// \param SectionCBs Callbacks that will generate body of each section.
1633 /// \param PrivCB Callback to copy a given variable (think copy constructor).
1634 /// \param FiniCB Callback to finalize variable copies.
1635 /// \param IsCancellable Flag to indicate a cancellable parallel region.
1636 /// \param IsNowait If true, barrier - to ensure all sections are executed
1637 /// before moving forward will not be generated.
1638 /// \returns The insertion position *after* the sections.
1640 InsertPointTy AllocaIP,
1642 PrivatizeCallbackTy PrivCB,
1643 FinalizeCallbackTy FiniCB, bool IsCancellable,
1644 bool IsNowait);
1645
1646 /// Generator for '#omp section'
1647 ///
1648 /// \param Loc The insert and source location description.
1649 /// \param BodyGenCB Callback that will generate the region body code.
1650 /// \param FiniCB Callback to finalize variable copies.
1651 /// \returns The insertion position *after* the section.
1653 BodyGenCallbackTy BodyGenCB,
1654 FinalizeCallbackTy FiniCB);
1655
1656 /// Generate conditional branch and relevant BasicBlocks through which private
1657 /// threads copy the 'copyin' variables from Master copy to threadprivate
1658 /// copies.
1659 ///
1660 /// \param IP insertion block for copyin conditional
1661 /// \param MasterVarPtr a pointer to the master variable
1662 /// \param PrivateVarPtr a pointer to the threadprivate variable
1663 /// \param IntPtrTy Pointer size type
1664 /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
1665 // and copy.in.end block
1666 ///
1667 /// \returns The insertion point where copying operation to be emitted.
1669 Value *PrivateAddr,
1670 llvm::IntegerType *IntPtrTy,
1671 bool BranchtoEnd = true);
1672
1673 /// Create a runtime call for kmpc_Alloc
1674 ///
1675 /// \param Loc The insert and source location description.
1676 /// \param Size Size of allocated memory space
1677 /// \param Allocator Allocator information instruction
1678 /// \param Name Name of call Instruction for OMP_alloc
1679 ///
1680 /// \returns CallInst to the OMP_Alloc call
1682 Value *Allocator, std::string Name = "");
1683
1684 /// Create a runtime call for kmpc_free
1685 ///
1686 /// \param Loc The insert and source location description.
1687 /// \param Addr Address of memory space to be freed
1688 /// \param Allocator Allocator information instruction
1689 /// \param Name Name of call Instruction for OMP_Free
1690 ///
1691 /// \returns CallInst to the OMP_Free call
1693 Value *Allocator, std::string Name = "");
1694
1695 /// Create a runtime call for kmpc_threadprivate_cached
1696 ///
1697 /// \param Loc The insert and source location description.
1698 /// \param Pointer pointer to data to be cached
1699 /// \param Size size of data to be cached
1700 /// \param Name Name of call Instruction for callinst
1701 ///
1702 /// \returns CallInst to the thread private cache call.
1704 llvm::Value *Pointer,
1706 const llvm::Twine &Name = Twine(""));
1707
1708 /// Create a runtime call for __tgt_interop_init
1709 ///
1710 /// \param Loc The insert and source location description.
1711 /// \param InteropVar variable to be allocated
1712 /// \param InteropType type of interop operation
1713 /// \param Device devide to which offloading will occur
1714 /// \param NumDependences number of dependence variables
1715 /// \param DependenceAddress pointer to dependence variables
1716 /// \param HaveNowaitClause does nowait clause exist
1717 ///
1718 /// \returns CallInst to the __tgt_interop_init call
1720 Value *InteropVar,
1721 omp::OMPInteropType InteropType, Value *Device,
1722 Value *NumDependences,
1723 Value *DependenceAddress,
1724 bool HaveNowaitClause);
1725
1726 /// Create a runtime call for __tgt_interop_destroy
1727 ///
1728 /// \param Loc The insert and source location description.
1729 /// \param InteropVar variable to be allocated
1730 /// \param Device devide to which offloading will occur
1731 /// \param NumDependences number of dependence variables
1732 /// \param DependenceAddress pointer to dependence variables
1733 /// \param HaveNowaitClause does nowait clause exist
1734 ///
1735 /// \returns CallInst to the __tgt_interop_destroy call
1737 Value *InteropVar, Value *Device,
1738 Value *NumDependences,
1739 Value *DependenceAddress,
1740 bool HaveNowaitClause);
1741
1742 /// Create a runtime call for __tgt_interop_use
1743 ///
1744 /// \param Loc The insert and source location description.
1745 /// \param InteropVar variable to be allocated
1746 /// \param Device devide to which offloading will occur
1747 /// \param NumDependences number of dependence variables
1748 /// \param DependenceAddress pointer to dependence variables
1749 /// \param HaveNowaitClause does nowait clause exist
1750 ///
1751 /// \returns CallInst to the __tgt_interop_use call
1753 Value *InteropVar, Value *Device,
1754 Value *NumDependences, Value *DependenceAddress,
1755 bool HaveNowaitClause);
1756
1757 /// The `omp target` interface
1758 ///
1759 /// For more information about the usage of this interface,
1760 /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
1761 ///
1762 ///{
1763
1764 /// Create a runtime call for kmpc_target_init
1765 ///
1766 /// \param Loc The insert and source location description.
1767 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1768 InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD);
1769
1770 /// Create a runtime call for kmpc_target_deinit
1771 ///
1772 /// \param Loc The insert and source location description.
1773 /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
1774 void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD);
1775
1776 ///}
1777
1778private:
1779 // Sets the function attributes expected for the outlined function
1780 void setOutlinedTargetRegionFunctionAttributes(Function *OutlinedFn,
1781 int32_t NumTeams,
1782 int32_t NumThreads);
1783
1784 // Creates the function ID/Address for the given outlined function.
1785 // In the case of an embedded device function the address of the function is
1786 // used, in the case of a non-offload function a constant is created.
1787 Constant *createOutlinedFunctionID(Function *OutlinedFn,
1788 StringRef EntryFnIDName);
1789
1790 // Creates the region entry address for the outlined function
1791 Constant *createTargetRegionEntryAddr(Function *OutlinedFunction,
1792 StringRef EntryFnName);
1793
1794public:
1795 /// Functions used to generate a function with the given name.
1796 using FunctionGenCallback = std::function<Function *(StringRef FunctionName)>;
1797
1798 /// Create a unique name for the entry function using the source location
1799 /// information of the current target region. The name will be something like:
1800 ///
1801 /// __omp_offloading_DD_FFFF_PP_lBB[_CC]
1802 ///
1803 /// where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
1804 /// mangled name of the function that encloses the target region and BB is the
1805 /// line number of the target region. CC is a count added when more than one
1806 /// region is located at the same location.
1807 ///
1808 /// If this target outline function is not an offload entry, we don't need to
1809 /// register it. This may happen if it is guarded by an if clause that is
1810 /// false at compile time, or no target archs have been specified.
1811 ///
1812 /// The created target region ID is used by the runtime library to identify
1813 /// the current target region, so it only has to be unique and not
1814 /// necessarily point to anything. It could be the pointer to the outlined
1815 /// function that implements the target region, but we aren't using that so
1816 /// that the compiler doesn't need to keep that, and could therefore inline
1817 /// the host function if proven worthwhile during optimization. In the other
1818 /// hand, if emitting code for the device, the ID has to be the function
1819 /// address so that it can retrieved from the offloading entry and launched
1820 /// by the runtime library. We also mark the outlined function to have
1821 /// external linkage in case we are emitting code for the device, because
1822 /// these functions will be entry points to the device.
1823 ///
1824 /// \param InfoManager The info manager keeping track of the offload entries
1825 /// \param EntryInfo The entry information about the function
1826 /// \param GenerateFunctionCallback The callback function to generate the code
1827 /// \param NumTeams Number default teams
1828 /// \param NumThreads Number default threads
1829 /// \param OutlinedFunction Pointer to the outlined function
1830 /// \param EntryFnIDName Name of the ID o be created
1832 FunctionGenCallback &GenerateFunctionCallback,
1833 int32_t NumTeams, int32_t NumThreads,
1834 bool IsOffloadEntry, Function *&OutlinedFn,
1835 Constant *&OutlinedFnID);
1836
1837 /// Registers the given function and sets up the attribtues of the function
1838 /// Returns the FunctionID.
1839 ///
1840 /// \param InfoManager The info manager keeping track of the offload entries
1841 /// \param EntryInfo The entry information about the function
1842 /// \param OutlinedFunction Pointer to the outlined function
1843 /// \param EntryFnName Name of the outlined function
1844 /// \param EntryFnIDName Name of the ID o be created
1845 /// \param NumTeams Number default teams
1846 /// \param NumThreads Number default threads
1848 Function *OutlinedFunction,
1849 StringRef EntryFnName,
1850 StringRef EntryFnIDName,
1851 int32_t NumTeams, int32_t NumThreads);
1852
1853 /// Generator for '#omp target data'
1854 ///
1855 /// \param Loc The location where the target data construct was encountered.
1856 /// \param CodeGenIP The insertion point at which the target directive code
1857 /// should be placed.
1858 /// \param MapTypeFlags BitVector storing the mapType flags for the
1859 /// mapOperands.
1860 /// \param MapNames Names for the mapOperands.
1861 /// \param MapperAllocas Pointers to the AllocInsts for the map clause.
1862 /// \param IsBegin If true then emits begin mapper call otherwise emits
1863 /// end mapper call.
1864 /// \param DeviceID Stores the DeviceID from the device clause.
1865 /// \param IfCond Value which corresponds to the if clause condition.
1866 /// \param ProcessMapOpCB Callback that generates code for the map clause.
1867 /// \param BodyGenCB Callback that will generate the region code.
1870 SmallVectorImpl<uint64_t> &MapTypeFlags,
1872 struct MapperAllocas &MapperAllocas, bool IsBegin, int64_t DeviceID,
1873 Value *IfCond, BodyGenCallbackTy ProcessMapOpCB,
1874 BodyGenCallbackTy BodyGenCB = {});
1875
1877 InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
1878
1879 /// Generator for '#omp target'
1880 ///
1881 /// \param Loc where the target data construct was encountered.
1882 /// \param CodeGenIP The insertion point where the call to the outlined
1883 /// function should be emitted.
1884 /// \param EntryInfo The entry information about the function.
1885 /// \param NumTeams Number of teams specified in the num_teams clause.
1886 /// \param NumThreads Number of teams specified in the thread_limit clause.
1887 /// \param Inputs The input values to the region that will be passed.
1888 /// as arguments to the outlined function.
1889 /// \param BodyGenCB Callback that will generate the region code.
1892 TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
1893 int32_t NumThreads,
1895 TargetBodyGenCallbackTy BodyGenCB);
1896
1897 /// Declarations for LLVM-IR types (simple, array, function and structure) are
1898 /// generated below. Their names are defined and used in OpenMPKinds.def. Here
1899 /// we provide the declarations, the initializeTypes function will provide the
1900 /// values.
1901 ///
1902 ///{
1903#define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
1904#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
1905 ArrayType *VarName##Ty = nullptr; \
1906 PointerType *VarName##PtrTy = nullptr;
1907#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
1908 FunctionType *VarName = nullptr; \
1909 PointerType *VarName##Ptr = nullptr;
1910#define OMP_STRUCT_TYPE(VarName, StrName, ...) \
1911 StructType *VarName = nullptr; \
1912 PointerType *VarName##Ptr = nullptr;
1913#include "llvm/Frontend/OpenMP/OMPKinds.def"
1914
1915 ///}
1916
1917private:
1918 /// Create all simple and struct types exposed by the runtime and remember
1919 /// the llvm::PointerTypes of them for easy access later.
1920 void initializeTypes(Module &M);
1921
1922 /// Common interface for generating entry calls for OMP Directives.
1923 /// if the directive has a region/body, It will set the insertion
1924 /// point to the body
1925 ///
1926 /// \param OMPD Directive to generate entry blocks for
1927 /// \param EntryCall Call to the entry OMP Runtime Function
1928 /// \param ExitBB block where the region ends.
1929 /// \param Conditional indicate if the entry call result will be used
1930 /// to evaluate a conditional of whether a thread will execute
1931 /// body code or not.
1932 ///
1933 /// \return The insertion position in exit block
1934 InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
1935 BasicBlock *ExitBB,
1936 bool Conditional = false);
1937
1938 /// Common interface to finalize the region
1939 ///
1940 /// \param OMPD Directive to generate exiting code for
1941 /// \param FinIP Insertion point for emitting Finalization code and exit call
1942 /// \param ExitCall Call to the ending OMP Runtime Function
1943 /// \param HasFinalize indicate if the directive will require finalization
1944 /// and has a finalization callback in the stack that
1945 /// should be called.
1946 ///
1947 /// \return The insertion position in exit block
1948 InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
1949 InsertPointTy FinIP,
1950 Instruction *ExitCall,
1951 bool HasFinalize = true);
1952
1953 /// Common Interface to generate OMP inlined regions
1954 ///
1955 /// \param OMPD Directive to generate inlined region for
1956 /// \param EntryCall Call to the entry OMP Runtime Function
1957 /// \param ExitCall Call to the ending OMP Runtime Function
1958 /// \param BodyGenCB Body code generation callback.
1959 /// \param FiniCB Finalization Callback. Will be called when finalizing region
1960 /// \param Conditional indicate if the entry call result will be used
1961 /// to evaluate a conditional of whether a thread will execute
1962 /// body code or not.
1963 /// \param HasFinalize indicate if the directive will require finalization
1964 /// and has a finalization callback in the stack that
1965 /// should be called.
1966 /// \param IsCancellable if HasFinalize is set to true, indicate if the
1967 /// the directive should be cancellable.
1968 /// \return The insertion point after the region
1969
1971 EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
1972 Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
1973 FinalizeCallbackTy FiniCB, bool Conditional = false,
1974 bool HasFinalize = true, bool IsCancellable = false);
1975
1976 /// Get the platform-specific name separator.
1977 /// \param Parts different parts of the final name that needs separation
1978 /// \param FirstSeparator First separator used between the initial two
1979 /// parts of the name.
1980 /// \param Separator separator used between all of the rest consecutive
1981 /// parts of the name
1982 static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
1983 StringRef FirstSeparator,
1984 StringRef Separator);
1985
1986 /// Returns corresponding lock object for the specified critical region
1987 /// name. If the lock object does not exist it is created, otherwise the
1988 /// reference to the existing copy is returned.
1989 /// \param CriticalName Name of the critical region.
1990 ///
1991 Value *getOMPCriticalRegionLock(StringRef CriticalName);
1992
1993 /// Callback type for Atomic Expression update
1994 /// ex:
1995 /// \code{.cpp}
1996 /// unsigned x = 0;
1997 /// #pragma omp atomic update
1998 /// x = Expr(x_old); //Expr() is any legal operation
1999 /// \endcode
2000 ///
2001 /// \param XOld the value of the atomic memory address to use for update
2002 /// \param IRB reference to the IRBuilder to use
2003 ///
2004 /// \returns Value to update X to.
2005 using AtomicUpdateCallbackTy =
2006 const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
2007
2008private:
2009 enum AtomicKind { Read, Write, Update, Capture, Compare };
2010
2011 /// Determine whether to emit flush or not
2012 ///
2013 /// \param Loc The insert and source location description.
2014 /// \param AO The required atomic ordering
2015 /// \param AK The OpenMP atomic operation kind used.
2016 ///
2017 /// \returns wether a flush was emitted or not
2018 bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
2019 AtomicOrdering AO, AtomicKind AK);
2020
2021 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2022 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2023 /// Only Scalar data types.
2024 ///
2025 /// \param AllocaIP The insertion point to be used for alloca
2026 /// instructions.
2027 /// \param X The target atomic pointer to be updated
2028 /// \param XElemTy The element type of the atomic pointer.
2029 /// \param Expr The value to update X with.
2030 /// \param AO Atomic ordering of the generated atomic
2031 /// instructions.
2032 /// \param RMWOp The binary operation used for update. If
2033 /// operation is not supported by atomicRMW,
2034 /// or belong to {FADD, FSUB, BAD_BINOP}.
2035 /// Then a `cmpExch` based atomic will be generated.
2036 /// \param UpdateOp Code generator for complex expressions that cannot be
2037 /// expressed through atomicrmw instruction.
2038 /// \param VolatileX true if \a X volatile?
2039 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2040 /// update expression, false otherwise.
2041 /// (e.g. true for X = X BinOp Expr)
2042 ///
2043 /// \returns A pair of the old value of X before the update, and the value
2044 /// used for the update.
2045 std::pair<Value *, Value *>
2046 emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
2048 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
2049 bool IsXBinopExpr);
2050
2051 /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
2052 ///
2053 /// \Return The instruction
2054 Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
2055 AtomicRMWInst::BinOp RMWOp);
2056
2057public:
2058 /// a struct to pack relevant information while generating atomic Ops
2060 Value *Var = nullptr;
2061 Type *ElemTy = nullptr;
2062 bool IsSigned = false;
2063 bool IsVolatile = false;
2064 };
2065
2066 /// Emit atomic Read for : V = X --- Only Scalar data types.
2067 ///
2068 /// \param Loc The insert and source location description.
2069 /// \param X The target pointer to be atomically read
2070 /// \param V Memory address where to store atomically read
2071 /// value
2072 /// \param AO Atomic ordering of the generated atomic
2073 /// instructions.
2074 ///
2075 /// \return Insertion point after generated atomic read IR.
2078 AtomicOrdering AO);
2079
2080 /// Emit atomic write for : X = Expr --- Only Scalar data types.
2081 ///
2082 /// \param Loc The insert and source location description.
2083 /// \param X The target pointer to be atomically written to
2084 /// \param Expr The value to store.
2085 /// \param AO Atomic ordering of the generated atomic
2086 /// instructions.
2087 ///
2088 /// \return Insertion point after generated atomic Write IR.
2090 AtomicOpValue &X, Value *Expr,
2091 AtomicOrdering AO);
2092
2093 /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
2094 /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
2095 /// Only Scalar data types.
2096 ///
2097 /// \param Loc The insert and source location description.
2098 /// \param AllocaIP The insertion point to be used for alloca instructions.
2099 /// \param X The target atomic pointer to be updated
2100 /// \param Expr The value to update X with.
2101 /// \param AO Atomic ordering of the generated atomic instructions.
2102 /// \param RMWOp The binary operation used for update. If operation
2103 /// is not supported by atomicRMW, or belong to
2104 /// {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
2105 /// atomic will be generated.
2106 /// \param UpdateOp Code generator for complex expressions that cannot be
2107 /// expressed through atomicrmw instruction.
2108 /// \param IsXBinopExpr true if \a X is Left H.S. in Right H.S. part of the
2109 /// update expression, false otherwise.
2110 /// (e.g. true for X = X BinOp Expr)
2111 ///
2112 /// \return Insertion point after generated atomic update IR.
2114 InsertPointTy AllocaIP, AtomicOpValue &X,
2115 Value *Expr, AtomicOrdering AO,
2117 AtomicUpdateCallbackTy &UpdateOp,
2118 bool IsXBinopExpr);
2119
2120 /// Emit atomic update for constructs: --- Only Scalar data types
2121 /// V = X; X = X BinOp Expr ,
2122 /// X = X BinOp Expr; V = X,
2123 /// V = X; X = Expr BinOp X,
2124 /// X = Expr BinOp X; V = X,
2125 /// V = X; X = UpdateOp(X),
2126 /// X = UpdateOp(X); V = X,
2127 ///
2128 /// \param Loc The insert and source location description.
2129 /// \param AllocaIP The insertion point to be used for alloca instructions.
2130 /// \param X The target atomic pointer to be updated
2131 /// \param V Memory address where to store captured value
2132 /// \param Expr The value to update X with.
2133 /// \param AO Atomic ordering of the generated atomic instructions
2134 /// \param RMWOp The binary operation used for update. If
2135 /// operation is not supported by atomicRMW, or belong to
2136 /// {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
2137 /// atomic will be generated.
2138 /// \param UpdateOp Code generator for complex expressions that cannot be
2139 /// expressed through atomicrmw instruction.
2140 /// \param UpdateExpr true if X is an in place update of the form
2141 /// X = X BinOp Expr or X = Expr BinOp X
2142 /// \param IsXBinopExpr true if X is Left H.S. in Right H.S. part of the
2143 /// update expression, false otherwise.
2144 /// (e.g. true for X = X BinOp Expr)
2145 /// \param IsPostfixUpdate true if original value of 'x' must be stored in
2146 /// 'v', not an updated one.
2147 ///
2148 /// \return Insertion point after generated atomic capture IR.
2151 AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
2153 AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
2154 bool IsPostfixUpdate, bool IsXBinopExpr);
2155
2156 /// Emit atomic compare for constructs: --- Only scalar data types
2157 /// cond-expr-stmt:
2158 /// x = x ordop expr ? expr : x;
2159 /// x = expr ordop x ? expr : x;
2160 /// x = x == e ? d : x;
2161 /// x = e == x ? d : x; (this one is not in the spec)
2162 /// cond-update-stmt:
2163 /// if (x ordop expr) { x = expr; }
2164 /// if (expr ordop x) { x = expr; }
2165 /// if (x == e) { x = d; }
2166 /// if (e == x) { x = d; } (this one is not in the spec)
2167 /// conditional-update-capture-atomic:
2168 /// v = x; cond-update-stmt; (IsPostfixUpdate=true, IsFailOnly=false)
2169 /// cond-update-stmt; v = x; (IsPostfixUpdate=false, IsFailOnly=false)
2170 /// if (x == e) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2171 /// IsFailOnly=true)
2172 /// r = x == e; if (r) { x = d; } (IsPostfixUpdate=false, IsFailOnly=false)
2173 /// r = x == e; if (r) { x = d; } else { v = x; } (IsPostfixUpdate=false,
2174 /// IsFailOnly=true)
2175 ///
2176 /// \param Loc The insert and source location description.
2177 /// \param X The target atomic pointer to be updated.
2178 /// \param V Memory address where to store captured value (for
2179 /// compare capture only).
2180 /// \param R Memory address where to store comparison result
2181 /// (for compare capture with '==' only).
2182 /// \param E The expected value ('e') for forms that use an
2183 /// equality comparison or an expression ('expr') for
2184 /// forms that use 'ordop' (logically an atomic maximum or
2185 /// minimum).
2186 /// \param D The desired value for forms that use an equality
2187 /// comparison. If forms that use 'ordop', it should be
2188 /// \p nullptr.
2189 /// \param AO Atomic ordering of the generated atomic instructions.
2190 /// \param Op Atomic compare operation. It can only be ==, <, or >.
2191 /// \param IsXBinopExpr True if the conditional statement is in the form where
2192 /// x is on LHS. It only matters for < or >.
2193 /// \param IsPostfixUpdate True if original value of 'x' must be stored in
2194 /// 'v', not an updated one (for compare capture
2195 /// only).
2196 /// \param IsFailOnly True if the original value of 'x' is stored to 'v'
2197 /// only when the comparison fails. This is only valid for
2198 /// the case the comparison is '=='.
2199 ///
2200 /// \return Insertion point after generated atomic capture IR.
2205 bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly);
2206
2207 /// Create the control flow structure of a canonical OpenMP loop.
2208 ///
2209 /// The emitted loop will be disconnected, i.e. no edge to the loop's
2210 /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
2211 /// IRBuilder location is not preserved.
2212 ///
2213 /// \param DL DebugLoc used for the instructions in the skeleton.
2214 /// \param TripCount Value to be used for the trip count.
2215 /// \param F Function in which to insert the BasicBlocks.
2216 /// \param PreInsertBefore Where to insert BBs that execute before the body,
2217 /// typically the body itself.
2218 /// \param PostInsertBefore Where to insert BBs that execute after the body.
2219 /// \param Name Base name used to derive BB
2220 /// and instruction names.
2221 ///
2222 /// \returns The CanonicalLoopInfo that represents the emitted loop.
2224 Function *F,
2225 BasicBlock *PreInsertBefore,
2226 BasicBlock *PostInsertBefore,
2227 const Twine &Name = {});
2228 /// OMP Offload Info Metadata name string
2229 const std::string ompOffloadInfoName = "omp_offload.info";
2230
2231 /// Loads all the offload entries information from the host IR
2232 /// metadata. This function is only meant to be used with device code
2233 /// generation.
2234 ///
2235 /// \param M Module to load Metadata info from. Module passed maybe
2236 /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
2238
2239 /// Gets (if variable with the given name already exist) or creates
2240 /// internal global variable with the specified Name. The created variable has
2241 /// linkage CommonLinkage by default and is initialized by null value.
2242 /// \param Ty Type of the global variable. If it is exist already the type
2243 /// must be the same.
2244 /// \param Name Name of the variable.
2246 unsigned AddressSpace = 0);
2247};
2248
2249/// Class to represented the control flow structure of an OpenMP canonical loop.
2250///
2251/// The control-flow structure is standardized for easy consumption by
2252/// directives associated with loops. For instance, the worksharing-loop
2253/// construct may change this control flow such that each loop iteration is
2254/// executed on only one thread. The constraints of a canonical loop in brief
2255/// are:
2256///
2257/// * The number of loop iterations must have been computed before entering the
2258/// loop.
2259///
2260/// * Has an (unsigned) logical induction variable that starts at zero and
2261/// increments by one.
2262///
2263/// * The loop's CFG itself has no side-effects. The OpenMP specification
2264/// itself allows side-effects, but the order in which they happen, including
2265/// how often or whether at all, is unspecified. We expect that the frontend
2266/// will emit those side-effect instructions somewhere (e.g. before the loop)
2267/// such that the CanonicalLoopInfo itself can be side-effect free.
2268///
2269/// Keep in mind that CanonicalLoopInfo is meant to only describe a repeated
2270/// execution of a loop body that satifies these constraints. It does NOT
2271/// represent arbitrary SESE regions that happen to contain a loop. Do not use
2272/// CanonicalLoopInfo for such purposes.
2273///
2274/// The control flow can be described as follows:
2275///
2276/// Preheader
2277/// |
2278/// /-> Header
2279/// | |
2280/// | Cond---\
2281/// | | |
2282/// | Body |
2283/// | | | |
2284/// | <...> |
2285/// | | | |
2286/// \--Latch |
2287/// |
2288/// Exit
2289/// |
2290/// After
2291///
2292/// The loop is thought to start at PreheaderIP (at the Preheader's terminator,
2293/// including) and end at AfterIP (at the After's first instruction, excluding).
2294/// That is, instructions in the Preheader and After blocks (except the
2295/// Preheader's terminator) are out of CanonicalLoopInfo's control and may have
2296/// side-effects. Typically, the Preheader is used to compute the loop's trip
2297/// count. The instructions from BodyIP (at the Body block's first instruction,
2298/// excluding) until the Latch are also considered outside CanonicalLoopInfo's
2299/// control and thus can have side-effects. The body block is the single entry
2300/// point into the loop body, which may contain arbitrary control flow as long
2301/// as all control paths eventually branch to the Latch block.
2302///
2303/// TODO: Consider adding another standardized BasicBlock between Body CFG and
2304/// Latch to guarantee that there is only a single edge to the latch. It would
2305/// make loop transformations easier to not needing to consider multiple
2306/// predecessors of the latch (See redirectAllPredecessorsTo) and would give us
2307/// an equivalant to PreheaderIP, AfterIP and BodyIP for inserting code that
2308/// executes after each body iteration.
2309///
2310/// There must be no loop-carried dependencies through llvm::Values. This is
2311/// equivalant to that the Latch has no PHINode and the Header's only PHINode is
2312/// for the induction variable.
2313///
2314/// All code in Header, Cond, Latch and Exit (plus the terminator of the
2315/// Preheader) are CanonicalLoopInfo's responsibility and their build-up checked
2316/// by assertOK(). They are expected to not be modified unless explicitly
2317/// modifying the CanonicalLoopInfo through a methods that applies a OpenMP
2318/// loop-associated construct such as applyWorkshareLoop, tileLoops, unrollLoop,
2319/// etc. These methods usually invalidate the CanonicalLoopInfo and re-use its
2320/// basic blocks. After invalidation, the CanonicalLoopInfo must not be used
2321/// anymore as its underlying control flow may not exist anymore.
2322/// Loop-transformation methods such as tileLoops, collapseLoops and unrollLoop
2323/// may also return a new CanonicalLoopInfo that can be passed to other
2324/// loop-associated construct implementing methods. These loop-transforming
2325/// methods may either create a new CanonicalLoopInfo usually using
2326/// createLoopSkeleton and invalidate the input CanonicalLoopInfo, or reuse and
2327/// modify one of the input CanonicalLoopInfo and return it as representing the
2328/// modified loop. What is done is an implementation detail of
2329/// transformation-implementing method and callers should always assume that the
2330/// CanonicalLoopInfo passed to it is invalidated and a new object is returned.
2331/// Returned CanonicalLoopInfo have the same structure and guarantees as the one
2332/// created by createCanonicalLoop, such that transforming methods do not have
2333/// to special case where the CanonicalLoopInfo originated from.
2334///
2335/// Generally, methods consuming CanonicalLoopInfo do not need an
2336/// OpenMPIRBuilder::InsertPointTy as argument, but use the locations of the
2337/// CanonicalLoopInfo to insert new or modify existing instructions. Unless
2338/// documented otherwise, methods consuming CanonicalLoopInfo do not invalidate
2339/// any InsertPoint that is outside CanonicalLoopInfo's control. Specifically,
2340/// any InsertPoint in the Preheader, After or Block can still be used after
2341/// calling such a method.
2342///
2343/// TODO: Provide mechanisms for exception handling and cancellation points.
2344///
2345/// Defined outside OpenMPIRBuilder because nested classes cannot be
2346/// forward-declared, e.g. to avoid having to include the entire OMPIRBuilder.h.
2348 friend class OpenMPIRBuilder;
2349
2350private:
2351 BasicBlock *Header = nullptr;
2352 BasicBlock *Cond = nullptr;
2353 BasicBlock *Latch = nullptr;
2354 BasicBlock *Exit = nullptr;
2355
2356 /// Add the control blocks of this loop to \p BBs.
2357 ///
2358 /// This does not include any block from the body, including the one returned
2359 /// by getBody().
2360 ///
2361 /// FIXME: This currently includes the Preheader and After blocks even though
2362 /// their content is (mostly) not under CanonicalLoopInfo's control.
2363 /// Re-evaluated whether this makes sense.
2364 void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
2365
2366 /// Sets the number of loop iterations to the given value. This value must be
2367 /// valid in the condition block (i.e., defined in the preheader) and is
2368 /// interpreted as an unsigned integer.
2369 void setTripCount(Value *TripCount);
2370
2371 /// Replace all uses of the canonical induction variable in the loop body with
2372 /// a new one.
2373 ///
2374 /// The intended use case is to update the induction variable for an updated
2375 /// iteration space such that it can stay normalized in the 0...tripcount-1
2376 /// range.
2377 ///
2378 /// The \p Updater is called with the (presumable updated) current normalized
2379 /// induction variable and is expected to return the value that uses of the
2380 /// pre-updated induction values should use instead, typically dependent on
2381 /// the new induction variable. This is a lambda (instead of e.g. just passing
2382 /// the new value) to be able to distinguish the uses of the pre-updated
2383 /// induction variable and uses of the induction varible to compute the
2384 /// updated induction variable value.
2385 void mapIndVar(llvm::function_ref<Value *(Instruction *)> Updater);
2386
2387public:
2388 /// Returns whether this object currently represents the IR of a loop. If
2389 /// returning false, it may have been consumed by a loop transformation or not
2390 /// been intialized. Do not use in this case;
2391 bool isValid() const { return Header; }
2392
2393 /// The preheader ensures that there is only a single edge entering the loop.
2394 /// Code that must be execute before any loop iteration can be emitted here,
2395 /// such as computing the loop trip count and begin lifetime markers. Code in
2396 /// the preheader is not considered part of the canonical loop.
2397 BasicBlock *getPreheader() const;
2398
2399 /// The header is the entry for each iteration. In the canonical control flow,
2400 /// it only contains the PHINode for the induction variable.
2402 assert(isValid() && "Requires a valid canonical loop");
2403 return Header;
2404 }
2405
2406 /// The condition block computes whether there is another loop iteration. If
2407 /// yes, branches to the body; otherwise to the exit block.
2409 assert(isValid() && "Requires a valid canonical loop");
2410 return Cond;
2411 }
2412
2413 /// The body block is the single entry for a loop iteration and not controlled
2414 /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
2415 /// eventually branch to the \p Latch block.
2417 assert(isValid() && "Requires a valid canonical loop");
2418 return cast<BranchInst>(Cond->getTerminator())->getSuccessor(0);
2419 }
2420
2421 /// Reaching the latch indicates the end of the loop body code. In the
2422 /// canonical control flow, it only contains the increment of the induction
2423 /// variable.
2425 assert(isValid() && "Requires a valid canonical loop");
2426 return Latch;
2427 }
2428
2429 /// Reaching the exit indicates no more iterations are being executed.
2431 assert(isValid() && "Requires a valid canonical loop");
2432 return Exit;
2433 }
2434
2435 /// The after block is intended for clean-up code such as lifetime end
2436 /// markers. It is separate from the exit block to ensure, analogous to the
2437 /// preheader, it having just a single entry edge and being free from PHI
2438 /// nodes should there be multiple loop exits (such as from break
2439 /// statements/cancellations).
2441 assert(isValid() && "Requires a valid canonical loop");
2442 return Exit->getSingleSuccessor();
2443 }
2444
2445 /// Returns the llvm::Value containing the number of loop iterations. It must
2446 /// be valid in the preheader and always interpreted as an unsigned integer of
2447 /// any bit-width.
2449 assert(isValid() && "Requires a valid canonical loop");
2450 Instruction *CmpI = &Cond->front();
2451 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
2452 return CmpI->getOperand(1);
2453 }
2454
2455 /// Returns the instruction representing the current logical induction
2456 /// variable. Always unsigned, always starting at 0 with an increment of one.
2458 assert(isValid() && "Requires a valid canonical loop");
2459 Instruction *IndVarPHI = &Header->front();
2460 assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
2461 return IndVarPHI;
2462 }
2463
2464 /// Return the type of the induction variable (and the trip count).
2466 assert(isValid() && "Requires a valid canonical loop");
2467 return getIndVar()->getType();
2468 }
2469
2470 /// Return the insertion point for user code before the loop.
2472 assert(isValid() && "Requires a valid canonical loop");
2473 BasicBlock *Preheader = getPreheader();
2474 return {Preheader, std::prev(Preheader->end())};
2475 };
2476
2477 /// Return the insertion point for user code in the body.
2479 assert(isValid() && "Requires a valid canonical loop");
2480 BasicBlock *Body = getBody();
2481 return {Body, Body->begin()};
2482 };
2483
2484 /// Return the insertion point for user code after the loop.
2486 assert(isValid() && "Requires a valid canonical loop");
2487 BasicBlock *After = getAfter();
2488 return {After, After->begin()};
2489 };
2490
2492 assert(isValid() && "Requires a valid canonical loop");
2493 return Header->getParent();
2494 }
2495
2496 /// Consistency self-check.
2497 void assertOK() const;
2498
2499 /// Invalidate this loop. That is, the underlying IR does not fulfill the
2500 /// requirements of an OpenMP canonical loop anymore.
2501 void invalidate();
2502};
2503
2504} // end namespace llvm
2505
2506#endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
arc branch finalize
This file defines the BumpPtrAllocator interface.
assume builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Hardware Loops
#define F(x, y, z)
Definition: MD5.cpp:55
This file defines constans and helpers used when dealing with OpenMP.
Basic Register Allocator
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
@ Flags
Definition: TextStubV5.cpp:93
@ Names
Definition: TextStubV5.cpp:106
Value * RHS
an instruction to allocate memory on the stack
Definition: Instructions.h:58
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:730
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:325
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:323
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:323
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
This is an important base class in LLVM.
Definition: Constant.h:41
A debug info location.
Definition: DebugLoc.h:33
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:47
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:243
BasicBlock * getBlock() const
Definition: IRBuilder.h:258
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:212
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:263
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:275
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
Class to represent integer types.
Definition: DerivedTypes.h:40
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
OffloadEntryInfoDeviceGlobalVar(unsigned Order, OMPTargetGlobalVarEntryKind Flags)
Definition: OMPIRBuilder.h:337
OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Definition: OMPIRBuilder.h:340
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:353
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:287
OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Definition: OMPIRBuilder.h:274
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:204
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:206
OffloadingEntryInfoKinds getKind() const
Definition: OMPIRBuilder.h:222
OffloadEntryInfo(OffloadingEntryInfoKinds Kind)
Definition: OMPIRBuilder.h:213
static bool classof(const OffloadEntryInfo *Info)
Definition: OMPIRBuilder.h:230
OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, uint32_t Flags)
Definition: OMPIRBuilder.h:214
Class that manages information about offload code regions and data.
Definition: OMPIRBuilder.h:192
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:375
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:257
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:259
@ OMPTargetRegionEntryDtor
Mark the entry as a global destructor.
Definition: OMPIRBuilder.h:263
@ OMPTargetRegionEntryCtor
Mark the entry as a global constructor.
Definition: OMPIRBuilder.h:261
OffloadEntriesInfoManager(OpenMPIRBuilder *builder)
Definition: OMPIRBuilder.h:250
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:248
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:321
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:325
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:323
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
Definition: OMPIRBuilder.h:312
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:370
bool empty() const
Return true if a there are no entries defined.
Captures attributes that affect generating LLVM-IR using the OpenMPIRBuilder and related classes.
Definition: OMPIRBuilder.h:83
std::optional< StringRef > FirstSeparator
First separator used between the initial two parts of a name.
Definition: OMPIRBuilder.h:101
StringRef separator() const
Definition: OMPIRBuilder.h:147
void setFirstSeparator(StringRef FS)
Definition: OMPIRBuilder.h:160
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:137
std::optional< bool > IsTargetCodegen
Flag for specifying if the compilation is done for an offloading target, like GPU.
Definition: OMPIRBuilder.h:91
std::optional< bool > OpenMPOffloadMandatory
Definition: OMPIRBuilder.h:98
bool hasRequiresUnifiedSharedMemory() const
Definition: OMPIRBuilder.h:124
OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen, bool HasRequiresUnifiedSharedMemory, bool OpenMPOffloadMandatory)
Definition: OMPIRBuilder.h:106
std::optional< bool > HasRequiresUnifiedSharedMemory
Flag for specifying weather a requires unified_shared_memory directive is present or not.
Definition: OMPIRBuilder.h:95
void setIsEmbedded(bool Value)
Definition: OMPIRBuilder.h:155
void setHasRequiresUnifiedSharedMemory(bool Value)
Definition: OMPIRBuilder.h:157
std::optional< StringRef > Separator
Separator used between all of the rest consecutive parts of s name.
Definition: OMPIRBuilder.h:103
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:130
std::optional< bool > IsEmbedded
Flag for specifying if the compilation is done for embedded device code or host code.
Definition: OMPIRBuilder.h:87
void setIsTargetCodegen(bool Value)
Definition: OMPIRBuilder.h:156
void setSeparator(StringRef S)
Definition: OMPIRBuilder.h:161
Struct that keeps the information that should be kept throughout a 'target data' region.
TargetDataInfo(bool RequiresDevicePointerInfo, bool SeparateBeginEndCalls)
void clearArrayInfo()
Clear information about the data arrays.
unsigned NumberOfPtrs
The total number of pointers passed to the runtime library.
bool isValid()
Return true if the current target data information has valid arrays.
bool HasMapper
Indicate whether any user-defined mapper exists.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:411
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:461
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
function_ref< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
Definition: OMPIRBuilder.h:513
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false)
Generator for '#omp reduction'.
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
std::function< void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> StorableBodyGenCallbackTy
Definition: OMPIRBuilder.h:520
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
void setConfig(OpenMPIRBuilderConfig C)
Definition: OMPIRBuilder.h:430
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes)
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD)
Create a runtime call for kmpc_target_deinit.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingEntry(Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, StringRef SectionName="omp_offloading_entries")
Create an offloading section struct used to register this global at runtime.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, llvm::Value *DidIt)
Generator for '#omp single'.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
StringMap< Constant *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false)
Modifies the canonical loop to be a workshare loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD)
The omp target interface.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy CodeGenIP, SmallVectorImpl< uint64_t > &MapTypeFlags, SmallVectorImpl< Constant * > &MapNames, struct MapperAllocas &MapperAllocas, bool IsBegin, int64_t DeviceID, Value *IfCond, BodyGenCallbackTy ProcessMapOpCB, BodyGenCallbackTy BodyGenCB={})
Generator for '#omp target data'.
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
void pushFinalizationCB(const FinalizationInfo &FI)
Push a finalization callback on the finalization stack.
Definition: OMPIRBuilder.h:479
InsertPointTy getInsertionPoint()
}
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:441
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName, int32_t NumTeams, int32_t NumThreads)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, TargetBodyGenCallbackTy BodyGenCB)
Generator for '#omp target'.
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
void initialize(StringRef HostFilePath={})
Initialize the internal state, this will put structures types and potentially other helpers into the ...
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
OpenMPIRBuilder(Module &M)
Create a new OpenMPIRBuilder operating on the given module M.
Definition: OMPIRBuilder.h:415
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
void popFinalizationCB()
Pop the last finalization callback from the finalization stack.
Definition: OMPIRBuilder.h:486
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:111
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:256
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
An efficient, type-erasing, non-owning reference to a callable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition: ISDOpcodes.h:71
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:66
RTLDependenceKindTy
Dependence kind for RTL.
Definition: OMPConstants.h:268
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:46
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:262
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
AtomicOrdering
Atomic ordering for LLVM's memory model.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
DependData(omp::RTLDependenceKindTy DepKind, Type *DepValueType, Value *DepVal)
omp::RTLDependenceKindTy DepKind
bool IsCancellable
Flag to indicate if the directive is cancellable.
Definition: OMPIRBuilder.h:473
FinalizeCallbackTy FiniCB
The finalization callback provided by the last in-flight invocation of createXXXX for the directive o...
Definition: OMPIRBuilder.h:466
omp::Directive DK
The directive kind of the innermost directive that has an associated region which might require final...
Definition: OMPIRBuilder.h:470
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:557
LocationDescription(const InsertPointTy &IP)
Definition: OMPIRBuilder.h:560
LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
Definition: OMPIRBuilder.h:561
LocationDescription(const IRBuilderBase &IRB)
Definition: OMPIRBuilder.h:558
This structure contains combined information generated for mappable clauses, including base pointers,...
void append(MapInfosTy &CurInfo)
Append arrays in CurInfo.
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
Function * getFunction() const
Return the function that contains the region to be outlined.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
std::function< void(Function &)> PostOutlineCBTy
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionInfo(Type *ElementType, Value *Variable, Value *PrivateVariable, ReductionGenTy ReductionGen, AtomicReductionGenTy AtomicReductionGen)
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:166
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count=0)
Definition: OMPIRBuilder.h:174
bool operator<(const TargetRegionEntryInfo RHS) const
Definition: OMPIRBuilder.h:184