LLVM 19.0.0git
OMPIRBuilder.cpp
Go to the documentation of this file.
1//===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the OpenMPIRBuilder class, which is used as a
11/// convenient way to create LLVM instructions for OpenMP directives.
12///
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/SmallSet.h"
18#include "llvm/ADT/StringRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/CFG.h"
31#include "llvm/IR/CallingConv.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
36#include "llvm/IR/Function.h"
38#include "llvm/IR/IRBuilder.h"
39#include "llvm/IR/LLVMContext.h"
40#include "llvm/IR/MDBuilder.h"
41#include "llvm/IR/Metadata.h"
42#include "llvm/IR/PassManager.h"
43#include "llvm/IR/Value.h"
55
56#include <cstdint>
57#include <optional>
58
59#define DEBUG_TYPE "openmp-ir-builder"
60
61using namespace llvm;
62using namespace omp;
63
64static cl::opt<bool>
65 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden,
66 cl::desc("Use optimistic attributes describing "
67 "'as-if' properties of runtime calls."),
68 cl::init(false));
69
71 "openmp-ir-builder-unroll-threshold-factor", cl::Hidden,
72 cl::desc("Factor for the unroll threshold to account for code "
73 "simplifications still taking place"),
74 cl::init(1.5));
75
76#ifndef NDEBUG
77/// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions
78/// at position IP1 may change the meaning of IP2 or vice-versa. This is because
79/// an InsertPoint stores the instruction before something is inserted. For
80/// instance, if both point to the same instruction, two IRBuilders alternating
81/// creating instruction will cause the instructions to be interleaved.
84 if (!IP1.isSet() || !IP2.isSet())
85 return false;
86 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
87}
88
90 // Valid ordered/unordered and base algorithm combinations.
91 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
92 case OMPScheduleType::UnorderedStaticChunked:
93 case OMPScheduleType::UnorderedStatic:
94 case OMPScheduleType::UnorderedDynamicChunked:
95 case OMPScheduleType::UnorderedGuidedChunked:
96 case OMPScheduleType::UnorderedRuntime:
97 case OMPScheduleType::UnorderedAuto:
98 case OMPScheduleType::UnorderedTrapezoidal:
99 case OMPScheduleType::UnorderedGreedy:
100 case OMPScheduleType::UnorderedBalanced:
101 case OMPScheduleType::UnorderedGuidedIterativeChunked:
102 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
103 case OMPScheduleType::UnorderedSteal:
104 case OMPScheduleType::UnorderedStaticBalancedChunked:
105 case OMPScheduleType::UnorderedGuidedSimd:
106 case OMPScheduleType::UnorderedRuntimeSimd:
107 case OMPScheduleType::OrderedStaticChunked:
108 case OMPScheduleType::OrderedStatic:
109 case OMPScheduleType::OrderedDynamicChunked:
110 case OMPScheduleType::OrderedGuidedChunked:
111 case OMPScheduleType::OrderedRuntime:
112 case OMPScheduleType::OrderedAuto:
113 case OMPScheduleType::OrderdTrapezoidal:
114 case OMPScheduleType::NomergeUnorderedStaticChunked:
115 case OMPScheduleType::NomergeUnorderedStatic:
116 case OMPScheduleType::NomergeUnorderedDynamicChunked:
117 case OMPScheduleType::NomergeUnorderedGuidedChunked:
118 case OMPScheduleType::NomergeUnorderedRuntime:
119 case OMPScheduleType::NomergeUnorderedAuto:
120 case OMPScheduleType::NomergeUnorderedTrapezoidal:
121 case OMPScheduleType::NomergeUnorderedGreedy:
122 case OMPScheduleType::NomergeUnorderedBalanced:
123 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
124 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
125 case OMPScheduleType::NomergeUnorderedSteal:
126 case OMPScheduleType::NomergeOrderedStaticChunked:
127 case OMPScheduleType::NomergeOrderedStatic:
128 case OMPScheduleType::NomergeOrderedDynamicChunked:
129 case OMPScheduleType::NomergeOrderedGuidedChunked:
130 case OMPScheduleType::NomergeOrderedRuntime:
131 case OMPScheduleType::NomergeOrderedAuto:
132 case OMPScheduleType::NomergeOrderedTrapezoidal:
133 break;
134 default:
135 return false;
136 }
137
138 // Must not set both monotonicity modifiers at the same time.
139 OMPScheduleType MonotonicityFlags =
140 SchedType & OMPScheduleType::MonotonicityMask;
141 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
142 return false;
143
144 return true;
145}
146#endif
147
148static const omp::GV &getGridValue(const Triple &T, Function *Kernel) {
149 if (T.isAMDGPU()) {
150 StringRef Features =
151 Kernel->getFnAttribute("target-features").getValueAsString();
152 if (Features.count("+wavefrontsize64"))
153 return omp::getAMDGPUGridValues<64>();
154 return omp::getAMDGPUGridValues<32>();
155 }
156 if (T.isNVPTX())
158 llvm_unreachable("No grid value available for this architecture!");
159}
160
161/// Determine which scheduling algorithm to use, determined from schedule clause
162/// arguments.
163static OMPScheduleType
164getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
165 bool HasSimdModifier) {
166 // Currently, the default schedule it static.
167 switch (ClauseKind) {
168 case OMP_SCHEDULE_Default:
169 case OMP_SCHEDULE_Static:
170 return HasChunks ? OMPScheduleType::BaseStaticChunked
171 : OMPScheduleType::BaseStatic;
172 case OMP_SCHEDULE_Dynamic:
173 return OMPScheduleType::BaseDynamicChunked;
174 case OMP_SCHEDULE_Guided:
175 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
176 : OMPScheduleType::BaseGuidedChunked;
177 case OMP_SCHEDULE_Auto:
179 case OMP_SCHEDULE_Runtime:
180 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
181 : OMPScheduleType::BaseRuntime;
182 }
183 llvm_unreachable("unhandled schedule clause argument");
184}
185
186/// Adds ordering modifier flags to schedule type.
187static OMPScheduleType
189 bool HasOrderedClause) {
190 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
191 OMPScheduleType::None &&
192 "Must not have ordering nor monotonicity flags already set");
193
194 OMPScheduleType OrderingModifier = HasOrderedClause
195 ? OMPScheduleType::ModifierOrdered
196 : OMPScheduleType::ModifierUnordered;
197 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
198
199 // Unsupported combinations
200 if (OrderingScheduleType ==
201 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
202 return OMPScheduleType::OrderedGuidedChunked;
203 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
204 OMPScheduleType::ModifierOrdered))
205 return OMPScheduleType::OrderedRuntime;
206
207 return OrderingScheduleType;
208}
209
210/// Adds monotonicity modifier flags to schedule type.
211static OMPScheduleType
213 bool HasSimdModifier, bool HasMonotonic,
214 bool HasNonmonotonic, bool HasOrderedClause) {
215 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
216 OMPScheduleType::None &&
217 "Must not have monotonicity flags already set");
218 assert((!HasMonotonic || !HasNonmonotonic) &&
219 "Monotonic and Nonmonotonic are contradicting each other");
220
221 if (HasMonotonic) {
222 return ScheduleType | OMPScheduleType::ModifierMonotonic;
223 } else if (HasNonmonotonic) {
224 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
225 } else {
226 // OpenMP 5.1, 2.11.4 Worksharing-Loop Construct, Description.
227 // If the static schedule kind is specified or if the ordered clause is
228 // specified, and if the nonmonotonic modifier is not specified, the
229 // effect is as if the monotonic modifier is specified. Otherwise, unless
230 // the monotonic modifier is specified, the effect is as if the
231 // nonmonotonic modifier is specified.
232 OMPScheduleType BaseScheduleType =
233 ScheduleType & ~OMPScheduleType::ModifierMask;
234 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
235 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
236 HasOrderedClause) {
237 // The monotonic is used by default in openmp runtime library, so no need
238 // to set it.
239 return ScheduleType;
240 } else {
241 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
242 }
243 }
244}
245
246/// Determine the schedule type using schedule and ordering clause arguments.
247static OMPScheduleType
248computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
249 bool HasSimdModifier, bool HasMonotonicModifier,
250 bool HasNonmonotonicModifier, bool HasOrderedClause) {
251 OMPScheduleType BaseSchedule =
252 getOpenMPBaseScheduleType(ClauseKind, HasChunks, HasSimdModifier);
253 OMPScheduleType OrderedSchedule =
254 getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause);
256 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
257 HasNonmonotonicModifier, HasOrderedClause);
258
260 return Result;
261}
262
263/// Make \p Source branch to \p Target.
264///
265/// Handles two situations:
266/// * \p Source already has an unconditional branch.
267/// * \p Source is a degenerate block (no terminator because the BB is
268/// the current head of the IR construction).
270 if (Instruction *Term = Source->getTerminator()) {
271 auto *Br = cast<BranchInst>(Term);
272 assert(!Br->isConditional() &&
273 "BB's terminator must be an unconditional branch (or degenerate)");
274 BasicBlock *Succ = Br->getSuccessor(0);
275 Succ->removePredecessor(Source, /*KeepOneInputPHIs=*/true);
276 Br->setSuccessor(0, Target);
277 return;
278 }
279
280 auto *NewBr = BranchInst::Create(Target, Source);
281 NewBr->setDebugLoc(DL);
282}
283
285 bool CreateBranch) {
286 assert(New->getFirstInsertionPt() == New->begin() &&
287 "Target BB must not have PHI nodes");
288
289 // Move instructions to new block.
290 BasicBlock *Old = IP.getBlock();
291 New->splice(New->begin(), Old, IP.getPoint(), Old->end());
292
293 if (CreateBranch)
294 BranchInst::Create(New, Old);
295}
296
297void llvm::spliceBB(IRBuilder<> &Builder, BasicBlock *New, bool CreateBranch) {
299 BasicBlock *Old = Builder.GetInsertBlock();
300
301 spliceBB(Builder.saveIP(), New, CreateBranch);
302 if (CreateBranch)
303 Builder.SetInsertPoint(Old->getTerminator());
304 else
305 Builder.SetInsertPoint(Old);
306
307 // SetInsertPoint also updates the Builder's debug location, but we want to
308 // keep the one the Builder was configured to use.
310}
311
314 BasicBlock *Old = IP.getBlock();
316 Old->getContext(), Name.isTriviallyEmpty() ? Old->getName() : Name,
317 Old->getParent(), Old->getNextNode());
318 spliceBB(IP, New, CreateBranch);
319 New->replaceSuccessorsPhiUsesWith(Old, New);
320 return New;
321}
322
323BasicBlock *llvm::splitBB(IRBuilderBase &Builder, bool CreateBranch,
326 BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
327 if (CreateBranch)
328 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
329 else
330 Builder.SetInsertPoint(Builder.GetInsertBlock());
331 // SetInsertPoint also updates the Builder's debug location, but we want to
332 // keep the one the Builder was configured to use.
334 return New;
335}
336
337BasicBlock *llvm::splitBB(IRBuilder<> &Builder, bool CreateBranch,
340 BasicBlock *New = splitBB(Builder.saveIP(), CreateBranch, Name);
341 if (CreateBranch)
342 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
343 else
344 Builder.SetInsertPoint(Builder.GetInsertBlock());
345 // SetInsertPoint also updates the Builder's debug location, but we want to
346 // keep the one the Builder was configured to use.
348 return New;
349}
350
352 llvm::Twine Suffix) {
353 BasicBlock *Old = Builder.GetInsertBlock();
354 return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
355}
356
357// This function creates a fake integer value and a fake use for the integer
358// value. It returns the fake value created. This is useful in modeling the
359// extra arguments to the outlined functions.
361 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
362 std::stack<Instruction *> &ToBeDeleted,
363 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
364 const Twine &Name = "", bool AsPtr = true) {
365 Builder.restoreIP(OuterAllocaIP);
366 Instruction *FakeVal;
367 AllocaInst *FakeValAddr =
368 Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, Name + ".addr");
369 ToBeDeleted.push(FakeValAddr);
370
371 if (AsPtr) {
372 FakeVal = FakeValAddr;
373 } else {
374 FakeVal =
375 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name + ".val");
376 ToBeDeleted.push(FakeVal);
377 }
378
379 // Generate a fake use of this value
380 Builder.restoreIP(InnerAllocaIP);
381 Instruction *UseFakeVal;
382 if (AsPtr) {
383 UseFakeVal =
384 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name + ".use");
385 } else {
386 UseFakeVal =
387 cast<BinaryOperator>(Builder.CreateAdd(FakeVal, Builder.getInt32(10)));
388 }
389 ToBeDeleted.push(UseFakeVal);
390 return FakeVal;
391}
392
393//===----------------------------------------------------------------------===//
394// OpenMPIRBuilderConfig
395//===----------------------------------------------------------------------===//
396
397namespace {
399/// Values for bit flags for marking which requires clauses have been used.
400enum OpenMPOffloadingRequiresDirFlags {
401 /// flag undefined.
402 OMP_REQ_UNDEFINED = 0x000,
403 /// no requires directive present.
404 OMP_REQ_NONE = 0x001,
405 /// reverse_offload clause.
406 OMP_REQ_REVERSE_OFFLOAD = 0x002,
407 /// unified_address clause.
408 OMP_REQ_UNIFIED_ADDRESS = 0x004,
409 /// unified_shared_memory clause.
410 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
411 /// dynamic_allocators clause.
412 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
413 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS)
414};
415
416} // anonymous namespace
417
419 : RequiresFlags(OMP_REQ_UNDEFINED) {}
420
422 bool IsTargetDevice, bool IsGPU, bool OpenMPOffloadMandatory,
423 bool HasRequiresReverseOffload, bool HasRequiresUnifiedAddress,
424 bool HasRequiresUnifiedSharedMemory, bool HasRequiresDynamicAllocators)
425 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
426 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
427 RequiresFlags(OMP_REQ_UNDEFINED) {
428 if (HasRequiresReverseOffload)
429 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
430 if (HasRequiresUnifiedAddress)
431 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
432 if (HasRequiresUnifiedSharedMemory)
433 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
434 if (HasRequiresDynamicAllocators)
435 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
436}
437
439 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
440}
441
443 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
444}
445
447 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
448}
449
451 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
452}
453
455 return hasRequiresFlags() ? RequiresFlags
456 : static_cast<int64_t>(OMP_REQ_NONE);
457}
458
460 if (Value)
461 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
462 else
463 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
464}
465
467 if (Value)
468 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
469 else
470 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
471}
472
474 if (Value)
475 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
476 else
477 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
478}
479
481 if (Value)
482 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
483 else
484 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
485}
486
487//===----------------------------------------------------------------------===//
488// OpenMPIRBuilder
489//===----------------------------------------------------------------------===//
490
492 IRBuilderBase &Builder,
493 SmallVector<Value *> &ArgsVector) {
495 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
498 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
499
500 Value *NumTeams3D =
501 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams, {0});
502 Value *NumThreads3D =
503 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads, {0});
504
505 ArgsVector = {Version,
506 PointerNum,
507 KernelArgs.RTArgs.BasePointersArray,
508 KernelArgs.RTArgs.PointersArray,
509 KernelArgs.RTArgs.SizesArray,
510 KernelArgs.RTArgs.MapTypesArray,
511 KernelArgs.RTArgs.MapNamesArray,
512 KernelArgs.RTArgs.MappersArray,
513 KernelArgs.NumIterations,
514 Flags,
515 NumTeams3D,
516 NumThreads3D,
517 KernelArgs.DynCGGroupMem};
518}
519
521 LLVMContext &Ctx = Fn.getContext();
522
523 // Get the function's current attributes.
524 auto Attrs = Fn.getAttributes();
525 auto FnAttrs = Attrs.getFnAttrs();
526 auto RetAttrs = Attrs.getRetAttrs();
528 for (size_t ArgNo = 0; ArgNo < Fn.arg_size(); ++ArgNo)
529 ArgAttrs.emplace_back(Attrs.getParamAttrs(ArgNo));
530
531 // Add AS to FnAS while taking special care with integer extensions.
532 auto addAttrSet = [&](AttributeSet &FnAS, const AttributeSet &AS,
533 bool Param = true) -> void {
534 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
535 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
536 if (HasSignExt || HasZeroExt) {
537 assert(AS.getNumAttributes() == 1 &&
538 "Currently not handling extension attr combined with others.");
539 if (Param) {
540 if (auto AK = TargetLibraryInfo::getExtAttrForI32Param(T, HasSignExt))
541 FnAS = FnAS.addAttribute(Ctx, AK);
542 } else if (auto AK =
543 TargetLibraryInfo::getExtAttrForI32Return(T, HasSignExt))
544 FnAS = FnAS.addAttribute(Ctx, AK);
545 } else {
546 FnAS = FnAS.addAttributes(Ctx, AS);
547 }
548 };
549
550#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
551#include "llvm/Frontend/OpenMP/OMPKinds.def"
552
553 // Add attributes to the function declaration.
554 switch (FnID) {
555#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
556 case Enum: \
557 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
558 addAttrSet(RetAttrs, RetAttrSet, /*Param*/ false); \
559 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
560 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
561 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
562 break;
563#include "llvm/Frontend/OpenMP/OMPKinds.def"
564 default:
565 // Attributes are optional.
566 break;
567 }
568}
569
572 FunctionType *FnTy = nullptr;
573 Function *Fn = nullptr;
574
575 // Try to find the declation in the module first.
576 switch (FnID) {
577#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
578 case Enum: \
579 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
580 IsVarArg); \
581 Fn = M.getFunction(Str); \
582 break;
583#include "llvm/Frontend/OpenMP/OMPKinds.def"
584 }
585
586 if (!Fn) {
587 // Create a new declaration if we need one.
588 switch (FnID) {
589#define OMP_RTL(Enum, Str, ...) \
590 case Enum: \
591 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
592 break;
593#include "llvm/Frontend/OpenMP/OMPKinds.def"
594 }
595
596 // Add information if the runtime function takes a callback function
597 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
598 if (!Fn->hasMetadata(LLVMContext::MD_callback)) {
599 LLVMContext &Ctx = Fn->getContext();
600 MDBuilder MDB(Ctx);
601 // Annotate the callback behavior of the runtime function:
602 // - The callback callee is argument number 2 (microtask).
603 // - The first two arguments of the callback callee are unknown (-1).
604 // - All variadic arguments to the runtime function are passed to the
605 // callback callee.
606 Fn->addMetadata(
607 LLVMContext::MD_callback,
609 2, {-1, -1}, /* VarArgsArePassed */ true)}));
610 }
611 }
612
613 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn->getName()
614 << " with type " << *Fn->getFunctionType() << "\n");
615 addAttributes(FnID, *Fn);
616
617 } else {
618 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn->getName()
619 << " with type " << *Fn->getFunctionType() << "\n");
620 }
621
622 assert(Fn && "Failed to create OpenMP runtime function");
623
624 return {FnTy, Fn};
625}
626
629 auto *Fn = dyn_cast<llvm::Function>(RTLFn.getCallee());
630 assert(Fn && "Failed to create OpenMP runtime function pointer");
631 return Fn;
632}
633
634void OpenMPIRBuilder::initialize() { initializeTypes(M); }
635
638 BasicBlock &EntryBlock = Function->getEntryBlock();
639 Instruction *MoveLocInst = EntryBlock.getFirstNonPHI();
640
641 // Loop over blocks looking for constant allocas, skipping the entry block
642 // as any allocas there are already in the desired location.
643 for (auto Block = std::next(Function->begin(), 1); Block != Function->end();
644 Block++) {
645 for (auto Inst = Block->getReverseIterator()->begin();
646 Inst != Block->getReverseIterator()->end();) {
647 if (auto *AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
648 Inst++;
649 if (!isa<ConstantData>(AllocaInst->getArraySize()))
650 continue;
651 AllocaInst->moveBeforePreserving(MoveLocInst);
652 } else {
653 Inst++;
654 }
655 }
656 }
657}
658
660 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
662 SmallVector<OutlineInfo, 16> DeferredOutlines;
663 for (OutlineInfo &OI : OutlineInfos) {
664 // Skip functions that have not finalized yet; may happen with nested
665 // function generation.
666 if (Fn && OI.getFunction() != Fn) {
667 DeferredOutlines.push_back(OI);
668 continue;
669 }
670
671 ParallelRegionBlockSet.clear();
672 Blocks.clear();
673 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
674
675 Function *OuterFn = OI.getFunction();
676 CodeExtractorAnalysisCache CEAC(*OuterFn);
677 // If we generate code for the target device, we need to allocate
678 // struct for aggregate params in the device default alloca address space.
679 // OpenMP runtime requires that the params of the extracted functions are
680 // passed as zero address space pointers. This flag ensures that
681 // CodeExtractor generates correct code for extracted functions
682 // which are used by OpenMP runtime.
683 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
684 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
685 /* AggregateArgs */ true,
686 /* BlockFrequencyInfo */ nullptr,
687 /* BranchProbabilityInfo */ nullptr,
688 /* AssumptionCache */ nullptr,
689 /* AllowVarArgs */ true,
690 /* AllowAlloca */ true,
691 /* AllocaBlock*/ OI.OuterAllocaBB,
692 /* Suffix */ ".omp_par", ArgsInZeroAddressSpace);
693
694 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n");
695 LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
696 << " Exit: " << OI.ExitBB->getName() << "\n");
697 assert(Extractor.isEligible() &&
698 "Expected OpenMP outlining to be possible!");
699
700 for (auto *V : OI.ExcludeArgsFromAggregate)
701 Extractor.excludeArgFromAggregate(V);
702
703 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
704
705 // Forward target-cpu, target-features attributes to the outlined function.
706 auto TargetCpuAttr = OuterFn->getFnAttribute("target-cpu");
707 if (TargetCpuAttr.isStringAttribute())
708 OutlinedFn->addFnAttr(TargetCpuAttr);
709
710 auto TargetFeaturesAttr = OuterFn->getFnAttribute("target-features");
711 if (TargetFeaturesAttr.isStringAttribute())
712 OutlinedFn->addFnAttr(TargetFeaturesAttr);
713
714 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn << "\n");
715 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn << "\n");
716 assert(OutlinedFn->getReturnType()->isVoidTy() &&
717 "OpenMP outlined functions should not return a value!");
718
719 // For compability with the clang CG we move the outlined function after the
720 // one with the parallel region.
721 OutlinedFn->removeFromParent();
722 M.getFunctionList().insertAfter(OuterFn->getIterator(), OutlinedFn);
723
724 // Remove the artificial entry introduced by the extractor right away, we
725 // made our own entry block after all.
726 {
727 BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
728 assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
729 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
730 // Move instructions from the to-be-deleted ArtificialEntry to the entry
731 // basic block of the parallel region. CodeExtractor generates
732 // instructions to unwrap the aggregate argument and may sink
733 // allocas/bitcasts for values that are solely used in the outlined region
734 // and do not escape.
735 assert(!ArtificialEntry.empty() &&
736 "Expected instructions to add in the outlined region entry");
737 for (BasicBlock::reverse_iterator It = ArtificialEntry.rbegin(),
738 End = ArtificialEntry.rend();
739 It != End;) {
740 Instruction &I = *It;
741 It++;
742
743 if (I.isTerminator())
744 continue;
745
746 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
747 }
748
749 OI.EntryBB->moveBefore(&ArtificialEntry);
750 ArtificialEntry.eraseFromParent();
751 }
752 assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
753 assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
754
755 // Run a user callback, e.g. to add attributes.
756 if (OI.PostOutlineCB)
757 OI.PostOutlineCB(*OutlinedFn);
758 }
759
760 // Remove work items that have been completed.
761 OutlineInfos = std::move(DeferredOutlines);
762
763 // The createTarget functions embeds user written code into
764 // the target region which may inject allocas which need to
765 // be moved to the entry block of our target or risk malformed
766 // optimisations by later passes, this is only relevant for
767 // the device pass which appears to be a little more delicate
768 // when it comes to optimisations (however, we do not block on
769 // that here, it's up to the inserter to the list to do so).
770 // This notbaly has to occur after the OutlinedInfo candidates
771 // have been extracted so we have an end product that will not
772 // be implicitly adversely affected by any raises unless
773 // intentionally appended to the list.
774 // NOTE: This only does so for ConstantData, it could be extended
775 // to ConstantExpr's with further effort, however, they should
776 // largely be folded when they get here. Extending it to runtime
777 // defined/read+writeable allocation sizes would be non-trivial
778 // (need to factor in movement of any stores to variables the
779 // allocation size depends on, as well as the usual loads,
780 // otherwise it'll yield the wrong result after movement) and
781 // likely be more suitable as an LLVM optimisation pass.
784
785 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
786 [](EmitMetadataErrorKind Kind,
787 const TargetRegionEntryInfo &EntryInfo) -> void {
788 errs() << "Error of kind: " << Kind
789 << " when emitting offload entries and metadata during "
790 "OMPIRBuilder finalization \n";
791 };
792
795}
796
798 assert(OutlineInfos.empty() && "There must be no outstanding outlinings");
799}
800
803 auto *GV =
804 new GlobalVariable(M, I32Ty,
805 /* isConstant = */ true, GlobalValue::WeakODRLinkage,
806 ConstantInt::get(I32Ty, Value), Name);
807 GV->setVisibility(GlobalValue::HiddenVisibility);
808
809 return GV;
810}
811
813 uint32_t SrcLocStrSize,
814 IdentFlag LocFlags,
815 unsigned Reserve2Flags) {
816 // Enable "C-mode".
817 LocFlags |= OMP_IDENT_FLAG_KMPC;
818
819 Constant *&Ident =
820 IdentMap[{SrcLocStr, uint64_t(LocFlags) << 31 | Reserve2Flags}];
821 if (!Ident) {
823 Constant *IdentData[] = {I32Null,
824 ConstantInt::get(Int32, uint32_t(LocFlags)),
825 ConstantInt::get(Int32, Reserve2Flags),
826 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
827 Constant *Initializer =
828 ConstantStruct::get(OpenMPIRBuilder::Ident, IdentData);
829
830 // Look for existing encoding of the location + flags, not needed but
831 // minimizes the difference to the existing solution while we transition.
832 for (GlobalVariable &GV : M.globals())
833 if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
834 if (GV.getInitializer() == Initializer)
835 Ident = &GV;
836
837 if (!Ident) {
838 auto *GV = new GlobalVariable(
839 M, OpenMPIRBuilder::Ident,
840 /* isConstant = */ true, GlobalValue::PrivateLinkage, Initializer, "",
843 GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
844 GV->setAlignment(Align(8));
845 Ident = GV;
846 }
847 }
848
850}
851
853 uint32_t &SrcLocStrSize) {
854 SrcLocStrSize = LocStr.size();
855 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
856 if (!SrcLocStr) {
857 Constant *Initializer =
859
860 // Look for existing encoding of the location, not needed but minimizes the
861 // difference to the existing solution while we transition.
862 for (GlobalVariable &GV : M.globals())
863 if (GV.isConstant() && GV.hasInitializer() &&
864 GV.getInitializer() == Initializer)
865 return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
866
867 SrcLocStr = Builder.CreateGlobalStringPtr(LocStr, /* Name */ "",
868 /* AddressSpace */ 0, &M);
869 }
870 return SrcLocStr;
871}
872
874 StringRef FileName,
875 unsigned Line, unsigned Column,
876 uint32_t &SrcLocStrSize) {
877 SmallString<128> Buffer;
878 Buffer.push_back(';');
879 Buffer.append(FileName);
880 Buffer.push_back(';');
881 Buffer.append(FunctionName);
882 Buffer.push_back(';');
883 Buffer.append(std::to_string(Line));
884 Buffer.push_back(';');
885 Buffer.append(std::to_string(Column));
886 Buffer.push_back(';');
887 Buffer.push_back(';');
888 return getOrCreateSrcLocStr(Buffer.str(), SrcLocStrSize);
889}
890
891Constant *
893 StringRef UnknownLoc = ";unknown;unknown;0;0;;";
894 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
895}
896
898 uint32_t &SrcLocStrSize,
899 Function *F) {
900 DILocation *DIL = DL.get();
901 if (!DIL)
902 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
903 StringRef FileName = M.getName();
904 if (DIFile *DIF = DIL->getFile())
905 if (std::optional<StringRef> Source = DIF->getSource())
906 FileName = *Source;
907 StringRef Function = DIL->getScope()->getSubprogram()->getName();
908 if (Function.empty() && F)
909 Function = F->getName();
910 return getOrCreateSrcLocStr(Function, FileName, DIL->getLine(),
911 DIL->getColumn(), SrcLocStrSize);
912}
913
915 uint32_t &SrcLocStrSize) {
916 return getOrCreateSrcLocStr(Loc.DL, SrcLocStrSize,
917 Loc.IP.getBlock()->getParent());
918}
919
921 return Builder.CreateCall(
922 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
923 "omp_global_thread_num");
924}
925
928 bool ForceSimpleCall, bool CheckCancelFlag) {
929 if (!updateToLocation(Loc))
930 return Loc.IP;
931 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
932}
933
936 bool ForceSimpleCall, bool CheckCancelFlag) {
937 // Build call __kmpc_cancel_barrier(loc, thread_id) or
938 // __kmpc_barrier(loc, thread_id);
939
940 IdentFlag BarrierLocFlags;
941 switch (Kind) {
942 case OMPD_for:
943 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
944 break;
945 case OMPD_sections:
946 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
947 break;
948 case OMPD_single:
949 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
950 break;
951 case OMPD_barrier:
952 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
953 break;
954 default:
955 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
956 break;
957 }
958
959 uint32_t SrcLocStrSize;
960 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
961 Value *Args[] = {
962 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
963 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
964
965 // If we are in a cancellable parallel region, barriers are cancellation
966 // points.
967 // TODO: Check why we would force simple calls or to ignore the cancel flag.
968 bool UseCancelBarrier =
969 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
970
971 Value *Result =
973 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
974 : OMPRTL___kmpc_barrier),
975 Args);
976
977 if (UseCancelBarrier && CheckCancelFlag)
978 emitCancelationCheckImpl(Result, OMPD_parallel);
979
980 return Builder.saveIP();
981}
982
985 Value *IfCondition,
986 omp::Directive CanceledDirective) {
987 if (!updateToLocation(Loc))
988 return Loc.IP;
989
990 // LLVM utilities like blocks with terminators.
991 auto *UI = Builder.CreateUnreachable();
992
993 Instruction *ThenTI = UI, *ElseTI = nullptr;
994 if (IfCondition)
995 SplitBlockAndInsertIfThenElse(IfCondition, UI, &ThenTI, &ElseTI);
996 Builder.SetInsertPoint(ThenTI);
997
998 Value *CancelKind = nullptr;
999 switch (CanceledDirective) {
1000#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1001 case DirectiveEnum: \
1002 CancelKind = Builder.getInt32(Value); \
1003 break;
1004#include "llvm/Frontend/OpenMP/OMPKinds.def"
1005 default:
1006 llvm_unreachable("Unknown cancel kind!");
1007 }
1008
1009 uint32_t SrcLocStrSize;
1010 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1011 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1012 Value *Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1013 Value *Result = Builder.CreateCall(
1014 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1015 auto ExitCB = [this, CanceledDirective, Loc](InsertPointTy IP) {
1016 if (CanceledDirective == OMPD_parallel) {
1018 Builder.restoreIP(IP);
1020 omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
1021 /* CheckCancelFlag */ false);
1022 }
1023 };
1024
1025 // The actual cancel logic is shared with others, e.g., cancel_barriers.
1026 emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
1027
1028 // Update the insertion point and remove the terminator we introduced.
1029 Builder.SetInsertPoint(UI->getParent());
1030 UI->eraseFromParent();
1031
1032 return Builder.saveIP();
1033}
1034
1036 const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return,
1037 Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads,
1038 Value *HostPtr, ArrayRef<Value *> KernelArgs) {
1039 if (!updateToLocation(Loc))
1040 return Loc.IP;
1041
1042 Builder.restoreIP(AllocaIP);
1043 auto *KernelArgsPtr =
1044 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args");
1045 Builder.restoreIP(Loc.IP);
1046
1047 for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) {
1048 llvm::Value *Arg =
1049 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I);
1051 KernelArgs[I], Arg,
1052 M.getDataLayout().getPrefTypeAlign(KernelArgs[I]->getType()));
1053 }
1054
1055 SmallVector<Value *> OffloadingArgs{Ident, DeviceID, NumTeams,
1056 NumThreads, HostPtr, KernelArgsPtr};
1057
1058 Return = Builder.CreateCall(
1059 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1060 OffloadingArgs);
1061
1062 return Builder.saveIP();
1063}
1064
1066 const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
1067 EmitFallbackCallbackTy emitTargetCallFallbackCB, TargetKernelArgs &Args,
1068 Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP) {
1069
1070 if (!updateToLocation(Loc))
1071 return Loc.IP;
1072
1073 Builder.restoreIP(Loc.IP);
1074 // On top of the arrays that were filled up, the target offloading call
1075 // takes as arguments the device id as well as the host pointer. The host
1076 // pointer is used by the runtime library to identify the current target
1077 // region, so it only has to be unique and not necessarily point to
1078 // anything. It could be the pointer to the outlined function that
1079 // implements the target region, but we aren't using that so that the
1080 // compiler doesn't need to keep that, and could therefore inline the host
1081 // function if proven worthwhile during optimization.
1082
1083 // From this point on, we need to have an ID of the target region defined.
1084 assert(OutlinedFnID && "Invalid outlined function ID!");
1085 (void)OutlinedFnID;
1086
1087 // Return value of the runtime offloading call.
1088 Value *Return = nullptr;
1089
1090 // Arguments for the target kernel.
1091 SmallVector<Value *> ArgsVector;
1092 getKernelArgsVector(Args, Builder, ArgsVector);
1093
1094 // The target region is an outlined function launched by the runtime
1095 // via calls to __tgt_target_kernel().
1096 //
1097 // Note that on the host and CPU targets, the runtime implementation of
1098 // these calls simply call the outlined function without forking threads.
1099 // The outlined functions themselves have runtime calls to
1100 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
1101 // the compiler in emitTeamsCall() and emitParallelCall().
1102 //
1103 // In contrast, on the NVPTX target, the implementation of
1104 // __tgt_target_teams() launches a GPU kernel with the requested number
1105 // of teams and threads so no additional calls to the runtime are required.
1106 // Check the error code and execute the host version if required.
1107 Builder.restoreIP(emitTargetKernel(Builder, AllocaIP, Return, RTLoc, DeviceID,
1108 Args.NumTeams, Args.NumThreads,
1109 OutlinedFnID, ArgsVector));
1110
1111 BasicBlock *OffloadFailedBlock =
1112 BasicBlock::Create(Builder.getContext(), "omp_offload.failed");
1113 BasicBlock *OffloadContBlock =
1114 BasicBlock::Create(Builder.getContext(), "omp_offload.cont");
1116 Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
1117
1118 auto CurFn = Builder.GetInsertBlock()->getParent();
1119 emitBlock(OffloadFailedBlock, CurFn);
1120 Builder.restoreIP(emitTargetCallFallbackCB(Builder.saveIP()));
1121 emitBranch(OffloadContBlock);
1122 emitBlock(OffloadContBlock, CurFn, /*IsFinished=*/true);
1123 return Builder.saveIP();
1124}
1125
1127 omp::Directive CanceledDirective,
1128 FinalizeCallbackTy ExitCB) {
1129 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1130 "Unexpected cancellation!");
1131
1132 // For a cancel barrier we create two new blocks.
1134 BasicBlock *NonCancellationBlock;
1135 if (Builder.GetInsertPoint() == BB->end()) {
1136 // TODO: This branch will not be needed once we moved to the
1137 // OpenMPIRBuilder codegen completely.
1138 NonCancellationBlock = BasicBlock::Create(
1139 BB->getContext(), BB->getName() + ".cont", BB->getParent());
1140 } else {
1141 NonCancellationBlock = SplitBlock(BB, &*Builder.GetInsertPoint());
1144 }
1145 BasicBlock *CancellationBlock = BasicBlock::Create(
1146 BB->getContext(), BB->getName() + ".cncl", BB->getParent());
1147
1148 // Jump to them based on the return value.
1149 Value *Cmp = Builder.CreateIsNull(CancelFlag);
1150 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1151 /* TODO weight */ nullptr, nullptr);
1152
1153 // From the cancellation block we finalize all variables and go to the
1154 // post finalization block that is known to the FiniCB callback.
1155 Builder.SetInsertPoint(CancellationBlock);
1156 if (ExitCB)
1157 ExitCB(Builder.saveIP());
1158 auto &FI = FinalizationStack.back();
1159 FI.FiniCB(Builder.saveIP());
1160
1161 // The continuation block is where code generation continues.
1162 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->begin());
1163}
1164
1165// Callback used to create OpenMP runtime calls to support
1166// omp parallel clause for the device.
1167// We need to use this callback to replace call to the OutlinedFn in OuterFn
1168// by the call to the OpenMP DeviceRTL runtime function (kmpc_parallel_51)
1170 OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn,
1171 BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition,
1172 Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr,
1173 Value *ThreadID, const SmallVector<Instruction *, 4> &ToBeDeleted) {
1174 // Add some known attributes.
1175 IRBuilder<> &Builder = OMPIRBuilder->Builder;
1176 OutlinedFn.addParamAttr(0, Attribute::NoAlias);
1177 OutlinedFn.addParamAttr(1, Attribute::NoAlias);
1178 OutlinedFn.addParamAttr(0, Attribute::NoUndef);
1179 OutlinedFn.addParamAttr(1, Attribute::NoUndef);
1180 OutlinedFn.addFnAttr(Attribute::NoUnwind);
1181
1182 assert(OutlinedFn.arg_size() >= 2 &&
1183 "Expected at least tid and bounded tid as arguments");
1184 unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
1185
1186 CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
1187 assert(CI && "Expected call instruction to outlined function");
1188 CI->getParent()->setName("omp_parallel");
1189
1190 Builder.SetInsertPoint(CI);
1191 Type *PtrTy = OMPIRBuilder->VoidPtr;
1192 Value *NullPtrValue = Constant::getNullValue(PtrTy);
1193
1194 // Add alloca for kernel args
1195 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1196 Builder.SetInsertPoint(OuterAllocaBB, OuterAllocaBB->getFirstInsertionPt());
1197 AllocaInst *ArgsAlloca =
1198 Builder.CreateAlloca(ArrayType::get(PtrTy, NumCapturedVars));
1199 Value *Args = ArgsAlloca;
1200 // Add address space cast if array for storing arguments is not allocated
1201 // in address space 0
1202 if (ArgsAlloca->getAddressSpace())
1203 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1204 Builder.restoreIP(CurrentIP);
1205
1206 // Store captured vars which are used by kmpc_parallel_51
1207 for (unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1208 Value *V = *(CI->arg_begin() + 2 + Idx);
1209 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1210 ArrayType::get(PtrTy, NumCapturedVars), Args, 0, Idx);
1211 Builder.CreateStore(V, StoreAddress);
1212 }
1213
1214 Value *Cond =
1215 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1216 : Builder.getInt32(1);
1217
1218 // Build kmpc_parallel_51 call
1219 Value *Parallel51CallArgs[] = {
1220 /* identifier*/ Ident,
1221 /* global thread num*/ ThreadID,
1222 /* if expression */ Cond,
1223 /* number of threads */ NumThreads ? NumThreads : Builder.getInt32(-1),
1224 /* Proc bind */ Builder.getInt32(-1),
1225 /* outlined function */
1226 Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1227 /* wrapper function */ NullPtrValue,
1228 /* arguments of the outlined funciton*/ Args,
1229 /* number of arguments */ Builder.getInt64(NumCapturedVars)};
1230
1231 FunctionCallee RTLFn =
1232 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1233
1234 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1235
1236 LLVM_DEBUG(dbgs() << "With kmpc_parallel_51 placed: "
1237 << *Builder.GetInsertBlock()->getParent() << "\n");
1238
1239 // Initialize the local TID stack location with the argument value.
1240 Builder.SetInsertPoint(PrivTID);
1241 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
1242 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1243 PrivTIDAddr);
1244
1245 // Remove redundant call to the outlined function.
1246 CI->eraseFromParent();
1247
1248 for (Instruction *I : ToBeDeleted) {
1249 I->eraseFromParent();
1250 }
1251}
1252
1253// Callback used to create OpenMP runtime calls to support
1254// omp parallel clause for the host.
1255// We need to use this callback to replace call to the OutlinedFn in OuterFn
1256// by the call to the OpenMP host runtime function ( __kmpc_fork_call[_if])
1257static void
1259 Function *OuterFn, Value *Ident, Value *IfCondition,
1260 Instruction *PrivTID, AllocaInst *PrivTIDAddr,
1261 const SmallVector<Instruction *, 4> &ToBeDeleted) {
1262 IRBuilder<> &Builder = OMPIRBuilder->Builder;
1263 FunctionCallee RTLFn;
1264 if (IfCondition) {
1265 RTLFn =
1266 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1267 } else {
1268 RTLFn =
1269 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1270 }
1271 if (auto *F = dyn_cast<Function>(RTLFn.getCallee())) {
1272 if (!F->hasMetadata(LLVMContext::MD_callback)) {
1273 LLVMContext &Ctx = F->getContext();
1274 MDBuilder MDB(Ctx);
1275 // Annotate the callback behavior of the __kmpc_fork_call:
1276 // - The callback callee is argument number 2 (microtask).
1277 // - The first two arguments of the callback callee are unknown (-1).
1278 // - All variadic arguments to the __kmpc_fork_call are passed to the
1279 // callback callee.
1280 F->addMetadata(LLVMContext::MD_callback,
1282 2, {-1, -1},
1283 /* VarArgsArePassed */ true)}));
1284 }
1285 }
1286 // Add some known attributes.
1287 OutlinedFn.addParamAttr(0, Attribute::NoAlias);
1288 OutlinedFn.addParamAttr(1, Attribute::NoAlias);
1289 OutlinedFn.addFnAttr(Attribute::NoUnwind);
1290
1291 assert(OutlinedFn.arg_size() >= 2 &&
1292 "Expected at least tid and bounded tid as arguments");
1293 unsigned NumCapturedVars = OutlinedFn.arg_size() - /* tid & bounded tid */ 2;
1294
1295 CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
1296 CI->getParent()->setName("omp_parallel");
1297 Builder.SetInsertPoint(CI);
1298
1299 // Build call __kmpc_fork_call[_if](Ident, n, microtask, var1, .., varn);
1300 Value *ForkCallArgs[] = {
1301 Ident, Builder.getInt32(NumCapturedVars),
1302 Builder.CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1303
1304 SmallVector<Value *, 16> RealArgs;
1305 RealArgs.append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1306 if (IfCondition) {
1307 Value *Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1308 RealArgs.push_back(Cond);
1309 }
1310 RealArgs.append(CI->arg_begin() + /* tid & bound tid */ 2, CI->arg_end());
1311
1312 // __kmpc_fork_call_if always expects a void ptr as the last argument
1313 // If there are no arguments, pass a null pointer.
1314 auto PtrTy = OMPIRBuilder->VoidPtr;
1315 if (IfCondition && NumCapturedVars == 0) {
1316 Value *NullPtrValue = Constant::getNullValue(PtrTy);
1317 RealArgs.push_back(NullPtrValue);
1318 }
1319 if (IfCondition && RealArgs.back()->getType() != PtrTy)
1320 RealArgs.back() = Builder.CreateBitCast(RealArgs.back(), PtrTy);
1321
1322 Builder.CreateCall(RTLFn, RealArgs);
1323
1324 LLVM_DEBUG(dbgs() << "With fork_call placed: "
1325 << *Builder.GetInsertBlock()->getParent() << "\n");
1326
1327 // Initialize the local TID stack location with the argument value.
1328 Builder.SetInsertPoint(PrivTID);
1329 Function::arg_iterator OutlinedAI = OutlinedFn.arg_begin();
1330 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1331 PrivTIDAddr);
1332
1333 // Remove redundant call to the outlined function.
1334 CI->eraseFromParent();
1335
1336 for (Instruction *I : ToBeDeleted) {
1337 I->eraseFromParent();
1338 }
1339}
1340
1342 const LocationDescription &Loc, InsertPointTy OuterAllocaIP,
1343 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1344 FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
1345 omp::ProcBindKind ProcBind, bool IsCancellable) {
1346 assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
1347
1348 if (!updateToLocation(Loc))
1349 return Loc.IP;
1350
1351 uint32_t SrcLocStrSize;
1352 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1353 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1354 Value *ThreadID = getOrCreateThreadID(Ident);
1355 // If we generate code for the target device, we need to allocate
1356 // struct for aggregate params in the device default alloca address space.
1357 // OpenMP runtime requires that the params of the extracted functions are
1358 // passed as zero address space pointers. This flag ensures that extracted
1359 // function arguments are declared in zero address space
1360 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1361
1362 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
1363 // only if we compile for host side.
1364 if (NumThreads && !Config.isTargetDevice()) {
1365 Value *Args[] = {
1366 Ident, ThreadID,
1367 Builder.CreateIntCast(NumThreads, Int32, /*isSigned*/ false)};
1369 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1370 }
1371
1372 if (ProcBind != OMP_PROC_BIND_default) {
1373 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
1374 Value *Args[] = {
1375 Ident, ThreadID,
1376 ConstantInt::get(Int32, unsigned(ProcBind), /*isSigned=*/true)};
1378 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1379 }
1380
1381 BasicBlock *InsertBB = Builder.GetInsertBlock();
1382 Function *OuterFn = InsertBB->getParent();
1383
1384 // Save the outer alloca block because the insertion iterator may get
1385 // invalidated and we still need this later.
1386 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1387
1388 // Vector to remember instructions we used only during the modeling but which
1389 // we want to delete at the end.
1391
1392 // Change the location to the outer alloca insertion point to create and
1393 // initialize the allocas we pass into the parallel region.
1394 Builder.restoreIP(OuterAllocaIP);
1395 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
1396 AllocaInst *ZeroAddrAlloca =
1397 Builder.CreateAlloca(Int32, nullptr, "zero.addr");
1398 Instruction *TIDAddr = TIDAddrAlloca;
1399 Instruction *ZeroAddr = ZeroAddrAlloca;
1400 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1401 // Add additional casts to enforce pointers in zero address space
1402 TIDAddr = new AddrSpaceCastInst(
1403 TIDAddrAlloca, PointerType ::get(M.getContext(), 0), "tid.addr.ascast");
1404 TIDAddr->insertAfter(TIDAddrAlloca);
1405 ToBeDeleted.push_back(TIDAddr);
1406 ZeroAddr = new AddrSpaceCastInst(ZeroAddrAlloca,
1407 PointerType ::get(M.getContext(), 0),
1408 "zero.addr.ascast");
1409 ZeroAddr->insertAfter(ZeroAddrAlloca);
1410 ToBeDeleted.push_back(ZeroAddr);
1411 }
1412
1413 // We only need TIDAddr and ZeroAddr for modeling purposes to get the
1414 // associated arguments in the outlined function, so we delete them later.
1415 ToBeDeleted.push_back(TIDAddrAlloca);
1416 ToBeDeleted.push_back(ZeroAddrAlloca);
1417
1418 // Create an artificial insertion point that will also ensure the blocks we
1419 // are about to split are not degenerated.
1420 auto *UI = new UnreachableInst(Builder.getContext(), InsertBB);
1421
1422 BasicBlock *EntryBB = UI->getParent();
1423 BasicBlock *PRegEntryBB = EntryBB->splitBasicBlock(UI, "omp.par.entry");
1424 BasicBlock *PRegBodyBB = PRegEntryBB->splitBasicBlock(UI, "omp.par.region");
1425 BasicBlock *PRegPreFiniBB =
1426 PRegBodyBB->splitBasicBlock(UI, "omp.par.pre_finalize");
1427 BasicBlock *PRegExitBB = PRegPreFiniBB->splitBasicBlock(UI, "omp.par.exit");
1428
1429 auto FiniCBWrapper = [&](InsertPointTy IP) {
1430 // Hide "open-ended" blocks from the given FiniCB by setting the right jump
1431 // target to the region exit block.
1432 if (IP.getBlock()->end() == IP.getPoint()) {
1434 Builder.restoreIP(IP);
1435 Instruction *I = Builder.CreateBr(PRegExitBB);
1436 IP = InsertPointTy(I->getParent(), I->getIterator());
1437 }
1438 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1439 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1440 "Unexpected insertion point for finalization call!");
1441 return FiniCB(IP);
1442 };
1443
1444 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1445
1446 // Generate the privatization allocas in the block that will become the entry
1447 // of the outlined function.
1448 Builder.SetInsertPoint(PRegEntryBB->getTerminator());
1449 InsertPointTy InnerAllocaIP = Builder.saveIP();
1450
1451 AllocaInst *PrivTIDAddr =
1452 Builder.CreateAlloca(Int32, nullptr, "tid.addr.local");
1453 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr, "tid");
1454
1455 // Add some fake uses for OpenMP provided arguments.
1456 ToBeDeleted.push_back(Builder.CreateLoad(Int32, TIDAddr, "tid.addr.use"));
1457 Instruction *ZeroAddrUse =
1458 Builder.CreateLoad(Int32, ZeroAddr, "zero.addr.use");
1459 ToBeDeleted.push_back(ZeroAddrUse);
1460
1461 // EntryBB
1462 // |
1463 // V
1464 // PRegionEntryBB <- Privatization allocas are placed here.
1465 // |
1466 // V
1467 // PRegionBodyBB <- BodeGen is invoked here.
1468 // |
1469 // V
1470 // PRegPreFiniBB <- The block we will start finalization from.
1471 // |
1472 // V
1473 // PRegionExitBB <- A common exit to simplify block collection.
1474 //
1475
1476 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn << "\n");
1477
1478 // Let the caller create the body.
1479 assert(BodyGenCB && "Expected body generation callback!");
1480 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->begin());
1481 BodyGenCB(InnerAllocaIP, CodeGenIP);
1482
1483 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn << "\n");
1484
1485 OutlineInfo OI;
1486 if (Config.isTargetDevice()) {
1487 // Generate OpenMP target specific runtime call
1488 OI.PostOutlineCB = [=, ToBeDeletedVec =
1489 std::move(ToBeDeleted)](Function &OutlinedFn) {
1490 targetParallelCallback(this, OutlinedFn, OuterFn, OuterAllocaBlock, Ident,
1491 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1492 ThreadID, ToBeDeletedVec);
1493 };
1494 } else {
1495 // Generate OpenMP host runtime call
1496 OI.PostOutlineCB = [=, ToBeDeletedVec =
1497 std::move(ToBeDeleted)](Function &OutlinedFn) {
1498 hostParallelCallback(this, OutlinedFn, OuterFn, Ident, IfCondition,
1499 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1500 };
1501 }
1502
1503 // Adjust the finalization stack, verify the adjustment, and call the
1504 // finalize function a last time to finalize values between the pre-fini
1505 // block and the exit block if we left the parallel "the normal way".
1506 auto FiniInfo = FinalizationStack.pop_back_val();
1507 (void)FiniInfo;
1508 assert(FiniInfo.DK == OMPD_parallel &&
1509 "Unexpected finalization stack state!");
1510
1511 Instruction *PRegPreFiniTI = PRegPreFiniBB->getTerminator();
1512
1513 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
1514 FiniCB(PreFiniIP);
1515
1516 OI.OuterAllocaBB = OuterAllocaBlock;
1517 OI.EntryBB = PRegEntryBB;
1518 OI.ExitBB = PRegExitBB;
1519
1520 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
1522 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1523
1524 // Ensure a single exit node for the outlined region by creating one.
1525 // We might have multiple incoming edges to the exit now due to finalizations,
1526 // e.g., cancel calls that cause the control flow to leave the region.
1527 BasicBlock *PRegOutlinedExitBB = PRegExitBB;
1528 PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
1529 PRegOutlinedExitBB->setName("omp.par.outlined.exit");
1530 Blocks.push_back(PRegOutlinedExitBB);
1531
1532 CodeExtractorAnalysisCache CEAC(*OuterFn);
1533 CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
1534 /* AggregateArgs */ false,
1535 /* BlockFrequencyInfo */ nullptr,
1536 /* BranchProbabilityInfo */ nullptr,
1537 /* AssumptionCache */ nullptr,
1538 /* AllowVarArgs */ true,
1539 /* AllowAlloca */ true,
1540 /* AllocationBlock */ OuterAllocaBlock,
1541 /* Suffix */ ".omp_par", ArgsInZeroAddressSpace);
1542
1543 // Find inputs to, outputs from the code region.
1544 BasicBlock *CommonExit = nullptr;
1545 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
1546 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1547 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands);
1548
1549 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn << "\n");
1550
1551 FunctionCallee TIDRTLFn =
1552 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1553
1554 auto PrivHelper = [&](Value &V) {
1555 if (&V == TIDAddr || &V == ZeroAddr) {
1556 OI.ExcludeArgsFromAggregate.push_back(&V);
1557 return;
1558 }
1559
1561 for (Use &U : V.uses())
1562 if (auto *UserI = dyn_cast<Instruction>(U.getUser()))
1563 if (ParallelRegionBlockSet.count(UserI->getParent()))
1564 Uses.insert(&U);
1565
1566 // __kmpc_fork_call expects extra arguments as pointers. If the input
1567 // already has a pointer type, everything is fine. Otherwise, store the
1568 // value onto stack and load it back inside the to-be-outlined region. This
1569 // will ensure only the pointer will be passed to the function.
1570 // FIXME: if there are more than 15 trailing arguments, they must be
1571 // additionally packed in a struct.
1572 Value *Inner = &V;
1573 if (!V.getType()->isPointerTy()) {
1575 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V << "\n");
1576
1577 Builder.restoreIP(OuterAllocaIP);
1578 Value *Ptr =
1579 Builder.CreateAlloca(V.getType(), nullptr, V.getName() + ".reloaded");
1580
1581 // Store to stack at end of the block that currently branches to the entry
1582 // block of the to-be-outlined region.
1583 Builder.SetInsertPoint(InsertBB,
1584 InsertBB->getTerminator()->getIterator());
1585 Builder.CreateStore(&V, Ptr);
1586
1587 // Load back next to allocations in the to-be-outlined region.
1588 Builder.restoreIP(InnerAllocaIP);
1589 Inner = Builder.CreateLoad(V.getType(), Ptr);
1590 }
1591
1592 Value *ReplacementValue = nullptr;
1593 CallInst *CI = dyn_cast<CallInst>(&V);
1594 if (CI && CI->getCalledFunction() == TIDRTLFn.getCallee()) {
1595 ReplacementValue = PrivTID;
1596 } else {
1598 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue));
1599 assert(ReplacementValue &&
1600 "Expected copy/create callback to set replacement value!");
1601 if (ReplacementValue == &V)
1602 return;
1603 }
1604
1605 for (Use *UPtr : Uses)
1606 UPtr->set(ReplacementValue);
1607 };
1608
1609 // Reset the inner alloca insertion as it will be used for loading the values
1610 // wrapped into pointers before passing them into the to-be-outlined region.
1611 // Configure it to insert immediately after the fake use of zero address so
1612 // that they are available in the generated body and so that the
1613 // OpenMP-related values (thread ID and zero address pointers) remain leading
1614 // in the argument list.
1615 InnerAllocaIP = IRBuilder<>::InsertPoint(
1616 ZeroAddrUse->getParent(), ZeroAddrUse->getNextNode()->getIterator());
1617
1618 // Reset the outer alloca insertion point to the entry of the relevant block
1619 // in case it was invalidated.
1620 OuterAllocaIP = IRBuilder<>::InsertPoint(
1621 OuterAllocaBlock, OuterAllocaBlock->getFirstInsertionPt());
1622
1623 for (Value *Input : Inputs) {
1624 LLVM_DEBUG(dbgs() << "Captured input: " << *Input << "\n");
1625 PrivHelper(*Input);
1626 }
1627 LLVM_DEBUG({
1628 for (Value *Output : Outputs)
1629 LLVM_DEBUG(dbgs() << "Captured output: " << *Output << "\n");
1630 });
1631 assert(Outputs.empty() &&
1632 "OpenMP outlining should not produce live-out values!");
1633
1634 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn << "\n");
1635 LLVM_DEBUG({
1636 for (auto *BB : Blocks)
1637 dbgs() << " PBR: " << BB->getName() << "\n";
1638 });
1639
1640 // Register the outlined info.
1641 addOutlineInfo(std::move(OI));
1642
1643 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1644 UI->eraseFromParent();
1645
1646 return AfterIP;
1647}
1648
1650 // Build call void __kmpc_flush(ident_t *loc)
1651 uint32_t SrcLocStrSize;
1652 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1653 Value *Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1654
1655 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1656}
1657
1659 if (!updateToLocation(Loc))
1660 return;
1661 emitFlush(Loc);
1662}
1663
1665 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
1666 // global_tid);
1667 uint32_t SrcLocStrSize;
1668 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1669 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1670 Value *Args[] = {Ident, getOrCreateThreadID(Ident)};
1671
1672 // Ignore return result until untied tasks are supported.
1673 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1674 Args);
1675}
1676
1678 if (!updateToLocation(Loc))
1679 return;
1680 emitTaskwaitImpl(Loc);
1681}
1682
1684 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
1685 uint32_t SrcLocStrSize;
1686 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1687 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1689 Value *Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1690
1691 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1692 Args);
1693}
1694
1696 if (!updateToLocation(Loc))
1697 return;
1698 emitTaskyieldImpl(Loc);
1699}
1700
1703 InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB,
1704 bool Tied, Value *Final, Value *IfCondition,
1705 SmallVector<DependData> Dependencies) {
1706
1707 if (!updateToLocation(Loc))
1708 return InsertPointTy();
1709
1710 uint32_t SrcLocStrSize;
1711 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1712 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1713 // The current basic block is split into four basic blocks. After outlining,
1714 // they will be mapped as follows:
1715 // ```
1716 // def current_fn() {
1717 // current_basic_block:
1718 // br label %task.exit
1719 // task.exit:
1720 // ; instructions after task
1721 // }
1722 // def outlined_fn() {
1723 // task.alloca:
1724 // br label %task.body
1725 // task.body:
1726 // ret void
1727 // }
1728 // ```
1729 BasicBlock *TaskExitBB = splitBB(Builder, /*CreateBranch=*/true, "task.exit");
1730 BasicBlock *TaskBodyBB = splitBB(Builder, /*CreateBranch=*/true, "task.body");
1731 BasicBlock *TaskAllocaBB =
1732 splitBB(Builder, /*CreateBranch=*/true, "task.alloca");
1733
1734 InsertPointTy TaskAllocaIP =
1735 InsertPointTy(TaskAllocaBB, TaskAllocaBB->begin());
1736 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->begin());
1737 BodyGenCB(TaskAllocaIP, TaskBodyIP);
1738
1739 OutlineInfo OI;
1740 OI.EntryBB = TaskAllocaBB;
1741 OI.OuterAllocaBB = AllocaIP.getBlock();
1742 OI.ExitBB = TaskExitBB;
1743
1744 // Add the thread ID argument.
1745 std::stack<Instruction *> ToBeDeleted;
1747 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP, "global.tid", false));
1748
1749 OI.PostOutlineCB = [this, Ident, Tied, Final, IfCondition, Dependencies,
1750 TaskAllocaBB, ToBeDeleted](Function &OutlinedFn) mutable {
1751 // Replace the Stale CI by appropriate RTL function call.
1752 assert(OutlinedFn.getNumUses() == 1 &&
1753 "there must be a single user for the outlined function");
1754 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1755
1756 // HasShareds is true if any variables are captured in the outlined region,
1757 // false otherwise.
1758 bool HasShareds = StaleCI->arg_size() > 1;
1759 Builder.SetInsertPoint(StaleCI);
1760
1761 // Gather the arguments for emitting the runtime call for
1762 // @__kmpc_omp_task_alloc
1763 Function *TaskAllocFn =
1764 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
1765
1766 // Arguments - `loc_ref` (Ident) and `gtid` (ThreadID)
1767 // call.
1768 Value *ThreadID = getOrCreateThreadID(Ident);
1769
1770 // Argument - `flags`
1771 // Task is tied iff (Flags & 1) == 1.
1772 // Task is untied iff (Flags & 1) == 0.
1773 // Task is final iff (Flags & 2) == 2.
1774 // Task is not final iff (Flags & 2) == 0.
1775 // TODO: Handle the other flags.
1776 Value *Flags = Builder.getInt32(Tied);
1777 if (Final) {
1778 Value *FinalFlag =
1780 Flags = Builder.CreateOr(FinalFlag, Flags);
1781 }
1782
1783 // Argument - `sizeof_kmp_task_t` (TaskSize)
1784 // Tasksize refers to the size in bytes of kmp_task_t data structure
1785 // including private vars accessed in task.
1786 // TODO: add kmp_task_t_with_privates (privates)
1787 Value *TaskSize = Builder.getInt64(
1789
1790 // Argument - `sizeof_shareds` (SharedsSize)
1791 // SharedsSize refers to the shareds array size in the kmp_task_t data
1792 // structure.
1793 Value *SharedsSize = Builder.getInt64(0);
1794 if (HasShareds) {
1795 AllocaInst *ArgStructAlloca =
1796 dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
1797 assert(ArgStructAlloca &&
1798 "Unable to find the alloca instruction corresponding to arguments "
1799 "for extracted function");
1800 StructType *ArgStructType =
1801 dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
1802 assert(ArgStructType && "Unable to find struct type corresponding to "
1803 "arguments for extracted function");
1804 SharedsSize =
1806 }
1807 // Emit the @__kmpc_omp_task_alloc runtime call
1808 // The runtime call returns a pointer to an area where the task captured
1809 // variables must be copied before the task is run (TaskData)
1810 CallInst *TaskData = Builder.CreateCall(
1811 TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
1812 /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
1813 /*task_func=*/&OutlinedFn});
1814
1815 // Copy the arguments for outlined function
1816 if (HasShareds) {
1817 Value *Shareds = StaleCI->getArgOperand(1);
1818 Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
1819 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
1820 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
1821 SharedsSize);
1822 }
1823
1824 Value *DepArray = nullptr;
1825 if (Dependencies.size()) {
1826 InsertPointTy OldIP = Builder.saveIP();
1828 &OldIP.getBlock()->getParent()->getEntryBlock().back());
1829
1830 Type *DepArrayTy = ArrayType::get(DependInfo, Dependencies.size());
1831 DepArray = Builder.CreateAlloca(DepArrayTy, nullptr, ".dep.arr.addr");
1832
1833 unsigned P = 0;
1834 for (const DependData &Dep : Dependencies) {
1835 Value *Base =
1836 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, P);
1837 // Store the pointer to the variable
1839 DependInfo, Base,
1840 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1841 Value *DepValPtr =
1843 Builder.CreateStore(DepValPtr, Addr);
1844 // Store the size of the variable
1846 DependInfo, Base,
1847 static_cast<unsigned int>(RTLDependInfoFields::Len));
1849 Dep.DepValueType)),
1850 Size);
1851 // Store the dependency kind
1853 DependInfo, Base,
1854 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1856 ConstantInt::get(Builder.getInt8Ty(),
1857 static_cast<unsigned int>(Dep.DepKind)),
1858 Flags);
1859 ++P;
1860 }
1861
1862 Builder.restoreIP(OldIP);
1863 }
1864
1865 // In the presence of the `if` clause, the following IR is generated:
1866 // ...
1867 // %data = call @__kmpc_omp_task_alloc(...)
1868 // br i1 %if_condition, label %then, label %else
1869 // then:
1870 // call @__kmpc_omp_task(...)
1871 // br label %exit
1872 // else:
1873 // call @__kmpc_omp_task_begin_if0(...)
1874 // call @outlined_fn(...)
1875 // call @__kmpc_omp_task_complete_if0(...)
1876 // br label %exit
1877 // exit:
1878 // ...
1879 if (IfCondition) {
1880 // `SplitBlockAndInsertIfThenElse` requires the block to have a
1881 // terminator.
1882 splitBB(Builder, /*CreateBranch=*/true, "if.end");
1883 Instruction *IfTerminator =
1884 Builder.GetInsertPoint()->getParent()->getTerminator();
1885 Instruction *ThenTI = IfTerminator, *ElseTI = nullptr;
1886 Builder.SetInsertPoint(IfTerminator);
1887 SplitBlockAndInsertIfThenElse(IfCondition, IfTerminator, &ThenTI,
1888 &ElseTI);
1889 Builder.SetInsertPoint(ElseTI);
1890 Function *TaskBeginFn =
1891 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
1892 Function *TaskCompleteFn =
1893 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
1894 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
1895 CallInst *CI = nullptr;
1896 if (HasShareds)
1897 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
1898 else
1899 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
1900 CI->setDebugLoc(StaleCI->getDebugLoc());
1901 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
1902 Builder.SetInsertPoint(ThenTI);
1903 }
1904
1905 if (Dependencies.size()) {
1906 Function *TaskFn =
1907 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
1909 TaskFn,
1910 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
1911 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
1913
1914 } else {
1915 // Emit the @__kmpc_omp_task runtime call to spawn the task
1916 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
1917 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
1918 }
1919
1920 StaleCI->eraseFromParent();
1921
1922 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
1923 if (HasShareds) {
1924 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
1925 OutlinedFn.getArg(1)->replaceUsesWithIf(
1926 Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
1927 }
1928
1929 while (!ToBeDeleted.empty()) {
1930 ToBeDeleted.top()->eraseFromParent();
1931 ToBeDeleted.pop();
1932 }
1933 };
1934
1935 addOutlineInfo(std::move(OI));
1936 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->begin());
1937
1938 return Builder.saveIP();
1939}
1940
1943 InsertPointTy AllocaIP,
1944 BodyGenCallbackTy BodyGenCB) {
1945 if (!updateToLocation(Loc))
1946 return InsertPointTy();
1947
1948 uint32_t SrcLocStrSize;
1949 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1950 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1951 Value *ThreadID = getOrCreateThreadID(Ident);
1952
1953 // Emit the @__kmpc_taskgroup runtime call to start the taskgroup
1954 Function *TaskgroupFn =
1955 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
1956 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
1957
1958 BasicBlock *TaskgroupExitBB = splitBB(Builder, true, "taskgroup.exit");
1959 BodyGenCB(AllocaIP, Builder.saveIP());
1960
1961 Builder.SetInsertPoint(TaskgroupExitBB);
1962 // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
1963 Function *EndTaskgroupFn =
1964 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
1965 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
1966
1967 return Builder.saveIP();
1968}
1969
1971 const LocationDescription &Loc, InsertPointTy AllocaIP,
1973 FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait) {
1974 assert(!isConflictIP(AllocaIP, Loc.IP) && "Dedicated IP allocas required");
1975
1976 if (!updateToLocation(Loc))
1977 return Loc.IP;
1978
1979 auto FiniCBWrapper = [&](InsertPointTy IP) {
1980 if (IP.getBlock()->end() != IP.getPoint())
1981 return FiniCB(IP);
1982 // This must be done otherwise any nested constructs using FinalizeOMPRegion
1983 // will fail because that function requires the Finalization Basic Block to
1984 // have a terminator, which is already removed by EmitOMPRegionBody.
1985 // IP is currently at cancelation block.
1986 // We need to backtrack to the condition block to fetch
1987 // the exit block and create a branch from cancelation
1988 // to exit block.
1990 Builder.restoreIP(IP);
1991 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
1992 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1993 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1994 Instruction *I = Builder.CreateBr(ExitBB);
1995 IP = InsertPointTy(I->getParent(), I->getIterator());
1996 return FiniCB(IP);
1997 };
1998
1999 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2000
2001 // Each section is emitted as a switch case
2002 // Each finalization callback is handled from clang.EmitOMPSectionDirective()
2003 // -> OMP.createSection() which generates the IR for each section
2004 // Iterate through all sections and emit a switch construct:
2005 // switch (IV) {
2006 // case 0:
2007 // <SectionStmt[0]>;
2008 // break;
2009 // ...
2010 // case <NumSection> - 1:
2011 // <SectionStmt[<NumSection> - 1]>;
2012 // break;
2013 // }
2014 // ...
2015 // section_loop.after:
2016 // <FiniCB>;
2017 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP, Value *IndVar) {
2018 Builder.restoreIP(CodeGenIP);
2020 splitBBWithSuffix(Builder, /*CreateBranch=*/false, ".sections.after");
2021 Function *CurFn = Continue->getParent();
2022 SwitchInst *SwitchStmt = Builder.CreateSwitch(IndVar, Continue);
2023
2024 unsigned CaseNumber = 0;
2025 for (auto SectionCB : SectionCBs) {
2027 M.getContext(), "omp_section_loop.body.case", CurFn, Continue);
2028 SwitchStmt->addCase(Builder.getInt32(CaseNumber), CaseBB);
2029 Builder.SetInsertPoint(CaseBB);
2030 BranchInst *CaseEndBr = Builder.CreateBr(Continue);
2031 SectionCB(InsertPointTy(),
2032 {CaseEndBr->getParent(), CaseEndBr->getIterator()});
2033 CaseNumber++;
2034 }
2035 // remove the existing terminator from body BB since there can be no
2036 // terminators after switch/case
2037 };
2038 // Loop body ends here
2039 // LowerBound, UpperBound, and STride for createCanonicalLoop
2040 Type *I32Ty = Type::getInt32Ty(M.getContext());
2041 Value *LB = ConstantInt::get(I32Ty, 0);
2042 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2043 Value *ST = ConstantInt::get(I32Ty, 1);
2045 Loc, LoopBodyGenCB, LB, UB, ST, true, false, AllocaIP, "section_loop");
2046 InsertPointTy AfterIP =
2047 applyStaticWorkshareLoop(Loc.DL, LoopInfo, AllocaIP, !IsNowait);
2048
2049 // Apply the finalization callback in LoopAfterBB
2050 auto FiniInfo = FinalizationStack.pop_back_val();
2051 assert(FiniInfo.DK == OMPD_sections &&
2052 "Unexpected finalization stack state!");
2053 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2054 Builder.restoreIP(AfterIP);
2055 BasicBlock *FiniBB =
2056 splitBBWithSuffix(Builder, /*CreateBranch=*/true, "sections.fini");
2057 CB(Builder.saveIP());
2058 AfterIP = {FiniBB, FiniBB->begin()};
2059 }
2060
2061 return AfterIP;
2062}
2063
2066 BodyGenCallbackTy BodyGenCB,
2067 FinalizeCallbackTy FiniCB) {
2068 if (!updateToLocation(Loc))
2069 return Loc.IP;
2070
2071 auto FiniCBWrapper = [&](InsertPointTy IP) {
2072 if (IP.getBlock()->end() != IP.getPoint())
2073 return FiniCB(IP);
2074 // This must be done otherwise any nested constructs using FinalizeOMPRegion
2075 // will fail because that function requires the Finalization Basic Block to
2076 // have a terminator, which is already removed by EmitOMPRegionBody.
2077 // IP is currently at cancelation block.
2078 // We need to backtrack to the condition block to fetch
2079 // the exit block and create a branch from cancelation
2080 // to exit block.
2082 Builder.restoreIP(IP);
2083 auto *CaseBB = Loc.IP.getBlock();
2084 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2085 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2086 Instruction *I = Builder.CreateBr(ExitBB);
2087 IP = InsertPointTy(I->getParent(), I->getIterator());
2088 return FiniCB(IP);
2089 };
2090
2091 Directive OMPD = Directive::OMPD_sections;
2092 // Since we are using Finalization Callback here, HasFinalize
2093 // and IsCancellable have to be true
2094 return EmitOMPInlinedRegion(OMPD, nullptr, nullptr, BodyGenCB, FiniCBWrapper,
2095 /*Conditional*/ false, /*hasFinalize*/ true,
2096 /*IsCancellable*/ true);
2097}
2098
2099/// Create a function with a unique name and a "void (i8*, i8*)" signature in
2100/// the given module and return it.
2102 Type *VoidTy = Type::getVoidTy(M.getContext());
2103 Type *Int8PtrTy = PointerType::getUnqual(M.getContext());
2104 auto *FuncTy =
2105 FunctionType::get(VoidTy, {Int8PtrTy, Int8PtrTy}, /* IsVarArg */ false);
2107 M.getDataLayout().getDefaultGlobalsAddressSpace(),
2108 ".omp.reduction.func", &M);
2109}
2110
2112 const LocationDescription &Loc, InsertPointTy AllocaIP,
2113 ArrayRef<ReductionInfo> ReductionInfos, bool IsNoWait, bool IsByRef) {
2114 for (const ReductionInfo &RI : ReductionInfos) {
2115 (void)RI;
2116 assert(RI.Variable && "expected non-null variable");
2117 assert(RI.PrivateVariable && "expected non-null private variable");
2118 assert(RI.ReductionGen && "expected non-null reduction generator callback");
2119 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
2120 "expected variables and their private equivalents to have the same "
2121 "type");
2122 assert(RI.Variable->getType()->isPointerTy() &&
2123 "expected variables to be pointers");
2124 }
2125
2126 if (!updateToLocation(Loc))
2127 return InsertPointTy();
2128
2129 BasicBlock *InsertBlock = Loc.IP.getBlock();
2130 BasicBlock *ContinuationBlock =
2131 InsertBlock->splitBasicBlock(Loc.IP.getPoint(), "reduce.finalize");
2132 InsertBlock->getTerminator()->eraseFromParent();
2133
2134 // Create and populate array of type-erased pointers to private reduction
2135 // values.
2136 unsigned NumReductions = ReductionInfos.size();
2137 Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions);
2138 Builder.restoreIP(AllocaIP);
2139 Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");
2140
2141 Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
2142
2143 for (auto En : enumerate(ReductionInfos)) {
2144 unsigned Index = En.index();
2145 const ReductionInfo &RI = En.value();
2146 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
2147 RedArrayTy, RedArray, 0, Index, "red.array.elem." + Twine(Index));
2148 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
2149 }
2150
2151 // Emit a call to the runtime function that orchestrates the reduction.
2152 // Declare the reduction function in the process.
2154 Module *Module = Func->getParent();
2155 uint32_t SrcLocStrSize;
2156 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2157 bool CanGenerateAtomic =
2158 llvm::all_of(ReductionInfos, [](const ReductionInfo &RI) {
2159 return RI.AtomicReductionGen;
2160 });
2161 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
2162 CanGenerateAtomic
2163 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
2164 : IdentFlag(0));
2165 Value *ThreadId = getOrCreateThreadID(Ident);
2166 Constant *NumVariables = Builder.getInt32(NumReductions);
2167 const DataLayout &DL = Module->getDataLayout();
2168 unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
2169 Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
2170 Function *ReductionFunc = getFreshReductionFunc(*Module);
2171 Value *Lock = getOMPCriticalRegionLock(".reduction");
2173 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
2174 : RuntimeFunction::OMPRTL___kmpc_reduce);
2175 CallInst *ReduceCall =
2176 Builder.CreateCall(ReduceFunc,
2177 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
2178 ReductionFunc, Lock},
2179 "reduce");
2180
2181 // Create final reduction entry blocks for the atomic and non-atomic case.
2182 // Emit IR that dispatches control flow to one of the blocks based on the
2183 // reduction supporting the atomic mode.
2184 BasicBlock *NonAtomicRedBlock =
2185 BasicBlock::Create(Module->getContext(), "reduce.switch.nonatomic", Func);
2186 BasicBlock *AtomicRedBlock =
2187 BasicBlock::Create(Module->getContext(), "reduce.switch.atomic", Func);
2188 SwitchInst *Switch =
2189 Builder.CreateSwitch(ReduceCall, ContinuationBlock, /* NumCases */ 2);
2190 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
2191 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
2192
2193 // Populate the non-atomic reduction using the elementwise reduction function.
2194 // This loads the elements from the global and private variables and reduces
2195 // them before storing back the result to the global variable.
2196 Builder.SetInsertPoint(NonAtomicRedBlock);
2197 for (auto En : enumerate(ReductionInfos)) {
2198 const ReductionInfo &RI = En.value();
2200 // We have one less load for by-ref case because that load is now inside of
2201 // the reduction region
2202 Value *RedValue = nullptr;
2203 if (!IsByRef) {
2204 RedValue = Builder.CreateLoad(ValueType, RI.Variable,
2205 "red.value." + Twine(En.index()));
2206 }
2207 Value *PrivateRedValue =
2209 "red.private.value." + Twine(En.index()));
2210 Value *Reduced;
2211 if (IsByRef) {
2213 PrivateRedValue, Reduced));
2214 } else {
2216 PrivateRedValue, Reduced));
2217 }
2218 if (!Builder.GetInsertBlock())
2219 return InsertPointTy();
2220 // for by-ref case, the load is inside of the reduction region
2221 if (!IsByRef)
2222 Builder.CreateStore(Reduced, RI.Variable);
2223 }
2224 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
2225 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
2226 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
2227 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
2228 Builder.CreateBr(ContinuationBlock);
2229
2230 // Populate the atomic reduction using the atomic elementwise reduction
2231 // function. There are no loads/stores here because they will be happening
2232 // inside the atomic elementwise reduction.
2233 Builder.SetInsertPoint(AtomicRedBlock);
2234 if (CanGenerateAtomic && !IsByRef) {
2235 for (const ReductionInfo &RI : ReductionInfos) {
2237 RI.Variable, RI.PrivateVariable));
2238 if (!Builder.GetInsertBlock())
2239 return InsertPointTy();
2240 }
2241 Builder.CreateBr(ContinuationBlock);
2242 } else {
2244 }
2245
2246 // Populate the outlined reduction function using the elementwise reduction
2247 // function. Partial values are extracted from the type-erased array of
2248 // pointers to private variables.
2249 BasicBlock *ReductionFuncBlock =
2250 BasicBlock::Create(Module->getContext(), "", ReductionFunc);
2251 Builder.SetInsertPoint(ReductionFuncBlock);
2252 Value *LHSArrayPtr = ReductionFunc->getArg(0);
2253 Value *RHSArrayPtr = ReductionFunc->getArg(1);
2254
2255 for (auto En : enumerate(ReductionInfos)) {
2256 const ReductionInfo &RI = En.value();
2258 RedArrayTy, LHSArrayPtr, 0, En.index());
2259 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
2260 Value *LHSPtr = Builder.CreateBitCast(LHSI8Ptr, RI.Variable->getType());
2261 Value *LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
2263 RedArrayTy, RHSArrayPtr, 0, En.index());
2264 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
2265 Value *RHSPtr =
2267 Value *RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
2268 Value *Reduced;
2270 if (!Builder.GetInsertBlock())
2271 return InsertPointTy();
2272 // store is inside of the reduction region when using by-ref
2273 if (!IsByRef)
2274 Builder.CreateStore(Reduced, LHSPtr);
2275 }
2277
2278 Builder.SetInsertPoint(ContinuationBlock);
2279 return Builder.saveIP();
2280}
2281
2284 BodyGenCallbackTy BodyGenCB,
2285 FinalizeCallbackTy FiniCB) {
2286
2287 if (!updateToLocation(Loc))
2288 return Loc.IP;
2289
2290 Directive OMPD = Directive::OMPD_master;
2291 uint32_t SrcLocStrSize;
2292 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2293 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2294 Value *ThreadId = getOrCreateThreadID(Ident);
2295 Value *Args[] = {Ident, ThreadId};
2296
2297 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
2298 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2299
2300 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
2301 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
2302
2303 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2304 /*Conditional*/ true, /*hasFinalize*/ true);
2305}
2306
2309 BodyGenCallbackTy BodyGenCB,
2310 FinalizeCallbackTy FiniCB, Value *Filter) {
2311 if (!updateToLocation(Loc))
2312 return Loc.IP;
2313
2314 Directive OMPD = Directive::OMPD_masked;
2315 uint32_t SrcLocStrSize;
2316 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2317 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2318 Value *ThreadId = getOrCreateThreadID(Ident);
2319 Value *Args[] = {Ident, ThreadId, Filter};
2320 Value *ArgsEnd[] = {Ident, ThreadId};
2321
2322 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
2323 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
2324
2325 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
2326 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
2327
2328 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
2329 /*Conditional*/ true, /*hasFinalize*/ true);
2330}
2331
2333 DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore,
2334 BasicBlock *PostInsertBefore, const Twine &Name) {
2335 Module *M = F->getParent();
2336 LLVMContext &Ctx = M->getContext();
2337 Type *IndVarTy = TripCount->getType();
2338
2339 // Create the basic block structure.
2340 BasicBlock *Preheader =
2341 BasicBlock::Create(Ctx, "omp_" + Name + ".preheader", F, PreInsertBefore);
2342 BasicBlock *Header =
2343 BasicBlock::Create(Ctx, "omp_" + Name + ".header", F, PreInsertBefore);
2344 BasicBlock *Cond =
2345 BasicBlock::Create(Ctx, "omp_" + Name + ".cond", F, PreInsertBefore);
2346 BasicBlock *Body =
2347 BasicBlock::Create(Ctx, "omp_" + Name + ".body", F, PreInsertBefore);
2348 BasicBlock *Latch =
2349 BasicBlock::Create(Ctx, "omp_" + Name + ".inc", F, PostInsertBefore);
2350 BasicBlock *Exit =
2351 BasicBlock::Create(Ctx, "omp_" + Name + ".exit", F, PostInsertBefore);
2352 BasicBlock *After =
2353 BasicBlock::Create(Ctx, "omp_" + Name + ".after", F, PostInsertBefore);
2354
2355 // Use specified DebugLoc for new instructions.
2357
2358 Builder.SetInsertPoint(Preheader);
2359 Builder.CreateBr(Header);
2360
2361 Builder.SetInsertPoint(Header);
2362 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2, "omp_" + Name + ".iv");
2363 IndVarPHI->addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
2365
2367 Value *Cmp =
2368 Builder.CreateICmpULT(IndVarPHI, TripCount, "omp_" + Name + ".cmp");
2369 Builder.CreateCondBr(Cmp, Body, Exit);
2370
2371 Builder.SetInsertPoint(Body);
2372 Builder.CreateBr(Latch);
2373
2374 Builder.SetInsertPoint(Latch);
2375 Value *Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
2376 "omp_" + Name + ".next", /*HasNUW=*/true);
2377 Builder.CreateBr(Header);
2378 IndVarPHI->addIncoming(Next, Latch);
2379
2380 Builder.SetInsertPoint(Exit);
2382
2383 // Remember and return the canonical control flow.
2384 LoopInfos.emplace_front();
2385 CanonicalLoopInfo *CL = &LoopInfos.front();
2386
2387 CL->Header = Header;
2388 CL->Cond = Cond;
2389 CL->Latch = Latch;
2390 CL->Exit = Exit;
2391
2392#ifndef NDEBUG
2393 CL->assertOK();
2394#endif
2395 return CL;
2396}
2397
2400 LoopBodyGenCallbackTy BodyGenCB,
2401 Value *TripCount, const Twine &Name) {
2402 BasicBlock *BB = Loc.IP.getBlock();
2403 BasicBlock *NextBB = BB->getNextNode();
2404
2405 CanonicalLoopInfo *CL = createLoopSkeleton(Loc.DL, TripCount, BB->getParent(),
2406 NextBB, NextBB, Name);
2407 BasicBlock *After = CL->getAfter();
2408
2409 // If location is not set, don't connect the loop.
2410 if (updateToLocation(Loc)) {
2411 // Split the loop at the insertion point: Branch to the preheader and move
2412 // every following instruction to after the loop (the After BB). Also, the
2413 // new successor is the loop's after block.
2414 spliceBB(Builder, After, /*CreateBranch=*/false);
2416 }
2417
2418 // Emit the body content. We do it after connecting the loop to the CFG to
2419 // avoid that the callback encounters degenerate BBs.
2420 BodyGenCB(CL->getBodyIP(), CL->getIndVar());
2421
2422#ifndef NDEBUG
2423 CL->assertOK();
2424#endif
2425 return CL;
2426}
2427
2429 const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB,
2430 Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop,
2431 InsertPointTy ComputeIP, const Twine &Name) {
2432
2433 // Consider the following difficulties (assuming 8-bit signed integers):
2434 // * Adding \p Step to the loop counter which passes \p Stop may overflow:
2435 // DO I = 1, 100, 50
2436 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
2437 // DO I = 100, 0, -128
2438
2439 // Start, Stop and Step must be of the same integer type.
2440 auto *IndVarTy = cast<IntegerType>(Start->getType());
2441 assert(IndVarTy == Stop->getType() && "Stop type mismatch");
2442 assert(IndVarTy == Step->getType() && "Step type mismatch");
2443
2444 LocationDescription ComputeLoc =
2445 ComputeIP.isSet() ? LocationDescription(ComputeIP, Loc.DL) : Loc;
2446 updateToLocation(ComputeLoc);
2447
2448 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
2449 ConstantInt *One = ConstantInt::get(IndVarTy, 1);
2450
2451 // Like Step, but always positive.
2452 Value *Incr = Step;
2453
2454 // Distance between Start and Stop; always positive.
2455 Value *Span;
2456
2457 // Condition whether there are no iterations are executed at all, e.g. because
2458 // UB < LB.
2459 Value *ZeroCmp;
2460
2461 if (IsSigned) {
2462 // Ensure that increment is positive. If not, negate and invert LB and UB.
2463 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
2464 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
2465 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
2466 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
2467 Span = Builder.CreateSub(UB, LB, "", false, true);
2468 ZeroCmp = Builder.CreateICmp(
2469 InclusiveStop ? CmpInst::ICMP_SLT : CmpInst::ICMP_SLE, UB, LB);
2470 } else {
2471 Span = Builder.CreateSub(Stop, Start, "", true);
2472 ZeroCmp = Builder.CreateICmp(
2473 InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Stop, Start);
2474 }
2475
2476 Value *CountIfLooping;
2477 if (InclusiveStop) {
2478 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
2479 } else {
2480 // Avoid incrementing past stop since it could overflow.
2481 Value *CountIfTwo = Builder.CreateAdd(
2482 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
2483 Value *OneCmp = Builder.CreateICmp(CmpInst::ICMP_ULE, Span, Incr);
2484 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
2485 }
2486 Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
2487 "omp_" + Name + ".tripcount");
2488
2489 auto BodyGen = [=](InsertPointTy CodeGenIP, Value *IV) {
2490 Builder.restoreIP(CodeGenIP);
2491 Value *Span = Builder.CreateMul(IV, Step);
2492 Value *IndVar = Builder.CreateAdd(Span, Start);
2493 BodyGenCB(Builder.saveIP(), IndVar);
2494 };
2495 LocationDescription LoopLoc = ComputeIP.isSet() ? Loc.IP : Builder.saveIP();
2496 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
2497}
2498
2499// Returns an LLVM function to call for initializing loop bounds using OpenMP
2500// static scheduling depending on `type`. Only i32 and i64 are supported by the
2501// runtime. Always interpret integers as unsigned similarly to
2502// CanonicalLoopInfo.
2504 OpenMPIRBuilder &OMPBuilder) {
2505 unsigned Bitwidth = Ty->getIntegerBitWidth();
2506 if (Bitwidth == 32)
2507 return OMPBuilder.getOrCreateRuntimeFunction(
2508 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
2509 if (Bitwidth == 64)
2510 return OMPBuilder.getOrCreateRuntimeFunction(
2511 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
2512 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
2513}
2514
2516OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
2517 InsertPointTy AllocaIP,
2518 bool NeedsBarrier) {
2519 assert(CLI->isValid() && "Requires a valid canonical loop");
2520 assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
2521 "Require dedicated allocate IP");
2522
2523 // Set up the source location value for OpenMP runtime.
2526
2527 uint32_t SrcLocStrSize;
2528 Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
2529 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2530
2531 // Declare useful OpenMP runtime functions.
2532 Value *IV = CLI->getIndVar();
2533 Type *IVTy = IV->getType();
2534 FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
2535 FunctionCallee StaticFini =
2536 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
2537
2538 // Allocate space for computed loop bounds as expected by the "init" function.
2539 Builder.restoreIP(AllocaIP);
2540 Type *I32Type = Type::getInt32Ty(M.getContext());
2541 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
2542 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
2543 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
2544 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
2545
2546 // At the end of the preheader, prepare for calling the "init" function by
2547 // storing the current loop bounds into the allocated space. A canonical loop
2548 // always iterates from 0 to trip-count with step 1. Note that "init" expects
2549 // and produces an inclusive upper bound.
2551 Constant *Zero = ConstantInt::get(IVTy, 0);
2552 Constant *One = ConstantInt::get(IVTy, 1);
2553 Builder.CreateStore(Zero, PLowerBound);
2554 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
2555 Builder.CreateStore(UpperBound, PUpperBound);
2556 Builder.CreateStore(One, PStride);
2557
2558 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
2559
2560 Constant *SchedulingType = ConstantInt::get(
2561 I32Type, static_cast<int>(OMPScheduleType::UnorderedStatic));
2562
2563 // Call the "init" function and update the trip count of the loop with the
2564 // value it produced.
2565 Builder.CreateCall(StaticInit,
2566 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
2567 PUpperBound, PStride, One, Zero});
2568 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
2569 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
2570 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
2571 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
2572 CLI->setTripCount(TripCount);
2573
2574 // Update all uses of the induction variable except the one in the condition
2575 // block that compares it with the actual upper bound, and the increment in
2576 // the latch block.
2577
2578 CLI->mapIndVar([&](Instruction *OldIV) -> Value * {
2580 CLI->getBody()->getFirstInsertionPt());
2582 return Builder.CreateAdd(OldIV, LowerBound);
2583 });
2584
2585 // In the "exit" block, call the "fini" function.
2587 CLI->getExit()->getTerminator()->getIterator());
2588 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
2589
2590 // Add the barrier if requested.
2591 if (NeedsBarrier)
2592 createBarrier(LocationDescription(Builder.saveIP(), DL),
2593 omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
2594 /* CheckCancelFlag */ false);
2595
2596 InsertPointTy AfterIP = CLI->getAfterIP();
2597 CLI->invalidate();
2598
2599 return AfterIP;
2600}
2601
2602OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
2603 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
2604 bool NeedsBarrier, Value *ChunkSize) {
2605 assert(CLI->isValid() && "Requires a valid canonical loop");
2606 assert(ChunkSize && "Chunk size is required");
2607
2608 LLVMContext &Ctx = CLI->getFunction()->getContext();
2609 Value *IV = CLI->getIndVar();
2610 Value *OrigTripCount = CLI->getTripCount();
2611 Type *IVTy = IV->getType();
2612 assert(IVTy->getIntegerBitWidth() <= 64 &&
2613 "Max supported tripcount bitwidth is 64 bits");
2614 Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32 ? Type::getInt32Ty(Ctx)
2615 : Type::getInt64Ty(Ctx);
2616 Type *I32Type = Type::getInt32Ty(M.getContext());
2617 Constant *Zero = ConstantInt::get(InternalIVTy, 0);
2618 Constant *One = ConstantInt::get(InternalIVTy, 1);
2619
2620 // Declare useful OpenMP runtime functions.
2621 FunctionCallee StaticInit =
2622 getKmpcForStaticInitForType(InternalIVTy, M, *this);
2623 FunctionCallee StaticFini =
2624 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
2625
2626 // Allocate space for computed loop bounds as expected by the "init" function.
2627 Builder.restoreIP(AllocaIP);
2629 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
2630 Value *PLowerBound =
2631 Builder.CreateAlloca(InternalIVTy, nullptr, "p.lowerbound");
2632 Value *PUpperBound =
2633 Builder.CreateAlloca(InternalIVTy, nullptr, "p.upperbound");
2634 Value *PStride = Builder.CreateAlloca(InternalIVTy, nullptr, "p.stride");
2635
2636 // Set up the source location value for the OpenMP runtime.
2639
2640 // TODO: Detect overflow in ubsan or max-out with current tripcount.
2641 Value *CastedChunkSize =
2642 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy, "chunksize");
2643 Value *CastedTripCount =
2644 Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount");
2645
2646 Constant *SchedulingType = ConstantInt::get(
2647 I32Type, static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
2648 Builder.CreateStore(Zero, PLowerBound);
2649 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
2650 Builder.CreateStore(OrigUpperBound, PUpperBound);
2651 Builder.CreateStore(One, PStride);
2652
2653 // Call the "init" function and update the trip count of the loop with the
2654 // value it produced.
2655 uint32_t SrcLocStrSize;
2656 Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
2657 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2658 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
2659 Builder.CreateCall(StaticInit,
2660 {/*loc=*/SrcLoc, /*global_tid=*/ThreadNum,
2661 /*schedtype=*/SchedulingType, /*plastiter=*/PLastIter,
2662 /*plower=*/PLowerBound, /*pupper=*/PUpperBound,
2663 /*pstride=*/PStride, /*incr=*/One,
2664 /*chunk=*/CastedChunkSize});
2665
2666 // Load values written by the "init" function.
2667 Value *FirstChunkStart =
2668 Builder.CreateLoad(InternalIVTy, PLowerBound, "omp_firstchunk.lb");
2669 Value *FirstChunkStop =
2670 Builder.CreateLoad(InternalIVTy, PUpperBound, "omp_firstchunk.ub");
2671 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
2672 Value *ChunkRange =
2673 Builder.CreateSub(FirstChunkEnd, FirstChunkStart, "omp_chunk.range");
2674 Value *NextChunkStride =
2675 Builder.CreateLoad(InternalIVTy, PStride, "omp_dispatch.stride");
2676
2677 // Create outer "dispatch" loop for enumerating the chunks.
2678 BasicBlock *DispatchEnter = splitBB(Builder, true);
2679 Value *DispatchCounter;
2681 {Builder.saveIP(), DL},
2682 [&](InsertPointTy BodyIP, Value *Counter) { DispatchCounter = Counter; },
2683 FirstChunkStart, CastedTripCount, NextChunkStride,
2684 /*IsSigned=*/false, /*InclusiveStop=*/false, /*ComputeIP=*/{},
2685 "dispatch");
2686
2687 // Remember the BasicBlocks of the dispatch loop we need, then invalidate to
2688 // not have to preserve the canonical invariant.
2689 BasicBlock *DispatchBody = DispatchCLI->getBody();
2690 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
2691 BasicBlock *DispatchExit = DispatchCLI->getExit();
2692 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
2693 DispatchCLI->invalidate();
2694
2695 // Rewire the original loop to become the chunk loop inside the dispatch loop.
2696 redirectTo(DispatchAfter, CLI->getAfter(), DL);
2697 redirectTo(CLI->getExit(), DispatchLatch, DL);
2698 redirectTo(DispatchBody, DispatchEnter, DL);
2699
2700 // Prepare the prolog of the chunk loop.
2703
2704 // Compute the number of iterations of the chunk loop.
2706 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
2707 Value *IsLastChunk =
2708 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount, "omp_chunk.is_last");
2709 Value *CountUntilOrigTripCount =
2710 Builder.CreateSub(CastedTripCount, DispatchCounter);
2711 Value *ChunkTripCount = Builder.CreateSelect(
2712 IsLastChunk, CountUntilOrigTripCount, ChunkRange, "omp_chunk.tripcount");
2713 Value *BackcastedChunkTC =
2714 Builder.CreateTrunc(ChunkTripCount, IVTy, "omp_chunk.tripcount.trunc");
2715 CLI->setTripCount(BackcastedChunkTC);
2716
2717 // Update all uses of the induction variable except the one in the condition
2718 // block that compares it with the actual upper bound, and the increment in
2719 // the latch block.
2720 Value *BackcastedDispatchCounter =
2721 Builder.CreateTrunc(DispatchCounter, IVTy, "omp_dispatch.iv.trunc");
2722 CLI->mapIndVar([&](Instruction *) -> Value * {
2723 Builder.restoreIP(CLI->getBodyIP());
2724 return Builder.CreateAdd(IV, BackcastedDispatchCounter);
2725 });
2726
2727 // In the "exit" block, call the "fini" function.
2728 Builder.SetInsertPoint(DispatchExit, DispatchExit->getFirstInsertionPt());
2729 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
2730
2731 // Add the barrier if requested.
2732 if (NeedsBarrier)
2733 createBarrier(LocationDescription(Builder.saveIP(), DL), OMPD_for,
2734 /*ForceSimpleCall=*/false, /*CheckCancelFlag=*/false);
2735
2736#ifndef NDEBUG
2737 // Even though we currently do not support applying additional methods to it,
2738 // the chunk loop should remain a canonical loop.
2739 CLI->assertOK();
2740#endif
2741
2742 return {DispatchAfter, DispatchAfter->getFirstInsertionPt()};
2743}
2744
2745// Returns an LLVM function to call for executing an OpenMP static worksharing
2746// for loop depending on `type`. Only i32 and i64 are supported by the runtime.
2747// Always interpret integers as unsigned similarly to CanonicalLoopInfo.
2748static FunctionCallee
2750 WorksharingLoopType LoopType) {
2751 unsigned Bitwidth = Ty->getIntegerBitWidth();
2752 Module &M = OMPBuilder->M;
2753 switch (LoopType) {
2754 case WorksharingLoopType::ForStaticLoop:
2755 if (Bitwidth == 32)
2756 return OMPBuilder->getOrCreateRuntimeFunction(
2757 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
2758 if (Bitwidth == 64)
2759 return OMPBuilder->getOrCreateRuntimeFunction(
2760 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
2761 break;
2762 case WorksharingLoopType::DistributeStaticLoop:
2763 if (Bitwidth == 32)
2764 return OMPBuilder->getOrCreateRuntimeFunction(
2765 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
2766 if (Bitwidth == 64)
2767 return OMPBuilder->getOrCreateRuntimeFunction(
2768 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
2769 break;
2770 case WorksharingLoopType::DistributeForStaticLoop:
2771 if (Bitwidth == 32)
2772 return OMPBuilder->getOrCreateRuntimeFunction(
2773 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
2774 if (Bitwidth == 64)
2775 return OMPBuilder->getOrCreateRuntimeFunction(
2776 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
2777 break;
2778 }
2779 if (Bitwidth != 32 && Bitwidth != 64) {
2780 llvm_unreachable("Unknown OpenMP loop iterator bitwidth");
2781 }
2782 llvm_unreachable("Unknown type of OpenMP worksharing loop");
2783}
2784
2785// Inserts a call to proper OpenMP Device RTL function which handles
2786// loop worksharing.
2788 OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType,
2789 BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg,
2790 Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn) {
2791 Type *TripCountTy = TripCount->getType();
2792 Module &M = OMPBuilder->M;
2793 IRBuilder<> &Builder = OMPBuilder->Builder;
2794 FunctionCallee RTLFn =
2795 getKmpcForStaticLoopForType(TripCountTy, OMPBuilder, LoopType);
2796 SmallVector<Value *, 8> RealArgs;
2797 RealArgs.push_back(Ident);
2798 RealArgs.push_back(Builder.CreateBitCast(&LoopBodyFn, ParallelTaskPtr));
2799 RealArgs.push_back(LoopBodyArg);
2800 RealArgs.push_back(TripCount);
2801 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
2802 RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
2803 Builder.CreateCall(RTLFn, RealArgs);
2804 return;
2805 }
2806 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
2807 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
2808 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
2809 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
2810
2811 RealArgs.push_back(
2812 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy, "num.threads.cast"));
2813 RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
2814 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
2815 RealArgs.push_back(ConstantInt::get(TripCountTy, 0));
2816 }
2817
2818 Builder.CreateCall(RTLFn, RealArgs);
2819}
2820
2821static void
2823 CanonicalLoopInfo *CLI, Value *Ident,
2824 Function &OutlinedFn, Type *ParallelTaskPtr,
2825 const SmallVector<Instruction *, 4> &ToBeDeleted,
2826 WorksharingLoopType LoopType) {
2827 IRBuilder<> &Builder = OMPIRBuilder->Builder;
2828 BasicBlock *Preheader = CLI->getPreheader();
2829 Value *TripCount = CLI->getTripCount();
2830
2831 // After loop body outling, the loop body contains only set up
2832 // of loop body argument structure and the call to the outlined
2833 // loop body function. Firstly, we need to move setup of loop body args
2834 // into loop preheader.
2835 Preheader->splice(std::prev(Preheader->end()), CLI->getBody(),
2836 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
2837
2838 // The next step is to remove the whole loop. We do not it need anymore.
2839 // That's why make an unconditional branch from loop preheader to loop
2840 // exit block
2841 Builder.restoreIP({Preheader, Preheader->end()});
2842 Preheader->getTerminator()->eraseFromParent();
2843 Builder.CreateBr(CLI->getExit());
2844
2845 // Delete dead loop blocks
2846 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
2847 SmallPtrSet<BasicBlock *, 32> RegionBlockSet;
2848 SmallVector<BasicBlock *, 32> BlocksToBeRemoved;
2849 CleanUpInfo.EntryBB = CLI->getHeader();
2850 CleanUpInfo.ExitBB = CLI->getExit();
2851 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
2852 DeleteDeadBlocks(BlocksToBeRemoved);
2853
2854 // Find the instruction which corresponds to loop body argument structure
2855 // and remove the call to loop body function instruction.
2856 Value *LoopBodyArg;
2857 User *OutlinedFnUser = OutlinedFn.getUniqueUndroppableUser();
2858 assert(OutlinedFnUser &&
2859 "Expected unique undroppable user of outlined function");
2860 CallInst *OutlinedFnCallInstruction = dyn_cast<CallInst>(OutlinedFnUser);
2861 assert(OutlinedFnCallInstruction && "Expected outlined function call");
2862 assert((OutlinedFnCallInstruction->getParent() == Preheader) &&
2863 "Expected outlined function call to be located in loop preheader");
2864 // Check in case no argument structure has been passed.
2865 if (OutlinedFnCallInstruction->arg_size() > 1)
2866 LoopBodyArg = OutlinedFnCallInstruction->getArgOperand(1);
2867 else
2868 LoopBodyArg = Constant::getNullValue(Builder.getPtrTy());
2869 OutlinedFnCallInstruction->eraseFromParent();
2870
2871 createTargetLoopWorkshareCall(OMPIRBuilder, LoopType, Preheader, Ident,
2872 LoopBodyArg, ParallelTaskPtr, TripCount,
2873 OutlinedFn);
2874
2875 for (auto &ToBeDeletedItem : ToBeDeleted)
2876 ToBeDeletedItem->eraseFromParent();
2877 CLI->invalidate();
2878}
2879
2881OpenMPIRBuilder::applyWorkshareLoopTarget(DebugLoc DL, CanonicalLoopInfo *CLI,
2882 InsertPointTy AllocaIP,
2883 WorksharingLoopType LoopType) {
2884 uint32_t SrcLocStrSize;
2885 Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
2886 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2887
2888 OutlineInfo OI;
2889 OI.OuterAllocaBB = CLI->getPreheader();
2890 Function *OuterFn = CLI->getPreheader()->getParent();
2891
2892 // Instructions which need to be deleted at the end of code generation
2894
2895 OI.OuterAllocaBB = AllocaIP.getBlock();
2896
2897 // Mark the body loop as region which needs to be extracted
2898 OI.EntryBB = CLI->getBody();
2899 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
2900 "omp.prelatch", true);
2901
2902 // Prepare loop body for extraction
2903 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
2904
2905 // Insert new loop counter variable which will be used only in loop
2906 // body.
2907 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0, "");
2908 Instruction *NewLoopCntLoad =
2909 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
2910 // New loop counter instructions are redundant in the loop preheader when
2911 // code generation for workshare loop is finshed. That's why mark them as
2912 // ready for deletion.
2913 ToBeDeleted.push_back(NewLoopCntLoad);
2914 ToBeDeleted.push_back(NewLoopCnt);
2915
2916 // Analyse loop body region. Find all input variables which are used inside
2917 // loop body region.
2918 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
2920 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
2921 SmallVector<BasicBlock *, 32> BlocksT(ParallelRegionBlockSet.begin(),
2922 ParallelRegionBlockSet.end());
2923
2924 CodeExtractorAnalysisCache CEAC(*OuterFn);
2925 CodeExtractor Extractor(Blocks,
2926 /* DominatorTree */ nullptr,
2927 /* AggregateArgs */ true,
2928 /* BlockFrequencyInfo */ nullptr,
2929 /* BranchProbabilityInfo */ nullptr,
2930 /* AssumptionCache */ nullptr,
2931 /* AllowVarArgs */ true,
2932 /* AllowAlloca */ true,
2933 /* AllocationBlock */ CLI->getPreheader(),
2934 /* Suffix */ ".omp_wsloop",
2935 /* AggrArgsIn0AddrSpace */ true);
2936
2937 BasicBlock *CommonExit = nullptr;
2938 SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
2939
2940 // Find allocas outside the loop body region which are used inside loop
2941 // body
2942 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
2943
2944 // We need to model loop body region as the function f(cnt, loop_arg).
2945 // That's why we replace loop induction variable by the new counter
2946 // which will be one of loop body function argument
2948 CLI->getIndVar()->user_end());
2949 for (auto Use : Users) {
2950 if (Instruction *Inst = dyn_cast<Instruction>(Use)) {
2951 if (ParallelRegionBlockSet.count(Inst->getParent())) {
2952 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
2953 }
2954 }
2955 }
2956 // Make sure that loop counter variable is not merged into loop body
2957 // function argument structure and it is passed as separate variable
2958 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
2959
2960 // PostOutline CB is invoked when loop body function is outlined and
2961 // loop body is replaced by call to outlined function. We need to add
2962 // call to OpenMP device rtl inside loop preheader. OpenMP device rtl
2963 // function will handle loop control logic.
2964 //
2965 OI.PostOutlineCB = [=, ToBeDeletedVec =
2966 std::move(ToBeDeleted)](Function &OutlinedFn) {
2967 workshareLoopTargetCallback(this, CLI, Ident, OutlinedFn, ParallelTaskPtr,
2968 ToBeDeletedVec, LoopType);
2969 };
2970 addOutlineInfo(std::move(OI));
2971 return CLI->getAfterIP();
2972}
2973
2976 bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize,
2977 bool HasSimdModifier, bool HasMonotonicModifier,
2978 bool HasNonmonotonicModifier, bool HasOrderedClause,
2979 WorksharingLoopType LoopType) {
2980 if (Config.isTargetDevice())
2981 return applyWorkshareLoopTarget(DL, CLI, AllocaIP, LoopType);
2982 OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
2983 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
2984 HasNonmonotonicModifier, HasOrderedClause);
2985
2986 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
2987 OMPScheduleType::ModifierOrdered;
2988 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
2989 case OMPScheduleType::BaseStatic:
2990 assert(!ChunkSize && "No chunk size with static-chunked schedule");
2991 if (IsOrdered)
2992 return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
2993 NeedsBarrier, ChunkSize);
2994 // FIXME: Monotonicity ignored?
2995 return applyStaticWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier);
2996
2997 case OMPScheduleType::BaseStaticChunked:
2998 if (IsOrdered)
2999 return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
3000 NeedsBarrier, ChunkSize);
3001 // FIXME: Monotonicity ignored?
3002 return applyStaticChunkedWorkshareLoop(DL, CLI, AllocaIP, NeedsBarrier,
3003 ChunkSize);
3004
3005 case OMPScheduleType::BaseRuntime:
3006 case OMPScheduleType::BaseAuto:
3007 case OMPScheduleType::BaseGreedy:
3008 case OMPScheduleType::BaseBalanced:
3009 case OMPScheduleType::BaseSteal:
3010 case OMPScheduleType::BaseGuidedSimd:
3011 case OMPScheduleType::BaseRuntimeSimd:
3012 assert(!ChunkSize &&
3013 "schedule type does not support user-defined chunk sizes");
3014 [[fallthrough]];
3015 case OMPScheduleType::BaseDynamicChunked:
3016 case OMPScheduleType::BaseGuidedChunked:
3017 case OMPScheduleType::BaseGuidedIterativeChunked:
3018 case OMPScheduleType::BaseGuidedAnalyticalChunked:
3019 case OMPScheduleType::BaseStaticBalancedChunked:
3020 return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
3021 NeedsBarrier, ChunkSize);
3022
3023 default:
3024 llvm_unreachable("Unknown/unimplemented schedule kind");
3025 }
3026}
3027
3028/// Returns an LLVM function to call for initializing loop bounds using OpenMP
3029/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
3030/// the runtime. Always interpret integers as unsigned similarly to
3031/// CanonicalLoopInfo.
3032static FunctionCallee
3034 unsigned Bitwidth = Ty->getIntegerBitWidth();
3035 if (Bitwidth == 32)
3036 return OMPBuilder.getOrCreateRuntimeFunction(
3037 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
3038 if (Bitwidth == 64)
3039 return OMPBuilder.getOrCreateRuntimeFunction(
3040 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
3041 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
3042}
3043
3044/// Returns an LLVM function to call for updating the next loop using OpenMP
3045/// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
3046/// the runtime. Always interpret integers as unsigned similarly to
3047/// CanonicalLoopInfo.
3048static FunctionCallee
3050 unsigned Bitwidth = Ty->getIntegerBitWidth();
3051 if (Bitwidth == 32)
3052 return OMPBuilder.getOrCreateRuntimeFunction(
3053 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
3054 if (Bitwidth == 64)
3055 return OMPBuilder.getOrCreateRuntimeFunction(
3056 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
3057 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
3058}
3059
3060/// Returns an LLVM function to call for finalizing the dynamic loop using
3061/// depending on `type`. Only i32 and i64 are supported by the runtime. Always
3062/// interpret integers as unsigned similarly to CanonicalLoopInfo.
3063static FunctionCallee
3065 unsigned Bitwidth = Ty->getIntegerBitWidth();
3066 if (Bitwidth == 32)
3067 return OMPBuilder.getOrCreateRuntimeFunction(
3068 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
3069 if (Bitwidth == 64)
3070 return OMPBuilder.getOrCreateRuntimeFunction(
3071 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
3072 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
3073}
3074
3075OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
3076 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
3077 OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
3078 assert(CLI->isValid() && "Requires a valid canonical loop");
3079 assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
3080 "Require dedicated allocate IP");
3082 "Require valid schedule type");
3083
3084 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
3085 OMPScheduleType::ModifierOrdered;
3086
3087 // Set up the source location value for OpenMP runtime.
3089
3090 uint32_t SrcLocStrSize;
3091 Constant *SrcLocStr = getOrCreateSrcLocStr(DL, SrcLocStrSize);
3092 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3093
3094 // Declare useful OpenMP runtime functions.
3095 Value *IV = CLI->getIndVar();
3096 Type *IVTy = IV->getType();
3097 FunctionCallee DynamicInit = getKmpcForDynamicInitForType(IVTy, M, *this);
3098 FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);
3099
3100 // Allocate space for computed loop bounds as expected by the "init" function.
3101 Builder.restoreIP(AllocaIP);
3102 Type *I32Type = Type::getInt32Ty(M.getContext());
3103 Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
3104 Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
3105 Value *PUpperBound = Builder.CreateAlloca(IVTy, nullptr, "p.upperbound");
3106 Value *PStride = Builder.CreateAlloca(IVTy, nullptr, "p.stride");
3107
3108 // At the end of the preheader, prepare for calling the "init" function by
3109 // storing the current loop bounds into the allocated space. A canonical loop
3110 // always iterates from 0 to trip-count with step 1. Note that "init" expects
3111 // and produces an inclusive upper bound.
3112 BasicBlock *PreHeader = CLI->getPreheader();
3113 Builder.SetInsertPoint(PreHeader->getTerminator());
3114 Constant *One = ConstantInt::get(IVTy, 1);
3115 Builder.CreateStore(One, PLowerBound);
3116 Value *UpperBound = CLI->getTripCount();
3117 Builder.CreateStore(UpperBound, PUpperBound);
3118 Builder.CreateStore(One, PStride);
3119
3120 BasicBlock *Header = CLI->getHeader();
3121 BasicBlock *Exit = CLI->getExit();
3122 BasicBlock *Cond = CLI->getCond();
3123 BasicBlock *Latch = CLI->getLatch();
3124 InsertPointTy AfterIP = CLI->getAfterIP();
3125
3126 // The CLI will be "broken" in the code below, as the loop is no longer
3127 // a valid canonical loop.
3128
3129 if (!Chunk)
3130 Chunk = One;
3131
3132 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
3133
3134 Constant *SchedulingType =
3135 ConstantInt::get(I32Type, static_cast<int>(SchedType));
3136
3137 // Call the "init" function.
3138 Builder.CreateCall(DynamicInit,
3139 {SrcLoc, ThreadNum, SchedulingType, /* LowerBound */ One,
3140 UpperBound, /* step */ One, Chunk});
3141
3142 // An outer loop around the existing one.
3143 BasicBlock *OuterCond = BasicBlock::Create(
3144 PreHeader->getContext(), Twine(PreHeader->getName()) + ".outer.cond",
3145 PreHeader->getParent());
3146 // This needs to be 32-bit always, so can't use the IVTy Zero above.
3147 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
3148 Value *Res =
3149 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
3150 PLowerBound, PUpperBound, PStride});
3151 Constant *Zero32 = ConstantInt::get(I32Type, 0);
3152 Value *MoreWork = Builder.CreateCmp(CmpInst::ICMP_NE, Res, Zero32);
3153 Value *LowerBound =
3154 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One, "lb");
3155 Builder.CreateCondBr(MoreWork, Header, Exit);
3156
3157 // Change PHI-node in loop header to use outer cond rather than preheader,
3158 // and set IV to the LowerBound.
3159 Instruction *Phi = &Header->front();
3160 auto *PI = cast<PHINode>(Phi);
3161 PI->setIncomingBlock(0, OuterCond);
3162 PI->setIncomingValue(0, LowerBound);
3163
3164 // Then set the pre-header to jump to the OuterCond
3165 Instruction *Term = PreHeader->getTerminator();
3166 auto *Br = cast<BranchInst>(Term);
3167 Br->setSuccessor(0, OuterCond);
3168
3169 // Modify the inner condition:
3170 // * Use the UpperBound returned from the DynamicNext call.
3171 // * jump to the loop outer loop when done with one of the inner loops.
3172 Builder.SetInsertPoint(Cond, Cond->getFirstInsertionPt());
3173 UpperBound = Builder.CreateLoad(IVTy, PUpperBound, "ub");
3175 auto *CI = cast<CmpInst>(Comp);
3176 CI->setOperand(1, UpperBound);
3177 // Redirect the inner exit to branch to outer condition.
3178 Instruction *Branch = &Cond->back();
3179 auto *BI = cast<BranchInst>(Branch);
3180 assert(BI->getSuccessor(1) == Exit);
3181 BI->setSuccessor(1, OuterCond);
3182
3183 // Call the "fini" function if "ordered" is present in wsloop directive.
3184 if (Ordered) {
3185 Builder.SetInsertPoint(&Latch->back());
3186 FunctionCallee DynamicFini = getKmpcForDynamicFiniForType(IVTy, M, *this);
3187 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
3188 }
3189
3190 // Add the barrier if requested.
3191 if (NeedsBarrier) {
3192 Builder.SetInsertPoint(&Exit->back());
3193 createBarrier(LocationDescription(Builder.saveIP(), DL),
3194 omp::Directive::OMPD_for, /* ForceSimpleCall */ false,
3195 /* CheckCancelFlag */ false);
3196 }
3197
3198 CLI->invalidate();
3199 return AfterIP;
3200}
3201
3202/// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
3203/// after this \p OldTarget will be orphaned.
3205 BasicBlock *NewTarget, DebugLoc DL) {
3206 for (BasicBlock *Pred : make_early_inc_range(predecessors(OldTarget)))
3207 redirectTo(Pred, NewTarget, DL);
3208}
3209
3210/// Determine which blocks in \p BBs are reachable from outside and remove the
3211/// ones that are not reachable from the function.
3213 SmallPtrSet<BasicBlock *, 6> BBsToErase{BBs.begin(), BBs.end()};
3214 auto HasRemainingUses = [&BBsToErase](BasicBlock *BB) {
3215 for (Use &U : BB->uses()) {
3216 auto *UseInst = dyn_cast<Instruction>(U.getUser());
3217 if (!UseInst)
3218 continue;
3219 if (BBsToErase.count(UseInst->getParent()))
3220 continue;
3221 return true;
3222 }
3223 return false;
3224 };
3225
3226 while (true) {
3227 bool Changed = false;
3228 for (BasicBlock *BB : make_early_inc_range(BBsToErase)) {
3229 if (HasRemainingUses(BB)) {
3230 BBsToErase.erase(BB);
3231 Changed = true;
3232 }
3233 }
3234 if (!Changed)
3235 break;
3236 }
3237
3238 SmallVector<BasicBlock *, 7> BBVec(BBsToErase.begin(), BBsToErase.end());
3239 DeleteDeadBlocks(BBVec);
3240}
3241
3244 InsertPointTy ComputeIP) {
3245 assert(Loops.size() >= 1 && "At least one loop required");
3246 size_t NumLoops = Loops.size();
3247
3248 // Nothing to do if there is already just one loop.
3249 if (NumLoops == 1)
3250 return Loops.front();
3251
3252 CanonicalLoopInfo *Outermost = Loops.front();
3253 CanonicalLoopInfo *Innermost = Loops.back();
3254 BasicBlock *OrigPreheader = Outermost->getPreheader();
3255 BasicBlock *OrigAfter = Outermost->getAfter();
3256 Function *F = OrigPreheader->getParent();
3257
3258 // Loop control blocks that may become orphaned later.
3259 SmallVector<BasicBlock *, 12> OldControlBBs;
3260 OldControlBBs.reserve(6 * Loops.size());
3262 Loop->collectControlBlocks(OldControlBBs);
3263
3264 // Setup the IRBuilder for inserting the trip count computation.
3266 if (ComputeIP.isSet())
3267 Builder.restoreIP(ComputeIP);
3268 else
3269 Builder.restoreIP(Outermost->getPreheaderIP());
3270
3271 // Derive the collapsed' loop trip count.
3272 // TODO: Find common/largest indvar type.
3273 Value *CollapsedTripCount = nullptr;
3274 for (CanonicalLoopInfo *L : Loops) {
3275 assert(L->isValid() &&
3276 "All loops to collapse must be valid canonical loops");
3277 Value *OrigTripCount = L->getTripCount();
3278 if (!CollapsedTripCount) {
3279 CollapsedTripCount = OrigTripCount;
3280 continue;
3281 }
3282
3283 // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
3284 CollapsedTripCount = Builder.CreateMul(CollapsedTripCount, OrigTripCount,
3285 {}, /*HasNUW=*/true);
3286 }
3287
3288 // Create the collapsed loop control flow.
3289 CanonicalLoopInfo *Result =
3290 createLoopSkeleton(DL, CollapsedTripCount, F,
3291 OrigPreheader->getNextNode(), OrigAfter, "collapsed");
3292
3293 // Build the collapsed loop body code.
3294 // Start with deriving the input loop induction variables from the collapsed
3295 // one, using a divmod scheme. To preserve the original loops' order, the
3296 // innermost loop use the least significant bits.
3297 Builder.restoreIP(Result->getBodyIP());
3298
3299 Value *Leftover = Result->getIndVar();
3300 SmallVector<Value *> NewIndVars;
3301 NewIndVars.resize(NumLoops);
3302 for (int i = NumLoops - 1; i >= 1; --i) {
3303 Value *OrigTripCount = Loops[i]->getTripCount();
3304
3305 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
3306 NewIndVars[i] = NewIndVar;
3307
3308 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
3309 }
3310 // Outermost loop gets all the remaining bits.
3311 NewIndVars[0] = Leftover;
3312
3313 // Construct the loop body control flow.
3314 // We progressively construct the branch structure following in direction of
3315 // the control flow, from the leading in-between code, the loop nest body, the
3316 // trailing in-between code, and rejoining the collapsed loop's latch.
3317 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
3318 // the ContinueBlock is set, continue with that block. If ContinuePred, use
3319 // its predecessors as sources.
3320 BasicBlock *ContinueBlock = Result->getBody();
3321 BasicBlock *ContinuePred = nullptr;
3322 auto ContinueWith = [&ContinueBlock, &ContinuePred, DL](BasicBlock *Dest,
3323 BasicBlock *NextSrc) {
3324 if (ContinueBlock)
3325 redirectTo(ContinueBlock, Dest, DL);
3326 else
3327 redirectAllPredecessorsTo(ContinuePred, Dest, DL);
3328
3329 ContinueBlock = nullptr;
3330 ContinuePred = NextSrc;
3331 };
3332
3333 // The code before the nested loop of each level.
3334 // Because we are sinking it into the nest, it will be executed more often
3335 // that the original loop. More sophisticated schemes could keep track of what
3336 // the in-between code is and instantiate it only once per thread.
3337 for (size_t i = 0; i < NumLoops - 1; ++i)
3338 ContinueWith(Loops[i]->getBody(), Loops[i + 1]->getHeader());
3339
3340 // Connect the loop nest body.
3341 ContinueWith(Innermost->getBody(), Innermost->getLatch());
3342
3343 // The code after the nested loop at each level.
3344 for (size_t i = NumLoops - 1; i > 0; --i)
3345 ContinueWith(Loops[i]->getAfter(), Loops[i - 1]->getLatch());
3346
3347 // Connect the finished loop to the collapsed loop latch.
3348 ContinueWith(Result->getLatch(), nullptr);
3349
3350 // Replace the input loops with the new collapsed loop.
3351 redirectTo(Outermost->getPreheader(), Result->getPreheader(), DL);
3352 redirectTo(Result->getAfter(), Outermost->getAfter(), DL);
3353
3354 // Replace the input loop indvars with the derived ones.
3355 for (size_t i = 0; i < NumLoops; ++i)
3356 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
3357
3358 // Remove unused parts of the input loops.
3359 removeUnusedBlocksFromParent(OldControlBBs);
3360
3361 for (CanonicalLoopInfo *L : Loops)
3362 L->invalidate();
3363
3364#ifndef NDEBUG
3365 Result->assertOK();
3366#endif
3367 return Result;
3368}
3369
3370std::vector<CanonicalLoopInfo *>
3372 ArrayRef<Value *> TileSizes) {
3373 assert(TileSizes.size() == Loops.size() &&
3374 "Must pass as many tile sizes as there are loops");
3375 int NumLoops = Loops.size();
3376 assert(NumLoops >= 1 && "At least one loop to tile required");
3377
3378 CanonicalLoopInfo *OutermostLoop = Loops.front();
3379 CanonicalLoopInfo *InnermostLoop = Loops.back();
3380 Function *F = OutermostLoop->getBody()->getParent();
3381 BasicBlock *InnerEnter = InnermostLoop->getBody();
3382 BasicBlock *InnerLatch = InnermostLoop->getLatch();
3383
3384 // Loop control blocks that may become orphaned later.
3385 SmallVector<BasicBlock *, 12> OldControlBBs;
3386 OldControlBBs.reserve(6 * Loops.size());
3388 Loop->collectControlBlocks(OldControlBBs);
3389
3390 // Collect original trip counts and induction variable to be accessible by
3391 // index. Also, the structure of the original loops is not preserved during
3392 // the construction of the tiled loops, so do it before we scavenge the BBs of
3393 // any original CanonicalLoopInfo.
3394 SmallVector<Value *, 4> OrigTripCounts, OrigIndVars;
3395 for (CanonicalLoopInfo *L : Loops) {
3396 assert(L->isValid() && "All input loops must be valid canonical loops");
3397 OrigTripCounts.push_back(L->getTripCount());
3398 OrigIndVars.push_back(L->getIndVar());
3399 }
3400
3401 // Collect the code between loop headers. These may contain SSA definitions
3402 // that are used in the loop nest body. To be usable with in the innermost
3403 // body, these BasicBlocks will be sunk into the loop nest body. That is,
3404 // these instructions may be executed more often than before the tiling.
3405 // TODO: It would be sufficient to only sink them into body of the
3406 // corresponding tile loop.
3408 for (int i = 0; i < NumLoops - 1; ++i) {
3409 CanonicalLoopInfo *Surrounding = Loops[i];
3410 CanonicalLoopInfo *Nested = Loops[i + 1];
3411
3412 BasicBlock *EnterBB = Surrounding->getBody();
3413 BasicBlock *ExitBB = Nested->getHeader();
3414 InbetweenCode.emplace_back(EnterBB, ExitBB);
3415 }
3416
3417 // Compute the trip counts of the floor loops.
3419 Builder.restoreIP(OutermostLoop->getPreheaderIP());
3420 SmallVector<Value *, 4> FloorCount, FloorRems;
3421 for (int i = 0; i < NumLoops; ++i) {
3422 Value *TileSize = TileSizes[i];
3423 Value *OrigTripCount = OrigTripCounts[i];
3424 Type *IVType = OrigTripCount->getType();
3425
3426 Value *FloorTripCount = Builder.CreateUDiv(OrigTripCount, TileSize);
3427 Value *FloorTripRem = Builder.CreateURem(OrigTripCount, TileSize);
3428
3429 // 0 if tripcount divides the tilesize, 1 otherwise.
3430 // 1 means we need an additional iteration for a partial tile.
3431 //
3432 // Unfortunately we cannot just use the roundup-formula
3433 // (tripcount + tilesize - 1)/tilesize
3434 // because the summation might overflow. We do not want introduce undefined
3435 // behavior when the untiled loop nest did not.
3436 Value *FloorTripOverflow =
3437 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
3438
3439 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
3440 FloorTripCount =
3441 Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
3442 "omp_floor" + Twine(i) + ".tripcount", true);
3443
3444 // Remember some values for later use.
3445 FloorCount.push_back(FloorTripCount);
3446 FloorRems.push_back(FloorTripRem);
3447 }
3448
3449 // Generate the new loop nest, from the outermost to the innermost.
3450 std::vector<CanonicalLoopInfo *> Result;
3451 Result.reserve(NumLoops * 2);
3452
3453 // The basic block of the surrounding loop that enters the nest generated
3454 // loop.
3455 BasicBlock *Enter = OutermostLoop->getPreheader();
3456
3457 // The basic block of the surrounding loop where the inner code should
3458 // continue.
3459 BasicBlock *Continue = OutermostLoop->getAfter();
3460
3461 // Where the next loop basic block should be inserted.
3462 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
3463
3464 auto EmbeddNewLoop =
3465 [this, DL, F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
3466 Value *TripCount, const Twine &Name) -> CanonicalLoopInfo * {
3467 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
3468 DL, TripCount, F, InnerEnter, OutroInsertBefore, Name);
3469 redirectTo(Enter, EmbeddedLoop->getPreheader(), DL);
3470 redirectTo(EmbeddedLoop->getAfter(), Continue, DL);
3471
3472 // Setup the position where the next embedded loop connects to this loop.
3473 Enter = EmbeddedLoop->getBody();
3474 Continue = EmbeddedLoop->getLatch();
3475 OutroInsertBefore = EmbeddedLoop->getLatch();
3476 return EmbeddedLoop;
3477 };
3478
3479 auto EmbeddNewLoops = [&Result, &EmbeddNewLoop](ArrayRef<Value *> TripCounts,
3480 const Twine &NameBase) {
3481 for (auto P : enumerate(TripCounts)) {
3482 CanonicalLoopInfo *EmbeddedLoop =
3483 EmbeddNewLoop(P.value(), NameBase + Twine(P.index()));
3484 Result.push_back(EmbeddedLoop);
3485 }
3486 };
3487
3488 EmbeddNewLoops(FloorCount, "floor");
3489
3490 // Within the innermost floor loop, emit the code that computes the tile
3491 // sizes.
3493 SmallVector<Value *, 4> TileCounts;
3494 for (int i = 0; i < NumLoops; ++i) {
3495 CanonicalLoopInfo *FloorLoop = Result[i];
3496 Value *TileSize = TileSizes[i];
3497
3498 Value *FloorIsEpilogue =
3499 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCount[i]);
3500 Value *TileTripCount =
3501 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i], TileSize);
3502
3503 TileCounts.push_back(TileTripCount);
3504 }
3505
3506 // Create the tile loops.
3507 EmbeddNewLoops(TileCounts, "tile");
3508
3509 // Insert the inbetween code into the body.
3510 BasicBlock *BodyEnter = Enter;
3511 BasicBlock *BodyEntered = nullptr;
3512 for (std::pair<BasicBlock *, BasicBlock *> P : InbetweenCode) {
3513 BasicBlock *EnterBB = P.first;
3514 BasicBlock *ExitBB = P.second;
3515
3516 if (BodyEnter)
3517 redirectTo(BodyEnter, EnterBB, DL);
3518 else
3519 redirectAllPredecessorsTo(BodyEntered, EnterBB, DL);
3520
3521 BodyEnter = nullptr;
3522 BodyEntered = ExitBB;
3523 }
3524
3525 // Append the original loop nest body into the generated loop nest body.
3526 if (BodyEnter)
3527 redirectTo(BodyEnter, InnerEnter, DL);
3528 else
3529 redirectAllPredecessorsTo(BodyEntered, InnerEnter, DL);
3531
3532 // Replace the original induction variable with an induction variable computed
3533 // from the tile and floor induction variables.
3534 Builder.restoreIP(Result.back()->getBodyIP());
3535 for (int i = 0; i < NumLoops; ++i) {
3536 CanonicalLoopInfo *FloorLoop = Result[i];
3537 CanonicalLoopInfo *TileLoop = Result[NumLoops + i];
3538 Value *OrigIndVar = OrigIndVars[i];
3539 Value *Size = TileSizes[i];
3540
3541 Value *Scale =
3542 Builder.CreateMul(Size, FloorLoop->getIndVar(), {}, /*HasNUW=*/true);
3543 Value *Shift =
3544 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {}, /*HasNUW=*/true);
3545 OrigIndVar->replaceAllUsesWith(Shift);
3546 }
3547
3548 // Remove unused parts of the original loops.
3549 removeUnusedBlocksFromParent(OldControlBBs);
3550
3551 for (CanonicalLoopInfo *L : Loops)
3552 L->invalidate();
3553
3554#ifndef NDEBUG
3555 for (CanonicalLoopInfo *GenL : Result)
3556 GenL->assertOK();
3557#endif
3558 return Result;
3559}
3560
3561/// Attach metadata \p Properties to the basic block described by \p BB. If the
3562/// basic block already has metadata, the basic block properties are appended.
3564 ArrayRef<Metadata *> Properties) {
3565 // Nothing to do if no property to attach.
3566 if (Properties.empty())
3567 return;
3568
3569 LLVMContext &Ctx = BB->getContext();
3570 SmallVector<Metadata *> NewProperties;
3571 NewProperties.push_back(nullptr);
3572
3573 // If the basic block already has metadata, prepend it to the new metadata.
3574 MDNode *Existing = BB->getTerminator()->getMetadata(LLVMContext::MD_loop);
3575 if (Existing)
3576 append_range(NewProperties, drop_begin(Existing->operands(), 1));
3577
3578 append_range(NewProperties, Properties);
3579 MDNode *BasicBlockID = MDNode::getDistinct(Ctx, NewProperties);
3580 BasicBlockID->replaceOperandWith(0, BasicBlockID);
3581
3582 BB->getTerminator()->setMetadata(LLVMContext::MD_loop, BasicBlockID);
3583}
3584
3585/// Attach loop metadata \p Properties to the loop described by \p Loop. If the
3586/// loop already has metadata, the loop properties are appended.
3588 ArrayRef<Metadata *> Properties) {
3589 assert(Loop->isValid() && "Expecting a valid CanonicalLoopInfo");
3590
3591 // Attach metadata to the loop's latch
3592 BasicBlock *Latch = Loop->getLatch();
3593 assert(Latch && "A valid CanonicalLoopInfo must have a unique latch");
3594 addBasicBlockMetadata(Latch, Properties);
3595}
3596
3597/// Attach llvm.access.group metadata to the memref instructions of \p Block
3598static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup,
3599 LoopInfo &LI) {
3600 for (Instruction &I : *Block) {
3601 if (I.mayReadOrWriteMemory()) {
3602 // TODO: This instruction may already have access group from
3603 // other pragmas e.g. #pragma clang loop vectorize. Append
3604 // so that the existing metadata is not overwritten.
3605 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
3606 }
3607 }
3608}
3609
3613 Loop, {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
3614 MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full"))});
3615}
3616
3620 Loop, {
3621 MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
3622 });
3623}
3624
3625void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
3626 Value *IfCond, ValueToValueMapTy &VMap,
3627 const Twine &NamePrefix) {
3628 Function *F = CanonicalLoop->getFunction();
3629
3630 // Define where if branch should be inserted
3631 Instruction *SplitBefore;
3632 if (Instruction::classof(IfCond)) {
3633 SplitBefore = dyn_cast<Instruction>(IfCond);
3634 } else {
3635 SplitBefore = CanonicalLoop->getPreheader()->getTerminator();
3636 }
3637
3638 // TODO: We should not rely on pass manager. Currently we use pass manager
3639 // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo
3640 // object. We should have a method which returns all blocks between
3641 // CanonicalLoopInfo::getHeader() and CanonicalLoopInfo::getAfter()
3643 FAM.registerPass([]() { return DominatorTreeAnalysis(); });
3644 FAM.registerPass([]() { return LoopAnalysis(); });
3645 FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
3646
3647 // Get the loop which needs to be cloned
3648 LoopAnalysis LIA;
3649 LoopInfo &&LI = LIA.run(*F, FAM);
3650 Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
3651
3652 // Create additional blocks for the if statement
3653 BasicBlock *Head = SplitBefore->getParent();
3654 Instruction *HeadOldTerm = Head->getTerminator();
3655 llvm::LLVMContext &C = Head->getContext();
3657 C, NamePrefix + ".if.then", Head->getParent(), Head->getNextNode());
3659 C, NamePrefix + ".if.else", Head->getParent(), CanonicalLoop->getExit());
3660
3661 // Create if condition branch.
3662 Builder.SetInsertPoint(HeadOldTerm);
3663 Instruction *BrInstr =
3664 Builder.CreateCondBr(IfCond, ThenBlock, /*ifFalse*/ ElseBlock);
3665 InsertPointTy IP{BrInstr->getParent(), ++BrInstr->getIterator()};
3666 // Then block contains branch to omp loop which needs to be vectorized
3667 spliceBB(IP, ThenBlock, false);
3668 ThenBlock->replaceSuccessorsPhiUsesWith(Head, ThenBlock);
3669
3670 Builder.SetInsertPoint(ElseBlock);
3671
3672 // Clone loop for the else branch
3674
3675 VMap[CanonicalLoop->getPreheader()] = ElseBlock;
3676 for (BasicBlock *Block : L->getBlocks()) {
3677 BasicBlock *NewBB = CloneBasicBlock(Block, VMap, "", F);
3678 NewBB->moveBefore(CanonicalLoop->getExit());
3679 VMap[Block] = NewBB;
3680 NewBlocks.push_back(NewBB);
3681 }
3682 remapInstructionsInBlocks(NewBlocks, VMap);
3683 Builder.CreateBr(NewBlocks.front());
3684}
3685
3686unsigned
3688 const StringMap<bool> &Features) {
3689 if (TargetTriple.isX86()) {
3690 if (Features.lookup("avx512f"))
3691 return 512;
3692 else if (Features.lookup("avx"))
3693 return 256;
3694 return 128;
3695 }
3696 if (TargetTriple.isPPC())
3697 return 128;
3698 if (TargetTriple.isWasm())
3699 return 128;
3700 return 0;
3701}
3702
3704 MapVector<Value *, Value *> AlignedVars,
3705 Value *IfCond, OrderKind Order,
3706 ConstantInt *Simdlen, ConstantInt *Safelen) {
3708
3709 Function *F = CanonicalLoop->getFunction();
3710
3711 // TODO: We should not rely on pass manager. Currently we use pass manager
3712 // only for getting llvm::Loop which corresponds to given CanonicalLoopInfo
3713 // object. We should have a method which returns all blocks between
3714 // CanonicalLoopInfo::getHeader() and CanonicalLoopInfo::getAfter()
3716 FAM.registerPass([]() { return DominatorTreeAnalysis(); });
3717 FAM.registerPass([]() { return LoopAnalysis(); });
3718 FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
3719
3720 LoopAnalysis LIA;
3721 LoopInfo &&LI = LIA.run(*F, FAM);
3722
3723 Loop *L = LI.getLoopFor(CanonicalLoop->getHeader());
3724 if (AlignedVars.size()) {
3726 Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator());
3727 for (auto &AlignedItem : AlignedVars) {
3728 Value *AlignedPtr = AlignedItem.first;
3729 Value *Alignment = AlignedItem.second;
3730 Builder.CreateAlignmentAssumption(F->getParent()->getDataLayout(),
3731 AlignedPtr, Alignment);
3732 }
3733 Builder.restoreIP(IP);
3734 }
3735
3736 if (IfCond) {
3737 ValueToValueMapTy VMap;
3738 createIfVersion(CanonicalLoop, IfCond, VMap, "simd");
3739 // Add metadata to the cloned loop which disables vectorization
3740 Value *MappedLatch = VMap.lookup(CanonicalLoop->getLatch());
3741 assert(MappedLatch &&
3742 "Cannot find value which corresponds to original loop latch");
3743 assert(isa<BasicBlock>(MappedLatch) &&
3744 "Cannot cast mapped latch block value to BasicBlock");
3745 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
3746 ConstantAsMetadata *BoolConst =
3749 NewLatchBlock,
3750 {MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"),
3751 BoolConst})});
3752 }
3753
3754 SmallSet<BasicBlock *, 8> Reachable;
3755
3756 // Get the basic blocks from the loop in which memref instructions
3757 // can be found.
3758 // TODO: Generalize getting all blocks inside a CanonicalizeLoopInfo,
3759 // preferably without running any passes.
3760 for (BasicBlock *Block : L->getBlocks()) {
3761 if (Block == CanonicalLoop->getCond() ||
3762 Block == CanonicalLoop->getHeader())
3763 continue;
3764 Reachable.insert(Block);
3765 }
3766
3767 SmallVector<Metadata *> LoopMDList;
3768
3769 // In presence of finite 'safelen', it may be unsafe to mark all
3770 // the memory instructions parallel, because loop-carried
3771 // dependences of 'safelen' iterations are possible.
3772 // If clause order(concurrent) is specified then the memory instructions
3773 // are marked parallel even if 'safelen' is finite.
3774 if ((Safelen == nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
3775 // Add access group metadata to memory-access instructions.
3776 MDNode *AccessGroup = MDNode::getDistinct(Ctx, {});
3777 for (BasicBlock *BB : Reachable)
3778 addSimdMetadata(BB, AccessGroup, LI);
3779 // TODO: If the loop has existing parallel access metadata, have
3780 // to combine two lists.
3781 LoopMDList.push_back(MDNode::get(
3782 Ctx, {MDString::get(Ctx, "llvm.loop.parallel_accesses"), AccessGroup}));
3783 }
3784
3785 // Use the above access group metadata to create loop level
3786 // metadata, which should be distinct for each loop.
3787 ConstantAsMetadata *BoolConst =
3789 LoopMDList.push_back(MDNode::get(
3790 Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.enable"), BoolConst}));
3791
3792 if (Simdlen || Safelen) {
3793 // If both simdlen and safelen clauses are specified, the value of the
3794 // simdlen parameter must be less than or equal to the value of the safelen
3795 // parameter. Therefore, use safelen only in the absence of simdlen.
3796 ConstantInt *VectorizeWidth = Simdlen == nullptr ? Safelen : Simdlen;
3797 LoopMDList.push_back(
3798 MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.vectorize.width"),
3799 ConstantAsMetadata::get(VectorizeWidth)}));
3800 }
3801
3802 addLoopMetadata(CanonicalLoop, LoopMDList);
3803}
3804
3805/// Create the TargetMachine object to query the backend for optimization
3806/// preferences.
3807///
3808/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but
3809/// e.g. Clang does not pass it to its CodeGen layer and creates it only when
3810/// needed for the LLVM pass pipline. We use some default options to avoid
3811/// having to pass too many settings from the frontend that probably do not
3812/// matter.
3813///
3814/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial
3815/// method. If we are going to use TargetMachine for more purposes, especially
3816/// those that are sensitive to TargetOptions, RelocModel and CodeModel, it
3817/// might become be worth requiring front-ends to pass on their TargetMachine,
3818/// or at least cache it between methods. Note that while fontends such as Clang
3819/// have just a single main TargetMachine per translation unit, "target-cpu" and
3820/// "target-features" that determine the TargetMachine are per-function and can
3821/// be overrided using __attribute__((target("OPTIONS"))).
3822static std::unique_ptr<TargetMachine>
3824 Module *M = F->getParent();
3825
3826 StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString();
3827 StringRef Features = F->getFnAttribute("target-features").getValueAsString();
3828 const std::string &Triple = M->getTargetTriple();
3829
3830 std::string Error;
3832 if (!TheTarget)
3833 return {};
3834
3836 return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
3837 Triple, CPU, Features, Options, /*RelocModel=*/std::nullopt,
3838 /*CodeModel=*/std::nullopt, OptLevel));
3839}
3840
3841/// Heuristically determine the best-performant unroll factor for \p CLI. This
3842/// depends on the target processor. We are re-using the same heuristics as the
3843/// LoopUnrollPass.
3845 Function *F = CLI->getFunction();
3846
3847 // Assume the user requests the most aggressive unrolling, even if the rest of
3848 // the code is optimized using a lower setting.
3850 std::unique_ptr<TargetMachine> TM = createTargetMachine(F, OptLevel);
3851
3853 FAM.registerPass([]() { return TargetLibraryAnalysis(); });
3854 FAM.registerPass([]() { return AssumptionAnalysis(); });
3855 FAM.registerPass([]() { return DominatorTreeAnalysis(); });
3856 FAM.registerPass([]() { return LoopAnalysis(); });
3857 FAM.registerPass([]() { return ScalarEvolutionAnalysis(); });
3858 FAM.registerPass([]() { return PassInstrumentationAnalysis(); });
3859 TargetIRAnalysis TIRA;
3860 if (TM)
3861 TIRA = TargetIRAnalysis(
3862 [&](const Function &F) { return TM->getTargetTransformInfo(F); });
3863 FAM.registerPass([&]() { return TIRA; });
3864
3865 TargetIRAnalysis::Result &&TTI = TIRA.run(*F, FAM);
3867 ScalarEvolution &&SE = SEA.run(*F, FAM);
3869 DominatorTree &&DT = DTA.run(*F, FAM);
3870 LoopAnalysis LIA;
3871 LoopInfo &&LI = LIA.run(*F, FAM);
3873 AssumptionCache &&AC = ACT.run(*F, FAM);
3875
3876 Loop *L = LI.getLoopFor(CLI->getHeader());
3877 assert(L && "Expecting CanonicalLoopInfo to be recognized as a loop");
3878
3881 /*BlockFrequencyInfo=*/nullptr,
3882 /*ProfileSummaryInfo=*/nullptr, ORE, static_cast<int>(OptLevel),
3883 /*UserThreshold=*/std::nullopt,
3884 /*UserCount=*/std::nullopt,
3885 /*UserAllowPartial=*/true,
3886 /*UserAllowRuntime=*/true,
3887 /*UserUpperBound=*/std::nullopt,
3888 /*UserFullUnrollMaxCount=*/std::nullopt);
3889
3890 UP.Force = true;
3891
3892 // Account for additional optimizations taking place before the LoopUnrollPass
3893 // would unroll the loop.
3896
3897 // Use normal unroll factors even if the rest of the code is optimized for
3898 // size.
3901
3902 LLVM_DEBUG(dbgs() << "Unroll heuristic thresholds:\n"
3903 << " Threshold=" << UP.Threshold << "\n"
3904 << " PartialThreshold=" << UP.PartialThreshold << "\n"
3905 << " OptSizeThreshold=" << UP.OptSizeThreshold << "\n"
3906 << " PartialOptSizeThreshold="
3907 << UP.PartialOptSizeThreshold << "\n");
3908
3909 // Disable peeling.
3912 /*UserAllowPeeling=*/false,
3913 /*UserAllowProfileBasedPeeling=*/false,
3914 /*UnrollingSpecficValues=*/false);
3915
3917 CodeMetrics::collectEphemeralValues(L, &AC, EphValues);
3918
3919 // Assume that reads and writes to stack variables can be eliminated by
3920 // Mem2Reg, SROA or LICM. That is, don't count them towards the loop body's
3921 // size.
3922 for (BasicBlock *BB : L->blocks()) {
3923 for (Instruction &I : *BB) {
3924 Value *Ptr;
3925 if (auto *Load = dyn_cast<LoadInst>(&I)) {
3926 Ptr = Load->getPointerOperand();
3927 } else if (auto *Store = dyn_cast<StoreInst>(&I)) {
3928 Ptr = Store->getPointerOperand();
3929 } else
3930 continue;
3931
3932 Ptr = Ptr->stripPointerCasts();
3933
3934 if (auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
3935 if (Alloca->getParent() == &F->getEntryBlock())
3936 EphValues.insert(&I);
3937 }
3938 }
3939 }
3940
3941 UnrollCostEstimator UCE(L, TTI, EphValues, UP.BEInsns);
3942
3943 // Loop is not unrollable if the loop contains certain instructions.
3944 if (!UCE.canUnroll() || UCE.Convergent) {
3945 LLVM_DEBUG(dbgs() << "Loop not considered unrollable\n");
3946 return 1;
3947 }
3948
3949 LLVM_DEBUG(dbgs() << "Estimated loop size is " << UCE.getRolledLoopSize()
3950 << "\n");
3951
3952 // TODO: Determine trip count of \p CLI if constant, computeUnrollCount might
3953 // be able to use it.
3954 int TripCount = 0;
3955 int MaxTripCount = 0;
3956 bool MaxOrZero = false;
3957 unsigned TripMultiple = 0;
3958
3959 bool UseUpperBound = false;
3960 computeUnrollCount(L, TTI, DT, &LI, &AC, SE, EphValues, &ORE, TripCount,
3961 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
3962 UseUpperBound);
3963 unsigned Factor = UP.Count;
3964 LLVM_DEBUG(dbgs() << "Suggesting unroll factor of " << Factor << "\n");
3965
3966 // This function returns 1 to signal to not unroll a loop.
3967 if (Factor == 0)
3968 return 1;
3969 return Factor;
3970}
3971
3973 int32_t Factor,
3974 CanonicalLoopInfo **UnrolledCLI) {
3975 assert(Factor >= 0 && "Unroll factor must not be negative");
3976
3977 Function *F = Loop->getFunction();
3978 LLVMContext &Ctx = F->getContext();
3979
3980 // If the unrolled loop is not used for another loop-associated directive, it
3981 // is sufficient to add metadata for the LoopUnrollPass.
3982 if (!UnrolledCLI) {
3983 SmallVector<Metadata *, 2> LoopMetadata;
3984 LoopMetadata.push_back(
3985 MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")));
3986
3987 if (Factor >= 1) {
3989 ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
3990 LoopMetadata.push_back(MDNode::get(
3991 Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst}));
3992 }
3993
3994 addLoopMetadata(Loop, LoopMetadata);
3995 return;
3996 }
3997
3998 // Heuristically determine the unroll factor.
3999 if (Factor == 0)
4001
4002 // No change required with unroll factor 1.
4003 if (Factor == 1) {
4004 *UnrolledCLI = Loop;
4005 return;
4006 }
4007
4008 assert(Factor >= 2 &&
4009 "unrolling only makes sense with a factor of 2 or larger");
4010
4011 Type *IndVarTy = Loop->getIndVarType();
4012
4013 // Apply partial unrolling by tiling the loop by the unroll-factor, then fully
4014 // unroll the inner loop.
4015 Value *FactorVal =
4016 ConstantInt::get(IndVarTy, APInt(IndVarTy->getIntegerBitWidth(), Factor,
4017 /*isSigned=*/false));
4018 std::vector<CanonicalLoopInfo *> LoopNest =
4019 tileLoops(DL, {Loop}, {FactorVal});
4020 assert(LoopNest.size() == 2 && "Expect 2 loops after tiling");
4021 *UnrolledCLI = LoopNest[0];
4022 CanonicalLoopInfo *InnerLoop = LoopNest[1];
4023
4024 // LoopUnrollPass can only fully unroll loops with constant trip count.
4025 // Unroll by the unroll factor with a fallback epilog for the remainder
4026 // iterations if necessary.
4028 ConstantInt::get(Type::getInt32Ty(Ctx), APInt(32, Factor)));
4030 InnerLoop,
4031 {MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.enable")),
4033 Ctx, {MDString::get(Ctx, "llvm.loop.unroll.count"), FactorConst})});
4034
4035#ifndef NDEBUG
4036 (*UnrolledCLI)->assertOK();
4037#endif
4038}
4039
4042 llvm::Value *BufSize, llvm::Value *CpyBuf,
4043 llvm::Value *CpyFn, llvm::Value *DidIt) {
4044 if (!updateToLocation(Loc))
4045 return Loc.IP;
4046
4047 uint32_t SrcLocStrSize;
4048 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4049 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4050 Value *ThreadId = getOrCreateThreadID(Ident);
4051
4052 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
4053
4054 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
4055
4056 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
4057 Builder.CreateCall(Fn, Args);
4058
4059 return Builder.saveIP();
4060}
4061
4063 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
4064 FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef<llvm::Value *> CPVars,
4066
4067 if (!updateToLocation(Loc))
4068 return Loc.IP;
4069
4070 // If needed allocate and initialize `DidIt` with 0.
4071 // DidIt: flag variable: 1=single thread; 0=not single thread.
4072 llvm::Value *DidIt = nullptr;
4073 if (!CPVars.empty()) {
4076 }
4077
4078 Directive OMPD = Directive::OMPD_single;
4079 uint32_t SrcLocStrSize;
4080 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4081 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4082 Value *ThreadId = getOrCreateThreadID(Ident);
4083 Value *Args[] = {Ident, ThreadId};
4084
4085 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
4086 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4087
4088 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
4089 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4090
4091 auto FiniCBWrapper = [&](InsertPointTy IP) {
4092 FiniCB(IP);
4093
4094 // The thread that executes the single region must set `DidIt` to 1.
4095 // This is used by __kmpc_copyprivate, to know if the caller is the
4096 // single thread or not.
4097 if (DidIt)
4099 };
4100
4101 // generates the following:
4102 // if (__kmpc_single()) {
4103 // .... single region ...
4104 // __kmpc_end_single
4105 // }
4106 // __kmpc_copyprivate
4107 // __kmpc_barrier
4108
4109 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
4110 /*Conditional*/ true,
4111 /*hasFinalize*/ true);
4112
4113 if (DidIt) {
4114 for (size_t I = 0, E = CPVars.size(); I < E; ++I)
4115 // NOTE BufSize is currently unused, so just pass 0.
4117 /*BufSize=*/ConstantInt::get(Int64, 0), CPVars[I],
4118 CPFuncs[I], DidIt);
4119 // NOTE __kmpc_copyprivate already inserts a barrier
4120 } else if (!IsNowait)
4122 omp::Directive::OMPD_unknown, /* ForceSimpleCall */ false,
4123 /* CheckCancelFlag */ false);
4124 return Builder.saveIP();
4125}
4126
4128 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
4129 FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst) {
4130
4131 if (!updateToLocation(Loc))
4132 return Loc.IP;
4133
4134 Directive OMPD = Directive::OMPD_critical;
4135 uint32_t SrcLocStrSize;
4136 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4137 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4138 Value *ThreadId = getOrCreateThreadID(Ident);
4139 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
4140 Value *Args[] = {Ident, ThreadId, LockVar};
4141
4142 SmallVector<llvm::Value *, 4> EnterArgs(std::begin(Args), std::end(Args));
4143 Function *RTFn = nullptr;
4144 if (HintInst) {
4145 // Add Hint to entry Args and create call
4146 EnterArgs.push_back(HintInst);
4147 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
4148 } else {
4149 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
4150 }
4151 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
4152
4153 Function *ExitRTLFn =
4154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
4155 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4156
4157 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4158 /*Conditional*/ false, /*hasFinalize*/ true);
4159}
4160
4163 InsertPointTy AllocaIP, unsigned NumLoops,
4164 ArrayRef<llvm::Value *> StoreValues,
4165 const Twine &Name, bool IsDependSource) {
4166 assert(
4167 llvm::all_of(StoreValues,
4168 [](Value *SV) { return SV->getType()->isIntegerTy(64); }) &&
4169 "OpenMP runtime requires depend vec with i64 type");
4170
4171 if (!updateToLocation(Loc))
4172 return Loc.IP;
4173
4174 // Allocate space for vector and generate alloc instruction.
4175 auto *ArrI64Ty = ArrayType::get(Int64, NumLoops);
4176 Builder.restoreIP(AllocaIP);
4177 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty, nullptr, Name);
4178 ArgsBase->setAlignment(Align(8));
4179 Builder.restoreIP(Loc.IP);
4180
4181 // Store the index value with offset in depend vector.
4182 for (unsigned I = 0; I < NumLoops; ++I) {
4183 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
4184 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(I)});
4185 StoreInst *STInst = Builder.CreateStore(StoreValues[I], DependAddrGEPIter);
4186 STInst->setAlignment(Align(8));
4187 }
4188
4189 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
4190 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
4191
4192 uint32_t SrcLocStrSize;
4193 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4194 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4195 Value *ThreadId = getOrCreateThreadID(Ident);
4196 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
4197
4198 Function *RTLFn = nullptr;
4199 if (IsDependSource)
4200 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
4201 else
4202 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
4203 Builder.CreateCall(RTLFn, Args);
4204
4205 return Builder.saveIP();
4206}
4207
4209 const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB,
4210 FinalizeCallbackTy FiniCB, bool IsThreads) {
4211 if (!updateToLocation(Loc))
4212 return Loc.IP;
4213
4214 Directive OMPD = Directive::OMPD_ordered;
4215 Instruction *EntryCall = nullptr;
4216 Instruction *ExitCall = nullptr;
4217
4218 if (IsThreads) {
4219 uint32_t SrcLocStrSize;
4220 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4221 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4222 Value *ThreadId = getOrCreateThreadID(Ident);
4223 Value *Args[] = {Ident, ThreadId};
4224
4225 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
4226 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4227
4228 Function *ExitRTLFn =
4229 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
4230 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4231 }
4232
4233 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4234 /*Conditional*/ false, /*hasFinalize*/ true);
4235}
4236
4237OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::EmitOMPInlinedRegion(
4238 Directive OMPD, Instruction *EntryCall, Instruction *ExitCall,
4239 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool Conditional,
4240 bool HasFinalize, bool IsCancellable) {
4241
4242 if (HasFinalize)
4243 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
4244
4245 // Create inlined region's entry and body blocks, in preparation
4246 // for conditional creation
4247 BasicBlock *EntryBB = Builder.GetInsertBlock();
4248 Instruction *SplitPos = EntryBB->getTerminator();
4249 if (!isa_and_nonnull<BranchInst>(SplitPos))
4250 SplitPos = new UnreachableInst(Builder.getContext(), EntryBB);
4251 BasicBlock *ExitBB = EntryBB->splitBasicBlock(SplitPos, "omp_region.end");
4252 BasicBlock *FiniBB =
4253 EntryBB->splitBasicBlock(EntryBB->getTerminator(), "omp_region.finalize");
4254
4256 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
4257
4258 // generate body
4259 BodyGenCB(/* AllocaIP */ InsertPointTy(),
4260 /* CodeGenIP */ Builder.saveIP());
4261
4262 // emit exit call and do any needed finalization.
4263 auto FinIP = InsertPointTy(FiniBB, FiniBB->getFirstInsertionPt());
4264 assert(FiniBB->getTerminator()->getNumSuccessors() == 1 &&
4265 FiniBB->getTerminator()->getSuccessor(0) == ExitBB &&
4266 "Unexpected control flow graph state!!");
4267 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
4268 assert(FiniBB->getUniquePredecessor()->getUniqueSuccessor() == FiniBB &&
4269 "Unexpected Control Flow State!");
4271
4272 // If we are skipping the region of a non conditional, remove the exit
4273 // block, and clear the builder's insertion point.
4274 assert(SplitPos->getParent() == ExitBB &&
4275 "Unexpected Insertion point location!");
4276 auto merged = MergeBlockIntoPredecessor(ExitBB);
4277 BasicBlock *ExitPredBB = SplitPos->getParent();
4278 auto InsertBB = merged ? ExitPredBB : ExitBB;
4279 if (!isa_and_nonnull<BranchInst>(SplitPos))
4280 SplitPos->eraseFromParent();
4281 Builder.SetInsertPoint(InsertBB);
4282
4283 return Builder.saveIP();
4284}
4285
4286OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
4287 Directive OMPD, Value *EntryCall, BasicBlock *ExitBB, bool Conditional) {
4288 // if nothing to do, Return current insertion point.
4289 if (!Conditional || !EntryCall)
4290 return Builder.saveIP();
4291
4292 BasicBlock *EntryBB = Builder.GetInsertBlock();
4293 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
4294 auto *ThenBB = BasicBlock::Create(M.getContext(), "omp_region.body");
4295 auto *UI = new UnreachableInst(Builder.getContext(), ThenBB);
4296
4297 // Emit thenBB and set the Builder's insertion point there for
4298 // body generation next. Place the block after the current block.
4299 Function *CurFn = EntryBB->getParent();
4300 CurFn->insert(std::next(EntryBB->getIterator()), ThenBB);
4301
4302 // Move Entry branch to end of ThenBB, and replace with conditional
4303 // branch (If-stmt)
4304 Instruction *EntryBBTI = EntryBB->getTerminator();
4305 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
4306 EntryBBTI->removeFromParent();
4308 Builder.Insert(EntryBBTI);
4309 UI->eraseFromParent();
4310 Builder.SetInsertPoint(ThenBB->getTerminator());
4311
4312 // return an insertion point to ExitBB.
4313 return IRBuilder<>::InsertPoint(ExitBB, ExitBB->getFirstInsertionPt());
4314}
4315
4316OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveExit(
4317 omp::Directive OMPD, InsertPointTy FinIP, Instruction *ExitCall,
4318 bool HasFinalize) {
4319
4320 Builder.restoreIP(FinIP);
4321
4322 // If there is finalization to do, emit it before the exit call
4323 if (HasFinalize) {
4324 assert(!FinalizationStack.empty() &&
4325 "Unexpected finalization stack state!");
4326
4327 FinalizationInfo Fi = FinalizationStack.pop_back_val();
4328 assert(Fi.DK == OMPD && "Unexpected Directive for Finalization call!");
4329
4330 Fi.FiniCB(FinIP);
4331
4332 BasicBlock *FiniBB = FinIP.getBlock();
4333 Instruction *FiniBBTI = FiniBB->getTerminator();
4334
4335 // set Builder IP for call creation
4336 Builder.SetInsertPoint(FiniBBTI);
4337 }
4338
4339 if (!ExitCall)
4340 return Builder.saveIP();
4341
4342 // place the Exitcall as last instruction before Finalization block terminator
4343 ExitCall->removeFromParent();
4344 Builder.Insert(ExitCall);
4345
4346 return IRBuilder<>::InsertPoint(ExitCall->getParent(),
4347 ExitCall->getIterator());
4348}
4349
4351 InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr,
4352 llvm::IntegerType *IntPtrTy, bool BranchtoEnd) {
4353 if (!IP.isSet())
4354 return IP;
4355
4357
4358 // creates the following CFG structure
4359 // OMP_Entry : (MasterAddr != PrivateAddr)?
4360 // F T
4361 // | \
4362 // | copin.not.master
4363 // | /
4364 // v /
4365 // copyin.not.master.end
4366 // |
4367 // v
4368 // OMP.Entry.Next
4369
4370 BasicBlock *OMP_Entry = IP.getBlock();
4371 Function *CurFn = OMP_Entry->getParent();
4372 BasicBlock *CopyBegin =
4373 BasicBlock::Create(M.getContext(), "copyin.not.master", CurFn);
4374 BasicBlock *CopyEnd = nullptr;
4375
4376 // If entry block is terminated, split to preserve the branch to following
4377 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
4378 if (isa_and_nonnull<BranchInst>(OMP_Entry->getTerminator())) {
4379 CopyEnd = OMP_Entry->splitBasicBlock(OMP_Entry->getTerminator(),
4380 "copyin.not.master.end");
4381 OMP_Entry->getTerminator()->eraseFromParent();
4382 } else {
4383 CopyEnd =
4384 BasicBlock::Create(M.getContext(), "copyin.not.master.end", CurFn);
4385 }
4386
4387 Builder.SetInsertPoint(OMP_Entry);
4388 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
4389 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
4390 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
4391 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
4392
4393 Builder.SetInsertPoint(CopyBegin);
4394 if (BranchtoEnd)
4396
4397 return Builder.saveIP();
4398}
4399
4401 Value *Size, Value *Allocator,
4402 std::string Name) {
4404 Builder.restoreIP(Loc.IP);
4405
4406 uint32_t SrcLocStrSize;
4407 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4408 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4409 Value *ThreadId = getOrCreateThreadID(Ident);
4410 Value *Args[] = {ThreadId, Size, Allocator};
4411
4412 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
4413
4414 return Builder.CreateCall(Fn, Args, Name);
4415}
4416
4418 Value *Addr, Value *Allocator,
4419 std::string Name) {
4421 Builder.restoreIP(Loc.IP);
4422
4423 uint32_t SrcLocStrSize;
4424 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4425 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4426 Value *ThreadId = getOrCreateThreadID(Ident);
4427 Value *Args[] = {ThreadId, Addr, Allocator};
4428 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
4429 return Builder.CreateCall(Fn, Args, Name);
4430}
4431
4433 const LocationDescription &Loc, Value *InteropVar,
4434 omp::OMPInteropType InteropType, Value *Device, Value *NumDependences,
4435 Value *DependenceAddress, bool HaveNowaitClause) {
4437 Builder.restoreIP(Loc.IP);
4438
4439 uint32_t SrcLocStrSize;
4440 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4441 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4442 Value *ThreadId = getOrCreateThreadID(Ident);
4443 if (Device == nullptr)
4444 Device = ConstantInt::get(Int32, -1);
4445 Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
4446 if (NumDependences == nullptr) {
4447 NumDependences = ConstantInt::get(Int32, 0);
4448 PointerType *PointerTypeVar = PointerType::getUnqual(M.getContext());
4449 DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
4450 }
4451 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
4452 Value *Args[] = {
4453 Ident, ThreadId, InteropVar, InteropTypeVal,
4454 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
4455
4456 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
4457
4458 return Builder.CreateCall(Fn, Args);
4459}
4460
4462 const LocationDescription &Loc, Value *InteropVar, Value *Device,
4463 Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause) {
4465 Builder.restoreIP(Loc.IP);
4466
4467 uint32_t SrcLocStrSize;
4468 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4469 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4470 Value *ThreadId = getOrCreateThreadID(Ident);
4471 if (Device == nullptr)
4472 Device = ConstantInt::get(Int32, -1);
4473 if (NumDependences == nullptr) {
4474 NumDependences = ConstantInt::get(Int32, 0);
4475 PointerType *PointerTypeVar = PointerType::getUnqual(M.getContext());
4476 DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
4477 }
4478 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
4479 Value *Args[] = {
4480 Ident, ThreadId, InteropVar, Device,
4481 NumDependences, DependenceAddress, HaveNowaitClauseVal};
4482
4483 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
4484
4485 return Builder.CreateCall(Fn, Args);
4486}
4487
4489 Value *InteropVar, Value *Device,
4490 Value *NumDependences,
4491 Value *DependenceAddress,
4492 bool HaveNowaitClause) {
4494 Builder.restoreIP(Loc.IP);
4495 uint32_t SrcLocStrSize;
4496 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4497 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4498 Value *ThreadId = getOrCreateThreadID(Ident);
4499 if (Device == nullptr)
4500 Device = ConstantInt::get(Int32, -1);
4501 if (NumDependences == nullptr) {
4502 NumDependences = ConstantInt::get(Int32, 0);
4503 PointerType *PointerTypeVar = PointerType::getUnqual(M.getContext());
4504 DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
4505 }
4506 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
4507 Value *Args[] = {
4508 Ident, ThreadId, InteropVar, Device,
4509 NumDependences, DependenceAddress, HaveNowaitClauseVal};
4510
4511 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
4512
4513 return Builder.CreateCall(Fn, Args);
4514}
4515
4517 const LocationDescription &Loc, llvm::Value *Pointer,
4520 Builder.restoreIP(Loc.IP);
4521
4522 uint32_t SrcLocStrSize;
4523 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4524 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4525 Value *ThreadId = getOrCreateThreadID(Ident);
4526 Constant *ThreadPrivateCache =
4527 getOrCreateInternalVariable(Int8PtrPtr, Name.str());
4528 llvm::Value *Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
4529
4530 Function *Fn =
4531 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
4532
4533 return Builder.CreateCall(Fn, Args);
4534}
4535
4538 int32_t MinThreadsVal, int32_t MaxThreadsVal,
4539 int32_t MinTeamsVal, int32_t MaxTeamsVal) {
4540 if (!updateToLocation(Loc))
4541 return Loc.IP;
4542
4543 uint32_t SrcLocStrSize;
4544 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4545 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4546 Constant *IsSPMDVal = ConstantInt::getSigned(
4548 Constant *UseGenericStateMachineVal = ConstantInt::getSigned(Int8, !IsSPMD);
4549 Constant *MayUseNestedParallelismVal = ConstantInt::getSigned(Int8, true);
4550 Constant *DebugIndentionLevelVal = ConstantInt::getSigned(Int16, 0);
4551
4553
4554 // Manifest the launch configuration in the metadata matching the kernel
4555 // environment.
4556 if (MinTeamsVal > 1 || MaxTeamsVal > 0)
4557 writeTeamsForKernel(T, *Kernel, MinTeamsVal, MaxTeamsVal);
4558
4559 // For max values, < 0 means unset, == 0 means set but unknown.
4560 if (MaxThreadsVal < 0)
4561 MaxThreadsVal = std::max(
4562 int32_t(getGridValue(T, Kernel).GV_Default_WG_Size), MinThreadsVal);
4563
4564 if (MaxThreadsVal > 0)
4565 writeThreadBoundsForKernel(T, *Kernel, MinThreadsVal, MaxThreadsVal);
4566
4567 Constant *MinThreads = ConstantInt::getSigned(Int32, MinThreadsVal);
4569 Constant *MinTeams = ConstantInt::getSigned(Int32, MinTeamsVal);
4570 Constant *MaxTeams = ConstantInt::getSigned(Int32, MaxTeamsVal);
4571 Constant *ReductionDataSize = ConstantInt::getSigned(Int32, 0);
4572 Constant *ReductionBufferLength = ConstantInt::getSigned(Int32, 0);
4573
4574 // We need to strip the debug prefix to get the correct kernel name.
4575 StringRef KernelName = Kernel->getName();
4576 const std::string DebugPrefix = "_debug__";
4577 if (KernelName.ends_with(DebugPrefix))
4578 KernelName = KernelName.drop_back(DebugPrefix.length());
4579
4581 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
4582 const DataLayout &DL = Fn->getParent()->getDataLayout();
4583
4584 Twine DynamicEnvironmentName = KernelName + "_dynamic_environment";
4585 Constant *DynamicEnvironmentInitializer =
4586 ConstantStruct::get(DynamicEnvironment, {DebugIndentionLevelVal});
4587 GlobalVariable *DynamicEnvironmentGV = new GlobalVariable(
4588 M, DynamicEnvironment, /*IsConstant=*/false, GlobalValue::WeakODRLinkage,
4589 DynamicEnvironmentInitializer, DynamicEnvironmentName,
4590 /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
4591 DL.getDefaultGlobalsAddressSpace());
4592 DynamicEnvironmentGV->setVisibility(GlobalValue::ProtectedVisibility);
4593
4594 Constant *DynamicEnvironment =
4595 DynamicEnvironmentGV->getType() == DynamicEnvironmentPtr
4596 ? DynamicEnvironmentGV
4597 : ConstantExpr::getAddrSpaceCast(DynamicEnvironmentGV,
4598 DynamicEnvironmentPtr);
4599
4600 Constant *ConfigurationEnvironmentInitializer = ConstantStruct::get(
4601 ConfigurationEnvironment, {
4602 UseGenericStateMachineVal,
4603 MayUseNestedParallelismVal,
4604 IsSPMDVal,
4605 MinThreads,
4606 MaxThreads,
4607 MinTeams,
4608 MaxTeams,
4609 ReductionDataSize,
4610 ReductionBufferLength,
4611 });
4612 Constant *KernelEnvironmentInitializer = ConstantStruct::get(
4613 KernelEnvironment, {
4614 ConfigurationEnvironmentInitializer,
4615 Ident,
4616 DynamicEnvironment,
4617 });
4618 Twine KernelEnvironmentName = KernelName + "_kernel_environment";
4619 GlobalVariable *KernelEnvironmentGV = new GlobalVariable(
4620 M, KernelEnvironment, /*IsConstant=*/true, GlobalValue::WeakODRLinkage,
4621 KernelEnvironmentInitializer, KernelEnvironmentName,
4622 /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
4623 DL.getDefaultGlobalsAddressSpace());
4624 KernelEnvironmentGV->setVisibility(GlobalValue::ProtectedVisibility);
4625
4626 Constant *KernelEnvironment =
4627 KernelEnvironmentGV->getType() == KernelEnvironmentPtr
4628 ? KernelEnvironmentGV
4629 : ConstantExpr::getAddrSpaceCast(KernelEnvironmentGV,
4630 KernelEnvironmentPtr);
4631 Value *KernelLaunchEnvironment = Kernel->getArg(0);
4632 CallInst *ThreadKind =
4633 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
4634
4635 Value *ExecUserCode = Builder.CreateICmpEQ(
4636 ThreadKind, ConstantInt::get(ThreadKind->getType(), -1),
4637 "exec_user_code");
4638
4639 // ThreadKind = __kmpc_target_init(...)
4640 // if (ThreadKind == -1)
4641 // user_code
4642 // else
4643 // return;
4644
4645 auto *UI = Builder.CreateUnreachable();
4646 BasicBlock *CheckBB = UI->getParent();
4647 BasicBlock *UserCodeEntryBB = CheckBB->splitBasicBlock(UI, "user_code.entry");
4648
4649 BasicBlock *WorkerExitBB = BasicBlock::Create(
4650 CheckBB->getContext(), "worker.exit", CheckBB->getParent());
4651 Builder.SetInsertPoint(WorkerExitBB);
4653
4654 auto *CheckBBTI = CheckBB->getTerminator();
4655 Builder.SetInsertPoint(CheckBBTI);
4656 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
4657
4658 CheckBBTI->eraseFromParent();
4659 UI->eraseFromParent();
4660
4661 // Continue in the "user_code" block, see diagram above and in
4662 // openmp/libomptarget/deviceRTLs/common/include/target.h .
4663 return InsertPointTy(UserCodeEntryBB, UserCodeEntryBB->getFirstInsertionPt());
4664}
4665
4667 int32_t TeamsReductionDataSize,
4668 int32_t TeamsReductionBufferLength) {
4669 if (!updateToLocation(Loc))
4670 return;
4671
4673 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
4674
4675 Builder.CreateCall(Fn, {});
4676
4677 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
4678 return;
4679
4681 // We need to strip the debug prefix to get the correct kernel name.
4682 StringRef KernelName = Kernel->getName();
4683 const std::string DebugPrefix = "_debug__";
4684 if (KernelName.ends_with(DebugPrefix))
4685 KernelName = KernelName.drop_back(DebugPrefix.length());
4686 auto *KernelEnvironmentGV =
4687 M.getNamedGlobal((KernelName + "_kernel_environment").str());
4688 assert(KernelEnvironmentGV && "Expected kernel environment global\n");
4689 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
4690 auto *NewInitializer = ConstantFoldInsertValueInstruction(
4691 KernelEnvironmentInitializer,
4692 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
4693 NewInitializer = ConstantFoldInsertValueInstruction(
4694 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
4695 {0, 8});
4696 KernelEnvironmentGV->setInitializer(NewInitializer);
4697}
4698
4700 Module &M = *Kernel.getParent();
4701 NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
4702 for (auto *Op : MD->operands()) {
4703 if (Op->getNumOperands() != 3)
4704 continue;
4705 auto *KernelOp = dyn_cast<ConstantAsMetadata>(Op->getOperand(0));
4706 if (!KernelOp || KernelOp->getValue() != &Kernel)
4707 continue;
4708 auto *Prop = dyn_cast<MDString>(Op->getOperand(1));
4709 if (!Prop || Prop->getString() != Name)
4710 continue;
4711 return Op;
4712 }
4713 return nullptr;
4714}
4715
4717 bool Min) {
4718 // Update the "maxntidx" metadata for NVIDIA, or add it.
4719 MDNode *ExistingOp = getNVPTXMDNode(Kernel, Name);
4720 if (ExistingOp) {
4721 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
4722 int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
4723 ExistingOp->replaceOperandWith(
4724 2, ConstantAsMetadata::get(ConstantInt::get(
4725 OldVal->getValue()->getType(),
4726 Min ? std::min(OldLimit, Value) : std::max(OldLimit, Value))));
4727 } else {
4728 LLVMContext &Ctx = Kernel.getContext();
4730 MDString::get(Ctx, Name),
4732 ConstantInt::get(Type::getInt32Ty(Ctx), Value))};
4733 // Append metadata to nvvm.annotations
4734 Module &M = *Kernel.getParent();
4735 NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
4736 MD->addOperand(MDNode::get(Ctx, MDVals));
4737 }
4738}
4739
4740std::pair<int32_t, int32_t>
4742 int32_t ThreadLimit =
4743 Kernel.getFnAttributeAsParsedInteger("omp_target_thread_limit");
4744
4745 if (T.isAMDGPU()) {
4746 const auto &Attr = Kernel.getFnAttribute("amdgpu-flat-work-group-size");
4747 if (!Attr.isValid() || !Attr.isStringAttribute())
4748 return {0, ThreadLimit};
4749 auto [LBStr, UBStr] = Attr.getValueAsString().split(',');
4750 int32_t LB, UB;
4751 if (!llvm::to_integer(UBStr, UB, 10))
4752 return {0, ThreadLimit};
4753 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
4754 if (!llvm::to_integer(LBStr, LB, 10))
4755 return {0, UB};
4756 return {LB, UB};
4757 }
4758
4759 if (MDNode *ExistingOp = getNVPTXMDNode(Kernel, "maxntidx")) {
4760 auto *OldVal = cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
4761 int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
4762 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
4763 }
4764 return {0, ThreadLimit};
4765}
4766
4768 Function &Kernel, int32_t LB,
4769 int32_t UB) {
4770 Kernel.addFnAttr("omp_target_thread_limit", std::to_string(UB));
4771
4772 if (T.isAMDGPU()) {
4773 Kernel.addFnAttr("amdgpu-flat-work-group-size",
4774 llvm::utostr(LB) + "," + llvm::utostr(UB));
4775 return;
4776 }
4777
4778 updateNVPTXMetadata(Kernel, "maxntidx", UB, true);
4779}
4780
4781std::pair<int32_t, int32_t>
4783 // TODO: Read from backend annotations if available.
4784 return {0, Kernel.getFnAttributeAsParsedInteger("omp_target_num_teams")};
4785}
4786
4788 int32_t LB, int32_t UB) {
4789 if (T.isNVPTX())
4790 if (UB > 0)
4791 updateNVPTXMetadata(Kernel, "maxclusterrank", UB, true);
4792 if (T.isAMDGPU())
4793 Kernel.addFnAttr("amdgpu-max-num-workgroups", llvm::utostr(LB) + ",1,1");
4794
4795 Kernel.addFnAttr("omp_target_num_teams", std::to_string(LB));
4796}
4797
4798void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
4799 Function *OutlinedFn) {
4800 if (Config.isTargetDevice()) {
4802 // TODO: Determine if DSO local can be set to true.
4803 OutlinedFn->setDSOLocal(false);
4805 if (T.isAMDGCN())
4807 }
4808}
4809
4810Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
4811 StringRef EntryFnIDName) {
4812 if (Config.isTargetDevice()) {
4813 assert(OutlinedFn && "The outlined function must exist if embedded");
4814 return OutlinedFn;
4815 }
4816
4817 return new GlobalVariable(
4818 M, Builder.getInt8Ty(), /*isConstant=*/true, GlobalValue::WeakAnyLinkage,
4819 Constant::getNullValue(Builder.getInt8Ty()), EntryFnIDName);
4820}
4821
4822Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
4823 StringRef EntryFnName) {
4824 if (OutlinedFn)
4825 return OutlinedFn;
4826
4827 assert(!M.getGlobalVariable(EntryFnName, true) &&
4828 "Named kernel already exists?");
4829 return new GlobalVariable(
4830 M, Builder.getInt8Ty(), /*isConstant=*/true, GlobalValue::InternalLinkage,
4831 Constant::getNullValue(Builder.getInt8Ty()), EntryFnName);
4832}
4833
4835 TargetRegionEntryInfo &EntryInfo,
4836 FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry,
4837 Function *&OutlinedFn, Constant *&OutlinedFnID) {
4838
4839 SmallString<64> EntryFnName;
4840 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
4841
4843 ? GenerateFunctionCallback(EntryFnName)
4844 : nullptr;
4845
4846 // If this target outline function is not an offload entry, we don't need to
4847 // register it. This may be in the case of a false if clause, or if there are
4848 // no OpenMP targets.
4849 if (!IsOffloadEntry)
4850 return;
4851
4852 std::string EntryFnIDName =
4854 ? std::string(EntryFnName)
4855 : createPlatformSpecificName({EntryFnName, "region_id"});
4856
4857 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
4858 EntryFnName, EntryFnIDName);
4859}
4860
4862 TargetRegionEntryInfo &EntryInfo, Function *OutlinedFn,
4863 StringRef EntryFnName, StringRef EntryFnIDName) {
4864 if (OutlinedFn)
4865 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
4866 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
4867 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
4869 EntryInfo, EntryAddr, OutlinedFnID,
4871 return OutlinedFnID;
4872}
4873
4875 const LocationDescription &Loc, InsertPointTy AllocaIP,
4876 InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
4877 TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB,
4878 omp::RuntimeFunction *MapperFunc,
4879 function_ref<InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)>
4880 BodyGenCB,
4881 function_ref<void(unsigned int, Value *)> DeviceAddrCB,
4882 function_ref<Value *(unsigned int)> CustomMapperCB, Value *SrcLocInfo) {
4883 if (!updateToLocation(Loc))
4884 return InsertPointTy();
4885
4886 // Disable TargetData CodeGen on Device pass.
4887 if (Config.IsTargetDevice.value_or(false)) {
4888 if (BodyGenCB)
4890 return Builder.saveIP();
4891 }
4892
4893 Builder.restoreIP(CodeGenIP);
4894 bool IsStandAlone = !BodyGenCB;
4895 MapInfosTy *MapInfo;
4896 // Generate the code for the opening of the data environment. Capture all the
4897 // arguments of the runtime call by reference because they are used in the
4898 // closing of the region.
4899 auto BeginThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4900 MapInfo = &GenMapInfoCB(Builder.saveIP());
4901 emitOffloadingArrays(AllocaIP, Builder.saveIP(), *MapInfo, Info,
4902 /*IsNonContiguous=*/true, DeviceAddrCB,
4903 CustomMapperCB);
4904
4905 TargetDataRTArgs RTArgs;
4907 !MapInfo->Names.empty());
4908
4909 // Emit the number of elements in the offloading arrays.
4910 Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
4911
4912 // Source location for the ident struct
4913 if (!SrcLocInfo) {
4914 uint32_t SrcLocStrSize;
4915 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4916 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4917 }
4918
4919 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
4920 PointerNum, RTArgs.BasePointersArray,
4921 RTArgs.PointersArray, RTArgs.SizesArray,
4922 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
4923 RTArgs.MappersArray};
4924
4925 if (IsStandAlone) {
4926 assert(MapperFunc && "MapperFunc missing for standalone target data");
4928 OffloadingArgs);
4929 } else {
4930 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
4931 omp::OMPRTL___tgt_target_data_begin_mapper);
4932
4933 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
4934
4935 for (auto DeviceMap : Info.DevicePtrInfoMap) {
4936 if (isa<AllocaInst>(DeviceMap.second.second)) {
4937 auto *LI =
4938 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
4939 Builder.CreateStore(LI, DeviceMap.second.second);
4940 }
4941 }
4942
4943 // If device pointer privatization is required, emit the body of the
4944 // region here. It will have to be duplicated: with and without
4945 // privatization.
4947 }
4948 };
4949
4950 // If we need device pointer privatization, we need to emit the body of the
4951 // region with no privatization in the 'else' branch of the conditional.
4952 // Otherwise, we don't have to do anything.
4953 auto BeginElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4955 };
4956
4957 // Generate code for the closing of the data region.
4958 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
4959 TargetDataRTArgs RTArgs;
4960 emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
4961 /*ForEndCall=*/true);
4962
4963 // Emit the number of elements in the offloading arrays.
4964 Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
4965
4966 // Source location for the ident struct
4967 if (!SrcLocInfo) {
4968 uint32_t SrcLocStrSize;
4969 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
4970 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4971 }
4972
4973 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
4974 PointerNum, RTArgs.BasePointersArray,
4975 RTArgs.PointersArray, RTArgs.SizesArray,
4976 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
4977 RTArgs.MappersArray};
4978 Function *EndMapperFunc =
4979 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
4980
4981 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
4982 };
4983
4984 // We don't have to do anything to close the region if the if clause evaluates
4985 // to false.
4986 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
4987
4988 if (BodyGenCB) {
4989 if (IfCond) {
4990 emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
4991 } else {
4992 BeginThenGen(AllocaIP, Builder.saveIP());
4993 }
4994
4995 // If we don't require privatization of device pointers, we emit the body in
4996 // between the runtime calls. This avoids duplicating the body code.
4998
4999 if (IfCond) {
5000 emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
5001 } else {
5002 EndThenGen(AllocaIP, Builder.saveIP());
5003 }
5004 } else {
5005 if (IfCond) {
5006 emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
5007 } else {
5008 BeginThenGen(AllocaIP, Builder.saveIP());
5009 }
5010 }
5011
5012 return Builder.saveIP();
5013}
5014
5017 bool IsGPUDistribute) {
5018 assert((IVSize == 32 || IVSize == 64) &&
5019 "IV size is not compatible with the omp runtime");
5021 if (IsGPUDistribute)
5022 Name = IVSize == 32
5023 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
5024 : omp::OMPRTL___kmpc_distribute_static_init_4u)
5025 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
5026 : omp::OMPRTL___kmpc_distribute_static_init_8u);
5027 else
5028 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
5029 : omp::OMPRTL___kmpc_for_static_init_4u)
5030 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
5031 : omp::OMPRTL___kmpc_for_static_init_8u);
5032
5034}
5035
5037 bool IVSigned) {
5038 assert((IVSize == 32 || IVSize == 64) &&
5039 "IV size is not compatible with the omp runtime");
5040 RuntimeFunction Name = IVSize == 32
5041 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
5042 : omp::OMPRTL___kmpc_dispatch_init_4u)
5043 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
5044 : omp::OMPRTL___kmpc_dispatch_init_8u);
5045
5047}
5048
5050 bool IVSigned) {
5051 assert((IVSize == 32 || IVSize == 64) &&
5052 "IV size is not compatible with the omp runtime");
5053 RuntimeFunction Name = IVSize == 32
5054 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
5055 : omp::OMPRTL___kmpc_dispatch_next_4u)
5056 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
5057 : omp::OMPRTL___kmpc_dispatch_next_8u);
5058
5060}
5061
5063 bool IVSigned) {
5064 assert((IVSize == 32 || IVSize == 64) &&
5065 "IV size is not compatible with the omp runtime");
5066 RuntimeFunction Name = IVSize == 32
5067 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
5068 : omp::OMPRTL___kmpc_dispatch_fini_4u)
5069 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
5070 : omp::OMPRTL___kmpc_dispatch_fini_8u);
5071
5073}
5074
5076 Function *Func) {
5077 for (User *User : make_early_inc_range(ConstExpr->users())) {
5078 if (auto *Instr = dyn_cast<Instruction>(User)) {
5079 if (Instr->getFunction() == Func) {
5080 Instruction *ConstInst = ConstExpr->getAsInstruction();
5081 ConstInst->insertBefore(*Instr->getParent(), Instr->getIterator());
5082 Instr->replaceUsesOfWith(ConstExpr, ConstInst);
5083 }
5084 }
5085 }
5086}
5087
5089 Function *Func) {
5090 for (User *User : make_early_inc_range(Input->users()))
5091 if (auto *Const = dyn_cast<Constant>(User))
5092 if (auto *ConstExpr = dyn_cast<ConstantExpr>(Const))
5094}
5095
5097 OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName,
5101 SmallVector<Type *> ParameterTypes;
5102 if (OMPBuilder.Config.isTargetDevice()) {
5103 // Add the "implicit" runtime argument we use to provide launch specific
5104 // information for target devices.
5105 auto *Int8PtrTy = PointerType::getUnqual(Builder.getContext());
5106 ParameterTypes.push_back(Int8PtrTy);
5107
5108 // All parameters to target devices are passed as pointers
5109 // or i64. This assumes 64-bit address spaces/pointers.
5110 for (auto &Arg : Inputs)
5111 ParameterTypes.push_back(Arg->getType()->isPointerTy()
5112 ? Arg->getType()
5113 : Type::getInt64Ty(Builder.getContext()));
5114 } else {
5115 for (auto &Arg : Inputs)
5116 ParameterTypes.push_back(Arg->getType());
5117 }
5118
5119 auto FuncType = FunctionType::get(Builder.getVoidTy(), ParameterTypes,
5120 /*isVarArg*/ false);
5121 auto Func = Function::Create(FuncType, GlobalValue::InternalLinkage, FuncName,
5122 Builder.GetInsertBlock()->getModule());
5123
5124 // Save insert point.
5125 auto OldInsertPoint = Builder.saveIP();
5126
5127 // Generate the region into the function.
5128 BasicBlock *EntryBB = BasicBlock::Create(Builder.getContext(), "entry", Func);
5129 Builder.SetInsertPoint(EntryBB);
5130
5131 // Insert target init call in the device compilation pass.
5132 if (OMPBuilder.Config.isTargetDevice())
5133 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, /*IsSPMD*/ false));
5134
5135 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
5136
5137 // As we embed the user code in the middle of our target region after we
5138 // generate entry code, we must move what allocas we can into the entry
5139 // block to avoid possible breaking optimisations for device
5140 if (OMPBuilder.Config.isTargetDevice())
5142
5143 // Insert target deinit call in the device compilation pass.
5144 Builder.restoreIP(CBFunc(Builder.saveIP(), Builder.saveIP()));
5145 if (OMPBuilder.Config.isTargetDevice())
5146 OMPBuilder.createTargetDeinit(Builder);
5147
5148 // Insert return instruction.
5149 Builder.CreateRetVoid();
5150
5151 // New Alloca IP at entry point of created device function.
5152 Builder.SetInsertPoint(EntryBB->getFirstNonPHI());
5153 auto AllocaIP = Builder.saveIP();
5154
5155 Builder.SetInsertPoint(UserCodeEntryBB->getFirstNonPHIOrDbg());
5156
5157 // Skip the artificial dyn_ptr on the device.
5158 const auto &ArgRange =
5159 OMPBuilder.Config.isTargetDevice()
5160 ? make_range(Func->arg_begin() + 1, Func->arg_end())
5161 : Func->args();
5162
5163 // Rewrite uses of input valus to parameters.
5164 for (auto InArg : zip(Inputs, ArgRange)) {
5165 Value *Input = std::get<0>(InArg);
5166 Argument &Arg = std::get<1>(InArg);
5167 Value *InputCopy = nullptr;
5168
5169 Builder.restoreIP(
5170 ArgAccessorFuncCB(Arg, Input, InputCopy, AllocaIP, Builder.saveIP()));
5171
5172 // Things like GEP's can come in the form of Constants. Constants and
5173 // ConstantExpr's do not have access to the knowledge of what they're
5174 // contained in, so we must dig a little to find an instruction so we can
5175 // tell if they're used inside of the function we're outlining. We also
5176 // replace the original constant expression with a new instruction
5177 // equivalent; an instruction as it allows easy modification in the
5178 // following loop, as we can now know the constant (instruction) is owned by
5179 // our target function and replaceUsesOfWith can now be invoked on it
5180 // (cannot do this with constants it seems). A brand new one also allows us
5181 // to be cautious as it is perhaps possible the old expression was used
5182 // inside of the function but exists and is used externally (unlikely by the
5183 // nature of a Constant, but still).
5185
5186 // Collect all the instructions
5187 for (User *User : make_early_inc_range(Input->users()))
5188 if (auto *Instr = dyn_cast<Instruction>(User))
5189 if (Instr->getFunction() == Func)
5190 Instr->replaceUsesOfWith(Input, InputCopy);
5191 }
5192
5193 // Restore insert point.
5194 Builder.restoreIP(OldInsertPoint);
5195
5196 return Func;
5197}
5198
5200 OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
5201 TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn,
5202 Constant *&OutlinedFnID, SmallVectorImpl<Value *> &Inputs,
5205
5206 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5207 [&OMPBuilder, &Builder, &Inputs, &CBFunc,
5208 &ArgAccessorFuncCB](StringRef EntryFnName) {
5209 return createOutlinedFunction(OMPBuilder, Builder, EntryFnName, Inputs,
5210 CBFunc, ArgAccessorFuncCB);
5211 };
5212
5213 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, true,
5214 OutlinedFn, OutlinedFnID);
5215}
5216
5217static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
5219 Function *OutlinedFn, Constant *OutlinedFnID,
5220 int32_t NumTeams, int32_t NumThreads,
5223
5225 /*RequiresDevicePointerInfo=*/false,
5226 /*SeparateBeginEndCalls=*/true);
5227
5228 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
5229 OMPBuilder.emitOffloadingArrays(AllocaIP, Builder.saveIP(), MapInfo, Info,
5230 /*IsNonContiguous=*/true);
5231
5233 OMPBuilder.emitOffloadingArraysArgument(Builder, RTArgs, Info,
5234 !MapInfo.Names.empty());
5235
5236 // emitKernelLaunch
5237 auto &&EmitTargetCallFallbackCB =
5239 Builder.restoreIP(IP);
5240 Builder.CreateCall(OutlinedFn, Args);
5241 return Builder.saveIP();
5242 };
5243
5244 unsigned NumTargetItems = MapInfo.BasePointers.size();
5245 // TODO: Use correct device ID
5246 Value *DeviceID = Builder.getInt64(OMP_DEVICEID_UNDEF);
5247 Value *NumTeamsVal = Builder.getInt32(NumTeams);
5248 Value *NumThreadsVal = Builder.getInt32(NumThreads);
5249 uint32_t SrcLocStrSize;
5250 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
5251 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
5252 llvm::omp::IdentFlag(0), 0);
5253 // TODO: Use correct NumIterations
5254 Value *NumIterations = Builder.getInt64(0);
5255 // TODO: Use correct DynCGGroupMem
5256 Value *DynCGGroupMem = Builder.getInt32(0);
5257
5258 bool HasNoWait = false;
5259
5260 OpenMPIRBuilder::TargetKernelArgs KArgs(NumTargetItems, RTArgs, NumIterations,
5261 NumTeamsVal, NumThreadsVal,
5262 DynCGGroupMem, HasNoWait);
5263
5264 Builder.restoreIP(OMPBuilder.emitKernelLaunch(
5265 Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
5266 DeviceID, RTLoc, AllocaIP));
5267}
5268
5270 const LocationDescription &Loc, InsertPointTy AllocaIP,
5271 InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
5272 int32_t NumThreads, SmallVectorImpl<Value *> &Args,
5273 GenMapInfoCallbackTy GenMapInfoCB,
5276 if (!updateToLocation(Loc))
5277 return InsertPointTy();
5278
5279 Builder.restoreIP(CodeGenIP);
5280
5281 Function *OutlinedFn;
5282 Constant *OutlinedFnID;
5283 emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn,
5284 OutlinedFnID, Args, CBFunc, ArgAccessorFuncCB);
5285 if (!Config.isTargetDevice())
5286 emitTargetCall(*this, Builder, AllocaIP, OutlinedFn, OutlinedFnID, NumTeams,
5287 NumThreads, Args, GenMapInfoCB);
5288
5289 return Builder.saveIP();
5290}
5291
5292std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
5293 StringRef FirstSeparator,
5294 StringRef Separator) {
5295 SmallString<128> Buffer;
5297 StringRef Sep = FirstSeparator;
5298 for (StringRef Part : Parts) {
5299 OS << Sep << Part;
5300 Sep = Separator;
5301 }
5302 return OS.str().str();
5303}
5304
5305std::string
5307 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
5308 Config.separator());
5309}
5310
5313 unsigned AddressSpace) {
5314 auto &Elem = *InternalVars.try_emplace(Name, nullptr).first;
5315 if (Elem.second) {
5316 assert(Elem.second->getValueType() == Ty &&
5317 "OMP internal variable has different type than requested");
5318 } else {
5319 // TODO: investigate the appropriate linkage type used for the global
5320 // variable for possibly changing that to internal or private, or maybe
5321 // create different versions of the function for different OMP internal
5322 // variables.
5323 auto Linkage = this->M.getTargetTriple().rfind("wasm32") == 0
5326 auto *GV = new GlobalVariable(M, Ty, /*IsConstant=*/false, Linkage,
5327 Constant::getNullValue(Ty), Elem.first(),
5328 /*InsertBefore=*/nullptr,
5330 const DataLayout &DL = M.getDataLayout();
5331 const llvm::Align TypeAlign = DL.getABITypeAlign(Ty);
5332 const llvm::Align PtrAlign = DL.getPointerABIAlignment(AddressSpace);
5333 GV->setAlignment(std::max(TypeAlign, PtrAlign));
5334 Elem.second = GV;
5335 }
5336
5337 return Elem.second;
5338}
5339
5340Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
5341 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
5342 std::string Name = getNameWithSeparators({Prefix, "var"}, ".", ".");
5343 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
5344}
5345
5348 Value *Null =
5349 Constant::getNullValue(PointerType::getUnqual(BasePtr->getContext()));
5350 Value *SizeGep =
5351 Builder.CreateGEP(BasePtr->getType(), Null, Builder.getInt32(1));
5352 Value *SizePtrToInt = Builder.CreatePtrToInt(SizeGep, Type::getInt64Ty(Ctx));
5353 return SizePtrToInt;
5354}
5355
5358 std::string VarName) {
5359 llvm::Constant *MaptypesArrayInit =
5361 auto *MaptypesArrayGlobal = new llvm::GlobalVariable(
5362 M, MaptypesArrayInit->getType(),
5363 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MaptypesArrayInit,
5364 VarName);
5365 MaptypesArrayGlobal->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
5366 return MaptypesArrayGlobal;
5367}
5368
5370 InsertPointTy AllocaIP,
5371 unsigned NumOperands,
5372 struct MapperAllocas &MapperAllocas) {
5373 if (!updateToLocation(Loc))
5374 return;
5375
5376 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
5377 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
5378 Builder.restoreIP(AllocaIP);
5379 AllocaInst *ArgsBase = Builder.CreateAlloca(
5380 ArrI8PtrTy, /* ArraySize = */ nullptr, ".offload_baseptrs");
5381 AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy, /* ArraySize = */ nullptr,
5382 ".offload_ptrs");
5383 AllocaInst *ArgSizes = Builder.CreateAlloca(
5384 ArrI64Ty, /* ArraySize = */ nullptr, ".offload_sizes");
5385 Builder.restoreIP(Loc.IP);
5386 MapperAllocas.ArgsBase = ArgsBase;
5387 MapperAllocas.Args = Args;
5388 MapperAllocas.ArgSizes = ArgSizes;
5389}
5390
5392 Function *MapperFunc, Value *SrcLocInfo,
5393 Value *MaptypesArg, Value *MapnamesArg,
5395 int64_t DeviceID, unsigned NumOperands) {
5396 if (!updateToLocation(Loc))
5397 return;
5398
5399 auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
5400 auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
5401 Value *ArgsBaseGEP =
5403 {Builder.getInt32(0), Builder.getInt32(0)});
5404 Value *ArgsGEP =
5406 {Builder.getInt32(0), Builder.getInt32(0)});
5407 Value *ArgSizesGEP =
5409 {Builder.getInt32(0), Builder.getInt32(0)});
5410 Value *NullPtr =
5411 Constant::getNullValue(PointerType::getUnqual(Int8Ptr->getContext()));
5412 Builder.CreateCall(MapperFunc,
5413 {SrcLocInfo, Builder.getInt64(DeviceID),
5414 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
5415 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
5416}
5417
5419 TargetDataRTArgs &RTArgs,
5420 TargetDataInfo &Info,
5421 bool EmitDebug,
5422 bool ForEndCall) {
5423 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
5424 "expected region end call to runtime only when end call is separate");
5425 auto UnqualPtrTy = PointerType::getUnqual(M.getContext());
5426 auto VoidPtrTy = UnqualPtrTy;
5427 auto VoidPtrPtrTy = UnqualPtrTy;
5428 auto Int64Ty = Type::getInt64Ty(M.getContext());
5429 auto Int64PtrTy = UnqualPtrTy;
5430
5431 if (!Info.NumberOfPtrs) {
5432 RTArgs.BasePointersArray = ConstantPointerNull::get(VoidPtrPtrTy);
5433 RTArgs.PointersArray = ConstantPointerNull::get(VoidPtrPtrTy);
5434 RTArgs.SizesArray = ConstantPointerNull::get(Int64PtrTy);
5435 RTArgs.MapTypesArray = ConstantPointerNull::get(Int64PtrTy);
5436 RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
5437 RTArgs.MappersArray = ConstantPointerNull::get(VoidPtrPtrTy);
5438 return;
5439 }
5440
5442 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs),
5443 Info.RTArgs.BasePointersArray,
5444 /*Idx0=*/0, /*Idx1=*/0);
5446 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
5447 /*Idx0=*/0,
5448 /*Idx1=*/0);
5450 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
5451 /*Idx0=*/0, /*Idx1=*/0);
5453 ArrayType::get(Int64Ty, Info.NumberOfPtrs),
5454 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
5455 : Info.RTArgs.MapTypesArray,
5456 /*Idx0=*/0,
5457 /*Idx1=*/0);
5458
5459 // Only emit the mapper information arrays if debug information is
5460 // requested.
5461 if (!EmitDebug)
5462 RTArgs.MapNamesArray = ConstantPointerNull::get(VoidPtrPtrTy);
5463 else
5465 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
5466 /*Idx0=*/0,
5467 /*Idx1=*/0);
5468 // If there is no user-defined mapper, set the mapper array to nullptr to
5469 // avoid an unnecessary data privatization
5470 if (!Info.HasMapper)
5471 RTArgs.MappersArray = ConstantPointerNull::get(VoidPtrPtrTy);
5472 else
5473 RTArgs.MappersArray =
5474 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
5475}
5476
5478 InsertPointTy CodeGenIP,
5479 MapInfosTy &CombinedInfo,
5480 TargetDataInfo &Info) {
5482 CombinedInfo.NonContigInfo;
5483
5484 // Build an array of struct descriptor_dim and then assign it to
5485 // offload_args.
5486 //
5487 // struct descriptor_dim {
5488 // uint64_t offset;
5489 // uint64_t count;
5490 // uint64_t stride
5491 // };
5492 Type *Int64Ty = Builder.getInt64Ty();
5494 M.getContext(), ArrayRef<Type *>({Int64Ty, Int64Ty, Int64Ty}),
5495 "struct.descriptor_dim");
5496
5497 enum { OffsetFD = 0, CountFD, StrideFD };
5498 // We need two index variable here since the size of "Dims" is the same as
5499 // the size of Components, however, the size of offset, count, and stride is
5500 // equal to the size of base declaration that is non-contiguous.
5501 for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
5502 // Skip emitting ir if dimension size is 1 since it cannot be
5503 // non-contiguous.
5504 if (NonContigInfo.Dims[I] == 1)
5505 continue;
5506 Builder.restoreIP(AllocaIP);
5507 ArrayType *ArrayTy = ArrayType::get(DimTy, NonContigInfo.Dims[I]);
5508 AllocaInst *DimsAddr =
5509 Builder.CreateAlloca(ArrayTy, /* ArraySize = */ nullptr, "dims");
5510 Builder.restoreIP(CodeGenIP);
5511 for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
5512 unsigned RevIdx = EE - II - 1;
5513 Value *DimsLVal = Builder.CreateInBoundsGEP(
5514 DimsAddr->getAllocatedType(), DimsAddr,
5515 {Builder.getInt64(0), Builder.getInt64(II)});
5516 // Offset
5517 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
5519 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
5520 M.getDataLayout().getPrefTypeAlign(OffsetLVal->getType()));
5521 // Count
5522 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
5524 NonContigInfo.Counts[L][RevIdx], CountLVal,
5525 M.getDataLayout().getPrefTypeAlign(CountLVal->getType()));
5526 // Stride
5527 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
5529 NonContigInfo.Strides[L][RevIdx], StrideLVal,
5530 M.getDataLayout().getPrefTypeAlign(CountLVal->getType()));
5531 }
5532 // args[I] = &dims
5533 Builder.restoreIP(CodeGenIP);
5535 DimsAddr, Builder.getPtrTy());
5537 ArrayType::get(Builder.getPtrTy(), Info.NumberOfPtrs),
5538 Info.RTArgs.PointersArray, 0, I);
5541 ++L;
5542 }
5543}
5544
5546 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
5547 TargetDataInfo &Info, bool IsNonContiguous,
5548 function_ref<void(unsigned int, Value *)> DeviceAddrCB,
5549 function_ref<Value *(unsigned int)> CustomMapperCB) {
5550
5551 // Reset the array information.
5552 Info.clearArrayInfo();
5553 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
5554
5555 if (Info.NumberOfPtrs == 0)
5556 return;
5557
5558 Builder.restoreIP(AllocaIP);
5559 // Detect if we have any capture size requiring runtime evaluation of the
5560 // size so that a constant array could be eventually used.
5561 ArrayType *PointerArrayType =
5562 ArrayType::get(Builder.getPtrTy(), Info.NumberOfPtrs);
5563
5564 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
5565 PointerArrayType, /* ArraySize = */ nullptr, ".offload_baseptrs");
5566
5567 Info.RTArgs.PointersArray = Builder.CreateAlloca(
5568 PointerArrayType, /* ArraySize = */ nullptr, ".offload_ptrs");
5569 AllocaInst *MappersArray = Builder.CreateAlloca(
5570 PointerArrayType, /* ArraySize = */ nullptr, ".offload_mappers");
5571 Info.RTArgs.MappersArray = MappersArray;
5572
5573 // If we don't have any VLA types or other types that require runtime
5574 // evaluation, we can use a constant array for the map sizes, otherwise we
5575 // need to fill up the arrays as we do for the pointers.
5576 Type *Int64Ty = Builder.getInt64Ty();
5577 SmallVector<Constant *> ConstSizes(CombinedInfo.Sizes.size(),
5578 ConstantInt::get(Int64Ty, 0));
5579 SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
5580 for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
5581 if (auto *CI = dyn_cast<Constant>(CombinedInfo.Sizes[I])) {
5582 if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
5583 if (IsNonContiguous &&
5584 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
5585 CombinedInfo.Types[I] &
5586 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
5587 ConstSizes[I] =
5588 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[I]);
5589 else
5590 ConstSizes[I] = CI;
5591 continue;
5592 }
5593 }
5594 RuntimeSizes.set(I);
5595 }
5596
5597 if (RuntimeSizes.all()) {
5598 ArrayType *SizeArrayType = ArrayType::get(Int64Ty, Info.NumberOfPtrs);
5599 Info.RTArgs.SizesArray = Builder.CreateAlloca(
5600 SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes");
5601 Builder.restoreIP(CodeGenIP);
5602 } else {
5603 auto *SizesArrayInit = ConstantArray::get(
5604 ArrayType::get(Int64Ty, ConstSizes.size()), ConstSizes);
5605 std::string Name = createPlatformSpecificName({"offload_sizes"});
5606 auto *SizesArrayGbl =
5607 new GlobalVariable(M, SizesArrayInit->getType(), /*isConstant=*/true,
5608 GlobalValue::PrivateLinkage, SizesArrayInit, Name);
5609 SizesArrayGbl->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
5610
5611 if (!RuntimeSizes.any()) {
5612 Info.RTArgs.SizesArray = SizesArrayGbl;
5613 } else {
5614 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
5615 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
5616 ArrayType *SizeArrayType = ArrayType::get(Int64Ty, Info.NumberOfPtrs);
5618 SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes");
5619 Buffer->setAlignment(OffloadSizeAlign);
5620 Builder.restoreIP(CodeGenIP);
5622 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->getType()),
5623 SizesArrayGbl, OffloadSizeAlign,
5625 IndexSize,
5626 Buffer->getAllocationSize(M.getDataLayout())->getFixedValue()));
5627
5628 Info.RTArgs.SizesArray = Buffer;
5629 }
5630 Builder.restoreIP(CodeGenIP);
5631 }
5632
5633 // The map types are always constant so we don't need to generate code to
5634 // fill arrays. Instead, we create an array constant.
5636 for (auto mapFlag : CombinedInfo.Types)
5637 Mapping.push_back(
5638 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
5639 mapFlag));
5640 std::string MaptypesName = createPlatformSpecificName({"offload_maptypes"});
5641 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
5642 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
5643
5644 // The information types are only built if provided.
5645 if (!CombinedInfo.Names.empty()) {
5646 std::string MapnamesName = createPlatformSpecificName({"offload_mapnames"});
5647 auto *MapNamesArrayGbl =
5648 createOffloadMapnames(CombinedInfo.Names, MapnamesName);
5649 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
5650 } else {
5651 Info.RTArgs.MapNamesArray =
5653 }
5654
5655 // If there's a present map type modifier, it must not be applied to the end
5656 // of a region, so generate a separate map type array in that case.
5657 if (Info.separateBeginEndCalls()) {
5658 bool EndMapTypesDiffer = false;
5659 for (uint64_t &Type : Mapping) {
5660 if (Type & static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
5661 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
5662 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
5663 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
5664 EndMapTypesDiffer = true;
5665 }
5666 }
5667 if (EndMapTypesDiffer) {
5668 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
5669 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
5670 }
5671 }
5672
5673 PointerType *PtrTy = Builder.getPtrTy();
5674 for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
5675 Value *BPVal = CombinedInfo.BasePointers[I];
5677 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
5678 0, I);
5679 Builder.CreateAlignedStore(BPVal, BP,
5681
5682 if (Info.requiresDevicePointerInfo()) {
5683 if (CombinedInfo.DevicePointers[I] == DeviceInfoTy::Pointer) {
5684 CodeGenIP = Builder.saveIP();
5685 Builder.restoreIP(AllocaIP);
5686 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
5687 Builder.restoreIP(CodeGenIP);
5688 if (DeviceAddrCB)
5689 DeviceAddrCB(I, Info.DevicePtrInfoMap[BPVal].second);
5690 } else if (CombinedInfo.DevicePointers[I] == DeviceInfoTy::Address) {
5691 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
5692 if (DeviceAddrCB)
5693 DeviceAddrCB(I, BP);
5694 }
5695 }
5696
5697 Value *PVal = CombinedInfo.Pointers[I];
5699 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
5700 I);
5701 // TODO: Check alignment correct.
5704
5705 if (RuntimeSizes.test(I)) {
5707 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
5708 /*Idx0=*/0,
5709 /*Idx1=*/I);
5711 Int64Ty,
5712 /*isSigned=*/true),
5713 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
5714 }
5715 // Fill up the mapper array.
5716 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
5717 Value *MFunc = ConstantPointerNull::get(PtrTy);
5718 if (CustomMapperCB)
5719 if (Value *CustomMFunc = CustomMapperCB(I))
5720 MFunc = Builder.CreatePointerCast(CustomMFunc, PtrTy);
5722 MappersArray->getAllocatedType(), MappersArray,
5723 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
5725 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->getType()));
5726 }
5727
5728 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
5729 Info.NumberOfPtrs == 0)
5730 return;
5731 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo, Info);
5732}
5733
5736
5737 if (!CurBB || CurBB->getTerminator()) {
5738 // If there is no insert point or the previous block is already
5739 // terminated, don't touch it.
5740 } else {
5741 // Otherwise, create a fall-through branch.
5743 }
5744
5746}
5747
5749 bool IsFinished) {
5751
5752 // Fall out of the current block (if necessary).
5753 emitBranch(BB);
5754
5755 if (IsFinished && BB->use_empty()) {
5756 BB->eraseFromParent();
5757 return;
5758 }
5759
5760 // Place the block after the current block, if possible, or else at
5761 // the end of the function.
5762 if (CurBB && CurBB->getParent())
5763 CurFn->insert(std::next(CurBB->getIterator()), BB);
5764 else
5765 CurFn->insert(CurFn->end(), BB);
5767}
5768
5770 BodyGenCallbackTy ElseGen,
5771 InsertPointTy AllocaIP) {
5772 // If the condition constant folds and can be elided, try to avoid emitting
5773 // the condition and the dead arm of the if/else.
5774 if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
5775 auto CondConstant = CI->getSExtValue();
5776 if (CondConstant)
5777 ThenGen(AllocaIP, Builder.saveIP());
5778 else
5779 ElseGen(AllocaIP, Builder.saveIP());
5780 return;
5781 }
5782
5784
5785 // Otherwise, the condition did not fold, or we couldn't elide it. Just
5786 // emit the conditional branch.
5787 BasicBlock *ThenBlock = BasicBlock::Create(M.getContext(), "omp_if.then");
5788 BasicBlock *ElseBlock = BasicBlock::Create(M.getContext(), "omp_if.else");
5789 BasicBlock *ContBlock = BasicBlock::Create(M.getContext(), "omp_if.end");
5790 Builder.CreateCondBr(Cond, ThenBlock, ElseBlock);
5791 // Emit the 'then' code.
5792 emitBlock(ThenBlock, CurFn);
5793 ThenGen(AllocaIP, Builder.saveIP());
5794 emitBranch(ContBlock);
5795 // Emit the 'else' code if present.
5796 // There is no need to emit line number for unconditional branch.
5797 emitBlock(ElseBlock, CurFn);
5798 ElseGen(AllocaIP, Builder.saveIP());
5799 // There is no need to emit line number for unconditional branch.
5800 emitBranch(ContBlock);
5801 // Emit the continuation block for code after the if.
5802 emitBlock(ContBlock, CurFn, /*IsFinished=*/true);
5803}
5804
5805bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
5806 const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
5809 "Unexpected Atomic Ordering.");
5810
5811 bool Flush = false;
5813
5814 switch (AK) {
5815 case Read:
5818 FlushAO = AtomicOrdering::Acquire;
5819 Flush = true;
5820 }
5821 break;
5822 case Write:
5823 case Compare:
5824 case Update:
5827 FlushAO = AtomicOrdering::Release;
5828 Flush = true;
5829 }
5830 break;
5831 case Capture:
5832 switch (AO) {
5834 FlushAO = AtomicOrdering::Acquire;
5835 Flush = true;
5836 break;
5838 FlushAO = AtomicOrdering::Release;
5839 Flush = true;
5840 break;
5844 Flush = true;
5845 break;
5846 default:
5847 // do nothing - leave silently.
5848 break;
5849 }
5850 }
5851
5852 if (Flush) {
5853 // Currently Flush RT call still doesn't take memory_ordering, so for when
5854 // that happens, this tries to do the resolution of which atomic ordering
5855 // to use with but issue the flush call
5856 // TODO: pass `FlushAO` after memory ordering support is added
5857 (void)FlushAO;
5858 emitFlush(Loc);
5859 }
5860
5861 // for AO == AtomicOrdering::Monotonic and all other case combinations
5862 // do nothing
5863 return Flush;
5864}
5865
5869 AtomicOrdering AO) {
5870 if (!updateToLocation(Loc))
5871 return Loc.IP;
5872
5873 assert(X.Var->getType()->isPointerTy() &&
5874 "OMP Atomic expects a pointer to target memory");
5875 Type *XElemTy = X.ElemTy;
5876 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
5877 XElemTy->isPointerTy()) &&
5878 "OMP atomic read expected a scalar type");
5879
5880 Value *XRead = nullptr;
5881
5882 if (XElemTy->isIntegerTy()) {
5883 LoadInst *XLD =
5884 Builder.CreateLoad(XElemTy, X.Var, X.IsVolatile, "omp.atomic.read");
5885 XLD->setAtomic(AO);
5886 XRead = cast<Value>(XLD);
5887 } else {
5888 // We need to perform atomic op as integer
5889 IntegerType *IntCastTy =
5891 LoadInst *XLoad =
5892 Builder.CreateLoad(IntCastTy, X.Var, X.IsVolatile, "omp.atomic.load");
5893 XLoad->setAtomic(AO);
5894 if (XElemTy->isFloatingPointTy()) {
5895 XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
5896 } else {
5897 XRead = Builder.CreateIntToPtr(XLoad, XElemTy, "atomic.ptr.cast");
5898 }
5899 }
5900 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
5901 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
5902 return Builder.saveIP();
5903}
5904
5907 AtomicOpValue &X, Value *Expr,
5908 AtomicOrdering AO) {
5909 if (!updateToLocation(Loc))
5910 return Loc.IP;
5911
5912 assert(X.Var->getType()->isPointerTy() &&
5913 "OMP Atomic expects a pointer to target memory");
5914 Type *XElemTy = X.ElemTy;
5915 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
5916 XElemTy->isPointerTy()) &&
5917 "OMP atomic write expected a scalar type");
5918
5919 if (XElemTy->isIntegerTy()) {
5920 StoreInst *XSt = Builder.CreateStore(Expr, X.Var, X.IsVolatile);
5921 XSt->setAtomic(AO);
5922 } else {
5923 // We need to bitcast and perform atomic op as integers
5924 IntegerType *IntCastTy =
5926 Value *ExprCast =
5927 Builder.CreateBitCast(Expr, IntCastTy, "atomic.src.int.cast");
5928 StoreInst *XSt = Builder.CreateStore(ExprCast, X.Var, X.IsVolatile);
5929 XSt->setAtomic(AO);
5930 }
5931
5932 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
5933 return Builder.saveIP();
5934}
5935
5937 const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
5938 Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
5939 AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
5940 assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
5941 if (!updateToLocation(Loc))
5942 return Loc.IP;
5943
5944 LLVM_DEBUG({
5945 Type *XTy = X.Var->getType();
5946 assert(XTy->isPointerTy() &&
5947 "OMP Atomic expects a pointer to target memory");
5948 Type *XElemTy = X.ElemTy;
5949 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
5950 XElemTy->isPointerTy()) &&
5951 "OMP atomic update expected a scalar type");
5952 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
5953 (RMWOp != AtomicRMWInst::UMax) && (RMWOp != AtomicRMWInst::UMin) &&
5954 "OpenMP atomic does not support LT or GT operations");
5955 });
5956
5957 emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
5958 X.IsVolatile, IsXBinopExpr);
5959 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
5960 return Builder.saveIP();
5961}
5962
5963// FIXME: Duplicating AtomicExpand
5964Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
5965 AtomicRMWInst::BinOp RMWOp) {
5966 switch (RMWOp) {
5967 case AtomicRMWInst::Add:
5968 return Builder.CreateAdd(Src1, Src2);
5969 case AtomicRMWInst::Sub:
5970 return Builder.CreateSub(Src1, Src2);
5971 case AtomicRMWInst::And:
5972 return Builder.CreateAnd(Src1, Src2);
5974 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
5975 case AtomicRMWInst::Or:
5976 return Builder.CreateOr(Src1, Src2);
5977 case AtomicRMWInst::Xor:
5978 return Builder.CreateXor(Src1, Src2);
5983 case AtomicRMWInst::Max:
5984 case AtomicRMWInst::Min:
5991 llvm_unreachable("Unsupported atomic update operation");
5992 }
5993 llvm_unreachable("Unsupported atomic update operation");
5994}
5995
5996std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
5997 InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
5999 AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
6000 // TODO: handle the case where XElemTy is not byte-sized or not a power of 2
6001 // or a complex datatype.
6002 bool emitRMWOp = false;
6003 switch (RMWOp) {
6004 case AtomicRMWInst::Add:
6005 case AtomicRMWInst::And:
6007 case AtomicRMWInst::Or:
6008 case AtomicRMWInst::Xor:
6010 emitRMWOp = XElemTy;
6011 break;
6012 case AtomicRMWInst::Sub:
6013 emitRMWOp = (IsXBinopExpr && XElemTy);
6014 break;
6015 default:
6016 emitRMWOp = false;
6017 }
6018 emitRMWOp &= XElemTy->isIntegerTy();
6019
6020 std::pair<Value *, Value *> Res;
6021 if (emitRMWOp) {
6022 Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
6023 // not needed except in case of postfix captures. Generate anyway for
6024 // consistency with the else part. Will be removed with any DCE pass.
6025 // AtomicRMWInst::Xchg does not have a coressponding instruction.
6026 if (RMWOp == AtomicRMWInst::Xchg)
6027 Res.second = Res.first;
6028 else
6029 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
6030 } else {
6031 IntegerType *IntCastTy =
6033 LoadInst *OldVal =
6034 Builder.CreateLoad(IntCastTy, X, X->getName() + ".atomic.load");
6035 OldVal->setAtomic(AO);
6036 // CurBB
6037 // | /---\
6038 // ContBB |
6039 // | \---/
6040 // ExitBB
6042 Instruction *CurBBTI = CurBB->getTerminator();
6043 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
6044 BasicBlock *ExitBB =
6045 CurBB->splitBasicBlock(CurBBTI, X->getName() + ".atomic.exit");
6046 BasicBlock *ContBB = CurBB->splitBasicBlock(CurBB->getTerminator(),
6047 X->getName() + ".atomic.cont");
6048 ContBB->getTerminator()->eraseFromParent();
6049 Builder.restoreIP(AllocaIP);
6050 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
6051 NewAtomicAddr->setName(X->getName() + "x.new.val");
6052 Builder.SetInsertPoint(ContBB);
6053 llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
6054 PHI->addIncoming(OldVal, CurBB);
6055 bool IsIntTy = XElemTy->isIntegerTy();
6056 Value *OldExprVal = PHI;
6057 if (!IsIntTy) {
6058 if (XElemTy->isFloatingPointTy()) {
6059 OldExprVal = Builder.CreateBitCast(PHI, XElemTy,
6060 X->getName() + ".atomic.fltCast");
6061 } else {
6062 OldExprVal = Builder.CreateIntToPtr(PHI, XElemTy,
6063 X->getName() + ".atomic.ptrCast");
6064 }
6065 }
6066
6067 Value *Upd = UpdateOp(OldExprVal, Builder);
6068 Builder.CreateStore(Upd, NewAtomicAddr);
6069 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
6073 X, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
6074 Result->setVolatile(VolatileX);
6075 Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
6076 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
6077 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
6078 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
6079
6080 Res.first = OldExprVal;
6081 Res.second = Upd;
6082
6083 // set Insertion point in exit block
6084 if (UnreachableInst *ExitTI =
6085 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
6086 CurBBTI->eraseFromParent();
6087 Builder.SetInsertPoint(ExitBB);
6088 } else {
6089 Builder.SetInsertPoint(ExitTI);
6090 }
6091 }
6092
6093 return Res;
6094}
6095
6097 const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
6098 AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
6100 bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
6101 if (!updateToLocation(Loc))
6102 return Loc.IP;
6103
6104 LLVM_DEBUG({
6105 Type *XTy = X.Var->getType();
6106 assert(XTy->isPointerTy() &&
6107 "OMP Atomic expects a pointer to target memory");
6108 Type *XElemTy = X.ElemTy;
6109 assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
6110 XElemTy->isPointerTy()) &&
6111 "OMP atomic capture expected a scalar type");
6112 assert((RMWOp != AtomicRMWInst::Max) && (RMWOp != AtomicRMWInst::Min) &&
6113 "OpenMP atomic does not support LT or GT operations");
6114 });
6115
6116 // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
6117 // 'x' is simply atomically rewritten with 'expr'.
6118 AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
6119 std::pair<Value *, Value *> Result =
6120 emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
6121 X.IsVolatile, IsXBinopExpr);
6122
6123 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
6124 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
6125
6126 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
6127 return Builder.saveIP();
6128}
6129
6133 omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
6134 bool IsFailOnly) {
6135
6137 return createAtomicCompare(Loc, X, V, R, E, D, AO, Op, IsXBinopExpr,
6138 IsPostfixUpdate, IsFailOnly, Failure);
6139}
6140
6144 omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate,
6145 bool IsFailOnly, AtomicOrdering Failure) {
6146
6147 if (!updateToLocation(Loc))
6148 return Loc.IP;
6149
6150 assert(X.Var->getType()->isPointerTy() &&
6151 "OMP atomic expects a pointer to target memory");
6152 // compare capture
6153 if (V.Var) {
6154 assert(V.Var->getType()->isPointerTy() && "v.var must be of pointer type");
6155 assert(V.ElemTy == X.ElemTy && "x and v must be of same type");
6156 }
6157
6158 bool IsInteger = E->getType()->isIntegerTy();
6159
6160 if (Op == OMPAtomicCompareOp::EQ) {
6161 AtomicCmpXchgInst *Result = nullptr;
6162 if (!IsInteger) {
6163 IntegerType *IntCastTy =
6164 IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
6165 Value *EBCast = Builder.CreateBitCast(E, IntCastTy);
6166 Value *DBCast = Builder.CreateBitCast(D, IntCastTy);
6167 Result = Builder.CreateAtomicCmpXchg(X.Var, EBCast, DBCast, MaybeAlign(),
6168 AO, Failure);
6169 } else {
6170 Result =
6171 Builder.CreateAtomicCmpXchg(X.Var, E, D, MaybeAlign(), AO, Failure);
6172 }
6173
6174 if (V.Var) {
6175 Value *OldValue = Builder.CreateExtractValue(Result, /*Idxs=*/0);
6176 if (!IsInteger)
6177 OldValue = Builder.CreateBitCast(OldValue, X.ElemTy);
6178 assert(OldValue->getType() == V.ElemTy &&
6179 "OldValue and V must be of same type");
6180 if (IsPostfixUpdate) {
6181 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
6182 } else {
6183 Value *SuccessOrFail = Builder.CreateExtractValue(Result, /*Idxs=*/1);
6184 if (IsFailOnly) {
6185 // CurBB----
6186 // | |
6187 // v |
6188 // ContBB |
6189 // | |
6190 // v |
6191 // ExitBB <-
6192 //
6193 // where ContBB only contains the store of old value to 'v'.
6195 Instruction *CurBBTI = CurBB->getTerminator();
6196 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
6197 BasicBlock *ExitBB = CurBB->splitBasicBlock(
6198 CurBBTI, X.Var->getName() + ".atomic.exit");
6199 BasicBlock *ContBB = CurBB->splitBasicBlock(
6200 CurBB->getTerminator(), X.Var->getName() + ".atomic.cont");
6201 ContBB->getTerminator()->eraseFromParent();
6202 CurBB->getTerminator()->eraseFromParent();
6203
6204 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
6205
6206 Builder.SetInsertPoint(ContBB);
6207 Builder.CreateStore(OldValue, V.Var);
6208 Builder.CreateBr(ExitBB);
6209
6210 if (UnreachableInst *ExitTI =
6211 dyn_cast<UnreachableInst>(ExitBB->getTerminator())) {
6212 CurBBTI->eraseFromParent();
6213 Builder.SetInsertPoint(ExitBB);
6214 } else {
6215 Builder.SetInsertPoint(ExitTI);
6216 }
6217 } else {
6218 Value *CapturedValue =
6219 Builder.CreateSelect(SuccessOrFail, E, OldValue);
6220 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
6221 }
6222 }
6223 }
6224 // The comparison result has to be stored.
6225 if (R.Var) {
6226 assert(R.Var->getType()->isPointerTy() &&
6227 "r.var must be of pointer type");
6228 assert(R.ElemTy->isIntegerTy() && "r must be of integral type");
6229
6230 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
6231 Value *ResultCast = R.IsSigned
6232 ? Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
6233 : Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
6234 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
6235 }
6236 } else {
6237 assert((Op == OMPAtomicCompareOp::MAX || Op == OMPAtomicCompareOp::MIN) &&
6238 "Op should be either max or min at this point");
6239 assert(!IsFailOnly && "IsFailOnly is only valid when the comparison is ==");
6240
6241 // Reverse the ordop as the OpenMP forms are different from LLVM forms.
6242 // Let's take max as example.
6243 // OpenMP form:
6244 // x = x > expr ? expr : x;
6245 // LLVM form:
6246 // *ptr = *ptr > val ? *ptr : val;
6247 // We need to transform to LLVM form.
6248 // x = x <= expr ? x : expr;
6250 if (IsXBinopExpr) {
6251 if (IsInteger) {
6252 if (X.IsSigned)
6253 NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Min
6255 else
6256 NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMin
6258 } else {
6259 NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMin
6261 }
6262 } else {
6263 if (IsInteger) {
6264 if (X.IsSigned)
6265 NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::Max
6267 else
6268 NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::UMax
6270 } else {
6271 NewOp = Op == OMPAtomicCompareOp::MAX ? AtomicRMWInst::FMax
6273 }
6274 }
6275
6276 AtomicRMWInst *OldValue =
6277 Builder.CreateAtomicRMW(NewOp, X.Var, E, MaybeAlign(), AO);
6278 if (V.Var) {
6279 Value *CapturedValue = nullptr;
6280 if (IsPostfixUpdate) {
6281 CapturedValue = OldValue;
6282 } else {
6283 CmpInst::Predicate Pred;
6284 switch (NewOp) {
6285 case AtomicRMWInst::Max:
6286 Pred = CmpInst::ICMP_SGT;
6287 break;
6289 Pred = CmpInst::ICMP_UGT;
6290 break;
6292 Pred = CmpInst::FCMP_OGT;
6293 break;
6294 case AtomicRMWInst::Min:
6295 Pred = CmpInst::ICMP_SLT;
6296 break;
6298 Pred = CmpInst::ICMP_ULT;
6299 break;
6301 Pred = CmpInst::FCMP_OLT;
6302 break;
6303 default:
6304 llvm_unreachable("unexpected comparison op");
6305 }
6306 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue, E);
6307 CapturedValue = Builder.CreateSelect(NonAtomicCmp, E, OldValue);
6308 }
6309 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
6310 }
6311 }
6312
6313 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Compare);
6314
6315 return Builder.saveIP();
6316}
6317
6320 BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower,
6321 Value *NumTeamsUpper, Value *ThreadLimit,
6322 Value *IfExpr) {
6323 if (!updateToLocation(Loc))
6324 return InsertPointTy();
6325
6326 uint32_t SrcLocStrSize;
6327 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
6328 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6329 Function *CurrentFunction = Builder.GetInsertBlock()->getParent();
6330
6331 // Outer allocation basicblock is the entry block of the current function.
6332 BasicBlock &OuterAllocaBB = CurrentFunction->getEntryBlock();
6333 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
6334 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.entry");
6335 Builder.SetInsertPoint(BodyBB, BodyBB->begin());
6336 }
6337
6338 // The current basic block is split into four basic blocks. After outlining,
6339 // they will be mapped as follows:
6340 // ```
6341 // def current_fn() {
6342 // current_basic_block:
6343 // br label %teams.exit
6344 // teams.exit:
6345 // ; instructions after teams
6346 // }
6347 //
6348 // def outlined_fn() {
6349 // teams.alloca:
6350 // br label %teams.body
6351 // teams.body:
6352 // ; instructions within teams body
6353 // }
6354 // ```
6355 BasicBlock *ExitBB = splitBB(Builder, /*CreateBranch=*/true, "teams.exit");
6356 BasicBlock *BodyBB = splitBB(Builder, /*CreateBranch=*/true, "teams.body");
6357 BasicBlock *AllocaBB =
6358 splitBB(Builder, /*CreateBranch=*/true, "teams.alloca");
6359
6360 bool SubClausesPresent =
6361 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
6362 // Push num_teams
6363 if (!Config.isTargetDevice() && SubClausesPresent) {
6364 assert((NumTeamsLower == nullptr || NumTeamsUpper != nullptr) &&
6365 "if lowerbound is non-null, then upperbound must also be non-null "
6366 "for bounds on num_teams");
6367
6368 if (NumTeamsUpper == nullptr)
6369 NumTeamsUpper = Builder.getInt32(0);
6370
6371 if (NumTeamsLower == nullptr)
6372 NumTeamsLower = NumTeamsUpper;
6373
6374 if (IfExpr) {
6375 assert(IfExpr->getType()->isIntegerTy() &&
6376 "argument to if clause must be an integer value");
6377
6378 // upper = ifexpr ? upper : 1
6379 if (IfExpr->getType() != Int1)
6380 IfExpr = Builder.CreateICmpNE(IfExpr,
6381 ConstantInt::get(IfExpr->getType(), 0));
6382 NumTeamsUpper = Builder.CreateSelect(
6383 IfExpr, NumTeamsUpper, Builder.getInt32(1), "numTeamsUpper");
6384
6385 // lower = ifexpr ? lower : 1
6386 NumTeamsLower = Builder.CreateSelect(
6387 IfExpr, NumTeamsLower, Builder.getInt32(1), "numTeamsLower");
6388 }
6389
6390 if (ThreadLimit == nullptr)
6391 ThreadLimit = Builder.getInt32(0);
6392
6393 Value *ThreadNum = getOrCreateThreadID(Ident);
6395 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
6396 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
6397 }
6398 // Generate the body of teams.
6399 InsertPointTy AllocaIP(AllocaBB, AllocaBB->begin());
6400 InsertPointTy CodeGenIP(BodyBB, BodyBB->begin());
6401 BodyGenCB(AllocaIP, CodeGenIP);
6402
6403 OutlineInfo OI;
6404 OI.EntryBB = AllocaBB;
6405 OI.ExitBB = ExitBB;
6406 OI.OuterAllocaBB = &OuterAllocaBB;
6407
6408 // Insert fake values for global tid and bound tid.
6409 std::stack<Instruction *> ToBeDeleted;
6410 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.begin());
6412 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "gid", true));
6414 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP, "tid", true));
6415
6416 auto HostPostOutlineCB = [this, Ident,
6417 ToBeDeleted](Function &OutlinedFn) mutable {
6418 // The stale call instruction will be replaced with a new call instruction
6419 // for runtime call with the outlined function.
6420
6421 assert(OutlinedFn.getNumUses() == 1 &&
6422 "there must be a single user for the outlined function");
6423 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
6424 ToBeDeleted.push(StaleCI);
6425
6426 assert((OutlinedFn.arg_size() == 2 || OutlinedFn.arg_size() == 3) &&
6427 "Outlined function must have two or three arguments only");
6428
6429 bool HasShared = OutlinedFn.arg_size() == 3;
6430
6431 OutlinedFn.getArg(0)->setName("global.tid.ptr");
6432 OutlinedFn.getArg(1)->setName("bound.tid.ptr");
6433 if (HasShared)
6434 OutlinedFn.getArg(2)->setName("data");
6435
6436 // Call to the runtime function for teams in the current function.
6437 assert(StaleCI && "Error while outlining - no CallInst user found for the "
6438 "outlined function.");
6439 Builder.SetInsertPoint(StaleCI);
6440 SmallVector<Value *> Args = {
6441 Ident, Builder.getInt32(StaleCI->arg_size() - 2), &OutlinedFn};
6442 if (HasShared)
6443 Args.push_back(StaleCI->getArgOperand(2));
6445 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
6446 Args);
6447
6448 while (!ToBeDeleted.empty()) {
6449 ToBeDeleted.top()->eraseFromParent();
6450 ToBeDeleted.pop();
6451 }
6452 };
6453
6454 if (!Config.isTargetDevice())
6455 OI.PostOutlineCB = HostPostOutlineCB;
6456
6457 addOutlineInfo(std::move(OI));
6458
6459 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
6460
6461 return Builder.saveIP();
6462}
6463
6466 std::string VarName) {
6467 llvm::Constant *MapNamesArrayInit = llvm::ConstantArray::get(
6469 Names.size()),
6470 Names);
6471 auto *MapNamesArrayGlobal = new llvm::GlobalVariable(
6472 M, MapNamesArrayInit->getType(),
6473 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage, MapNamesArrayInit,
6474 VarName);
6475 return MapNamesArrayGlobal;
6476}
6477
6478// Create all simple and struct types exposed by the runtime and remember
6479// the llvm::PointerTypes of them for easy access later.
6480void OpenMPIRBuilder::initializeTypes(Module &M) {
6481 LLVMContext &Ctx = M.getContext();
6482 StructType *T;
6483#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
6484#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
6485 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
6486 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
6487#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
6488 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
6489 VarName##Ptr = PointerType::getUnqual(VarName);
6490#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
6491 T = StructType::getTypeByName(Ctx, StructName); \
6492 if (!T) \
6493 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
6494 VarName = T; \
6495 VarName##Ptr = PointerType::getUnqual(T);
6496#include "llvm/Frontend/OpenMP/OMPKinds.def"
6497}
6498
6501 SmallVectorImpl<BasicBlock *> &BlockVector) {
6503 BlockSet.insert(EntryBB);
6504 BlockSet.insert(ExitBB);
6505
6506 Worklist.push_back(EntryBB);
6507 while (!Worklist.empty()) {
6508 BasicBlock *BB = Worklist.pop_back_val();
6509 BlockVector.push_back(BB);
6510 for (BasicBlock *SuccBB : successors(BB))
6511 if (BlockSet.insert(SuccBB).second)
6512 Worklist.push_back(SuccBB);
6513 }
6514}
6515
6517 uint64_t Size, int32_t Flags,
6519 StringRef Name) {
6520 if (!Config.isGPU()) {
6522 M, ID, Name.empty() ? Addr->getName() : Name, Size, Flags, /*Data=*/0,
6523 "omp_offloading_entries");
6524 return;
6525 }
6526 // TODO: Add support for global variables on the device after declare target
6527 // support.
6528 Function *Fn = dyn_cast<Function>(Addr);
6529 if (!Fn)
6530 return;
6531
6532 Module &M = *(Fn->getParent());
6533 LLVMContext &Ctx = M.getContext();
6534
6535 // Get "nvvm.annotations" metadata node.
6536 NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
6537
6538 Metadata *MDVals[] = {
6539 ConstantAsMetadata::get(Fn), MDString::get(Ctx, "kernel"),
6540 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(Ctx), 1))};
6541 // Append metadata to nvvm.annotations.
6542 MD->addOperand(MDNode::get(Ctx, MDVals));
6543
6544 // Add a function attribute for the kernel.
6545 Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
6546 if (T.isAMDGCN())
6547 Fn->addFnAttr("uniform-work-group-size", "true");
6548 Fn->addFnAttr(Attribute::MustProgress);
6549}
6550
6551// We only generate metadata for function that contain target regions.
6554
6555 // If there are no entries, we don't need to do anything.
6557 return;
6558
6562 16>
6563 OrderedEntries(OffloadInfoManager.size());
6564
6565 // Auxiliary methods to create metadata values and strings.
6566 auto &&GetMDInt = [this](unsigned V) {
6567 return ConstantAsMetadata::get(ConstantInt::get(Builder.getInt32Ty(), V));
6568 };
6569
6570 auto &&GetMDString = [&C](StringRef V) { return MDString::get(C, V); };
6571
6572 // Create the offloading info metadata node.
6573 NamedMDNode *MD = M.getOrInsertNamedMetadata("omp_offload.info");
6574 auto &&TargetRegionMetadataEmitter =
6575 [&C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
6576 const TargetRegionEntryInfo &EntryInfo,
6578 // Generate metadata for target regions. Each entry of this metadata
6579 // contains:
6580 // - Entry 0 -> Kind of this type of metadata (0).
6581 // - Entry 1 -> Device ID of the file where the entry was identified.
6582 // - Entry 2 -> File ID of the file where the entry was identified.
6583 // - Entry 3 -> Mangled name of the function where the entry was
6584 // identified.
6585 // - Entry 4 -> Line in the file where the entry was identified.
6586 // - Entry 5 -> Count of regions at this DeviceID/FilesID/Line.
6587 // - Entry 6 -> Order the entry was created.
6588 // The first element of the metadata node is the kind.
6589 Metadata *Ops[] = {
6590 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
6591 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
6592 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
6593 GetMDInt(E.getOrder())};
6594
6595 // Save this entry in the right position of the ordered entries array.
6596 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
6597
6598 // Add metadata to the named metadata node.
6599 MD->addOperand(MDNode::get(C, Ops));
6600 };
6601
6602 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
6603
6604 // Create function that emits metadata for each device global variable entry;
6605 auto &&DeviceGlobalVarMetadataEmitter =
6606 [&C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
6607 StringRef MangledName,
6609 // Generate metadata for global variables. Each entry of this metadata
6610 // contains:
6611 // - Entry 0 -> Kind of this type of metadata (1).
6612 // - Entry 1 -> Mangled name of the variable.
6613 // - Entry 2 -> Declare target kind.
6614 // - Entry 3 -> Order the entry was created.
6615 // The first element of the metadata node is the kind.
6616 Metadata *Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
6617 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
6618
6619 // Save this entry in the right position of the ordered entries array.
6620 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
6621 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
6622
6623 // Add metadata to the named metadata node.
6624 MD->addOperand(MDNode::get(C, Ops));
6625 };
6626
6628 DeviceGlobalVarMetadataEmitter);
6629
6630 for (const auto &E : OrderedEntries) {
6631 assert(E.first && "All ordered entries must exist!");
6632 if (const auto *CE =
6633 dyn_cast<OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion>(
6634 E.first)) {
6635 if (!CE->getID() || !CE->getAddress()) {
6636 // Do not blame the entry if the parent funtion is not emitted.
6637 TargetRegionEntryInfo EntryInfo = E.second;
6638 StringRef FnName = EntryInfo.ParentName;
6639 if (!M.getNamedValue(FnName))
6640 continue;
6641 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
6642 continue;
6643 }
6644 createOffloadEntry(CE->getID(), CE->getAddress(),
6645 /*Size=*/0, CE->getFlags(),
6647 } else if (const auto *CE = dyn_cast<
6649 E.first)) {
6652 CE->getFlags());
6653 switch (Flags) {
6657 continue;
6658 if (!CE->getAddress()) {
6659 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR, E.second);
6660 continue;
6661 }
6662 // The vaiable has no definition - no need to add the entry.
6663 if (CE->getVarSize() == 0)
6664 continue;
6665 break;
6667 assert(((Config.isTargetDevice() && !CE->getAddress()) ||
6668 (!Config.isTargetDevice() && CE->getAddress())) &&
6669 "Declaret target link address is set.");
6670 if (Config.isTargetDevice())
6671 continue;
6672 if (!CE->getAddress()) {
6674 continue;
6675 }
6676 break;
6677 default:
6678 break;
6679 }
6680
6681 // Hidden or internal symbols on the device are not externally visible.
6682 // We should not attempt to register them by creating an offloading
6683 // entry. Indirect variables are handled separately on the device.
6684 if (auto *GV = dyn_cast<GlobalValue>(CE->getAddress()))
6685 if ((GV->hasLocalLinkage() || GV->hasHiddenVisibility()) &&
6687 continue;
6688
6689 // Indirect globals need to use a special name that doesn't match the name
6690 // of the associated host global.
6692 createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
6693 Flags, CE->getLinkage(), CE->getVarName());
6694 else
6695 createOffloadEntry(CE->getAddress(), CE->getAddress(), CE->getVarSize(),
6696 Flags, CE->getLinkage());
6697
6698 } else {
6699 llvm_unreachable("Unsupported entry kind.");
6700 }
6701 }
6702
6703 // Emit requires directive globals to a special entry so the runtime can
6704 // register them when the device image is loaded.
6705 // TODO: This reduces the offloading entries to a 32-bit integer. Offloading
6706 // entries should be redesigned to better suit this use-case.
6710 /*Name=*/"",
6712 Config.getRequiresFlags(), "omp_offloading_entries");
6713}
6714
6716 SmallVectorImpl<char> &Name, StringRef ParentName, unsigned DeviceID,
6717 unsigned FileID, unsigned Line, unsigned Count) {
6719 OS << "__omp_offloading" << llvm::format("_%x", DeviceID)
6720 << llvm::format("_%x_", FileID) << ParentName << "_l" << Line;
6721 if (Count)
6722 OS << "_" << Count;
6723}
6724
6727 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
6729 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
6730 EntryInfo.Line, NewCount);
6731}
6732
6735 StringRef ParentName) {
6737 auto FileIDInfo = CallBack();
6738 if (auto EC = sys::fs::getUniqueID(std::get<0>(FileIDInfo), ID)) {
6739 report_fatal_error(("Unable to get unique ID for file, during "
6740 "getTargetEntryUniqueInfo, error message: " +
6741 EC.message())
6742 .c_str());
6743 }
6744
6745 return TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
6746 std::get<1>(FileIDInfo));
6747}
6748
6750 unsigned Offset = 0;
6751 for (uint64_t Remain =
6752 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>>(
6754 !(Remain & 1); Remain = Remain >> 1)
6755 Offset++;
6756 return Offset;
6757}
6758
6761 // Rotate by getFlagMemberOffset() bits.
6762 return static_cast<omp::OpenMPOffloadMappingFlags>(((uint64_t)Position + 1)
6763 << getFlagMemberOffset());
6764}
6765
6768 omp::OpenMPOffloadMappingFlags MemberOfFlag) {
6769 // If the entry is PTR_AND_OBJ but has not been marked with the special
6770 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
6771 // marked as MEMBER_OF.
6772 if (static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>>(
6774 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>>(
6777 return;
6778
6779 // Reset the placeholder value to prepare the flag for the assignment of the
6780 // proper MEMBER_OF value.
6781 Flags &= ~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
6782 Flags |= MemberOfFlag;
6783}
6784
6788 bool IsDeclaration, bool IsExternallyVisible,
6789 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
6790 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
6791 std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
6792 std::function<Constant *()> GlobalInitializer,
6793 std::function<GlobalValue::LinkageTypes()> VariableLinkage) {
6794 // TODO: convert this to utilise the IRBuilder Config rather than
6795 // a passed down argument.
6796 if (OpenMPSIMD)
6797 return nullptr;
6798
6801 CaptureClause ==
6804 SmallString<64> PtrName;
6805 {
6806 raw_svector_ostream OS(PtrName);
6807 OS << MangledName;
6808 if (!IsExternallyVisible)
6809 OS << format("_%x", EntryInfo.FileID);
6810 OS << "_decl_tgt_ref_ptr";
6811 }
6812
6813 Value *Ptr = M.getNamedValue(PtrName);
6814
6815 if (!Ptr) {
6816 GlobalValue *GlobalValue = M.getNamedValue(MangledName);
6817 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
6818
6819 auto *GV = cast<GlobalVariable>(Ptr);
6820 GV->setLinkage(GlobalValue::WeakAnyLinkage);
6821
6822 if (!Config.isTargetDevice()) {
6823 if (GlobalInitializer)
6824 GV->setInitializer(GlobalInitializer());
6825 else
6826 GV->setInitializer(GlobalValue);
6827 }
6828
6830 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
6831 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
6832 GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(Ptr));
6833 }
6834
6835 return cast<Constant>(Ptr);
6836 }
6837
6838 return nullptr;
6839}
6840
6844 bool IsDeclaration, bool IsExternallyVisible,
6845 TargetRegionEntryInfo EntryInfo, StringRef MangledName,
6846 std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
6847 std::vector<Triple> TargetTriple,
6848 std::function<Constant *()> GlobalInitializer,
6849 std::function<GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy,
6850 Constant *Addr) {
6852 (TargetTriple.empty() && !Config.isTargetDevice()))
6853 return;
6854
6856 StringRef VarName;
6857 int64_t VarSize;
6859
6861 CaptureClause ==
6865 VarName = MangledName;
6866 GlobalValue *LlvmVal = M.getNamedValue(VarName);
6867
6868 if (!IsDeclaration)
6869 VarSize = divideCeil(
6871 else
6872 VarSize = 0;
6873 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->getLinkage();
6874
6875 // This is a workaround carried over from Clang which prevents undesired
6876 // optimisation of internal variables.
6877 if (Config.isTargetDevice() &&
6878 (!IsExternallyVisible || Linkage == GlobalValue::LinkOnceODRLinkage)) {
6879 // Do not create a "ref-variable" if the original is not also available
6880 // on the host.
6882 return;
6883
6884 std::string RefName = createPlatformSpecificName({VarName, "ref"});
6885
6886 if (!M.getNamedValue(RefName)) {
6887 Constant *AddrRef =
6888 getOrCreateInternalVariable(Addr->getType(), RefName);
6889 auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
6890 GvAddrRef->setConstant(true);
6891 GvAddrRef->setLinkage(GlobalValue::InternalLinkage);
6892 GvAddrRef->setInitializer(Addr);
6893 GeneratedRefs.push_back(GvAddrRef);
6894 }
6895 }
6896 } else {
6899 else
6901
6902 if (Config.isTargetDevice()) {
6903 VarName = (Addr) ? Addr->getName() : "";
6904 Addr = nullptr;
6905 } else {
6907 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
6908 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
6909 LlvmPtrTy, GlobalInitializer, VariableLinkage);
6910 VarName = (Addr) ? Addr->getName() : "";
6911 }
6912 VarSize = M.getDataLayout().getPointerSize();
6914 }
6915
6917 Flags, Linkage);
6918}
6919
6920/// Loads all the offload entries information from the host IR
6921/// metadata.
6923 // If we are in target mode, load the metadata from the host IR. This code has
6924 // to match the metadata creation in createOffloadEntriesAndInfoMetadata().
6925
6927 if (!MD)
6928 return;
6929
6930 for (MDNode *MN : MD->operands()) {
6931 auto &&GetMDInt = [MN](unsigned Idx) {
6932 auto *V = cast<ConstantAsMetadata>(MN->getOperand(Idx));
6933 return cast<ConstantInt>(V->getValue())->getZExtValue();
6934 };
6935
6936 auto &&GetMDString = [MN](unsigned Idx) {
6937 auto *V = cast<MDString>(MN->getOperand(Idx));
6938 return V->getString();
6939 };
6940
6941 switch (GetMDInt(0)) {
6942 default:
6943 llvm_unreachable("Unexpected metadata!");
6944 break;
6947 TargetRegionEntryInfo EntryInfo(/*ParentName=*/GetMDString(3),
6948 /*DeviceID=*/GetMDInt(1),
6949 /*FileID=*/GetMDInt(2),
6950 /*Line=*/GetMDInt(4),
6951 /*Count=*/GetMDInt(5));
6953 /*Order=*/GetMDInt(6));
6954 break;
6955 }
6959 /*MangledName=*/GetMDString(1),
6961 /*Flags=*/GetMDInt(2)),
6962 /*Order=*/GetMDInt(3));
6963 break;
6964 }
6965 }
6966}
6967
6969 if (HostFilePath.empty())
6970 return;
6971
6972 auto Buf = MemoryBuffer::getFile(HostFilePath);
6973 if (std::error_code Err = Buf.getError()) {
6974 report_fatal_error(("error opening host file from host file path inside of "
6975 "OpenMPIRBuilder: " +
6976 Err.message())
6977 .c_str());
6978 }
6979
6980 LLVMContext Ctx;
6982 Ctx, parseBitcodeFile(Buf.get()->getMemBufferRef(), Ctx));
6983 if (std::error_code Err = M.getError()) {
6985 ("error parsing host file inside of OpenMPIRBuilder: " + Err.message())
6986 .c_str());
6987 }
6988
6989 loadOffloadInfoMetadata(*M.get());
6990}
6991
6992//===----------------------------------------------------------------------===//
6993// OffloadEntriesInfoManager
6994//===----------------------------------------------------------------------===//
6995
6997 return OffloadEntriesTargetRegion.empty() &&
6998 OffloadEntriesDeviceGlobalVar.empty();
6999}
7000
7001unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
7002 const TargetRegionEntryInfo &EntryInfo) const {
7003 auto It = OffloadEntriesTargetRegionCount.find(
7004 getTargetRegionEntryCountKey(EntryInfo));
7005 if (It == OffloadEntriesTargetRegionCount.end())
7006 return 0;
7007 return It->second;
7008}
7009
7010void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
7011 const TargetRegionEntryInfo &EntryInfo) {
7012 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
7013 EntryInfo.Count + 1;
7014}
7015
7016/// Initialize target region entry.
7018 const TargetRegionEntryInfo &EntryInfo, unsigned Order) {
7019 OffloadEntriesTargetRegion[EntryInfo] =
7020 OffloadEntryInfoTargetRegion(Order, /*Addr=*/nullptr, /*ID=*/nullptr,
7021 OMPTargetRegionEntryTargetRegion);
7022 ++OffloadingEntriesNum;
7023}
7024
7028 assert(EntryInfo.Count == 0 && "expected default EntryInfo");
7029
7030 // Update the EntryInfo with the next available count for this location.
7031 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
7032
7033 // If we are emitting code for a target, the entry is already initialized,
7034 // only has to be registered.
7035 if (OMPBuilder->Config.isTargetDevice()) {
7036 // This could happen if the device compilation is invoked standalone.
7037 if (!hasTargetRegionEntryInfo(EntryInfo)) {
7038 return;
7039 }
7040 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
7041 Entry.setAddress(Addr);
7042 Entry.setID(ID);
7043 Entry.setFlags(Flags);
7044 } else {
7046 hasTargetRegionEntryInfo(EntryInfo, /*IgnoreAddressId*/ true))
7047 return;
7048 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
7049 "Target region entry already registered!");
7050 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr, ID, Flags);
7051 OffloadEntriesTargetRegion[EntryInfo] = Entry;
7052 ++OffloadingEntriesNum;
7053 }
7054 incrementTargetRegionEntryInfoCount(EntryInfo);
7055}
7056
7058 TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId) const {
7059
7060 // Update the EntryInfo with the next available count for this location.
7061 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
7062
7063 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
7064 if (It == OffloadEntriesTargetRegion.end()) {
7065 return false;
7066 }
7067 // Fail if this entry is already registered.
7068 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
7069 return false;
7070 return true;
7071}
7072
7074 const OffloadTargetRegionEntryInfoActTy &Action) {
7075 // Scan all target region entries and perform the provided action.
7076 for (const auto &It : OffloadEntriesTargetRegion) {
7077 Action(It.first, It.second);
7078 }
7079}
7080
7082 StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order) {
7083 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
7084 ++OffloadingEntriesNum;
7085}
7086
7088 StringRef VarName, Constant *Addr, int64_t VarSize,
7090 if (OMPBuilder->Config.isTargetDevice()) {
7091 // This could happen if the device compilation is invoked standalone.
7092 if (!hasDeviceGlobalVarEntryInfo(VarName))
7093 return;
7094 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
7095 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
7096 if (Entry.getVarSize() == 0) {
7097 Entry.setVarSize(VarSize);
7098 Entry.setLinkage(Linkage);
7099 }
7100 return;
7101 }
7102 Entry.setVarSize(VarSize);
7103 Entry.setLinkage(Linkage);
7104 Entry.setAddress(Addr);
7105 } else {
7106 if (hasDeviceGlobalVarEntryInfo(VarName)) {
7107 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
7108 assert(Entry.isValid() && Entry.getFlags() == Flags &&
7109 "Entry not initialized!");
7110 if (Entry.getVarSize() == 0) {
7111 Entry.setVarSize(VarSize);
7112 Entry.setLinkage(Linkage);
7113 }
7114 return;
7115 }
7117 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
7118 Addr, VarSize, Flags, Linkage,
7119 VarName.str());
7120 else
7121 OffloadEntriesDeviceGlobalVar.try_emplace(
7122 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage, "");
7123 ++OffloadingEntriesNum;
7124 }
7125}
7126
7129 // Scan all target region entries and perform the provided action.
7130 for (const auto &E : OffloadEntriesDeviceGlobalVar)
7131 Action(E.getKey(), E.getValue());
7132}
7133
7134//===----------------------------------------------------------------------===//
7135// CanonicalLoopInfo
7136//===----------------------------------------------------------------------===//
7137
7138void CanonicalLoopInfo::collectControlBlocks(
7140 // We only count those BBs as control block for which we do not need to
7141 // reverse the CFG, i.e. not the loop body which can contain arbitrary control
7142 // flow. For consistency, this also means we do not add the Body block, which
7143 // is just the entry to the body code.
7144 BBs.reserve(BBs.size() + 6);
7145 BBs.append({getPreheader(), Header, Cond, Latch, Exit, getAfter()});
7146}
7147
7149 assert(isValid() && "Requires a valid canonical loop");
7150 for (BasicBlock *Pred : predecessors(Header)) {
7151 if (Pred != Latch)
7152 return Pred;
7153 }
7154 llvm_unreachable("Missing preheader");
7155}
7156
7157void CanonicalLoopInfo::setTripCount(Value *TripCount) {
7158 assert(isValid() && "Requires a valid canonical loop");
7159
7160 Instruction *CmpI = &getCond()->front();
7161 assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
7162 CmpI->setOperand(1, TripCount);
7163
7164#ifndef NDEBUG
7165 assertOK();
7166#endif
7167}
7168
7169void CanonicalLoopInfo::mapIndVar(
7170 llvm::function_ref<Value *(Instruction *)> Updater) {
7171 assert(isValid() && "Requires a valid canonical loop");
7172
7173 Instruction *OldIV = getIndVar();
7174
7175 // Record all uses excluding those introduced by the updater. Uses by the
7176 // CanonicalLoopInfo itself to keep track of the number of iterations are
7177 // excluded.
7178 SmallVector<Use *> ReplacableUses;
7179 for (Use &U : OldIV->uses()) {
7180 auto *User = dyn_cast<Instruction>(U.getUser());
7181 if (!User)
7182 continue;
7183 if (User->getParent() == getCond())
7184 continue;
7185 if (User->getParent() == getLatch())
7186 continue;
7187 ReplacableUses.push_back(&U);
7188 }
7189
7190 // Run the updater that may introduce new uses
7191 Value *NewIV = Updater(OldIV);
7192
7193 // Replace the old uses with the value returned by the updater.
7194 for (Use *U : ReplacableUses)
7195 U->set(NewIV);
7196
7197#ifndef NDEBUG
7198 assertOK();
7199#endif
7200}
7201
7203#ifndef NDEBUG
7204 // No constraints if this object currently does not describe a loop.
7205 if (!isValid())
7206 return;
7207
7208 BasicBlock *Preheader = getPreheader();
7209 BasicBlock *Body = getBody();
7210 BasicBlock *After = getAfter();
7211
7212 // Verify standard control-flow we use for OpenMP loops.
7213 assert(Preheader);
7214 assert(isa<BranchInst>(Preheader->getTerminator()) &&
7215 "Preheader must terminate with unconditional branch");
7216 assert(Preheader->getSingleSuccessor() == Header &&
7217 "Preheader must jump to header");
7218
7219 assert(Header);
7220 assert(isa<BranchInst>(Header->getTerminator()) &&
7221 "Header must terminate with unconditional branch");
7222 assert(Header->getSingleSuccessor() == Cond &&
7223 "Header must jump to exiting block");
7224
7225 assert(Cond);
7226 assert(Cond->getSinglePredecessor() == Header &&
7227 "Exiting block only reachable from header");
7228
7229 assert(isa<BranchInst>(Cond->getTerminator()) &&
7230 "Exiting block must terminate with conditional branch");
7231 assert(size(successors(Cond)) == 2 &&
7232 "Exiting block must have two successors");
7233 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(0) == Body &&
7234 "Exiting block's first successor jump to the body");
7235 assert(cast<BranchInst>(Cond->getTerminator())->getSuccessor(1) == Exit &&
7236 "Exiting block's second successor must exit the loop");
7237
7238 assert(Body);
7239 assert(Body->getSinglePredecessor() == Cond &&
7240 "Body only reachable from exiting block");
7241 assert(!isa<PHINode>(Body->front()));
7242
7243 assert(Latch);
7244 assert(isa<BranchInst>(Latch->getTerminator()) &&
7245 "Latch must terminate with unconditional branch");
7246 assert(Latch->getSingleSuccessor() == Header && "Latch must jump to header");
7247 // TODO: To support simple redirecting of the end of the body code that has
7248 // multiple; introduce another auxiliary basic block like preheader and after.
7249 assert(Latch->getSinglePredecessor() != nullptr);
7250 assert(!isa<PHINode>(Latch->front()));
7251
7252 assert(Exit);
7253 assert(isa<BranchInst>(Exit->getTerminator()) &&
7254 "Exit block must terminate with unconditional branch");
7255 assert(Exit->getSingleSuccessor() == After &&
7256 "Exit block must jump to after block");
7257
7258 assert(After);
7259 assert(After->getSinglePredecessor() == Exit &&
7260 "After block only reachable from exit block");
7261 assert(After->empty() || !isa<PHINode>(After->front()));
7262
7263 Instruction *IndVar = getIndVar();
7264 assert(IndVar && "Canonical induction variable not found?");
7265 assert(isa<IntegerType>(IndVar->getType()) &&
7266 "Induction variable must be an integer");
7267 assert(cast<PHINode>(IndVar)->getParent() == Header &&
7268 "Induction variable must be a PHI in the loop header");
7269 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
7270 assert(
7271 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->isZero());
7272 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
7273
7274 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
7275 assert(cast<Instruction>(NextIndVar)->getParent() == Latch);
7276 assert(cast<BinaryOperator>(NextIndVar)->getOpcode() == BinaryOperator::Add);
7277 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
7278 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
7279 ->isOne());
7280
7281 Value *TripCount = getTripCount();
7282 assert(TripCount && "Loop trip count not found?");
7283 assert(IndVar->getType() == TripCount->getType() &&
7284 "Trip count and induction variable must have the same type");
7285
7286 auto *CmpI = cast<CmpInst>(&Cond->front());
7287 assert(CmpI->getPredicate() == CmpInst::ICMP_ULT &&
7288 "Exit condition must be a signed less-than comparison");
7289 assert(CmpI->getOperand(0) == IndVar &&
7290 "Exit condition must compare the induction variable");
7291 assert(CmpI->getOperand(1) == TripCount &&
7292 "Exit condition must compare with the trip count");
7293#endif
7294}
7295
7297 Header = nullptr;
7298 Cond = nullptr;
7299 Latch = nullptr;
7300 Exit = nullptr;
7301}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Rewrite undef for PHI
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used by LLVM_MARK_AS_BITMASK_EN...
Definition: BitmaskEnum.h:83
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue)
LLVM_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you can perform bitwise operatio...
Definition: BitmaskEnum.h:42
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static LVOptions Options
Definition: LVOptions.cpp:25
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:528
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file contains the declarations for metadata subclasses.
IntegerType * Int32Ty
#define OMP_KERNEL_ARG_VERSION
Definition: OMPConstants.h:75
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, TargetRegionEntryInfo &EntryInfo, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static Function * createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, Function *OutlinedFn, Constant *OutlinedFnID, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static MDNode * getNVPTXMDNode(Function &Kernel, StringRef Name)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static void replaceConstantValueUsesInFuncWithInstr(llvm::Value *Input, Function *Func)
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void replaceConstatExprUsesInFuncWithInstr(ConstantExpr *ConstExpr, Function *Func)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, Type *ParallelTaskPtr, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
Value * createFakeIntVal(IRBuilder<> &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, std::stack< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
Function * getFreshReductionFunc(Module &M)
Create a function with a unique name and a "void (i8*, i8*)" signature in the given module and return...
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
#define P(N)
FunctionAnalysisManager FAM
const char LLVMTargetMachineRef TM
This header defines various interfaces for pass management in LLVM.
const SmallVectorImpl< MachineOperand > & Cond
Basic Register Allocator
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallSet class.
This file contains some functions that are useful when dealing with strings.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Definition: blake3_impl.h:78
Class for arbitrary precision integers.
Definition: APInt.h:76
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:59
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:107
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:125
unsigned getAddressSpace() const
Return the address space for the allocation.
Definition: Instructions.h:112
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
Definition: Instructions.h:136
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:103
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:535
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
Class to represent array types.
Definition: DerivedTypes.h:371
static ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Definition: Type.cpp:647
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:696
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ FAdd
*p = old + v
Definition: Instructions.h:785
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:778
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ FSub
*p = old - v
Definition: Instructions.h:788
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:776
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:782
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:796
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:780
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:792
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
Definition: Attributes.cpp:797
AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
Definition: Attributes.cpp:782
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:93
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
Definition: BasicBlock.cpp:657
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
reverse_iterator rbegin()
Definition: BasicBlock.h:446
bool empty() const
Definition: BasicBlock.h:452
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
const Instruction & front() const
Definition: BasicBlock.h:453
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
Definition: BasicBlock.cpp:490
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:167
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:276
reverse_iterator rend()
Definition: BasicBlock.h:448
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
Definition: BasicBlock.h:358
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition: BasicBlock.h:613
const Instruction & back() const
Definition: BasicBlock.h:455
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
Definition: BasicBlock.cpp:289
void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Definition: BasicBlock.cpp:509
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1662
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1668
unsigned arg_size() const
Definition: InstrTypes.h:1685
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_SLE
signed less or equal
Definition: InstrTypes.h:1023
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:999
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:997
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
@ ICMP_ULE
unsigned less or equal
Definition: InstrTypes.h:1019
A cache for the CodeExtractor analysis.
Definition: CodeExtractor.h:46
Utility class for extracting code into a new function.
Definition: CodeExtractor.h:85
void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, const ValueSet &Allocas) const
Compute the set of input values and output values for the code.
void findAllocas(const CodeExtractorAnalysisCache &CEAC, ValueSet &SinkCands, ValueSet &HoistCands, BasicBlock *&ExitBlock) const
Find the set of allocas whose life ranges are contained within the outlined region.
Function * extractCodeRegion(const CodeExtractorAnalysisCache &CEAC)
Perform the extraction, returning the new function.
bool isEligible() const
Test whether this code extractor is eligible.
void excludeArgFromAggregate(Value *Arg)
Exclude a value from aggregate argument passing when extracting a code region, passing it instead as ...
static Constant * get(ArrayType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1291
static ConstantAsMetadata * get(Constant *C)
Definition: Metadata.h:528
static Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
Definition: Constants.cpp:2881
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:705
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
Definition: Constants.cpp:2072
static Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
Definition: Constants.cpp:2087
static Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2152
Instruction * getAsInstruction() const
Returns an Instruction which implements the same operation as this ConstantExpr.
Definition: Constants.cpp:3310
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.h:123
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1775
static Constant * get(StructType *T, ArrayRef< Constant * > V)
Definition: Constants.cpp:1356
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Debug location.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
unsigned getDefaultGlobalsAddressSpace() const
Definition: DataLayout.h:294
Align getABIIntegerTypeAlignment(unsigned BitWidth) const
Returns the minimum ABI-required alignment for an integer type of the specified bitwidth.
Definition: DataLayout.h:533
unsigned getAllocaAddrSpace() const
Definition: DataLayout.h:276
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
Definition: DataLayout.cpp:750
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:420
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Definition: Dominators.cpp:371
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
Class to represent function types.
Definition: DerivedTypes.h:103
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
Definition: Function.cpp:585
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition: Function.h:163
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
bool empty() const
Definition: Function.h:805
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:201
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
Definition: Function.cpp:399
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:713
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
const Function & getFunction() const
Definition: Function.h:161
iterator begin()
Definition: Function.h:799
arg_iterator arg_begin()
Definition: Function.h:814
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Definition: Function.cpp:613
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Definition: Function.h:728
size_t arg_size() const
Definition: Function.h:847
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
iterator end()
Definition: Function.h:801
void setCallingConv(CallingConv::ID CC)
Definition: Function.h:267
Argument * getArg(unsigned i) const
Definition: Function.h:832
bool hasMetadata() const
Return true if this value has any metadata attached to it.
Definition: Value.h:589
void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
Definition: Metadata.cpp:1522
LinkageTypes getLinkage() const
Definition: GlobalValue.h:546
void setLinkage(LinkageTypes LT)
Definition: GlobalValue.h:537
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void setDSOLocal(bool Local)
Definition: GlobalValue.h:303
PointerType * getType() const
Global values are always pointers.
Definition: GlobalValue.h:294
@ HiddenVisibility
The GV is hidden.
Definition: GlobalValue.h:68
@ ProtectedVisibility
The GV is protected.
Definition: GlobalValue.h:69
void setVisibility(VisibilityTypes V)
Definition: GlobalValue.h:254
LinkageTypes
An enumeration for the kinds of linkage for global values.
Definition: GlobalValue.h:51
@ PrivateLinkage
Like Internal, but omit from symbol table.
Definition: GlobalValue.h:60
@ CommonLinkage
Tentative definitions.
Definition: GlobalValue.h:62
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
@ WeakODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:57
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
Definition: GlobalValue.h:56
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Definition: GlobalValue.h:55
Type * getValueType() const
Definition: GlobalValue.h:296
InsertPoint - A saved insertion point.
Definition: IRBuilder.h:257
BasicBlock * getBlock() const
Definition: IRBuilder.h:272
bool isSet() const
Returns true if this insert point is set.
Definition: IRBuilder.h:270
BasicBlock::iterator getPoint() const
Definition: IRBuilder.h:273
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2257
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1841
AllocaInst * CreateAlloca(Type *Ty, unsigned AddrSpace, Value *ArraySize=nullptr, const Twine &Name="")
Definition: IRBuilder.h:1773
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2523
Constant * CreateGlobalStringPtr(StringRef Str, const Twine &Name="", unsigned AddressSpace=0, Module *M=nullptr)
Same as CreateGlobalString, but return a pointer with "i8*" type instead of a pointer to array of i8.
Definition: IRBuilder.h:1993
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2039
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1263
Value * CreatePointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2170
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
CallInst * CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue, unsigned Alignment, Value *OffsetValue=nullptr)
Create an assume intrinsic call that represents an alignment assumption on the provided pointer.
Definition: IRBuilder.cpp:1328
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1110
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateStructGEP(Type *Ty, Value *Ptr, unsigned Idx, const Twine &Name="")
Definition: IRBuilder.h:1973
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2033
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2122
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:526
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
IntegerType * getInt64Ty()
Fetch the type representing a 64-bit integer.
Definition: IRBuilder.h:531
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition: IRBuilder.h:1876
Value * CreatePointerBitCastOrAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2182
Value * CreateUDiv(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1378
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2245
ConstantInt * getInt64(uint64_t C)
Get a constant 64-bit value.
Definition: IRBuilder.h:491
Value * CreateNeg(Value *V, const Twine &Name="", bool HasNSW=false)
Definition: IRBuilder.h:1721
InsertPoint saveIP() const
Returns the current insert point.
Definition: IRBuilder.h:277
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:486
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition: IRBuilder.h:1143
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition: IRBuilder.h:145
DebugLoc getCurrentDebugLocation() const
Get location information used by debugging information.
Definition: IRBuilder.cpp:63
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:497
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1790
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1475
ReturnInst * CreateRetVoid()
Create a 'ret void' instruction.
Definition: IRBuilder.h:1090
Value * CreateConstInBoundsGEP2_32(Type *Ty, Value *Ptr, unsigned Idx0, unsigned Idx1, const Twine &Name="")
Definition: IRBuilder.h:1914
Value * CreateConstInBoundsGEP2_64(Type *Ty, Value *Ptr, uint64_t Idx0, uint64_t Idx1, const Twine &Name="")
Definition: IRBuilder.h:1960
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1803
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1327
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2117
Value * CreateIsNotNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg != 0.
Definition: IRBuilder.h:2549
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1854
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:569
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1114
void ClearInsertionPoint()
Clear the insertion point: created instructions will not be inserted into a block.
Definition: IRBuilder.h:169
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateICmpUGE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2253
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition: IRBuilder.h:2196
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
Definition: IRBuilder.h:289
Value * CreateIsNull(Value *Arg, const Twine &Name="")
Return a boolean value testing if Arg == 0.
Definition: IRBuilder.h:2544
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
Type * getVoidTy()
Fetch the type representing void.
Definition: IRBuilder.h:564
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1519
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2351
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition: IRBuilder.h:516
Value * CreateURem(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1404
CallInst * CreateMemCpy(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, uint64_t Size, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *TBAAStructTag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memcpy between the specified pointers.
Definition: IRBuilder.h:659
Value * CreateSExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a SExt or Trunc from the integer value V to DestTy.
Definition: IRBuilder.h:2054
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1361
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:88
void moveBeforePreserving(Instruction *MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
static bool classof(const Value *V)
Methods for support type inquiry through isa, cast, and dyn_cast:
Definition: Instruction.h:916
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:359
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:266
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopInfo run(Function &F, FunctionAnalysisManager &AM)
Definition: LoopInfo.cpp:957
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
Definition: MDBuilder.cpp:107
Metadata node.
Definition: Metadata.h:1067
void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
Definition: Metadata.cpp:1071
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1549
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
ArrayRef< MDOperand > operands() const
Definition: Metadata.h:1426
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:600
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
size_type size() const
Definition: MapVector.h:60
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:301
NamedMDNode * getNamedMetadata(const Twine &Name) const
Return the first NamedMDNode in the module with the specified name.
Definition: Module.cpp:260
StringRef getName() const
Get a short "name" for the module.
Definition: Module.h:284
const std::string & getTargetTriple() const
Get the target triple which is a string describing the target host.
Definition: Module.h:297
iterator_range< global_iterator > globals()
Definition: Module.h:699
const FunctionListType & getFunctionList() const
Get the Module's list of functions (constant).
Definition: Module.h:611
GlobalVariable * getGlobalVariable(StringRef Name) const
Look up the specified global variable in the module symbol table.
Definition: Module.h:446
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
Definition: Module.cpp:133
NamedMDNode * getOrInsertNamedMetadata(StringRef Name)
Return the named MDNode in the module with the specified name.
Definition: Module.cpp:269
const GlobalVariable * getNamedGlobal(StringRef Name) const
Return the global variable in the module with the specified name, of arbitrary type.
Definition: Module.h:461
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
A tuple of MDNodes.
Definition: Metadata.h:1729
iterator_range< op_iterator > operands()
Definition: Metadata.h:1825
void addOperand(MDNode *M)
Definition: Metadata.cpp:1388
@ OffloadingEntryInfoTargetRegion
Entry is a target region.
Definition: OMPIRBuilder.h:221
@ OffloadingEntryInfoDeviceGlobalVar
Entry is a declare target variable.
Definition: OMPIRBuilder.h:223
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
Definition: OMPIRBuilder.h:354
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
Definition: OMPIRBuilder.h:356
void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
Definition: OMPIRBuilder.h:274
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
Definition: OMPIRBuilder.h:276
void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
unsigned size() const
Return number of entries defined so far.
Definition: OMPIRBuilder.h:265
void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
Definition: OMPIRBuilder.h:334
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
Definition: OMPIRBuilder.h:340
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
Definition: OMPIRBuilder.h:346
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
Definition: OMPIRBuilder.h:344
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
Definition: OMPIRBuilder.h:338
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
Definition: OMPIRBuilder.h:336
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
Definition: OMPIRBuilder.h:410
bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
Definition: OMPIRBuilder.h:90
StringRef separator() const
Definition: OMPIRBuilder.h:157
int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
StringRef firstSeparator() const
Definition: OMPIRBuilder.h:147
void setHasRequiresReverseOffload(bool Value)
bool hasRequiresUnifiedSharedMemory() const
void setHasRequiresUnifiedSharedMemory(bool Value)
bool hasRequiresDynamicAllocators() const
bool openMPOffloadMandatory() const
Definition: OMPIRBuilder.h:129
void setHasRequiresUnifiedAddress(bool Value)
void setHasRequiresDynamicAllocators(bool Value)
bool hasRequiresReverseOffload() const
bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
Definition: OMPIRBuilder.h:451
Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
std::function< void(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
Definition: OMPIRBuilder.h:497
InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, int32_t MinThreadsVal=0, int32_t MaxThreadsVal=0, int32_t MinTeamsVal=0, int32_t MaxTeamsVal=0)
The omp target interface.
void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
void createTaskyield(const LocationDescription &Loc)
Generator for '#omp taskyield'.
void emitBranch(BasicBlock *Target)
InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO)
Emit atomic write for : X = Expr — Only Scalar data types.
static void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
InsertPointTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for '#omp critical'.
static TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
InsertPointTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
CanonicalLoopInfo * createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
InsertPointTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
InsertPointTy emitBarrierImpl(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall, bool CheckCancelFlag)
Generate a barrier runtime call.
void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
InsertPointTy emitKernelLaunch(const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for '#omp cancel'.
static std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO)
Emit atomic Read for : V = X — Only Scalar data types.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
InsertPointTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for '#omp ordered [threads | simd]'.
OpenMPIRBuilder::InsertPointTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for '#omp target data'.
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
void createTaskwait(const LocationDescription &Loc)
Generator for '#omp taskwait'.
CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool EmitDebug=false, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
InsertPointTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for '#omp sections'.
InsertPointTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={})
Generator for #omp task
void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
InsertPointTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, bool IsNoWait=false, bool IsByRef=false)
Generator for '#omp reduction'.
InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?...
InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for '#omp ordered depend (source | sink)'.
InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
void emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, function_ref< Value *(unsigned int)> CustomMapperCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
InsertPointTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp section'.
InsertPointTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop)
Modifies the canonical loop to be a workshare loop.
void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
InsertPointTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for '#omp master'.
IRBuilder ::InsertPoint createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for '#omp parallel'.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, unsigned AddressSpace=0)
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
InsertPointTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for '#omp single'.
CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
Definition: OMPIRBuilder.h:477
GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
static void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
std::function< Function *(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
void emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective, FinalizeCallbackTy ExitCB={})
Generate control flow and cleanup for cancellation.
Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
InsertPointTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for '#omp masked'.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
InsertPointTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
InsertPointTy createTarget(const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB)
Generator for '#omp target'.
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
InsertPointTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
void createFlush(const LocationDescription &Loc)
Generator for '#omp flush'.
Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
The optimization diagnostic interface.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Definition: PassManager.h:296
Class to represent pointers.
Definition: DerivedTypes.h:646
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Analysis pass that exposes the ScalarEvolution for a function.
ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
Definition: SetVector.h:57
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
iterator end() const
Definition: SmallPtrSet.h:385
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
iterator begin() const
Definition: SmallPtrSet.h:380
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
void setAlignment(Align Align)
Definition: Instructions.h:373
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:400
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
Definition: StringMap.h:127
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: StringMap.h:253
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:696
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
size_t count(char C) const
Return the number of occurrences of C in the string.
Definition: StringRef.h:447
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
Definition: StringRef.h:271
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Definition: StringRef.h:612
Class to represent struct types.
Definition: DerivedTypes.h:216
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:513
Multiway switch.
void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
Result run(const Function &F, FunctionAnalysisManager &)
Analysis pass providing the TargetLibraryInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(StringRef TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
Definition: Triple.h:937
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
Definition: Triple.h:995
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Definition: Triple.h:1005
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
Definition: UnrollLoop.h:122
bool canUnroll() const
Whether it is legal to unroll this loop.
Definition: UnrollLoop.h:135
uint64_t getRolledLoopSize() const
Definition: UnrollLoop.h:137
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
ValueT lookup(const KeyT &Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: ValueMap.h:164
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
User * user_back()
Definition: Value.h:407
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
Definition: Value.cpp:542
User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
Definition: Value.cpp:179
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
iterator insertAfter(iterator where, pointer New)
Definition: ilist.h:174
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:690
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
void emitOffloadingEntry(Module &M, Constant *Addr, StringRef Name, uint64_t Size, int32_t Flags, int32_t Data, StringRef SectionName)
Create an offloading section struct used to register this global at runtime.
Definition: Utility.cpp:64
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
Definition: OMPConstants.h:195
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
Definition: OMPConstants.h:252
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
Definition: OMPConstants.h:65
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
Definition: OMPConstants.h:45
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
WorksharingLoopType
A type of worksharing loop construct.
Definition: OMPConstants.h:283
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
Definition: OMPConstants.h:267
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
std::error_code getUniqueID(const Twine Path, UniqueID &Result)
Definition: Path.cpp:788
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:853
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2406
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:649
auto successors(const MachineBasicBlock *BB)
AddressSpace
Definition: NVPTXBaseInfo.h:21
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
Definition: BitcodeReader.h:66
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
Definition: LoopPeel.cpp:832
void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
DWARFExpression::Operation Op
void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
@ Continue
Definition: DWP.h:21
void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
static void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
Definition: CodeMetrics.cpp:70
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
Definition: OMPIRBuilder.h:593
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
Information about an OpenMP reduction.
AtomicReductionGenTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenTy ReductionGen
Callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
Value * NumTeams
The number of teams.
Value * DynCGGroupMem
The size of the dynamic shared memory.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
Value * NumThreads
The number of threads.
Data structure to contain the information needed to uniquely identify a target entry.
Definition: OMPIRBuilder.h:183
static void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static const Target * lookupTarget(StringRef Triple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...
Definition: OMPGridValues.h:57