68#define DEBUG_TYPE "openmp-ir-builder"
75 cl::desc(
"Use optimistic attributes describing "
76 "'as-if' properties of runtime calls."),
80 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
81 cl::desc(
"Factor for the unroll threshold to account for code "
82 "simplifications still taking place"),
93 if (!IP1.isSet() || !IP2.isSet())
95 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
100 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
101 case OMPScheduleType::UnorderedStaticChunked:
102 case OMPScheduleType::UnorderedStatic:
103 case OMPScheduleType::UnorderedDynamicChunked:
104 case OMPScheduleType::UnorderedGuidedChunked:
105 case OMPScheduleType::UnorderedRuntime:
106 case OMPScheduleType::UnorderedAuto:
107 case OMPScheduleType::UnorderedTrapezoidal:
108 case OMPScheduleType::UnorderedGreedy:
109 case OMPScheduleType::UnorderedBalanced:
110 case OMPScheduleType::UnorderedGuidedIterativeChunked:
111 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
112 case OMPScheduleType::UnorderedSteal:
113 case OMPScheduleType::UnorderedStaticBalancedChunked:
114 case OMPScheduleType::UnorderedGuidedSimd:
115 case OMPScheduleType::UnorderedRuntimeSimd:
116 case OMPScheduleType::OrderedStaticChunked:
117 case OMPScheduleType::OrderedStatic:
118 case OMPScheduleType::OrderedDynamicChunked:
119 case OMPScheduleType::OrderedGuidedChunked:
120 case OMPScheduleType::OrderedRuntime:
121 case OMPScheduleType::OrderedAuto:
122 case OMPScheduleType::OrderdTrapezoidal:
123 case OMPScheduleType::NomergeUnorderedStaticChunked:
124 case OMPScheduleType::NomergeUnorderedStatic:
125 case OMPScheduleType::NomergeUnorderedDynamicChunked:
126 case OMPScheduleType::NomergeUnorderedGuidedChunked:
127 case OMPScheduleType::NomergeUnorderedRuntime:
128 case OMPScheduleType::NomergeUnorderedAuto:
129 case OMPScheduleType::NomergeUnorderedTrapezoidal:
130 case OMPScheduleType::NomergeUnorderedGreedy:
131 case OMPScheduleType::NomergeUnorderedBalanced:
132 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
133 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
134 case OMPScheduleType::NomergeUnorderedSteal:
135 case OMPScheduleType::NomergeOrderedStaticChunked:
136 case OMPScheduleType::NomergeOrderedStatic:
137 case OMPScheduleType::NomergeOrderedDynamicChunked:
138 case OMPScheduleType::NomergeOrderedGuidedChunked:
139 case OMPScheduleType::NomergeOrderedRuntime:
140 case OMPScheduleType::NomergeOrderedAuto:
141 case OMPScheduleType::NomergeOrderedTrapezoidal:
142 case OMPScheduleType::OrderedDistributeChunked:
143 case OMPScheduleType::OrderedDistribute:
151 SchedType & OMPScheduleType::MonotonicityMask;
152 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
164 Builder.restoreIP(IP);
174 Kernel->getFnAttribute(
"target-features").getValueAsString();
175 if (Features.
count(
"+wavefrontsize64"))
190 bool HasSimdModifier,
bool HasDistScheduleChunks) {
192 switch (ClauseKind) {
193 case OMP_SCHEDULE_Default:
194 case OMP_SCHEDULE_Static:
195 return HasChunks ? OMPScheduleType::BaseStaticChunked
196 : OMPScheduleType::BaseStatic;
197 case OMP_SCHEDULE_Dynamic:
198 return OMPScheduleType::BaseDynamicChunked;
199 case OMP_SCHEDULE_Guided:
200 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
201 : OMPScheduleType::BaseGuidedChunked;
202 case OMP_SCHEDULE_Auto:
204 case OMP_SCHEDULE_Runtime:
205 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
206 : OMPScheduleType::BaseRuntime;
207 case OMP_SCHEDULE_Distribute:
208 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
209 : OMPScheduleType::BaseDistribute;
217 bool HasOrderedClause) {
218 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
219 OMPScheduleType::None &&
220 "Must not have ordering nor monotonicity flags already set");
223 ? OMPScheduleType::ModifierOrdered
224 : OMPScheduleType::ModifierUnordered;
225 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
228 if (OrderingScheduleType ==
229 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
230 return OMPScheduleType::OrderedGuidedChunked;
231 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
232 OMPScheduleType::ModifierOrdered))
233 return OMPScheduleType::OrderedRuntime;
235 return OrderingScheduleType;
241 bool HasSimdModifier,
bool HasMonotonic,
242 bool HasNonmonotonic,
bool HasOrderedClause) {
243 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
244 OMPScheduleType::None &&
245 "Must not have monotonicity flags already set");
246 assert((!HasMonotonic || !HasNonmonotonic) &&
247 "Monotonic and Nonmonotonic are contradicting each other");
250 return ScheduleType | OMPScheduleType::ModifierMonotonic;
251 }
else if (HasNonmonotonic) {
252 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
262 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
263 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
269 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
277 bool HasSimdModifier,
bool HasMonotonicModifier,
278 bool HasNonmonotonicModifier,
bool HasOrderedClause,
279 bool HasDistScheduleChunks) {
281 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
285 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
286 HasNonmonotonicModifier, HasOrderedClause);
301 assert(!Br->isConditional() &&
302 "BB's terminator must be an unconditional branch (or degenerate)");
305 Br->setSuccessor(0,
Target);
310 NewBr->setDebugLoc(
DL);
315 assert(New->getFirstInsertionPt() == New->begin() &&
316 "Target BB must not have PHI nodes");
332 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
336 NewBr->setDebugLoc(
DL);
348 Builder.SetInsertPoint(Old);
352 Builder.SetCurrentDebugLocation(
DebugLoc);
362 New->replaceSuccessorsPhiUsesWith(Old, New);
371 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
373 Builder.SetInsertPoint(Builder.GetInsertBlock());
376 Builder.SetCurrentDebugLocation(
DebugLoc);
385 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
387 Builder.SetInsertPoint(Builder.GetInsertBlock());
390 Builder.SetCurrentDebugLocation(
DebugLoc);
407 const Twine &Name =
"",
bool AsPtr =
true,
408 bool Is64Bit =
false) {
409 Builder.restoreIP(OuterAllocaIP);
413 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
417 FakeVal = FakeValAddr;
419 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
424 Builder.restoreIP(InnerAllocaIP);
427 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
430 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
443enum OpenMPOffloadingRequiresDirFlags {
445 OMP_REQ_UNDEFINED = 0x000,
447 OMP_REQ_NONE = 0x001,
449 OMP_REQ_REVERSE_OFFLOAD = 0x002,
451 OMP_REQ_UNIFIED_ADDRESS = 0x004,
453 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
455 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
462 : RequiresFlags(OMP_REQ_UNDEFINED) {}
466 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
467 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
470 RequiresFlags(OMP_REQ_UNDEFINED) {
471 if (HasRequiresReverseOffload)
472 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
473 if (HasRequiresUnifiedAddress)
474 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
475 if (HasRequiresUnifiedSharedMemory)
476 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
477 if (HasRequiresDynamicAllocators)
478 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
482 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
486 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
490 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
494 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
499 :
static_cast<int64_t
>(OMP_REQ_NONE);
504 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
506 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
511 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
513 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
518 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
520 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
525 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
527 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
540 constexpr size_t MaxDim = 3;
545 Value *DynCGroupMemFallbackFlag =
547 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
548 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
554 Value *NumThreads3D =
585 auto FnAttrs = Attrs.getFnAttrs();
586 auto RetAttrs = Attrs.getRetAttrs();
588 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
593 bool Param =
true) ->
void {
594 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
595 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
596 if (HasSignExt || HasZeroExt) {
597 assert(AS.getNumAttributes() == 1 &&
598 "Currently not handling extension attr combined with others.");
600 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
603 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
610#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
611#include "llvm/Frontend/OpenMP/OMPKinds.def"
615#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
617 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
618 addAttrSet(RetAttrs, RetAttrSet, false); \
619 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
620 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
621 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
623#include "llvm/Frontend/OpenMP/OMPKinds.def"
637#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
639 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
641 Fn = M.getFunction(Str); \
643#include "llvm/Frontend/OpenMP/OMPKinds.def"
649#define OMP_RTL(Enum, Str, ...) \
651 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
653#include "llvm/Frontend/OpenMP/OMPKinds.def"
657 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
667 LLVMContext::MD_callback,
669 2, {-1, -1},
true)}));
682 assert(Fn &&
"Failed to create OpenMP runtime function");
693 Builder.SetInsertPoint(FiniBB);
705 FiniBB = OtherFiniBB;
707 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
715 auto EndIt = FiniBB->end();
716 if (FiniBB->size() >= 1)
717 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
722 FiniBB->replaceAllUsesWith(OtherFiniBB);
723 FiniBB->eraseFromParent();
724 FiniBB = OtherFiniBB;
731 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
754 for (
auto Inst =
Block->getReverseIterator()->begin();
755 Inst !=
Block->getReverseIterator()->end();) {
784 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
809 ParallelRegionBlockSet.
clear();
811 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
821 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
830 ".omp_par", ArgsInZeroAddressSpace);
834 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
835 assert(Extractor.isEligible() &&
836 "Expected OpenMP outlining to be possible!");
838 for (
auto *V : OI.ExcludeArgsFromAggregate)
839 Extractor.excludeArgFromAggregate(V);
842 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
846 if (TargetCpuAttr.isStringAttribute())
849 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
850 if (TargetFeaturesAttr.isStringAttribute())
851 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
854 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
856 "OpenMP outlined functions should not return a value!");
861 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
868 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
875 "Expected instructions to add in the outlined region entry");
877 End = ArtificialEntry.
rend();
882 if (
I.isTerminator()) {
884 if (OI.EntryBB->getTerminator())
885 OI.EntryBB->getTerminator()->adoptDbgRecords(
886 &ArtificialEntry,
I.getIterator(),
false);
890 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
893 OI.EntryBB->moveBefore(&ArtificialEntry);
900 if (OI.PostOutlineCB)
901 OI.PostOutlineCB(*OutlinedFn);
903 if (OI.FixUpNonEntryAllocas)
935 errs() <<
"Error of kind: " << Kind
936 <<
" when emitting offload entries and metadata during "
937 "OMPIRBuilder finalization \n";
943 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
944 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
945 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
946 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
963 ConstantInt::get(I32Ty,
Value), Name);
976 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
980 if (UsedArray.
empty())
987 GV->setSection(
"llvm.metadata");
993 auto *Int8Ty =
Builder.getInt8Ty();
996 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1004 unsigned Reserve2Flags) {
1006 LocFlags |= OMP_IDENT_FLAG_KMPC;
1013 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1014 ConstantInt::get(Int32, Reserve2Flags),
1015 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1017 size_t SrcLocStrArgIdx = 4;
1018 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1022 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1029 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1030 if (
GV.getInitializer() == Initializer)
1035 M, OpenMPIRBuilder::Ident,
1038 M.getDataLayout().getDefaultGlobalsAddressSpace());
1050 SrcLocStrSize = LocStr.
size();
1059 if (
GV.isConstant() &&
GV.hasInitializer() &&
1060 GV.getInitializer() == Initializer)
1063 SrcLocStr =
Builder.CreateGlobalString(
1064 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1072 unsigned Line,
unsigned Column,
1078 Buffer.
append(FunctionName);
1080 Buffer.
append(std::to_string(Line));
1082 Buffer.
append(std::to_string(Column));
1090 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1101 if (
DIFile *DIF = DIL->getFile())
1102 if (std::optional<StringRef> Source = DIF->getSource())
1108 DIL->getColumn(), SrcLocStrSize);
1114 Loc.IP.getBlock()->getParent());
1120 "omp_global_thread_num");
1125 bool ForceSimpleCall,
bool CheckCancelFlag) {
1135 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1138 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1141 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1144 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1147 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1160 bool UseCancelBarrier =
1165 ? OMPRTL___kmpc_cancel_barrier
1166 : OMPRTL___kmpc_barrier),
1169 if (UseCancelBarrier && CheckCancelFlag)
1179 omp::Directive CanceledDirective) {
1184 auto *UI =
Builder.CreateUnreachable();
1192 Builder.SetInsertPoint(ElseTI);
1193 auto ElseIP =
Builder.saveIP();
1201 Builder.SetInsertPoint(ThenTI);
1203 Value *CancelKind =
nullptr;
1204 switch (CanceledDirective) {
1205#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1206 case DirectiveEnum: \
1207 CancelKind = Builder.getInt32(Value); \
1209#include "llvm/Frontend/OpenMP/OMPKinds.def"
1226 Builder.SetInsertPoint(UI->getParent());
1227 UI->eraseFromParent();
1234 omp::Directive CanceledDirective) {
1239 auto *UI =
Builder.CreateUnreachable();
1242 Value *CancelKind =
nullptr;
1243 switch (CanceledDirective) {
1244#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1245 case DirectiveEnum: \
1246 CancelKind = Builder.getInt32(Value); \
1248#include "llvm/Frontend/OpenMP/OMPKinds.def"
1265 Builder.SetInsertPoint(UI->getParent());
1266 UI->eraseFromParent();
1279 auto *KernelArgsPtr =
1280 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1285 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1288 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1292 NumThreads, HostPtr, KernelArgsPtr};
1319 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1323 Value *Return =
nullptr;
1343 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1344 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1351 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1353 auto CurFn =
Builder.GetInsertBlock()->getParent();
1360 emitBlock(OffloadContBlock, CurFn,
true);
1365 Value *CancelFlag, omp::Directive CanceledDirective) {
1367 "Unexpected cancellation!");
1387 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1396 Builder.SetInsertPoint(CancellationBlock);
1397 Builder.CreateBr(*FiniBBOrErr);
1400 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1419 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1422 "Expected at least tid and bounded tid as arguments");
1423 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1426 assert(CI &&
"Expected call instruction to outlined function");
1427 CI->
getParent()->setName(
"omp_parallel");
1429 Builder.SetInsertPoint(CI);
1430 Type *PtrTy = OMPIRBuilder->VoidPtr;
1434 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1438 Value *Args = ArgsAlloca;
1442 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1443 Builder.restoreIP(CurrentIP);
1446 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1448 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1450 Builder.CreateStore(V, StoreAddress);
1454 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1455 : Builder.getInt32(1);
1458 Value *Parallel60CallArgs[] = {
1462 NumThreads ? NumThreads : Builder.getInt32(-1),
1463 Builder.getInt32(-1),
1467 Builder.getInt64(NumCapturedVars),
1468 Builder.getInt32(0)};
1476 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1479 Builder.SetInsertPoint(PrivTID);
1481 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1488 I->eraseFromParent();
1511 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1519 F->addMetadata(LLVMContext::MD_callback,
1528 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1531 "Expected at least tid and bounded tid as arguments");
1532 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1535 CI->
getParent()->setName(
"omp_parallel");
1536 Builder.SetInsertPoint(CI);
1539 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1543 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1545 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1552 auto PtrTy = OMPIRBuilder->VoidPtr;
1553 if (IfCondition && NumCapturedVars == 0) {
1561 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1564 Builder.SetInsertPoint(PrivTID);
1566 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1573 I->eraseFromParent();
1581 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1590 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1591 (ProcBind != OMP_PROC_BIND_default);
1598 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1602 if (NumThreads && !
Config.isTargetDevice()) {
1605 Builder.CreateIntCast(NumThreads, Int32,
false)};
1610 if (ProcBind != OMP_PROC_BIND_default) {
1614 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1636 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1639 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1642 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1646 PointerType ::get(
M.getContext(), 0),
1647 "zero.addr.ascast");
1671 if (IP.getBlock()->end() == IP.getPoint()) {
1677 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1678 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1679 "Unexpected insertion point for finalization call!");
1691 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1697 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1715 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1718 assert(BodyGenCB &&
"Expected body generation callback!");
1720 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1723 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1726 if (
Config.isTargetDevice()) {
1729 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1731 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1732 ThreadID, ToBeDeletedVec);
1738 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1740 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1762 ".omp_par", ArgsInZeroAddressSpace);
1767 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1769 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1774 return GV->getValueType() == OpenMPIRBuilder::Ident;
1779 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1785 if (&V == TIDAddr || &V == ZeroAddr) {
1791 for (
Use &U : V.uses())
1793 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1803 if (!V.getType()->isPointerTy()) {
1807 Builder.restoreIP(OuterAllocaIP);
1809 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1813 Builder.SetInsertPoint(InsertBB,
1818 Builder.restoreIP(InnerAllocaIP);
1819 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1822 Value *ReplacementValue =
nullptr;
1825 ReplacementValue = PrivTID;
1828 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1836 assert(ReplacementValue &&
1837 "Expected copy/create callback to set replacement value!");
1838 if (ReplacementValue == &V)
1843 UPtr->set(ReplacementValue);
1868 for (
Value *Output : Outputs)
1872 "OpenMP outlining should not produce live-out values!");
1874 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1876 for (
auto *BB : Blocks)
1877 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1885 assert(FiniInfo.DK == OMPD_parallel &&
1886 "Unexpected finalization stack state!");
1897 Builder.CreateBr(*FiniBBOrErr);
1901 Term->eraseFromParent();
1907 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1908 UI->eraseFromParent();
1975 if (Dependencies.
empty())
1995 Type *DependInfo = OMPBuilder.DependInfo;
1998 Value *DepArray =
nullptr;
2000 Builder.SetInsertPoint(
2004 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2006 Builder.restoreIP(OldIP);
2008 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2010 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2012 Value *Addr = Builder.CreateStructGEP(
2014 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2015 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2016 Builder.CreateStore(DepValPtr, Addr);
2019 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2020 Builder.CreateStore(
2021 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2024 Value *Flags = Builder.CreateStructGEP(
2026 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2027 Builder.CreateStore(
2028 ConstantInt::get(Builder.getInt8Ty(),
2029 static_cast<unsigned int>(Dep.DepKind)),
2036Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2038 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2053 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2057 "omp_taskloop_dup",
M);
2060 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2061 DestTaskArg->
setName(
"dest_task");
2062 SrcTaskArg->
setName(
"src_task");
2063 LastprivateFlagArg->
setName(
"lastprivate_flag");
2065 IRBuilderBase::InsertPointGuard Guard(
Builder);
2069 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2070 Type *TaskWithPrivatesTy =
2073 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2075 PrivatesTy, TaskPrivates,
2080 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2081 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2083 DestTaskContextPtr->
setName(
"destPtr");
2084 SrcTaskContextPtr->
setName(
"srcPtr");
2089 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2090 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2091 if (!AfterIPOrError)
2093 Builder.restoreIP(*AfterIPOrError);
2103 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2105 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2107 Value *TaskContextStructPtrVal) {
2112 uint32_t SrcLocStrSize;
2128 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2131 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2136 llvm::CanonicalLoopInfo *CLI = result.
get();
2138 OI.
EntryBB = TaskloopAllocaBB;
2139 OI.OuterAllocaBB = AllocaIP.getBlock();
2140 OI.ExitBB = TaskloopExitBB;
2146 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2148 TaskloopAllocaIP,
"lb",
false,
true);
2150 TaskloopAllocaIP,
"ub",
false,
true);
2152 TaskloopAllocaIP,
"step",
false,
true);
2155 OI.Inputs.insert(FakeLB);
2156 OI.Inputs.insert(FakeUB);
2157 OI.Inputs.insert(FakeStep);
2158 if (TaskContextStructPtrVal)
2159 OI.Inputs.insert(TaskContextStructPtrVal);
2160 assert(((TaskContextStructPtrVal && DupCB) ||
2161 (!TaskContextStructPtrVal && !DupCB)) &&
2162 "Task context struct ptr and duplication callback must be both set "
2168 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2172 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2173 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2176 if (!TaskDupFnOrErr) {
2179 Value *TaskDupFn = *TaskDupFnOrErr;
2181 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2182 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2183 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2184 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2185 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2187 assert(OutlinedFn.hasOneUse() &&
2188 "there must be a single user for the outlined function");
2194 IRBuilderBase::InsertPoint CurrentIp =
Builder.saveIP();
2196 Value *CastedLBVal =
2197 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2198 Value *CastedUBVal =
2199 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2200 Value *CastedStepVal =
2201 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2204 Builder.SetInsertPoint(StaleCI);
2217 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2238 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2240 AllocaInst *ArgStructAlloca =
2242 assert(ArgStructAlloca &&
2243 "Unable to find the alloca instruction corresponding to arguments "
2244 "for extracted function");
2245 std::optional<TypeSize> ArgAllocSize =
2248 "Unable to determine size of arguments for extracted function");
2249 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2254 CallInst *TaskData =
Builder.CreateCall(
2255 TaskAllocFn, {Ident, ThreadID,
Flags,
2256 TaskSize, SharedsSize,
2261 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2262 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2267 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2270 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2273 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2279 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2285 Value *GrainSizeVal =
2286 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2288 Value *TaskDup = TaskDupFn;
2290 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2291 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2296 Builder.CreateCall(TaskloopFn, Args);
2303 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2308 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2310 LoadInst *SharedsOutlined =
2311 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2312 OutlinedFn.getArg(1)->replaceUsesWithIf(
2314 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2317 Type *IVTy =
IV->getType();
2323 Value *TaskLB =
nullptr;
2324 Value *TaskUB =
nullptr;
2325 Value *LoadTaskLB =
nullptr;
2326 Value *LoadTaskUB =
nullptr;
2327 for (Instruction &
I : *TaskloopAllocaBB) {
2328 if (
I.getOpcode() == Instruction::GetElementPtr) {
2331 switch (CI->getZExtValue()) {
2340 }
else if (
I.getOpcode() == Instruction::Load) {
2342 if (
Load.getPointerOperand() == TaskLB) {
2343 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2345 }
else if (
Load.getPointerOperand() == TaskUB) {
2346 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2352 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2354 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2355 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2356 Value *TripCountMinusOne =
2357 Builder.CreateSDiv(
Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
2358 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2359 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2360 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2362 CLI->setTripCount(CastedTripCount);
2364 Builder.SetInsertPoint(CLI->getBody(),
2365 CLI->getBody()->getFirstInsertionPt());
2367 if (NumOfCollapseLoops > 1) {
2373 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2376 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2377 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2378 User *IVUser = IVUse->getUser();
2380 if (
Op->getOpcode() == Instruction::URem ||
2381 Op->getOpcode() == Instruction::UDiv) {
2386 for (User *User : UsersToReplace) {
2387 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2404 assert(CLI->getIndVar()->getNumUses() == 3 &&
2405 "Canonical loop should have exactly three uses of the ind var");
2406 for (User *IVUser : CLI->getIndVar()->users()) {
2408 if (
Mul->getOpcode() == Instruction::Mul) {
2409 for (User *MulUser :
Mul->users()) {
2411 if (
Add->getOpcode() == Instruction::Add) {
2412 Add->setOperand(1, CastedTaskLB);
2421 FakeLB->replaceAllUsesWith(CastedLBVal);
2422 FakeUB->replaceAllUsesWith(CastedUBVal);
2423 FakeStep->replaceAllUsesWith(CastedStepVal);
2425 I->eraseFromParent();
2430 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2470 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2481 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2483 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2484 Mergeable, Priority, EventHandle, TaskAllocaBB,
2485 ToBeDeleted](
Function &OutlinedFn)
mutable {
2487 assert(OutlinedFn.hasOneUse() &&
2488 "there must be a single user for the outlined function");
2493 bool HasShareds = StaleCI->
arg_size() > 1;
2494 Builder.SetInsertPoint(StaleCI);
2519 Flags =
Builder.CreateOr(FinalFlag, Flags);
2532 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2541 assert(ArgStructAlloca &&
2542 "Unable to find the alloca instruction corresponding to arguments "
2543 "for extracted function");
2544 std::optional<TypeSize> ArgAllocSize =
2547 "Unable to determine size of arguments for extracted function");
2548 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2554 TaskAllocFn, {Ident, ThreadID, Flags,
2555 TaskSize, SharedsSize,
2563 OMPRTL___kmpc_task_allow_completion_event);
2567 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2569 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2570 Builder.CreateStore(EventVal, EventHandleAddr);
2576 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2577 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2595 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2598 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2600 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2603 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2604 PriorityData, {Zero, Zero});
2605 Builder.CreateStore(Priority, CmplrData);
2632 Builder.GetInsertPoint()->getParent()->getTerminator();
2633 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2634 Builder.SetInsertPoint(IfTerminator);
2637 Builder.SetInsertPoint(ElseTI);
2639 if (Dependencies.
size()) {
2644 {Ident, ThreadID,
Builder.getInt32(Dependencies.
size()), DepArray,
2645 ConstantInt::get(
Builder.getInt32Ty(), 0),
2660 Builder.SetInsertPoint(ThenTI);
2663 if (Dependencies.
size()) {
2668 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
2669 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
2680 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2682 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2683 OutlinedFn.getArg(1)->replaceUsesWithIf(
2684 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2688 I->eraseFromParent();
2692 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2718 Builder.SetInsertPoint(TaskgroupExitBB);
2761 unsigned CaseNumber = 0;
2762 for (
auto SectionCB : SectionCBs) {
2764 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2766 Builder.SetInsertPoint(CaseBB);
2780 Value *LB = ConstantInt::get(I32Ty, 0);
2781 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2782 Value *ST = ConstantInt::get(I32Ty, 1);
2784 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2789 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2790 WorksharingLoopType::ForStaticLoop, !IsNowait);
2796 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2800 assert(FiniInfo.DK == OMPD_sections &&
2801 "Unexpected finalization stack state!");
2802 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2816 if (IP.getBlock()->end() != IP.getPoint())
2827 auto *CaseBB =
Loc.IP.getBlock();
2828 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2829 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2835 Directive OMPD = Directive::OMPD_sections;
2838 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2849Value *OpenMPIRBuilder::getGPUThreadID() {
2852 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2856Value *OpenMPIRBuilder::getGPUWarpSize() {
2861Value *OpenMPIRBuilder::getNVPTXWarpID() {
2862 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2863 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2866Value *OpenMPIRBuilder::getNVPTXLaneID() {
2867 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2868 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2869 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2870 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2877 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2878 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2879 assert(FromSize > 0 &&
"From size must be greater than zero");
2880 assert(ToSize > 0 &&
"To size must be greater than zero");
2881 if (FromType == ToType)
2883 if (FromSize == ToSize)
2884 return Builder.CreateBitCast(From, ToType);
2886 return Builder.CreateIntCast(From, ToType,
true);
2892 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2893 CastItem,
Builder.getPtrTy(0));
2894 Builder.CreateStore(From, ValCastItem);
2895 return Builder.CreateLoad(ToType, CastItem);
2902 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2903 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2907 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2909 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2911 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2912 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2913 Value *WarpSizeCast =
2915 Value *ShuffleCall =
2917 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2924 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2936 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2937 Value *ElemPtr = DstAddr;
2938 Value *Ptr = SrcAddr;
2939 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2943 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2946 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2947 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2951 if ((
Size / IntSize) > 1) {
2952 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2953 SrcAddrGEP,
Builder.getPtrTy());
2970 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
2972 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
2975 Value *Res = createRuntimeShuffleFunction(
2978 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
2980 Builder.CreateAlignedStore(Res, ElemPtr,
2981 M.getDataLayout().getPrefTypeAlign(ElemType));
2983 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2984 Value *LocalElemPtr =
2985 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2991 Value *Res = createRuntimeShuffleFunction(
2992 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2995 Res =
Builder.CreateTrunc(Res, ElemType);
2996 Builder.CreateStore(Res, ElemPtr);
2997 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2999 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3005Error OpenMPIRBuilder::emitReductionListCopy(
3010 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3011 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3015 for (
auto En :
enumerate(ReductionInfos)) {
3017 Value *SrcElementAddr =
nullptr;
3018 AllocaInst *DestAlloca =
nullptr;
3019 Value *DestElementAddr =
nullptr;
3020 Value *DestElementPtrAddr =
nullptr;
3022 bool ShuffleInElement =
false;
3025 bool UpdateDestListPtr =
false;
3029 ReductionArrayTy, SrcBase,
3030 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3031 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3035 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3036 ReductionArrayTy, DestBase,
3037 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3038 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3044 Type *DestAllocaType =
3045 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3046 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3047 ".omp.reduction.element");
3049 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3050 DestElementAddr = DestAlloca;
3053 DestElementAddr->
getName() +
".ascast");
3055 ShuffleInElement =
true;
3056 UpdateDestListPtr =
true;
3068 if (ShuffleInElement) {
3069 Type *ShuffleType = RI.ElementType;
3070 Value *ShuffleSrcAddr = SrcElementAddr;
3071 Value *ShuffleDestAddr = DestElementAddr;
3072 AllocaInst *LocalStorage =
nullptr;
3075 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3076 assert(RI.ByRefAllocatedType &&
3077 "Expected by-ref allocated type to be set");
3082 ShuffleType = RI.ByRefElementType;
3085 RI.DataPtrPtrGen(
Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3088 return GenResult.takeError();
3090 ShuffleSrcAddr =
Builder.CreateLoad(
Builder.getPtrTy(), ShuffleSrcAddr);
3096 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3098 ShuffleDestAddr = LocalStorage;
3102 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3103 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3107 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3108 DestAlloca,
Builder.getPtrTy(),
".ascast");
3111 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3112 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3115 return GenResult.takeError();
3118 switch (RI.EvaluationKind) {
3120 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3122 Builder.CreateStore(Elem, DestElementAddr);
3126 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3127 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3129 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3131 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3133 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3135 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3136 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3137 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3138 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3139 Builder.CreateStore(SrcReal, DestRealPtr);
3140 Builder.CreateStore(SrcImg, DestImgPtr);
3145 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3147 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3148 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3160 if (UpdateDestListPtr) {
3161 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3162 DestElementAddr,
Builder.getPtrTy(),
3163 DestElementAddr->
getName() +
".ascast");
3164 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3171Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3175 LLVMContext &Ctx =
M.getContext();
3177 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3181 "_omp_reduction_inter_warp_copy_func", &
M);
3186 Builder.SetInsertPoint(EntryBB);
3203 StringRef TransferMediumName =
3204 "__openmp_nvptx_data_transfer_temporary_storage";
3205 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3206 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3208 if (!TransferMedium) {
3209 TransferMedium =
new GlobalVariable(
3217 Value *GPUThreadID = getGPUThreadID();
3219 Value *LaneID = getNVPTXLaneID();
3221 Value *WarpID = getNVPTXWarpID();
3225 Builder.GetInsertBlock()->getFirstInsertionPt());
3229 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3230 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3231 AllocaInst *NumWarpsAlloca =
3232 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3233 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3234 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3235 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3236 NumWarpsAlloca,
Builder.getPtrTy(0),
3237 NumWarpsAlloca->
getName() +
".ascast");
3238 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3239 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3248 for (
auto En :
enumerate(ReductionInfos)) {
3254 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3255 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3256 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3257 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3260 unsigned NumIters = RealTySize / TySize;
3263 Value *Cnt =
nullptr;
3264 Value *CntAddr =
nullptr;
3271 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3273 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3274 CntAddr->
getName() +
".ascast");
3286 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3287 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3294 omp::Directive::OMPD_unknown,
3298 return BarrierIP1.takeError();
3304 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3305 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3309 auto *RedListArrayTy =
3312 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3314 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3315 {ConstantInt::get(IndexTy, 0),
3316 ConstantInt::get(IndexTy, En.index())});
3322 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3325 return GenRes.takeError();
3336 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3341 Builder.CreateStore(Elem, MediumPtr,
3353 omp::Directive::OMPD_unknown,
3357 return BarrierIP2.takeError();
3364 Value *NumWarpsVal =
3367 Value *IsActiveThread =
3368 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3369 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3376 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3378 Value *TargetElemPtrPtr =
3379 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3380 {ConstantInt::get(IndexTy, 0),
3381 ConstantInt::get(IndexTy, En.index())});
3382 Value *TargetElemPtrVal =
3384 Value *TargetElemPtr = TargetElemPtrVal;
3388 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3391 return GenRes.takeError();
3393 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3401 Value *SrcMediumValue =
3402 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3403 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3413 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3414 Builder.CreateStore(Cnt, CntAddr,
false);
3416 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3420 RealTySize %= TySize;
3430Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3433 LLVMContext &Ctx =
M.getContext();
3434 FunctionType *FuncTy =
3436 {Builder.getPtrTy(), Builder.getInt16Ty(),
3437 Builder.getInt16Ty(), Builder.getInt16Ty()},
3441 "_omp_reduction_shuffle_and_reduce_func", &
M);
3451 Builder.SetInsertPoint(EntryBB);
3462 Type *ReduceListArgType = ReduceListArg->
getType();
3466 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3467 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3468 LaneIDArg->
getName() +
".addr");
3470 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3471 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3472 AlgoVerArg->
getName() +
".addr");
3479 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3481 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3482 ReduceListAlloca, ReduceListArgType,
3483 ReduceListAlloca->
getName() +
".ascast");
3484 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3485 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3486 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3488 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3489 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3490 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3491 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3492 RemoteReductionListAlloca,
Builder.getPtrTy(),
3493 RemoteReductionListAlloca->
getName() +
".ascast");
3495 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3496 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3497 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3498 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3500 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3501 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3502 Value *RemoteLaneOffset =
3503 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3504 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3511 Error EmitRedLsCpRes = emitReductionListCopy(
3513 ReduceList, RemoteListAddrCast, IsByRef,
3514 {RemoteLaneOffset,
nullptr,
nullptr});
3517 return EmitRedLsCpRes;
3542 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3547 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3548 Value *RemoteOffsetComp =
3550 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3551 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3552 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3558 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3560 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3561 ReduceList,
Builder.getPtrTy());
3562 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3563 RemoteListAddrCast,
Builder.getPtrTy());
3565 ->addFnAttr(Attribute::NoUnwind);
3576 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3577 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3582 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3586 EmitRedLsCpRes = emitReductionListCopy(
3588 RemoteListAddrCast, ReduceList, IsByRef);
3591 return EmitRedLsCpRes;
3606OpenMPIRBuilder::generateReductionDescriptor(
3608 Type *DescriptorType,
3614 Value *DescriptorSize =
3615 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3617 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3618 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3622 Value *DataPtrField;
3624 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3627 return GenResult.takeError();
3630 DataPtr,
Builder.getPtrTy(),
".ascast"),
3636Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3640 LLVMContext &Ctx =
M.getContext();
3643 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3647 "_omp_reduction_list_to_global_copy_func", &
M);
3654 Builder.SetInsertPoint(EntryBlock);
3664 BufferArg->
getName() +
".addr");
3668 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3669 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3670 BufferArgAlloca,
Builder.getPtrTy(),
3671 BufferArgAlloca->
getName() +
".ascast");
3672 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3673 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3674 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3675 ReduceListArgAlloca,
Builder.getPtrTy(),
3676 ReduceListArgAlloca->
getName() +
".ascast");
3678 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3679 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3680 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3682 Value *LocalReduceList =
3684 Value *BufferArgVal =
3688 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3689 for (
auto En :
enumerate(ReductionInfos)) {
3691 auto *RedListArrayTy =
3695 RedListArrayTy, LocalReduceList,
3696 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3702 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3704 ReductionsBufferTy, BufferVD, 0, En.index());
3706 switch (RI.EvaluationKind) {
3708 Value *TargetElement;
3710 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3711 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3714 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3717 return GenResult.takeError();
3720 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3723 Builder.CreateStore(TargetElement, GlobVal);
3727 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3728 RI.ElementType, ElemPtr, 0, 0,
".realp");
3730 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3732 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3734 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3736 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3737 RI.ElementType, GlobVal, 0, 0,
".realp");
3738 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3739 RI.ElementType, GlobVal, 0, 1,
".imagp");
3740 Builder.CreateStore(SrcReal, DestRealPtr);
3741 Builder.CreateStore(SrcImg, DestImgPtr);
3746 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3748 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3749 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3760Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3764 LLVMContext &Ctx =
M.getContext();
3767 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3771 "_omp_reduction_list_to_global_reduce_func", &
M);
3778 Builder.SetInsertPoint(EntryBlock);
3788 BufferArg->
getName() +
".addr");
3792 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3793 auto *RedListArrayTy =
3798 Value *LocalReduceList =
3799 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3803 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3804 BufferArgAlloca,
Builder.getPtrTy(),
3805 BufferArgAlloca->
getName() +
".ascast");
3806 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3807 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3808 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3809 ReduceListArgAlloca,
Builder.getPtrTy(),
3810 ReduceListArgAlloca->
getName() +
".ascast");
3811 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3812 LocalReduceList,
Builder.getPtrTy(),
3813 LocalReduceList->
getName() +
".ascast");
3815 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3816 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3817 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3822 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3823 for (
auto En :
enumerate(ReductionInfos)) {
3827 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3831 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3832 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3833 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3838 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3839 RedListArrayTy, LocalReduceListAddrCast,
3840 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3842 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3844 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3845 ReductionsBufferTy, BufferVD, 0, En.index());
3847 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3851 Value *SrcElementPtrPtr =
3852 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3853 {ConstantInt::get(IndexTy, 0),
3854 ConstantInt::get(IndexTy, En.index())});
3855 Value *SrcDescriptorAddr =
3860 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
3861 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3864 return GenResult.takeError();
3866 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3868 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3876 ->addFnAttr(Attribute::NoUnwind);
3882Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3886 LLVMContext &Ctx =
M.getContext();
3889 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3893 "_omp_reduction_global_to_list_copy_func", &
M);
3900 Builder.SetInsertPoint(EntryBlock);
3910 BufferArg->
getName() +
".addr");
3914 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3915 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3916 BufferArgAlloca,
Builder.getPtrTy(),
3917 BufferArgAlloca->
getName() +
".ascast");
3918 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3919 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3920 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3921 ReduceListArgAlloca,
Builder.getPtrTy(),
3922 ReduceListArgAlloca->
getName() +
".ascast");
3923 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3924 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3925 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3927 Value *LocalReduceList =
3932 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3933 for (
auto En :
enumerate(ReductionInfos)) {
3934 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3935 auto *RedListArrayTy =
3939 RedListArrayTy, LocalReduceList,
3940 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3945 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3946 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3947 ReductionsBufferTy, BufferVD, 0, En.index());
3953 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3959 return GenResult.takeError();
3964 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
3965 Builder.CreateStore(TargetElement, ElemPtr);
3969 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3978 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3980 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3982 Builder.CreateStore(SrcReal, DestRealPtr);
3983 Builder.CreateStore(SrcImg, DestImgPtr);
3990 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
3991 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4003Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4007 LLVMContext &Ctx =
M.getContext();
4010 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4014 "_omp_reduction_global_to_list_reduce_func", &
M);
4021 Builder.SetInsertPoint(EntryBlock);
4031 BufferArg->
getName() +
".addr");
4035 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4041 Value *LocalReduceList =
4042 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4046 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4047 BufferArgAlloca,
Builder.getPtrTy(),
4048 BufferArgAlloca->
getName() +
".ascast");
4049 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4050 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4051 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4052 ReduceListArgAlloca,
Builder.getPtrTy(),
4053 ReduceListArgAlloca->
getName() +
".ascast");
4054 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4055 LocalReduceList,
Builder.getPtrTy(),
4056 LocalReduceList->
getName() +
".ascast");
4058 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4059 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4060 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4065 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4066 for (
auto En :
enumerate(ReductionInfos)) {
4070 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4074 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4075 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4081 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4082 RedListArrayTy, ReductionList,
4083 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4086 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4087 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4088 ReductionsBufferTy, BufferVD, 0, En.index());
4090 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4092 Value *ReduceListVal =
4094 Value *SrcElementPtrPtr =
4095 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4096 {ConstantInt::get(IndexTy, 0),
4097 ConstantInt::get(IndexTy, En.index())});
4098 Value *SrcDescriptorAddr =
4103 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4104 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4106 return GenResult.takeError();
4108 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4110 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4118 ->addFnAttr(Attribute::NoUnwind);
4124std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4125 std::string Suffix =
4127 return (Name + Suffix).str();
4130Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4133 AttributeList FuncAttrs) {
4135 {Builder.getPtrTy(), Builder.getPtrTy()},
4137 std::string
Name = getReductionFuncName(ReducerName);
4145 Builder.SetInsertPoint(EntryBB);
4149 Value *LHSArrayPtr =
nullptr;
4150 Value *RHSArrayPtr =
nullptr;
4157 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4159 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4160 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4161 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4162 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4163 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4164 Builder.CreateStore(Arg0, LHSAddrCast);
4165 Builder.CreateStore(Arg1, RHSAddrCast);
4166 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4167 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4171 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4173 for (
auto En :
enumerate(ReductionInfos)) {
4176 RedArrayTy, RHSArrayPtr,
4177 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4179 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4180 RHSI8Ptr, RI.PrivateVariable->getType(),
4181 RHSI8Ptr->
getName() +
".ascast");
4184 RedArrayTy, LHSArrayPtr,
4185 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4187 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4188 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4197 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4198 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4199 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4206 return AfterIP.takeError();
4207 if (!
Builder.GetInsertBlock())
4208 return ReductionFunc;
4212 if (!IsByRef.
empty() && !IsByRef[En.index()])
4213 Builder.CreateStore(Reduced, LHSPtr);
4218 for (
auto En :
enumerate(ReductionInfos)) {
4219 unsigned Index = En.index();
4221 Value *LHSFixupPtr, *RHSFixupPtr;
4222 Builder.restoreIP(RI.ReductionGenClang(
4223 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4228 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4233 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4247 return ReductionFunc;
4255 assert(RI.Variable &&
"expected non-null variable");
4256 assert(RI.PrivateVariable &&
"expected non-null private variable");
4257 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4258 "expected non-null reduction generator callback");
4261 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4262 "expected variables and their private equivalents to have the same "
4265 assert(RI.Variable->getType()->isPointerTy() &&
4266 "expected variables to be pointers");
4275 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4289 if (ReductionInfos.
size() == 0)
4299 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4303 AttributeList FuncAttrs;
4304 AttrBuilder AttrBldr(Ctx);
4306 AttrBldr.addAttribute(Attr);
4307 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4308 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4312 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4314 if (!ReductionResult)
4316 Function *ReductionFunc = *ReductionResult;
4320 if (GridValue.has_value())
4321 Config.setGridValue(GridValue.value());
4336 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4340 Value *ReductionListAlloca =
4341 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4342 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4343 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4346 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4347 for (
auto En :
enumerate(ReductionInfos)) {
4350 RedArrayTy, ReductionList,
4351 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4354 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4359 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4360 Builder.CreateStore(CastElem, ElemPtr);
4364 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4370 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4376 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4378 unsigned MaxDataSize = 0;
4380 for (
auto En :
enumerate(ReductionInfos)) {
4381 auto Size =
M.getDataLayout().getTypeStoreSize(En.value().ElementType);
4382 if (
Size > MaxDataSize)
4384 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4385 ? En.value().ByRefElementType
4386 : En.value().ElementType;
4389 Value *ReductionDataSize =
4390 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4391 if (!IsTeamsReduction) {
4392 Value *SarFuncCast =
4393 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4395 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4396 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4399 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4404 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4406 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4409 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4414 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4419 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4424 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4431 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4433 Value *Args3[] = {SrcLocInfo,
4434 KernelTeamsReductionPtr,
4435 Builder.getInt32(ReductionBufNum),
4446 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4463 for (
auto En :
enumerate(ReductionInfos)) {
4471 Value *LHSPtr, *RHSPtr;
4473 &LHSPtr, &RHSPtr, CurFunc));
4486 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4488 "red.value." +
Twine(En.index()));
4499 if (!IsByRef.
empty() && !IsByRef[En.index()])
4504 if (ContinuationBlock) {
4505 Builder.CreateBr(ContinuationBlock);
4506 Builder.SetInsertPoint(ContinuationBlock);
4508 Config.setEmitLLVMUsed();
4519 ".omp.reduction.func", &M);
4529 Builder.SetInsertPoint(ReductionFuncBlock);
4530 Value *LHSArrayPtr =
nullptr;
4531 Value *RHSArrayPtr =
nullptr;
4542 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4544 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4545 Value *LHSAddrCast =
4546 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4547 Value *RHSAddrCast =
4548 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4549 Builder.CreateStore(Arg0, LHSAddrCast);
4550 Builder.CreateStore(Arg1, RHSAddrCast);
4551 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4552 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4554 LHSArrayPtr = ReductionFunc->
getArg(0);
4555 RHSArrayPtr = ReductionFunc->
getArg(1);
4558 unsigned NumReductions = ReductionInfos.
size();
4561 for (
auto En :
enumerate(ReductionInfos)) {
4563 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4564 RedArrayTy, LHSArrayPtr, 0, En.index());
4565 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4566 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4569 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4570 RedArrayTy, RHSArrayPtr, 0, En.index());
4571 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4572 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4581 Builder.restoreIP(*AfterIP);
4583 if (!Builder.GetInsertBlock())
4587 if (!IsByRef[En.index()])
4588 Builder.CreateStore(Reduced, LHSPtr);
4590 Builder.CreateRetVoid();
4597 bool IsNoWait,
bool IsTeamsReduction) {
4601 IsByRef, IsNoWait, IsTeamsReduction);
4608 if (ReductionInfos.
size() == 0)
4618 unsigned NumReductions = ReductionInfos.
size();
4621 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4623 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4625 for (
auto En :
enumerate(ReductionInfos)) {
4626 unsigned Index = En.index();
4628 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4629 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4636 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4646 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4651 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4652 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4654 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4656 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4657 : RuntimeFunction::OMPRTL___kmpc_reduce);
4660 {Ident, ThreadId, NumVariables, RedArraySize,
4661 RedArray, ReductionFunc, Lock},
4672 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4673 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4674 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4679 Builder.SetInsertPoint(NonAtomicRedBlock);
4680 for (
auto En :
enumerate(ReductionInfos)) {
4686 if (!IsByRef[En.index()]) {
4688 "red.value." +
Twine(En.index()));
4690 Value *PrivateRedValue =
4692 "red.private.value." +
Twine(En.index()));
4700 if (!
Builder.GetInsertBlock())
4703 if (!IsByRef[En.index()])
4707 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4708 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4710 Builder.CreateBr(ContinuationBlock);
4715 Builder.SetInsertPoint(AtomicRedBlock);
4716 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4723 if (!
Builder.GetInsertBlock())
4726 Builder.CreateBr(ContinuationBlock);
4739 if (!
Builder.GetInsertBlock())
4742 Builder.SetInsertPoint(ContinuationBlock);
4753 Directive OMPD = Directive::OMPD_master;
4758 Value *Args[] = {Ident, ThreadId};
4766 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4777 Directive OMPD = Directive::OMPD_masked;
4783 Value *ArgsEnd[] = {Ident, ThreadId};
4791 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4801 Call->setDoesNotThrow();
4816 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4818 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4819 ScanVarsType, ScanRedInfo);
4830 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4833 Type *DestTy = ScanVarsType[i];
4834 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4837 Builder.CreateStore(Src, Val);
4842 Builder.GetInsertBlock()->getParent());
4845 IV = ScanRedInfo->
IV;
4848 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4851 Type *DestTy = ScanVarsType[i];
4853 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4855 Builder.CreateStore(Src, ScanVars[i]);
4869 Builder.GetInsertBlock()->getParent());
4874Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4878 Builder.restoreIP(AllocaIP);
4880 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4882 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4889 Builder.restoreIP(CodeGenIP);
4891 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4892 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4896 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4897 AllocSpan,
nullptr,
"arr");
4898 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4916 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4925Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4931 Value *PrivateVar = RedInfo.PrivateVariable;
4932 Value *OrigVar = RedInfo.Variable;
4936 Type *SrcTy = RedInfo.ElementType;
4941 Builder.CreateStore(Src, OrigVar);
4964 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4989 Builder.GetInsertBlock()->getModule(),
4996 Builder.GetInsertBlock()->getModule(),
5002 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5003 Builder.SetInsertPoint(InputBB);
5006 Builder.SetInsertPoint(LoopBB);
5022 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5024 Builder.SetInsertPoint(InnerLoopBB);
5028 Value *ReductionVal = RedInfo.PrivateVariable;
5031 Type *DestTy = RedInfo.ElementType;
5034 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5037 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5042 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5045 Builder.CreateStore(Result, LHSPtr);
5048 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5050 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5051 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5054 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5060 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5081 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5088Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5100 Error Err = InputLoopGen();
5111 Error Err = ScanLoopGen(Builder.saveIP());
5118void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5155 Builder.SetInsertPoint(Preheader);
5158 Builder.SetInsertPoint(Header);
5159 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5160 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5165 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5166 Builder.CreateCondBr(Cmp, Body, Exit);
5171 Builder.SetInsertPoint(Latch);
5173 "omp_" + Name +
".next",
true);
5184 CL->Header = Header;
5203 NextBB, NextBB, Name);
5235 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5244 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5245 ScanRedInfo->
Span = TripCount;
5251 ScanRedInfo->
IV =
IV;
5252 createScanBBs(ScanRedInfo);
5255 assert(Terminator->getNumSuccessors() == 1);
5256 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5259 Builder.GetInsertBlock()->getParent());
5262 Builder.GetInsertBlock()->getParent());
5263 Builder.CreateBr(ContinueBlock);
5269 const auto &&InputLoopGen = [&]() ->
Error {
5271 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5272 ComputeIP, Name,
true, ScanRedInfo);
5276 Builder.restoreIP((*LoopInfo)->getAfterIP());
5282 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5286 Builder.restoreIP((*LoopInfo)->getAfterIP());
5290 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5298 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5308 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5309 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5313 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5329 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5332 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5336 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5341 Value *CountIfLooping;
5342 if (InclusiveStop) {
5343 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5349 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5352 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5353 "omp_" + Name +
".tripcount");
5358 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5365 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5372 ScanRedInfo->
IV = IndVar;
5373 return BodyGenCB(
Builder.saveIP(), IndVar);
5379 Builder.getCurrentDebugLocation());
5390 unsigned Bitwidth = Ty->getIntegerBitWidth();
5393 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5396 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5406 unsigned Bitwidth = Ty->getIntegerBitWidth();
5409 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5412 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5420 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5422 "Require dedicated allocate IP");
5428 uint32_t SrcLocStrSize;
5434 Type *IVTy =
IV->getType();
5435 FunctionCallee StaticInit =
5436 LoopType == WorksharingLoopType::DistributeForStaticLoop
5439 FunctionCallee StaticFini =
5443 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5446 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5447 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5448 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5449 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5458 Constant *One = ConstantInt::get(IVTy, 1);
5459 Builder.CreateStore(Zero, PLowerBound);
5461 Builder.CreateStore(UpperBound, PUpperBound);
5462 Builder.CreateStore(One, PStride);
5467 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5468 ? OMPScheduleType::OrderedDistribute
5471 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5475 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5476 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5479 PLowerBound, PUpperBound});
5480 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5481 Value *PDistUpperBound =
5482 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5483 Args.push_back(PDistUpperBound);
5488 BuildInitCall(SchedulingType,
Builder);
5489 if (HasDistSchedule &&
5490 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5491 Constant *DistScheduleSchedType = ConstantInt::get(
5496 BuildInitCall(DistScheduleSchedType,
Builder);
5498 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5499 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5500 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5501 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5502 CLI->setTripCount(TripCount);
5508 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5512 return Builder.CreateAdd(OldIV, LowerBound);
5524 omp::Directive::OMPD_for,
false,
5527 return BarrierIP.takeError();
5554 Reachable.insert(
Block);
5564 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5568OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5572 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5573 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5578 Type *IVTy =
IV->getType();
5580 "Max supported tripcount bitwidth is 64 bits");
5582 :
Type::getInt64Ty(Ctx);
5585 Constant *One = ConstantInt::get(InternalIVTy, 1);
5592 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5595 if (ChunkSize || DistScheduleChunkSize)
5600 FunctionCallee StaticInit =
5602 FunctionCallee StaticFini =
5608 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5609 Value *PLowerBound =
5610 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5611 Value *PUpperBound =
5612 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5613 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5622 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5623 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5624 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5625 "distschedulechunksize");
5626 Value *CastedTripCount =
5627 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5630 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5632 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5633 Builder.CreateStore(Zero, PLowerBound);
5634 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5635 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5637 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5638 Builder.CreateStore(UpperBound, PUpperBound);
5639 Builder.CreateStore(One, PStride);
5643 uint32_t SrcLocStrSize;
5647 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5648 PUpperBound, PStride, One,
5649 this](
Value *SchedulingType,
Value *ChunkSize,
5652 StaticInit, {SrcLoc, ThreadNum,
5653 SchedulingType, PLastIter,
5654 PLowerBound, PUpperBound,
5658 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5659 if (DistScheduleSchedType != OMPScheduleType::None &&
5660 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5661 SchedType != OMPScheduleType::OrderedDistribute) {
5665 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5669 Value *FirstChunkStart =
5670 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5671 Value *FirstChunkStop =
5672 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5673 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5675 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5676 Value *NextChunkStride =
5677 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5681 Value *DispatchCounter;
5689 DispatchCounter = Counter;
5692 FirstChunkStart, CastedTripCount, NextChunkStride,
5715 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5716 Value *IsLastChunk =
5717 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5718 Value *CountUntilOrigTripCount =
5719 Builder.CreateSub(CastedTripCount, DispatchCounter);
5721 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5722 Value *BackcastedChunkTC =
5723 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5724 CLI->setTripCount(BackcastedChunkTC);
5729 Value *BackcastedDispatchCounter =
5730 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5731 CLI->mapIndVar([&](Instruction *) ->
Value * {
5733 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5746 return AfterIP.takeError();
5761static FunctionCallee
5764 unsigned Bitwidth = Ty->getIntegerBitWidth();
5767 case WorksharingLoopType::ForStaticLoop:
5770 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5773 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5775 case WorksharingLoopType::DistributeStaticLoop:
5778 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5781 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5783 case WorksharingLoopType::DistributeForStaticLoop:
5786 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5789 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5792 if (Bitwidth != 32 && Bitwidth != 64) {
5804 Function &LoopBodyFn,
bool NoLoop) {
5815 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5816 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5817 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5818 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5823 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5824 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5828 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5829 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5830 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5831 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5832 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5834 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5858 Builder.restoreIP({Preheader, Preheader->
end()});
5861 Builder.CreateBr(CLI->
getExit());
5869 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5877 "Expected unique undroppable user of outlined function");
5879 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5881 "Expected outlined function call to be located in loop preheader");
5883 if (OutlinedFnCallInstruction->
arg_size() > 1)
5890 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5892 for (
auto &ToBeDeletedItem : ToBeDeleted)
5893 ToBeDeletedItem->eraseFromParent();
5900 uint32_t SrcLocStrSize;
5909 SmallVector<Instruction *, 4> ToBeDeleted;
5911 OI.OuterAllocaBB = AllocaIP.getBlock();
5934 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
5936 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5938 CodeExtractorAnalysisCache CEAC(*OuterFn);
5939 CodeExtractor Extractor(Blocks,
5952 SetVector<Value *> SinkingCands, HoistingCands;
5956 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5963 for (
auto Use :
Users) {
5965 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5966 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
5972 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5979 OI.PostOutlineCB = [=, ToBeDeletedVec =
5980 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5990 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5991 bool HasSimdModifier,
bool HasMonotonicModifier,
5992 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5994 Value *DistScheduleChunkSize) {
5995 if (
Config.isTargetDevice())
5996 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5998 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5999 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6001 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6002 OMPScheduleType::ModifierOrdered;
6004 if (HasDistSchedule) {
6005 DistScheduleSchedType = DistScheduleChunkSize
6006 ? OMPScheduleType::OrderedDistributeChunked
6007 : OMPScheduleType::OrderedDistribute;
6009 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6010 case OMPScheduleType::BaseStatic:
6011 case OMPScheduleType::BaseDistribute:
6012 assert((!ChunkSize || !DistScheduleChunkSize) &&
6013 "No chunk size with static-chunked schedule");
6014 if (IsOrdered && !HasDistSchedule)
6015 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6016 NeedsBarrier, ChunkSize);
6018 if (DistScheduleChunkSize)
6019 return applyStaticChunkedWorkshareLoop(
6020 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6021 DistScheduleChunkSize, DistScheduleSchedType);
6022 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6025 case OMPScheduleType::BaseStaticChunked:
6026 case OMPScheduleType::BaseDistributeChunked:
6027 if (IsOrdered && !HasDistSchedule)
6028 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6029 NeedsBarrier, ChunkSize);
6031 return applyStaticChunkedWorkshareLoop(
6032 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6033 DistScheduleChunkSize, DistScheduleSchedType);
6035 case OMPScheduleType::BaseRuntime:
6036 case OMPScheduleType::BaseAuto:
6037 case OMPScheduleType::BaseGreedy:
6038 case OMPScheduleType::BaseBalanced:
6039 case OMPScheduleType::BaseSteal:
6040 case OMPScheduleType::BaseRuntimeSimd:
6042 "schedule type does not support user-defined chunk sizes");
6044 case OMPScheduleType::BaseGuidedSimd:
6045 case OMPScheduleType::BaseDynamicChunked:
6046 case OMPScheduleType::BaseGuidedChunked:
6047 case OMPScheduleType::BaseGuidedIterativeChunked:
6048 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6049 case OMPScheduleType::BaseStaticBalancedChunked:
6050 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6051 NeedsBarrier, ChunkSize);
6064 unsigned Bitwidth = Ty->getIntegerBitWidth();
6067 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6070 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6078static FunctionCallee
6080 unsigned Bitwidth = Ty->getIntegerBitWidth();
6083 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6086 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6093static FunctionCallee
6095 unsigned Bitwidth = Ty->getIntegerBitWidth();
6098 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6101 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6106OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6109 bool NeedsBarrier,
Value *Chunk) {
6110 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6112 "Require dedicated allocate IP");
6114 "Require valid schedule type");
6116 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6117 OMPScheduleType::ModifierOrdered;
6122 uint32_t SrcLocStrSize;
6128 Type *IVTy =
IV->getType();
6133 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6135 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6136 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6137 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6138 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6147 Constant *One = ConstantInt::get(IVTy, 1);
6148 Builder.CreateStore(One, PLowerBound);
6150 Builder.CreateStore(UpperBound, PUpperBound);
6151 Builder.CreateStore(One, PStride);
6168 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6180 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6183 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6184 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6187 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6188 Builder.CreateCondBr(MoreWork, Header, Exit);
6194 PI->setIncomingBlock(0, OuterCond);
6195 PI->setIncomingValue(0, LowerBound);
6200 Br->setSuccessor(0, OuterCond);
6206 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6209 CI->setOperand(1, UpperBound);
6213 assert(BI->getSuccessor(1) == Exit);
6214 BI->setSuccessor(1, OuterCond);
6228 omp::Directive::OMPD_for,
false,
6231 return BarrierIP.takeError();
6250 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6255 if (BBsToErase.
count(UseInst->getParent()))
6262 while (BBsToErase.
remove_if(HasRemainingUses)) {
6273 assert(
Loops.size() >= 1 &&
"At least one loop required");
6274 size_t NumLoops =
Loops.size();
6278 return Loops.front();
6290 Loop->collectControlBlocks(OldControlBBs);
6294 if (ComputeIP.
isSet())
6301 Value *CollapsedTripCount =
nullptr;
6304 "All loops to collapse must be valid canonical loops");
6305 Value *OrigTripCount = L->getTripCount();
6306 if (!CollapsedTripCount) {
6307 CollapsedTripCount = OrigTripCount;
6312 CollapsedTripCount =
6313 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6319 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6325 Builder.restoreIP(Result->getBodyIP());
6327 Value *Leftover = Result->getIndVar();
6329 NewIndVars.
resize(NumLoops);
6330 for (
int i = NumLoops - 1; i >= 1; --i) {
6331 Value *OrigTripCount =
Loops[i]->getTripCount();
6333 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6334 NewIndVars[i] = NewIndVar;
6336 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6339 NewIndVars[0] = Leftover;
6348 BasicBlock *ContinueBlock = Result->getBody();
6350 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6357 ContinueBlock =
nullptr;
6358 ContinuePred = NextSrc;
6365 for (
size_t i = 0; i < NumLoops - 1; ++i)
6366 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6372 for (
size_t i = NumLoops - 1; i > 0; --i)
6373 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6376 ContinueWith(Result->getLatch(),
nullptr);
6383 for (
size_t i = 0; i < NumLoops; ++i)
6384 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6398std::vector<CanonicalLoopInfo *>
6402 "Must pass as many tile sizes as there are loops");
6403 int NumLoops =
Loops.size();
6404 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6416 Loop->collectControlBlocks(OldControlBBs);
6424 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6425 OrigTripCounts.
push_back(L->getTripCount());
6436 for (
int i = 0; i < NumLoops - 1; ++i) {
6449 for (
int i = 0; i < NumLoops; ++i) {
6451 Value *OrigTripCount = OrigTripCounts[i];
6464 Value *FloorTripOverflow =
6465 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6467 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6468 Value *FloorTripCount =
6469 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6470 "omp_floor" +
Twine(i) +
".tripcount",
true);
6473 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6479 std::vector<CanonicalLoopInfo *> Result;
6480 Result.reserve(NumLoops * 2);
6493 auto EmbeddNewLoop =
6494 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6497 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6502 Enter = EmbeddedLoop->
getBody();
6504 OutroInsertBefore = EmbeddedLoop->
getLatch();
6505 return EmbeddedLoop;
6509 const Twine &NameBase) {
6512 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6513 Result.push_back(EmbeddedLoop);
6517 EmbeddNewLoops(FloorCount,
"floor");
6523 for (
int i = 0; i < NumLoops; ++i) {
6527 Value *FloorIsEpilogue =
6529 Value *TileTripCount =
6536 EmbeddNewLoops(TileCounts,
"tile");
6541 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6550 BodyEnter =
nullptr;
6551 BodyEntered = ExitBB;
6563 Builder.restoreIP(Result.back()->getBodyIP());
6564 for (
int i = 0; i < NumLoops; ++i) {
6567 Value *OrigIndVar = OrigIndVars[i];
6595 if (Properties.
empty())
6618 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6622 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6630 if (
I.mayReadOrWriteMemory()) {
6634 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6648 Loop->collectControlBlocks(oldControlBBs);
6653 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6654 origTripCounts.
push_back(L->getTripCount());
6663 Builder.SetInsertPoint(TCBlock);
6664 Value *fusedTripCount =
nullptr;
6666 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6667 Value *origTripCount = L->getTripCount();
6668 if (!fusedTripCount) {
6669 fusedTripCount = origTripCount;
6672 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6673 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
6687 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6688 Loops[i]->getPreheader()->moveBefore(TCBlock);
6689 Loops[i]->getAfter()->moveBefore(TCBlock);
6693 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6705 for (
size_t i = 0; i <
Loops.size(); ++i) {
6707 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
6708 Builder.SetInsertPoint(condBlock);
6716 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6717 Builder.SetInsertPoint(condBBs[i]);
6718 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
6734 "omp.fused.pre_latch");
6767 const Twine &NamePrefix) {
6796 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6798 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6801 Builder.SetInsertPoint(SplitBeforeIt);
6803 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6806 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6809 Builder.SetInsertPoint(ElseBlock);
6815 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6817 ExistingBlocks.
append(L->block_begin(), L->block_end());
6823 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6825 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6832 if (
Block == ThenBlock)
6833 NewBB->
setName(NamePrefix +
".if.else");
6836 VMap[
Block] = NewBB;
6844 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
6845 NamePrefix +
".pre_latch");
6849 L->addBasicBlockToLoop(ThenBlock, LI);
6855 if (TargetTriple.
isX86()) {
6856 if (Features.
lookup(
"avx512f"))
6858 else if (Features.
lookup(
"avx"))
6862 if (TargetTriple.
isPPC())
6864 if (TargetTriple.
isWasm())
6871 Value *IfCond, OrderKind Order,
6890 if (AlignedVars.
size()) {
6892 for (
auto &AlignedItem : AlignedVars) {
6893 Value *AlignedPtr = AlignedItem.first;
6894 Value *Alignment = AlignedItem.second;
6897 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6905 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6918 Reachable.insert(
Block);
6928 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6944 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6946 if (Simdlen || Safelen) {
6950 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6976static std::unique_ptr<TargetMachine>
6980 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6981 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6992 std::nullopt, OptLevel));
7016 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7017 FAM.registerPass([&]() {
return TIRA; });
7031 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7036 nullptr, ORE,
static_cast<int>(OptLevel),
7057 <<
" Threshold=" << UP.
Threshold <<
"\n"
7060 <<
" PartialOptSizeThreshold="
7080 Ptr = Load->getPointerOperand();
7082 Ptr = Store->getPointerOperand();
7089 if (Alloca->getParent() == &
F->getEntryBlock())
7109 int MaxTripCount = 0;
7110 bool MaxOrZero =
false;
7111 unsigned TripMultiple = 0;
7113 bool UseUpperBound =
false;
7115 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
7117 unsigned Factor = UP.
Count;
7118 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7129 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7145 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7158 *UnrolledCLI =
Loop;
7163 "unrolling only makes sense with a factor of 2 or larger");
7165 Type *IndVarTy =
Loop->getIndVarType();
7172 std::vector<CanonicalLoopInfo *>
LoopNest =
7187 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7190 (*UnrolledCLI)->assertOK();
7208 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7227 if (!CPVars.
empty()) {
7232 Directive OMPD = Directive::OMPD_single;
7237 Value *Args[] = {Ident, ThreadId};
7246 if (
Error Err = FiniCB(IP))
7267 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7274 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7277 ConstantInt::get(Int64, 0), CPVars[
I],
7280 }
else if (!IsNowait) {
7283 omp::Directive::OMPD_unknown,
false,
7298 Directive OMPD = Directive::OMPD_critical;
7303 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7304 Value *Args[] = {Ident, ThreadId, LockVar};
7321 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7329 const Twine &Name,
bool IsDependSource) {
7333 "OpenMP runtime requires depend vec with i64 type");
7346 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7360 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7378 Directive OMPD = Directive::OMPD_ordered;
7387 Value *Args[] = {Ident, ThreadId};
7397 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7404 bool HasFinalize,
bool IsCancellable) {
7411 BasicBlock *EntryBB = Builder.GetInsertBlock();
7420 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7431 "Unexpected control flow graph state!!");
7433 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7435 return AfterIP.takeError();
7440 "Unexpected Insertion point location!");
7443 auto InsertBB = merged ? ExitPredBB : ExitBB;
7446 Builder.SetInsertPoint(InsertBB);
7448 return Builder.saveIP();
7452 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7454 if (!Conditional || !EntryCall)
7460 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7470 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7474 UI->eraseFromParent();
7482 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7490 "Unexpected finalization stack state!");
7493 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7495 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7496 return std::move(Err);
7500 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7510 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7544 "copyin.not.master.end");
7551 Builder.SetInsertPoint(OMP_Entry);
7552 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7553 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7554 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7555 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7557 Builder.SetInsertPoint(CopyBegin);
7574 Value *Args[] = {ThreadId,
Size, Allocator};
7591 Value *Args[] = {ThreadId, Addr, Allocator};
7599 Value *DependenceAddress,
bool HaveNowaitClause) {
7607 if (Device ==
nullptr)
7609 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7610 if (NumDependences ==
nullptr) {
7611 NumDependences = ConstantInt::get(Int32, 0);
7615 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7617 Ident, ThreadId, InteropVar, InteropTypeVal,
7618 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7627 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7635 if (Device ==
nullptr)
7637 if (NumDependences ==
nullptr) {
7638 NumDependences = ConstantInt::get(Int32, 0);
7642 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7644 Ident, ThreadId, InteropVar, Device,
7645 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7654 Value *NumDependences,
7655 Value *DependenceAddress,
7656 bool HaveNowaitClause) {
7663 if (Device ==
nullptr)
7665 if (NumDependences ==
nullptr) {
7666 NumDependences = ConstantInt::get(Int32, 0);
7670 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7672 Ident, ThreadId, InteropVar, Device,
7673 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7703 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7704 "expected num_threads and num_teams to be specified");
7723 const std::string DebugPrefix =
"_debug__";
7724 if (KernelName.
ends_with(DebugPrefix)) {
7725 KernelName = KernelName.
drop_back(DebugPrefix.length());
7726 Kernel =
M.getFunction(KernelName);
7732 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7737 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7738 if (MaxThreadsVal < 0)
7739 MaxThreadsVal = std::max(
7742 if (MaxThreadsVal > 0)
7755 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7758 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7759 Constant *DynamicEnvironmentInitializer =
7763 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7765 DL.getDefaultGlobalsAddressSpace());
7769 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7770 ? DynamicEnvironmentGV
7772 DynamicEnvironmentPtr);
7775 ConfigurationEnvironment, {
7776 UseGenericStateMachineVal,
7777 MayUseNestedParallelismVal,
7784 ReductionBufferLength,
7787 KernelEnvironment, {
7788 ConfigurationEnvironmentInitializer,
7792 std::string KernelEnvironmentName =
7793 (KernelName +
"_kernel_environment").str();
7796 KernelEnvironmentInitializer, KernelEnvironmentName,
7798 DL.getDefaultGlobalsAddressSpace());
7802 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7803 ? KernelEnvironmentGV
7805 KernelEnvironmentPtr);
7806 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7808 KernelLaunchEnvironment =
7809 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7810 ? KernelLaunchEnvironment
7811 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7812 KernelLaunchEnvParamTy);
7814 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7826 auto *UI =
Builder.CreateUnreachable();
7832 Builder.SetInsertPoint(WorkerExitBB);
7836 Builder.SetInsertPoint(CheckBBTI);
7837 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7839 CheckBBTI->eraseFromParent();
7840 UI->eraseFromParent();
7848 int32_t TeamsReductionDataSize,
7849 int32_t TeamsReductionBufferLength) {
7854 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7858 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7864 const std::string DebugPrefix =
"_debug__";
7866 KernelName = KernelName.
drop_back(DebugPrefix.length());
7867 auto *KernelEnvironmentGV =
7868 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7869 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7870 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
7872 KernelEnvironmentInitializer,
7873 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7875 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7877 KernelEnvironmentGV->setInitializer(NewInitializer);
7882 if (
Kernel.hasFnAttribute(Name)) {
7883 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7889std::pair<int32_t, int32_t>
7891 int32_t ThreadLimit =
7892 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7895 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7896 if (!Attr.isValid() || !Attr.isStringAttribute())
7897 return {0, ThreadLimit};
7898 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7901 return {0, ThreadLimit};
7902 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7908 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7909 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7910 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7912 return {0, ThreadLimit};
7918 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7921 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7929std::pair<int32_t, int32_t>
7932 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7936 int32_t LB, int32_t UB) {
7943 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7946void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7955 else if (
T.isNVPTX())
7957 else if (
T.isSPIRV())
7962Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
7963 StringRef EntryFnIDName) {
7964 if (
Config.isTargetDevice()) {
7965 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7969 return new GlobalVariable(
7974Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
7975 StringRef EntryFnName) {
7979 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
7980 "Named kernel already exists?");
7981 return new GlobalVariable(
7994 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
7998 OutlinedFn = *CBResult;
8000 OutlinedFn =
nullptr;
8006 if (!IsOffloadEntry)
8009 std::string EntryFnIDName =
8011 ? std::string(EntryFnName)
8015 EntryFnName, EntryFnIDName);
8023 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8024 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8025 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8027 EntryInfo, EntryAddr, OutlinedFnID,
8029 return OutlinedFnID;
8046 bool IsStandAlone = !BodyGenCB;
8053 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8055 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8056 true, DeviceAddrCB))
8063 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8073 SrcLocInfo, DeviceID,
8080 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8084 if (Info.HasNoWait) {
8094 if (Info.HasNoWait) {
8098 emitBlock(OffloadContBlock, CurFn,
true);
8104 bool RequiresOuterTargetTask = Info.HasNoWait;
8105 if (!RequiresOuterTargetTask)
8106 cantFail(TaskBodyCB(
nullptr,
nullptr,
8110 {}, RTArgs, Info.HasNoWait));
8113 omp::OMPRTL___tgt_target_data_begin_mapper);
8117 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8121 Builder.CreateStore(LI, DeviceMap.second.second);
8157 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8166 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8188 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8189 return BeginThenGen(AllocaIP,
Builder.saveIP());
8204 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8205 return EndThenGen(AllocaIP,
Builder.saveIP());
8208 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8209 return BeginThenGen(AllocaIP,
Builder.saveIP());
8220 bool IsGPUDistribute) {
8221 assert((IVSize == 32 || IVSize == 64) &&
8222 "IV size is not compatible with the omp runtime");
8224 if (IsGPUDistribute)
8226 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8227 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8228 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8229 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8231 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8232 : omp::OMPRTL___kmpc_for_static_init_4u)
8233 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8234 : omp::OMPRTL___kmpc_for_static_init_8u);
8241 assert((IVSize == 32 || IVSize == 64) &&
8242 "IV size is not compatible with the omp runtime");
8244 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8245 : omp::OMPRTL___kmpc_dispatch_init_4u)
8246 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8247 : omp::OMPRTL___kmpc_dispatch_init_8u);
8254 assert((IVSize == 32 || IVSize == 64) &&
8255 "IV size is not compatible with the omp runtime");
8257 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8258 : omp::OMPRTL___kmpc_dispatch_next_4u)
8259 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8260 : omp::OMPRTL___kmpc_dispatch_next_8u);
8267 assert((IVSize == 32 || IVSize == 64) &&
8268 "IV size is not compatible with the omp runtime");
8270 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8271 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8272 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8273 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8284 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8292 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8296 if (NewVar && (arg == NewVar->
getArg()))
8306 auto UpdateDebugRecord = [&](
auto *DR) {
8309 for (
auto Loc : DR->location_ops()) {
8310 auto Iter = ValueReplacementMap.find(
Loc);
8311 if (Iter != ValueReplacementMap.end()) {
8312 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8313 ArgNo = std::get<1>(Iter->second) + 1;
8317 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8324 "Unexpected debug intrinsic");
8326 UpdateDebugRecord(&DVR);
8331 Module *M = Func->getParent();
8334 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8336 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
8337 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
8339 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
8365 for (
auto &Arg : Inputs)
8366 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8370 for (
auto &Arg : Inputs)
8371 ParameterTypes.
push_back(Arg->getType());
8374 auto BB = Builder.GetInsertBlock();
8375 auto M = BB->getModule();
8386 if (TargetCpuAttr.isStringAttribute())
8387 Func->addFnAttr(TargetCpuAttr);
8389 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8390 if (TargetFeaturesAttr.isStringAttribute())
8391 Func->addFnAttr(TargetFeaturesAttr);
8396 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8407 Builder.SetInsertPoint(EntryBB);
8413 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8423 splitBB(Builder,
true,
"outlined.body");
8429 Builder.restoreIP(*AfterIP);
8434 Builder.CreateRetVoid();
8438 auto AllocaIP = Builder.saveIP();
8443 const auto &ArgRange =
8445 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
8478 if (Instr->getFunction() == Func)
8479 Instr->replaceUsesOfWith(
Input, InputCopy);
8485 for (
auto InArg :
zip(Inputs, ArgRange)) {
8487 Argument &Arg = std::get<1>(InArg);
8488 Value *InputCopy =
nullptr;
8491 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8494 Builder.restoreIP(*AfterIP);
8495 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8515 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8522 ReplaceValue(
Input, InputCopy, Func);
8526 for (
auto Deferred : DeferredReplacement)
8527 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8530 ValueReplacementMap);
8538 Value *TaskWithPrivates,
8539 Type *TaskWithPrivatesTy) {
8541 Type *TaskTy = OMPIRBuilder.Task;
8544 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8545 Value *Shareds = TaskT;
8555 if (TaskWithPrivatesTy != TaskTy)
8556 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8573 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8578 assert((!NumOffloadingArrays || PrivatesTy) &&
8579 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8612 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8613 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8619 ".omp_target_task_proxy_func",
8620 Builder.GetInsertBlock()->getModule());
8621 Value *ThreadId = ProxyFn->getArg(0);
8622 Value *TaskWithPrivates = ProxyFn->getArg(1);
8623 ThreadId->
setName(
"thread.id");
8624 TaskWithPrivates->
setName(
"task");
8626 bool HasShareds = SharedArgsOperandNo > 0;
8627 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8630 Builder.SetInsertPoint(EntryBB);
8636 if (HasOffloadingArrays) {
8637 assert(TaskTy != TaskWithPrivatesTy &&
8638 "If there are offloading arrays to pass to the target"
8639 "TaskTy cannot be the same as TaskWithPrivatesTy");
8642 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8643 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8645 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8649 auto *ArgStructAlloca =
8651 assert(ArgStructAlloca &&
8652 "Unable to find the alloca instruction corresponding to arguments "
8653 "for extracted function");
8655 std::optional<TypeSize> ArgAllocSize =
8657 assert(ArgStructType && ArgAllocSize &&
8658 "Unable to determine size of arguments for extracted function");
8659 uint64_t StructSize = ArgAllocSize->getFixedValue();
8662 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8664 Value *SharedsSize = Builder.getInt64(StructSize);
8667 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8669 Builder.CreateMemCpy(
8670 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8672 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8675 Builder.CreateRetVoid();
8681 return GEP->getSourceElementType();
8683 return Alloca->getAllocatedType();
8706 if (OffloadingArraysToPrivatize.
empty())
8707 return OMPIRBuilder.Task;
8710 for (
Value *V : OffloadingArraysToPrivatize) {
8711 assert(V->getType()->isPointerTy() &&
8712 "Expected pointer to array to privatize. Got a non-pointer value "
8715 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8721 "struct.task_with_privates");
8735 EntryFnName, Inputs, CBFunc,
8740 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8877 TargetTaskAllocaBB->
begin());
8881 OI.
EntryBB = TargetTaskAllocaBB;
8887 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8890 Builder.restoreIP(TargetTaskBodyIP);
8891 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8909 bool NeedsTargetTask = HasNoWait && DeviceID;
8910 if (NeedsTargetTask) {
8916 OffloadingArraysToPrivatize.
push_back(V);
8921 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8922 DeviceID, OffloadingArraysToPrivatize](
8925 "there must be a single user for the outlined function");
8939 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8940 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8942 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8943 "Wrong number of arguments for StaleCI when shareds are present");
8944 int SharedArgOperandNo =
8945 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8951 if (!OffloadingArraysToPrivatize.
empty())
8956 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8957 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8959 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8962 Builder.SetInsertPoint(StaleCI);
8979 OMPRTL___kmpc_omp_target_task_alloc);
8991 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8998 auto *ArgStructAlloca =
9000 assert(ArgStructAlloca &&
9001 "Unable to find the alloca instruction corresponding to arguments "
9002 "for extracted function");
9003 std::optional<TypeSize> ArgAllocSize =
9006 "Unable to determine size of arguments for extracted function");
9007 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9026 TaskSize, SharedsSize,
9029 if (NeedsTargetTask) {
9030 assert(DeviceID &&
"Expected non-empty device ID.");
9040 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9041 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9044 if (!OffloadingArraysToPrivatize.
empty()) {
9046 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9047 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9048 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9055 "ElementType should match ArrayType");
9058 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9060 Dst, Alignment, PtrToPrivatize, Alignment,
9061 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9075 if (!NeedsTargetTask) {
9084 ConstantInt::get(
Builder.getInt32Ty(), 0),
9097 }
else if (DepArray) {
9105 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
9106 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
9116 I->eraseFromParent();
9121 << *(
Builder.GetInsertBlock()) <<
"\n");
9123 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9135 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9152 bool HasNoWait,
Value *DynCGroupMem,
9159 Builder.restoreIP(IP);
9161 return Builder.saveIP();
9164 bool HasDependencies = Dependencies.
size() > 0;
9165 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9182 if (OutlinedFnID && DeviceID)
9184 EmitTargetCallFallbackCB, KArgs,
9185 DeviceID, RTLoc, TargetTaskAllocaIP);
9193 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9200 auto &&EmitTargetCallElse =
9206 if (RequiresOuterTargetTask) {
9213 Dependencies, EmptyRTArgs, HasNoWait);
9215 return EmitTargetCallFallbackCB(Builder.saveIP());
9218 Builder.restoreIP(AfterIP);
9222 auto &&EmitTargetCallThen =
9225 Info.HasNoWait = HasNoWait;
9229 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9237 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9242 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9244 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9248 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9251 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9259 Value *MaxThreadsClause =
9261 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9264 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9266 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9267 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9269 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9270 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9272 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9275 unsigned NumTargetItems = Info.NumberOfPtrs;
9283 Builder.getInt64Ty(),
9285 : Builder.getInt64(0);
9289 DynCGroupMem = Builder.getInt32(0);
9292 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9293 HasNoWait, DynCGroupMemFallback);
9300 if (RequiresOuterTargetTask)
9302 RTLoc, AllocaIP, Dependencies,
9303 KArgs.
RTArgs, Info.HasNoWait);
9306 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9307 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9310 Builder.restoreIP(AfterIP);
9317 if (!OutlinedFnID) {
9318 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9324 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9329 EmitTargetCallElse, AllocaIP));
9356 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9357 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9363 if (!
Config.isTargetDevice())
9365 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9366 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9367 DynCGroupMemFallback);
9381 return OS.
str().str();
9386 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9392 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9394 assert(Elem.second->getValueType() == Ty &&
9395 "OMP internal variable has different type than requested");
9408 :
M.getTargetTriple().isAMDGPU()
9410 :
DL.getDefaultGlobalsAddressSpace();
9419 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9420 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9427Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9428 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9429 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9440 return SizePtrToInt;
9445 std::string VarName) {
9453 return MaptypesArrayGlobal;
9458 unsigned NumOperands,
9467 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9471 ArrI64Ty,
nullptr,
".offload_sizes");
9482 int64_t DeviceID,
unsigned NumOperands) {
9488 Value *ArgsBaseGEP =
9490 {Builder.getInt32(0), Builder.getInt32(0)});
9493 {Builder.getInt32(0), Builder.getInt32(0)});
9494 Value *ArgSizesGEP =
9496 {Builder.getInt32(0), Builder.getInt32(0)});
9500 Builder.getInt32(NumOperands),
9501 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9502 MaptypesArg, MapnamesArg, NullPtr});
9509 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9510 "expected region end call to runtime only when end call is separate");
9512 auto VoidPtrTy = UnqualPtrTy;
9513 auto VoidPtrPtrTy = UnqualPtrTy;
9515 auto Int64PtrTy = UnqualPtrTy;
9517 if (!Info.NumberOfPtrs) {
9529 Info.RTArgs.BasePointersArray,
9532 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9536 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9540 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9541 : Info.RTArgs.MapTypesArray,
9547 if (!Info.EmitDebug)
9551 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9556 if (!Info.HasMapper)
9560 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9581 "struct.descriptor_dim");
9583 enum { OffsetFD = 0, CountFD, StrideFD };
9587 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9590 if (NonContigInfo.
Dims[
I] == 1)
9595 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9597 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9598 unsigned RevIdx = EE -
II - 1;
9602 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9604 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9605 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9607 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9609 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9610 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9612 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9614 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9615 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9619 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9620 DimsAddr,
Builder.getPtrTy());
9623 Info.RTArgs.PointersArray, 0,
I);
9625 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9630void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9634 StringRef Prefix = IsInit ?
".init" :
".del";
9640 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9641 Value *DeleteBit = Builder.CreateAnd(
9644 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9645 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9650 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9651 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9652 DeleteCond = Builder.CreateIsNull(
9657 DeleteCond =
Builder.CreateIsNotNull(
9673 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9674 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9675 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9676 MapTypeArg =
Builder.CreateOr(
9679 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9680 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9684 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9685 ArraySize, MapTypeArg, MapName};
9711 MapperFn->
addFnAttr(Attribute::NoInline);
9712 MapperFn->
addFnAttr(Attribute::NoUnwind);
9722 auto SavedIP =
Builder.saveIP();
9723 Builder.SetInsertPoint(EntryBB);
9735 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9737 Value *PtrBegin = BeginIn;
9743 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9744 MapType, MapName, ElementSize, HeadBB,
9755 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9756 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9762 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9763 PtrPHI->addIncoming(PtrBegin, HeadBB);
9768 return Info.takeError();
9772 Value *OffloadingArgs[] = {MapperHandle};
9776 Value *ShiftedPreviousSize =
9780 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9781 Value *CurBaseArg = Info->BasePointers[
I];
9782 Value *CurBeginArg = Info->Pointers[
I];
9783 Value *CurSizeArg = Info->Sizes[
I];
9784 Value *CurNameArg = Info->Names.size()
9790 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9792 Value *MemberMapType =
9793 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9810 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9811 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9812 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9822 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9828 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9829 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9830 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9836 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9837 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9838 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9844 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9845 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9851 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9852 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9853 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9859 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9860 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9871 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9872 CurSizeArg, CurMapType, CurNameArg};
9874 auto ChildMapperFn = CustomMapperCB(
I);
9876 return ChildMapperFn.takeError();
9877 if (*ChildMapperFn) {
9892 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9893 "omp.arraymap.next");
9894 PtrPHI->addIncoming(PtrNext, LastBB);
9895 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9897 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9902 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9903 MapType, MapName, ElementSize, DoneBB,
9917 bool IsNonContiguous,
9921 Info.clearArrayInfo();
9924 if (Info.NumberOfPtrs == 0)
9933 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
9934 PointerArrayType,
nullptr,
".offload_baseptrs");
9936 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
9937 PointerArrayType,
nullptr,
".offload_ptrs");
9939 PointerArrayType,
nullptr,
".offload_mappers");
9940 Info.RTArgs.MappersArray = MappersArray;
9947 ConstantInt::get(Int64Ty, 0));
9949 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
9952 if (IsNonContiguous &&
9953 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9955 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9963 RuntimeSizes.
set(
I);
9966 if (RuntimeSizes.
all()) {
9968 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
9969 SizeArrayType,
nullptr,
".offload_sizes");
9975 auto *SizesArrayGbl =
9980 if (!RuntimeSizes.
any()) {
9981 Info.RTArgs.SizesArray = SizesArrayGbl;
9983 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
9984 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
9987 SizeArrayType,
nullptr,
".offload_sizes");
9991 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9992 SizesArrayGbl, OffloadSizeAlign,
9997 Info.RTArgs.SizesArray = Buffer;
10005 for (
auto mapFlag : CombinedInfo.
Types)
10007 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10011 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10017 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10018 Info.EmitDebug =
true;
10020 Info.RTArgs.MapNamesArray =
10022 Info.EmitDebug =
false;
10027 if (Info.separateBeginEndCalls()) {
10028 bool EndMapTypesDiffer =
false;
10030 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10031 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10032 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10033 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10034 EndMapTypesDiffer =
true;
10037 if (EndMapTypesDiffer) {
10039 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10044 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10047 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10049 Builder.CreateAlignedStore(BPVal, BP,
10050 M.getDataLayout().getPrefTypeAlign(PtrTy));
10052 if (Info.requiresDevicePointerInfo()) {
10054 CodeGenIP =
Builder.saveIP();
10056 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10057 Builder.restoreIP(CodeGenIP);
10059 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10061 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10063 DeviceAddrCB(
I, BP);
10069 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10072 Builder.CreateAlignedStore(PVal,
P,
10073 M.getDataLayout().getPrefTypeAlign(PtrTy));
10075 if (RuntimeSizes.
test(
I)) {
10077 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10083 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10086 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10089 auto CustomMFunc = CustomMapperCB(
I);
10091 return CustomMFunc.takeError();
10093 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10096 PointerArrayType, MappersArray,
10099 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10103 Info.NumberOfPtrs == 0)
10120 Builder.ClearInsertionPoint();
10150 auto CondConstant = CI->getSExtValue();
10152 return ThenGen(AllocaIP,
Builder.saveIP());
10154 return ElseGen(AllocaIP,
Builder.saveIP());
10164 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10182bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10186 "Unexpected Atomic Ordering.");
10188 bool Flush =
false;
10250 assert(
X.Var->getType()->isPointerTy() &&
10251 "OMP Atomic expects a pointer to target memory");
10252 Type *XElemTy =
X.ElemTy;
10255 "OMP atomic read expected a scalar type");
10257 Value *XRead =
nullptr;
10261 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10270 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10273 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10275 XRead = AtomicLoadRes.first;
10282 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10285 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10287 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10290 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10291 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10302 assert(
X.Var->getType()->isPointerTy() &&
10303 "OMP Atomic expects a pointer to target memory");
10304 Type *XElemTy =
X.ElemTy;
10307 "OMP atomic write expected a scalar type");
10315 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10318 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10326 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10331 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10338 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10339 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10345 Type *XTy =
X.Var->getType();
10347 "OMP Atomic expects a pointer to target memory");
10348 Type *XElemTy =
X.ElemTy;
10351 "OMP atomic update expected a scalar type");
10354 "OpenMP atomic does not support LT or GT operations");
10358 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10359 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10361 return AtomicResult.takeError();
10362 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10367Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10371 return Builder.CreateAdd(Src1, Src2);
10373 return Builder.CreateSub(Src1, Src2);
10375 return Builder.CreateAnd(Src1, Src2);
10377 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10379 return Builder.CreateOr(Src1, Src2);
10381 return Builder.CreateXor(Src1, Src2);
10403Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10406 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10407 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10410 bool emitRMWOp =
false;
10418 emitRMWOp = XElemTy;
10421 emitRMWOp = (IsXBinopExpr && XElemTy);
10428 std::pair<Value *, Value *> Res;
10430 AtomicRMWInst *RMWInst =
10431 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10432 if (
T.isAMDGPU()) {
10433 if (IsIgnoreDenormalMode)
10434 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10436 if (!IsFineGrainedMemory)
10437 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10439 if (!IsRemoteMemory)
10443 Res.first = RMWInst;
10448 Res.second = Res.first;
10450 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10454 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10457 unsigned LoadSize =
10460 OpenMPIRBuilder::AtomicInfo atomicInfo(
10462 OldVal->
getAlign(),
true , AllocaIP,
X);
10463 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10466 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10473 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10474 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10475 Builder.SetInsertPoint(ContBB);
10477 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10479 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10482 Value *Upd = *CBResult;
10483 Builder.CreateStore(Upd, NewAtomicAddr);
10486 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10487 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10488 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10489 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10492 Res.first = OldExprVal;
10495 if (UnreachableInst *ExitTI =
10498 Builder.SetInsertPoint(ExitBB);
10500 Builder.SetInsertPoint(ExitTI);
10503 IntegerType *IntCastTy =
10506 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10515 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10522 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10523 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10524 Builder.SetInsertPoint(ContBB);
10526 PHI->addIncoming(OldVal, CurBB);
10531 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10532 X->getName() +
".atomic.fltCast");
10534 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10535 X->getName() +
".atomic.ptrCast");
10539 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10542 Value *Upd = *CBResult;
10543 Builder.CreateStore(Upd, NewAtomicAddr);
10544 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10548 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10549 Result->setVolatile(VolatileX);
10550 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10551 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10552 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10553 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10555 Res.first = OldExprVal;
10559 if (UnreachableInst *ExitTI =
10562 Builder.SetInsertPoint(ExitBB);
10564 Builder.SetInsertPoint(ExitTI);
10575 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10576 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10581 Type *XTy =
X.Var->getType();
10583 "OMP Atomic expects a pointer to target memory");
10584 Type *XElemTy =
X.ElemTy;
10587 "OMP atomic capture expected a scalar type");
10589 "OpenMP atomic does not support LT or GT operations");
10596 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10597 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10600 Value *CapturedVal =
10601 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10602 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10604 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10616 IsPostfixUpdate, IsFailOnly, Failure);
10628 assert(
X.Var->getType()->isPointerTy() &&
10629 "OMP atomic expects a pointer to target memory");
10632 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10633 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10636 bool IsInteger = E->getType()->isIntegerTy();
10638 if (
Op == OMPAtomicCompareOp::EQ) {
10653 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10655 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10657 "OldValue and V must be of same type");
10658 if (IsPostfixUpdate) {
10659 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10661 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10674 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10676 CurBBTI,
X.Var->getName() +
".atomic.exit");
10682 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10684 Builder.SetInsertPoint(ContBB);
10685 Builder.CreateStore(OldValue, V.Var);
10691 Builder.SetInsertPoint(ExitBB);
10693 Builder.SetInsertPoint(ExitTI);
10696 Value *CapturedValue =
10697 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10698 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10704 assert(R.Var->getType()->isPointerTy() &&
10705 "r.var must be of pointer type");
10706 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10708 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10709 Value *ResultCast = R.IsSigned
10710 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10711 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10712 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10715 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10716 "Op should be either max or min at this point");
10717 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10728 if (IsXBinopExpr) {
10757 Value *CapturedValue =
nullptr;
10758 if (IsPostfixUpdate) {
10759 CapturedValue = OldValue;
10784 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10785 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10787 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10791 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10811 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
10838 bool SubClausesPresent =
10839 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10841 if (!
Config.isTargetDevice() && SubClausesPresent) {
10842 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10843 "if lowerbound is non-null, then upperbound must also be non-null "
10844 "for bounds on num_teams");
10846 if (NumTeamsUpper ==
nullptr)
10847 NumTeamsUpper =
Builder.getInt32(0);
10849 if (NumTeamsLower ==
nullptr)
10850 NumTeamsLower = NumTeamsUpper;
10854 "argument to if clause must be an integer value");
10858 IfExpr =
Builder.CreateICmpNE(IfExpr,
10859 ConstantInt::get(IfExpr->
getType(), 0));
10860 NumTeamsUpper =
Builder.CreateSelect(
10861 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
10864 NumTeamsLower =
Builder.CreateSelect(
10865 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
10868 if (ThreadLimit ==
nullptr)
10869 ThreadLimit =
Builder.getInt32(0);
10873 Value *NumTeamsLowerInt32 =
10875 Value *NumTeamsUpperInt32 =
10877 Value *ThreadLimitInt32 =
10884 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
10885 ThreadLimitInt32});
10890 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10902 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10904 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10906 auto HostPostOutlineCB = [
this, Ident,
10907 ToBeDeleted](
Function &OutlinedFn)
mutable {
10912 "there must be a single user for the outlined function");
10917 "Outlined function must have two or three arguments only");
10919 bool HasShared = OutlinedFn.
arg_size() == 3;
10927 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10928 "outlined function.");
10929 Builder.SetInsertPoint(StaleCI);
10936 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10940 I->eraseFromParent();
10943 if (!
Config.isTargetDevice())
10962 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
10977 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10982 if (
Config.isTargetDevice()) {
10997 std::string VarName) {
11006 return MapNamesArrayGlobal;
11011void OpenMPIRBuilder::initializeTypes(
Module &M) {
11015 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11016#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11017#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11018 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11019 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11020#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11021 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11022 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11023#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11024 T = StructType::getTypeByName(Ctx, StructName); \
11026 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11028 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11029#include "llvm/Frontend/OpenMP/OMPKinds.def"
11040 while (!Worklist.
empty()) {
11044 if (
BlockSet.insert(SuccBB).second)
11056 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11068 Fn->
addFnAttr(
"uniform-work-group-size");
11069 Fn->
addFnAttr(Attribute::MustProgress);
11087 auto &&GetMDInt = [
this](
unsigned V) {
11094 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11095 auto &&TargetRegionMetadataEmitter =
11096 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11111 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11112 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11113 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11114 GetMDInt(E.getOrder())};
11117 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11126 auto &&DeviceGlobalVarMetadataEmitter =
11127 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11137 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11138 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11142 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11149 DeviceGlobalVarMetadataEmitter);
11151 for (
const auto &E : OrderedEntries) {
11152 assert(E.first &&
"All ordered entries must exist!");
11153 if (
const auto *CE =
11156 if (!CE->getID() || !CE->getAddress()) {
11160 if (!
M.getNamedValue(FnName))
11168 }
else if (
const auto *CE =
dyn_cast<
11177 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11179 if (!CE->getAddress()) {
11184 if (CE->getVarSize() == 0)
11188 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11189 (!
Config.isTargetDevice() && CE->getAddress())) &&
11190 "Declaret target link address is set.");
11191 if (
Config.isTargetDevice())
11193 if (!CE->getAddress()) {
11200 if (!CE->getAddress()) {
11213 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11217 OMPTargetGlobalVarEntryIndirectVTable))
11226 Flags, CE->getLinkage(), CE->getVarName());
11229 Flags, CE->getLinkage());
11240 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11246 Config.getRequiresFlags());
11256 OS <<
"_" <<
Count;
11261 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11264 EntryInfo.
Line, NewCount);
11272 auto FileIDInfo = CallBack();
11276 FileID =
Status->getUniqueID().getFile();
11280 FileID =
hash_value(std::get<0>(FileIDInfo));
11284 std::get<1>(FileIDInfo));
11290 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11292 !(Remain & 1); Remain = Remain >> 1)
11310 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11312 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11319 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11325 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11326 Flags |= MemberOfFlag;
11332 bool IsDeclaration,
bool IsExternallyVisible,
11334 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11335 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11336 std::function<
Constant *()> GlobalInitializer,
11347 Config.hasRequiresUnifiedSharedMemory())) {
11352 if (!IsExternallyVisible)
11354 OS <<
"_decl_tgt_ref_ptr";
11357 Value *Ptr =
M.getNamedValue(PtrName);
11366 if (!
Config.isTargetDevice()) {
11367 if (GlobalInitializer)
11368 GV->setInitializer(GlobalInitializer());
11374 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11375 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11376 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11388 bool IsDeclaration,
bool IsExternallyVisible,
11390 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11391 std::vector<Triple> TargetTriple,
11392 std::function<
Constant *()> GlobalInitializer,
11396 (TargetTriple.empty() && !
Config.isTargetDevice()))
11407 !
Config.hasRequiresUnifiedSharedMemory()) {
11409 VarName = MangledName;
11412 if (!IsDeclaration)
11414 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11417 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11421 if (
Config.isTargetDevice() &&
11430 if (!
M.getNamedValue(RefName)) {
11434 GvAddrRef->setConstant(
true);
11436 GvAddrRef->setInitializer(Addr);
11437 GeneratedRefs.push_back(GvAddrRef);
11446 if (
Config.isTargetDevice()) {
11447 VarName = (Addr) ? Addr->
getName() :
"";
11451 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11452 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11453 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11454 VarName = (Addr) ? Addr->
getName() :
"";
11456 VarSize =
M.getDataLayout().getPointerSize();
11475 auto &&GetMDInt = [MN](
unsigned Idx) {
11480 auto &&GetMDString = [MN](
unsigned Idx) {
11482 return V->getString();
11485 switch (GetMDInt(0)) {
11489 case OffloadEntriesInfoManager::OffloadEntryInfo::
11490 OffloadingEntryInfoTargetRegion: {
11500 case OffloadEntriesInfoManager::OffloadEntryInfo::
11501 OffloadingEntryInfoDeviceGlobalVar:
11514 if (HostFilePath.
empty())
11518 if (std::error_code Err = Buf.getError()) {
11520 "OpenMPIRBuilder: " +
11528 if (std::error_code Err =
M.getError()) {
11530 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11542 return OffloadEntriesTargetRegion.empty() &&
11543 OffloadEntriesDeviceGlobalVar.empty();
11546unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11548 auto It = OffloadEntriesTargetRegionCount.find(
11549 getTargetRegionEntryCountKey(EntryInfo));
11550 if (It == OffloadEntriesTargetRegionCount.end())
11555void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11557 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11558 EntryInfo.
Count + 1;
11564 OffloadEntriesTargetRegion[EntryInfo] =
11567 ++OffloadingEntriesNum;
11573 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
11576 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11580 if (OMPBuilder->Config.isTargetDevice()) {
11585 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11586 Entry.setAddress(Addr);
11588 Entry.setFlags(Flags);
11594 "Target region entry already registered!");
11596 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11597 ++OffloadingEntriesNum;
11599 incrementTargetRegionEntryInfoCount(EntryInfo);
11606 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11608 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11609 if (It == OffloadEntriesTargetRegion.end()) {
11613 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11621 for (
const auto &It : OffloadEntriesTargetRegion) {
11622 Action(It.first, It.second);
11628 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11629 ++OffloadingEntriesNum;
11635 if (OMPBuilder->Config.isTargetDevice()) {
11639 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11641 if (Entry.getVarSize() == 0) {
11642 Entry.setVarSize(VarSize);
11643 Entry.setLinkage(Linkage);
11647 Entry.setVarSize(VarSize);
11648 Entry.setLinkage(Linkage);
11649 Entry.setAddress(Addr);
11652 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11653 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11654 "Entry not initialized!");
11655 if (Entry.getVarSize() == 0) {
11656 Entry.setVarSize(VarSize);
11657 Entry.setLinkage(Linkage);
11664 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11665 Addr, VarSize, Flags, Linkage,
11668 OffloadEntriesDeviceGlobalVar.try_emplace(
11669 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
11670 ++OffloadingEntriesNum;
11677 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
11678 Action(E.getKey(), E.getValue());
11685void CanonicalLoopInfo::collectControlBlocks(
11692 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
11704void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
11716void CanonicalLoopInfo::mapIndVar(
11726 for (
Use &U : OldIV->
uses()) {
11730 if (
User->getParent() == getCond())
11732 if (
User->getParent() == getLatch())
11738 Value *NewIV = Updater(OldIV);
11741 for (Use *U : ReplacableUses)
11762 "Preheader must terminate with unconditional branch");
11764 "Preheader must jump to header");
11768 "Header must terminate with unconditional branch");
11769 assert(Header->getSingleSuccessor() == Cond &&
11770 "Header must jump to exiting block");
11773 assert(Cond->getSinglePredecessor() == Header &&
11774 "Exiting block only reachable from header");
11777 "Exiting block must terminate with conditional branch");
11779 "Exiting block must have two successors");
11781 "Exiting block's first successor jump to the body");
11783 "Exiting block's second successor must exit the loop");
11787 "Body only reachable from exiting block");
11792 "Latch must terminate with unconditional branch");
11793 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
11796 assert(Latch->getSinglePredecessor() !=
nullptr);
11801 "Exit block must terminate with unconditional branch");
11802 assert(Exit->getSingleSuccessor() == After &&
11803 "Exit block must jump to after block");
11807 "After block only reachable from exit block");
11811 assert(IndVar &&
"Canonical induction variable not found?");
11813 "Induction variable must be an integer");
11815 "Induction variable must be a PHI in the loop header");
11821 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11829 assert(TripCount &&
"Loop trip count not found?");
11831 "Trip count and induction variable must have the same type");
11835 "Exit condition must be a signed less-than comparison");
11837 "Exit condition must compare the induction variable");
11839 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...