69#define DEBUG_TYPE "openmp-ir-builder"
76 cl::desc(
"Use optimistic attributes describing "
77 "'as-if' properties of runtime calls."),
81 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
82 cl::desc(
"Factor for the unroll threshold to account for code "
83 "simplifications still taking place"),
94 if (!IP1.isSet() || !IP2.isSet())
96 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
101 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
102 case OMPScheduleType::UnorderedStaticChunked:
103 case OMPScheduleType::UnorderedStatic:
104 case OMPScheduleType::UnorderedDynamicChunked:
105 case OMPScheduleType::UnorderedGuidedChunked:
106 case OMPScheduleType::UnorderedRuntime:
107 case OMPScheduleType::UnorderedAuto:
108 case OMPScheduleType::UnorderedTrapezoidal:
109 case OMPScheduleType::UnorderedGreedy:
110 case OMPScheduleType::UnorderedBalanced:
111 case OMPScheduleType::UnorderedGuidedIterativeChunked:
112 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
113 case OMPScheduleType::UnorderedSteal:
114 case OMPScheduleType::UnorderedStaticBalancedChunked:
115 case OMPScheduleType::UnorderedGuidedSimd:
116 case OMPScheduleType::UnorderedRuntimeSimd:
117 case OMPScheduleType::OrderedStaticChunked:
118 case OMPScheduleType::OrderedStatic:
119 case OMPScheduleType::OrderedDynamicChunked:
120 case OMPScheduleType::OrderedGuidedChunked:
121 case OMPScheduleType::OrderedRuntime:
122 case OMPScheduleType::OrderedAuto:
123 case OMPScheduleType::OrderdTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedStaticChunked:
125 case OMPScheduleType::NomergeUnorderedStatic:
126 case OMPScheduleType::NomergeUnorderedDynamicChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedChunked:
128 case OMPScheduleType::NomergeUnorderedRuntime:
129 case OMPScheduleType::NomergeUnorderedAuto:
130 case OMPScheduleType::NomergeUnorderedTrapezoidal:
131 case OMPScheduleType::NomergeUnorderedGreedy:
132 case OMPScheduleType::NomergeUnorderedBalanced:
133 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
134 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
135 case OMPScheduleType::NomergeUnorderedSteal:
136 case OMPScheduleType::NomergeOrderedStaticChunked:
137 case OMPScheduleType::NomergeOrderedStatic:
138 case OMPScheduleType::NomergeOrderedDynamicChunked:
139 case OMPScheduleType::NomergeOrderedGuidedChunked:
140 case OMPScheduleType::NomergeOrderedRuntime:
141 case OMPScheduleType::NomergeOrderedAuto:
142 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 case OMPScheduleType::OrderedDistributeChunked:
144 case OMPScheduleType::OrderedDistribute:
152 SchedType & OMPScheduleType::MonotonicityMask;
153 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
165 Builder.restoreIP(IP);
173 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
179 Kernel->getFnAttribute(
"target-features").getValueAsString();
180 if (Features.
count(
"+wavefrontsize64"))
195 bool HasSimdModifier,
bool HasDistScheduleChunks) {
197 switch (ClauseKind) {
198 case OMP_SCHEDULE_Default:
199 case OMP_SCHEDULE_Static:
200 return HasChunks ? OMPScheduleType::BaseStaticChunked
201 : OMPScheduleType::BaseStatic;
202 case OMP_SCHEDULE_Dynamic:
203 return OMPScheduleType::BaseDynamicChunked;
204 case OMP_SCHEDULE_Guided:
205 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
206 : OMPScheduleType::BaseGuidedChunked;
207 case OMP_SCHEDULE_Auto:
209 case OMP_SCHEDULE_Runtime:
210 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
211 : OMPScheduleType::BaseRuntime;
212 case OMP_SCHEDULE_Distribute:
213 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
214 : OMPScheduleType::BaseDistribute;
222 bool HasOrderedClause) {
223 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
224 OMPScheduleType::None &&
225 "Must not have ordering nor monotonicity flags already set");
228 ? OMPScheduleType::ModifierOrdered
229 : OMPScheduleType::ModifierUnordered;
230 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
233 if (OrderingScheduleType ==
234 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
235 return OMPScheduleType::OrderedGuidedChunked;
236 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
237 OMPScheduleType::ModifierOrdered))
238 return OMPScheduleType::OrderedRuntime;
240 return OrderingScheduleType;
246 bool HasSimdModifier,
bool HasMonotonic,
247 bool HasNonmonotonic,
bool HasOrderedClause) {
248 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
249 OMPScheduleType::None &&
250 "Must not have monotonicity flags already set");
251 assert((!HasMonotonic || !HasNonmonotonic) &&
252 "Monotonic and Nonmonotonic are contradicting each other");
255 return ScheduleType | OMPScheduleType::ModifierMonotonic;
256 }
else if (HasNonmonotonic) {
257 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
267 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
268 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
274 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
282 bool HasSimdModifier,
bool HasMonotonicModifier,
283 bool HasNonmonotonicModifier,
bool HasOrderedClause,
284 bool HasDistScheduleChunks) {
286 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
290 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
291 HasNonmonotonicModifier, HasOrderedClause);
304 if (
Instruction *Term = Source->getTerminatorOrNull()) {
313 NewBr->setDebugLoc(
DL);
318 assert(New->getFirstInsertionPt() == New->begin() &&
319 "Target BB must not have PHI nodes");
335 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
339 NewBr->setDebugLoc(
DL);
351 Builder.SetInsertPoint(Old);
355 Builder.SetCurrentDebugLocation(
DebugLoc);
365 New->replaceSuccessorsPhiUsesWith(Old, New);
374 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
376 Builder.SetInsertPoint(Builder.GetInsertBlock());
379 Builder.SetCurrentDebugLocation(
DebugLoc);
388 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
390 Builder.SetInsertPoint(Builder.GetInsertBlock());
393 Builder.SetCurrentDebugLocation(
DebugLoc);
410 const Twine &Name =
"",
bool AsPtr =
true,
411 bool Is64Bit =
false) {
412 Builder.restoreIP(OuterAllocaIP);
416 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
420 FakeVal = FakeValAddr;
422 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
427 Builder.restoreIP(InnerAllocaIP);
430 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
433 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
446enum OpenMPOffloadingRequiresDirFlags {
448 OMP_REQ_UNDEFINED = 0x000,
450 OMP_REQ_NONE = 0x001,
452 OMP_REQ_REVERSE_OFFLOAD = 0x002,
454 OMP_REQ_UNIFIED_ADDRESS = 0x004,
456 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
458 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
465 : RequiresFlags(OMP_REQ_UNDEFINED) {}
469 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
470 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
473 RequiresFlags(OMP_REQ_UNDEFINED) {
474 if (HasRequiresReverseOffload)
475 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
476 if (HasRequiresUnifiedAddress)
477 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
478 if (HasRequiresUnifiedSharedMemory)
479 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
480 if (HasRequiresDynamicAllocators)
481 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
485 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
489 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
493 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
497 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
502 :
static_cast<int64_t
>(OMP_REQ_NONE);
507 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
509 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
514 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
516 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
521 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
523 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
528 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
530 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
543 constexpr size_t MaxDim = 3;
548 Value *DynCGroupMemFallbackFlag =
550 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
551 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
557 Value *NumThreads3D =
588 auto FnAttrs = Attrs.getFnAttrs();
589 auto RetAttrs = Attrs.getRetAttrs();
591 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
596 bool Param =
true) ->
void {
597 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
598 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
599 if (HasSignExt || HasZeroExt) {
600 assert(AS.getNumAttributes() == 1 &&
601 "Currently not handling extension attr combined with others.");
603 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
606 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
613#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
614#include "llvm/Frontend/OpenMP/OMPKinds.def"
618#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
620 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
621 addAttrSet(RetAttrs, RetAttrSet, false); \
622 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
623 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
624 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
626#include "llvm/Frontend/OpenMP/OMPKinds.def"
640#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
642 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
644 Fn = M.getFunction(Str); \
646#include "llvm/Frontend/OpenMP/OMPKinds.def"
652#define OMP_RTL(Enum, Str, ...) \
654 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
656#include "llvm/Frontend/OpenMP/OMPKinds.def"
660 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
670 LLVMContext::MD_callback,
672 2, {-1, -1},
true)}));
685 assert(Fn &&
"Failed to create OpenMP runtime function");
696 Builder.SetInsertPoint(FiniBB);
708 FiniBB = OtherFiniBB;
710 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
718 auto EndIt = FiniBB->end();
719 if (FiniBB->size() >= 1)
720 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
725 FiniBB->replaceAllUsesWith(OtherFiniBB);
726 FiniBB->eraseFromParent();
727 FiniBB = OtherFiniBB;
734 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
757 for (
auto Inst =
Block->getReverseIterator()->begin();
758 Inst !=
Block->getReverseIterator()->end();) {
787 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
812 ParallelRegionBlockSet.
clear();
814 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
824 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
833 ".omp_par", ArgsInZeroAddressSpace);
837 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
838 assert(Extractor.isEligible() &&
839 "Expected OpenMP outlining to be possible!");
841 for (
auto *V : OI.ExcludeArgsFromAggregate)
842 Extractor.excludeArgFromAggregate(V);
845 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
849 if (TargetCpuAttr.isStringAttribute())
852 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
853 if (TargetFeaturesAttr.isStringAttribute())
854 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
857 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
859 "OpenMP outlined functions should not return a value!");
864 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
871 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
878 "Expected instructions to add in the outlined region entry");
880 End = ArtificialEntry.
rend();
885 if (
I.isTerminator()) {
887 if (
Instruction *TI = OI.EntryBB->getTerminatorOrNull())
888 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
892 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
895 OI.EntryBB->moveBefore(&ArtificialEntry);
902 if (OI.PostOutlineCB)
903 OI.PostOutlineCB(*OutlinedFn);
905 if (OI.FixUpNonEntryAllocas)
937 errs() <<
"Error of kind: " << Kind
938 <<
" when emitting offload entries and metadata during "
939 "OMPIRBuilder finalization \n";
945 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
946 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
947 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
948 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
965 ConstantInt::get(I32Ty,
Value), Name);
978 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
982 if (UsedArray.
empty())
989 GV->setSection(
"llvm.metadata");
995 auto *Int8Ty =
Builder.getInt8Ty();
998 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1006 unsigned Reserve2Flags) {
1008 LocFlags |= OMP_IDENT_FLAG_KMPC;
1015 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1016 ConstantInt::get(Int32, Reserve2Flags),
1017 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1019 size_t SrcLocStrArgIdx = 4;
1020 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1024 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1031 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1032 if (
GV.getInitializer() == Initializer)
1037 M, OpenMPIRBuilder::Ident,
1040 M.getDataLayout().getDefaultGlobalsAddressSpace());
1052 SrcLocStrSize = LocStr.
size();
1061 if (
GV.isConstant() &&
GV.hasInitializer() &&
1062 GV.getInitializer() == Initializer)
1065 SrcLocStr =
Builder.CreateGlobalString(
1066 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1074 unsigned Line,
unsigned Column,
1080 Buffer.
append(FunctionName);
1082 Buffer.
append(std::to_string(Line));
1084 Buffer.
append(std::to_string(Column));
1092 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1103 if (
DIFile *DIF = DIL->getFile())
1104 if (std::optional<StringRef> Source = DIF->getSource())
1110 DIL->getColumn(), SrcLocStrSize);
1116 Loc.IP.getBlock()->getParent());
1122 "omp_global_thread_num");
1127 bool ForceSimpleCall,
bool CheckCancelFlag) {
1137 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1140 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1143 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1146 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1149 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1162 bool UseCancelBarrier =
1167 ? OMPRTL___kmpc_cancel_barrier
1168 : OMPRTL___kmpc_barrier),
1171 if (UseCancelBarrier && CheckCancelFlag)
1181 omp::Directive CanceledDirective) {
1186 auto *UI =
Builder.CreateUnreachable();
1194 Builder.SetInsertPoint(ElseTI);
1195 auto ElseIP =
Builder.saveIP();
1203 Builder.SetInsertPoint(ThenTI);
1205 Value *CancelKind =
nullptr;
1206 switch (CanceledDirective) {
1207#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1208 case DirectiveEnum: \
1209 CancelKind = Builder.getInt32(Value); \
1211#include "llvm/Frontend/OpenMP/OMPKinds.def"
1228 Builder.SetInsertPoint(UI->getParent());
1229 UI->eraseFromParent();
1236 omp::Directive CanceledDirective) {
1241 auto *UI =
Builder.CreateUnreachable();
1244 Value *CancelKind =
nullptr;
1245 switch (CanceledDirective) {
1246#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1247 case DirectiveEnum: \
1248 CancelKind = Builder.getInt32(Value); \
1250#include "llvm/Frontend/OpenMP/OMPKinds.def"
1267 Builder.SetInsertPoint(UI->getParent());
1268 UI->eraseFromParent();
1281 auto *KernelArgsPtr =
1282 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1287 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1290 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1294 NumThreads, HostPtr, KernelArgsPtr};
1321 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1325 Value *Return =
nullptr;
1345 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1346 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1353 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1355 auto CurFn =
Builder.GetInsertBlock()->getParent();
1362 emitBlock(OffloadContBlock, CurFn,
true);
1367 Value *CancelFlag, omp::Directive CanceledDirective) {
1369 "Unexpected cancellation!");
1389 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1398 Builder.SetInsertPoint(CancellationBlock);
1399 Builder.CreateBr(*FiniBBOrErr);
1402 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1421 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1424 "Expected at least tid and bounded tid as arguments");
1425 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1428 assert(CI &&
"Expected call instruction to outlined function");
1429 CI->
getParent()->setName(
"omp_parallel");
1431 Builder.SetInsertPoint(CI);
1432 Type *PtrTy = OMPIRBuilder->VoidPtr;
1436 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1440 Value *Args = ArgsAlloca;
1444 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1445 Builder.restoreIP(CurrentIP);
1448 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1450 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1452 Builder.CreateStore(V, StoreAddress);
1456 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1457 : Builder.getInt32(1);
1460 Value *Parallel60CallArgs[] = {
1464 NumThreads ? NumThreads : Builder.getInt32(-1),
1465 Builder.getInt32(-1),
1469 Builder.getInt64(NumCapturedVars),
1470 Builder.getInt32(0)};
1478 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1481 Builder.SetInsertPoint(PrivTID);
1483 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1490 I->eraseFromParent();
1513 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1521 F->addMetadata(LLVMContext::MD_callback,
1530 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1533 "Expected at least tid and bounded tid as arguments");
1534 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1537 CI->
getParent()->setName(
"omp_parallel");
1538 Builder.SetInsertPoint(CI);
1541 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1545 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1547 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1554 auto PtrTy = OMPIRBuilder->VoidPtr;
1555 if (IfCondition && NumCapturedVars == 0) {
1563 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1566 Builder.SetInsertPoint(PrivTID);
1568 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1575 I->eraseFromParent();
1583 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1592 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1593 (ProcBind != OMP_PROC_BIND_default);
1600 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1604 if (NumThreads && !
Config.isTargetDevice()) {
1607 Builder.CreateIntCast(NumThreads, Int32,
false)};
1612 if (ProcBind != OMP_PROC_BIND_default) {
1616 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1638 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1641 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1644 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1648 PointerType ::get(
M.getContext(), 0),
1649 "zero.addr.ascast");
1673 if (IP.getBlock()->end() == IP.getPoint()) {
1679 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1680 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1681 "Unexpected insertion point for finalization call!");
1693 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1699 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1717 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1720 assert(BodyGenCB &&
"Expected body generation callback!");
1722 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1725 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1728 if (
Config.isTargetDevice()) {
1731 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1733 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1734 ThreadID, ToBeDeletedVec);
1740 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1742 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1764 ".omp_par", ArgsInZeroAddressSpace);
1769 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1771 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1776 return GV->getValueType() == OpenMPIRBuilder::Ident;
1781 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1787 if (&V == TIDAddr || &V == ZeroAddr) {
1793 for (
Use &U : V.uses())
1795 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1805 if (!V.getType()->isPointerTy()) {
1809 Builder.restoreIP(OuterAllocaIP);
1811 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1815 Builder.SetInsertPoint(InsertBB,
1820 Builder.restoreIP(InnerAllocaIP);
1821 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1824 Value *ReplacementValue =
nullptr;
1827 ReplacementValue = PrivTID;
1830 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1838 assert(ReplacementValue &&
1839 "Expected copy/create callback to set replacement value!");
1840 if (ReplacementValue == &V)
1845 UPtr->set(ReplacementValue);
1870 for (
Value *Output : Outputs)
1874 "OpenMP outlining should not produce live-out values!");
1876 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1878 for (
auto *BB : Blocks)
1879 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1887 assert(FiniInfo.DK == OMPD_parallel &&
1888 "Unexpected finalization stack state!");
1899 Builder.CreateBr(*FiniBBOrErr);
1903 Term->eraseFromParent();
1909 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1910 UI->eraseFromParent();
1977 if (Dependencies.
empty())
1997 Type *DependInfo = OMPBuilder.DependInfo;
2000 Value *DepArray =
nullptr;
2002 Builder.SetInsertPoint(
2006 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2008 Builder.restoreIP(OldIP);
2010 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2012 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2014 Value *Addr = Builder.CreateStructGEP(
2016 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2017 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2018 Builder.CreateStore(DepValPtr, Addr);
2021 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2022 Builder.CreateStore(
2023 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2026 Value *Flags = Builder.CreateStructGEP(
2028 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2029 Builder.CreateStore(
2030 ConstantInt::get(Builder.getInt8Ty(),
2031 static_cast<unsigned int>(Dep.DepKind)),
2038Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2040 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2055 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2059 "omp_taskloop_dup",
M);
2062 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2063 DestTaskArg->
setName(
"dest_task");
2064 SrcTaskArg->
setName(
"src_task");
2065 LastprivateFlagArg->
setName(
"lastprivate_flag");
2067 IRBuilderBase::InsertPointGuard Guard(
Builder);
2071 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2072 Type *TaskWithPrivatesTy =
2075 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2077 PrivatesTy, TaskPrivates,
2082 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2083 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2085 DestTaskContextPtr->
setName(
"destPtr");
2086 SrcTaskContextPtr->
setName(
"srcPtr");
2091 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2092 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2093 if (!AfterIPOrError)
2095 Builder.restoreIP(*AfterIPOrError);
2105 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2107 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2109 Value *TaskContextStructPtrVal) {
2114 uint32_t SrcLocStrSize;
2130 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2133 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2138 llvm::CanonicalLoopInfo *CLI = result.
get();
2140 OI.
EntryBB = TaskloopAllocaBB;
2141 OI.OuterAllocaBB = AllocaIP.getBlock();
2142 OI.ExitBB = TaskloopExitBB;
2148 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2150 TaskloopAllocaIP,
"lb",
false,
true);
2152 TaskloopAllocaIP,
"ub",
false,
true);
2154 TaskloopAllocaIP,
"step",
false,
true);
2157 OI.Inputs.insert(FakeLB);
2158 OI.Inputs.insert(FakeUB);
2159 OI.Inputs.insert(FakeStep);
2160 if (TaskContextStructPtrVal)
2161 OI.Inputs.insert(TaskContextStructPtrVal);
2162 assert(((TaskContextStructPtrVal && DupCB) ||
2163 (!TaskContextStructPtrVal && !DupCB)) &&
2164 "Task context struct ptr and duplication callback must be both set "
2170 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2174 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2175 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2178 if (!TaskDupFnOrErr) {
2181 Value *TaskDupFn = *TaskDupFnOrErr;
2183 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2184 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2185 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2186 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2187 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2189 assert(OutlinedFn.hasOneUse() &&
2190 "there must be a single user for the outlined function");
2197 Value *CastedLBVal =
2198 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2199 Value *CastedUBVal =
2200 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2201 Value *CastedStepVal =
2202 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2204 Builder.SetInsertPoint(StaleCI);
2217 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2238 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2240 AllocaInst *ArgStructAlloca =
2242 assert(ArgStructAlloca &&
2243 "Unable to find the alloca instruction corresponding to arguments "
2244 "for extracted function");
2245 std::optional<TypeSize> ArgAllocSize =
2248 "Unable to determine size of arguments for extracted function");
2249 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2254 CallInst *TaskData =
Builder.CreateCall(
2255 TaskAllocFn, {Ident, ThreadID,
Flags,
2256 TaskSize, SharedsSize,
2261 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2262 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2267 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2270 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2273 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2279 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2285 Value *GrainSizeVal =
2286 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2288 Value *TaskDup = TaskDupFn;
2290 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2291 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2296 Builder.CreateCall(TaskloopFn, Args);
2303 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2308 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2310 LoadInst *SharedsOutlined =
2311 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2312 OutlinedFn.getArg(1)->replaceUsesWithIf(
2314 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2317 Type *IVTy =
IV->getType();
2323 Value *TaskLB =
nullptr;
2324 Value *TaskUB =
nullptr;
2325 Value *TaskStep =
nullptr;
2326 Value *LoadTaskLB =
nullptr;
2327 Value *LoadTaskUB =
nullptr;
2328 Value *LoadTaskStep =
nullptr;
2329 for (Instruction &
I : *TaskloopAllocaBB) {
2330 if (
I.getOpcode() == Instruction::GetElementPtr) {
2333 switch (CI->getZExtValue()) {
2345 }
else if (
I.getOpcode() == Instruction::Load) {
2347 if (
Load.getPointerOperand() == TaskLB) {
2348 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2350 }
else if (
Load.getPointerOperand() == TaskUB) {
2351 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2353 }
else if (
Load.getPointerOperand() == TaskStep) {
2354 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2360 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2362 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2363 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2364 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2366 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2367 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2368 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2369 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2371 CLI->setTripCount(CastedTripCount);
2373 Builder.SetInsertPoint(CLI->getBody(),
2374 CLI->getBody()->getFirstInsertionPt());
2376 if (NumOfCollapseLoops > 1) {
2382 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2385 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2386 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2387 User *IVUser = IVUse->getUser();
2389 if (
Op->getOpcode() == Instruction::URem ||
2390 Op->getOpcode() == Instruction::UDiv) {
2395 for (User *User : UsersToReplace) {
2396 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2413 assert(CLI->getIndVar()->getNumUses() == 3 &&
2414 "Canonical loop should have exactly three uses of the ind var");
2415 for (User *IVUser : CLI->getIndVar()->users()) {
2417 if (
Mul->getOpcode() == Instruction::Mul) {
2418 for (User *MulUser :
Mul->users()) {
2420 if (
Add->getOpcode() == Instruction::Add) {
2421 Add->setOperand(1, CastedTaskLB);
2430 FakeLB->replaceAllUsesWith(CastedLBVal);
2431 FakeUB->replaceAllUsesWith(CastedUBVal);
2432 FakeStep->replaceAllUsesWith(CastedStepVal);
2434 I->eraseFromParent();
2439 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2445 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2454 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2486 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2497 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2499 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2500 Affinities, Mergeable, Priority, EventHandle,
2501 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
2503 assert(OutlinedFn.hasOneUse() &&
2504 "there must be a single user for the outlined function");
2509 bool HasShareds = StaleCI->
arg_size() > 1;
2510 Builder.SetInsertPoint(StaleCI);
2535 Flags =
Builder.CreateOr(FinalFlag, Flags);
2548 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2557 assert(ArgStructAlloca &&
2558 "Unable to find the alloca instruction corresponding to arguments "
2559 "for extracted function");
2560 std::optional<TypeSize> ArgAllocSize =
2563 "Unable to determine size of arguments for extracted function");
2564 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2570 TaskAllocFn, {Ident, ThreadID, Flags,
2571 TaskSize, SharedsSize,
2574 if (Affinities.
Count && Affinities.
Info) {
2576 OMPRTL___kmpc_omp_reg_task_with_affinity);
2587 OMPRTL___kmpc_task_allow_completion_event);
2591 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2593 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2594 Builder.CreateStore(EventVal, EventHandleAddr);
2600 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2601 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2619 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2622 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2624 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2627 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2628 PriorityData, {Zero, Zero});
2629 Builder.CreateStore(Priority, CmplrData);
2656 Builder.GetInsertPoint()->getParent()->getTerminator();
2657 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2658 Builder.SetInsertPoint(IfTerminator);
2661 Builder.SetInsertPoint(ElseTI);
2663 if (Dependencies.
size()) {
2668 {Ident, ThreadID,
Builder.getInt32(Dependencies.
size()), DepArray,
2669 ConstantInt::get(
Builder.getInt32Ty(), 0),
2684 Builder.SetInsertPoint(ThenTI);
2687 if (Dependencies.
size()) {
2692 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
2693 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
2704 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2706 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2707 OutlinedFn.getArg(1)->replaceUsesWithIf(
2708 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2712 I->eraseFromParent();
2716 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2742 Builder.SetInsertPoint(TaskgroupExitBB);
2785 unsigned CaseNumber = 0;
2786 for (
auto SectionCB : SectionCBs) {
2788 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2790 Builder.SetInsertPoint(CaseBB);
2793 CaseEndBr->getIterator()}))
2804 Value *LB = ConstantInt::get(I32Ty, 0);
2805 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2806 Value *ST = ConstantInt::get(I32Ty, 1);
2808 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2813 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2814 WorksharingLoopType::ForStaticLoop, !IsNowait);
2820 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2824 assert(FiniInfo.DK == OMPD_sections &&
2825 "Unexpected finalization stack state!");
2826 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2840 if (IP.getBlock()->end() != IP.getPoint())
2851 auto *CaseBB =
Loc.IP.getBlock();
2852 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2853 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2859 Directive OMPD = Directive::OMPD_sections;
2862 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2873Value *OpenMPIRBuilder::getGPUThreadID() {
2876 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2880Value *OpenMPIRBuilder::getGPUWarpSize() {
2885Value *OpenMPIRBuilder::getNVPTXWarpID() {
2886 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2887 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2890Value *OpenMPIRBuilder::getNVPTXLaneID() {
2891 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2892 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2893 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2894 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2901 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2902 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2903 assert(FromSize > 0 &&
"From size must be greater than zero");
2904 assert(ToSize > 0 &&
"To size must be greater than zero");
2905 if (FromType == ToType)
2907 if (FromSize == ToSize)
2908 return Builder.CreateBitCast(From, ToType);
2910 return Builder.CreateIntCast(From, ToType,
true);
2916 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2917 CastItem,
Builder.getPtrTy(0));
2918 Builder.CreateStore(From, ValCastItem);
2919 return Builder.CreateLoad(ToType, CastItem);
2926 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2927 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2931 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2933 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2935 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2936 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2937 Value *WarpSizeCast =
2939 Value *ShuffleCall =
2941 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2948 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2960 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2961 Value *ElemPtr = DstAddr;
2962 Value *Ptr = SrcAddr;
2963 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2967 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2970 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2971 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2975 if ((
Size / IntSize) > 1) {
2976 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2977 SrcAddrGEP,
Builder.getPtrTy());
2994 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
2996 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
2999 Value *Res = createRuntimeShuffleFunction(
3002 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3004 Builder.CreateAlignedStore(Res, ElemPtr,
3005 M.getDataLayout().getPrefTypeAlign(ElemType));
3007 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3008 Value *LocalElemPtr =
3009 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3015 Value *Res = createRuntimeShuffleFunction(
3016 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3019 Res =
Builder.CreateTrunc(Res, ElemType);
3020 Builder.CreateStore(Res, ElemPtr);
3021 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3023 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3029Error OpenMPIRBuilder::emitReductionListCopy(
3034 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3035 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3039 for (
auto En :
enumerate(ReductionInfos)) {
3041 Value *SrcElementAddr =
nullptr;
3042 AllocaInst *DestAlloca =
nullptr;
3043 Value *DestElementAddr =
nullptr;
3044 Value *DestElementPtrAddr =
nullptr;
3046 bool ShuffleInElement =
false;
3049 bool UpdateDestListPtr =
false;
3053 ReductionArrayTy, SrcBase,
3054 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3055 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3059 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3060 ReductionArrayTy, DestBase,
3061 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3062 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3068 Type *DestAllocaType =
3069 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3070 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3071 ".omp.reduction.element");
3073 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3074 DestElementAddr = DestAlloca;
3077 DestElementAddr->
getName() +
".ascast");
3079 ShuffleInElement =
true;
3080 UpdateDestListPtr =
true;
3092 if (ShuffleInElement) {
3093 Type *ShuffleType = RI.ElementType;
3094 Value *ShuffleSrcAddr = SrcElementAddr;
3095 Value *ShuffleDestAddr = DestElementAddr;
3096 AllocaInst *LocalStorage =
nullptr;
3099 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3100 assert(RI.ByRefAllocatedType &&
3101 "Expected by-ref allocated type to be set");
3106 ShuffleType = RI.ByRefElementType;
3109 RI.DataPtrPtrGen(
Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3112 return GenResult.takeError();
3114 ShuffleSrcAddr =
Builder.CreateLoad(
Builder.getPtrTy(), ShuffleSrcAddr);
3120 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3122 ShuffleDestAddr = LocalStorage;
3126 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3127 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3131 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3132 DestAlloca,
Builder.getPtrTy(),
".ascast");
3135 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3136 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3139 return GenResult.takeError();
3142 switch (RI.EvaluationKind) {
3144 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3146 Builder.CreateStore(Elem, DestElementAddr);
3150 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3151 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3153 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3155 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3157 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3159 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3160 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3161 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3162 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3163 Builder.CreateStore(SrcReal, DestRealPtr);
3164 Builder.CreateStore(SrcImg, DestImgPtr);
3169 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3171 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3172 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3184 if (UpdateDestListPtr) {
3185 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3186 DestElementAddr,
Builder.getPtrTy(),
3187 DestElementAddr->
getName() +
".ascast");
3188 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3195Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3199 LLVMContext &Ctx =
M.getContext();
3201 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3205 "_omp_reduction_inter_warp_copy_func", &
M);
3210 Builder.SetInsertPoint(EntryBB);
3227 StringRef TransferMediumName =
3228 "__openmp_nvptx_data_transfer_temporary_storage";
3229 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3230 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3232 if (!TransferMedium) {
3233 TransferMedium =
new GlobalVariable(
3241 Value *GPUThreadID = getGPUThreadID();
3243 Value *LaneID = getNVPTXLaneID();
3245 Value *WarpID = getNVPTXWarpID();
3249 Builder.GetInsertBlock()->getFirstInsertionPt());
3253 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3254 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3255 AllocaInst *NumWarpsAlloca =
3256 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3257 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3258 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3259 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3260 NumWarpsAlloca,
Builder.getPtrTy(0),
3261 NumWarpsAlloca->
getName() +
".ascast");
3262 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3263 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3272 for (
auto En :
enumerate(ReductionInfos)) {
3278 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3279 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3280 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3281 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3284 unsigned NumIters = RealTySize / TySize;
3287 Value *Cnt =
nullptr;
3288 Value *CntAddr =
nullptr;
3295 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3297 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3298 CntAddr->
getName() +
".ascast");
3310 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3311 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3318 omp::Directive::OMPD_unknown,
3322 return BarrierIP1.takeError();
3328 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3329 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3333 auto *RedListArrayTy =
3336 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3338 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3339 {ConstantInt::get(IndexTy, 0),
3340 ConstantInt::get(IndexTy, En.index())});
3346 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3349 return GenRes.takeError();
3360 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3365 Builder.CreateStore(Elem, MediumPtr,
3377 omp::Directive::OMPD_unknown,
3381 return BarrierIP2.takeError();
3388 Value *NumWarpsVal =
3391 Value *IsActiveThread =
3392 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3393 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3400 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3402 Value *TargetElemPtrPtr =
3403 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3404 {ConstantInt::get(IndexTy, 0),
3405 ConstantInt::get(IndexTy, En.index())});
3406 Value *TargetElemPtrVal =
3408 Value *TargetElemPtr = TargetElemPtrVal;
3412 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3415 return GenRes.takeError();
3417 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3425 Value *SrcMediumValue =
3426 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3427 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3437 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3438 Builder.CreateStore(Cnt, CntAddr,
false);
3440 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3444 RealTySize %= TySize;
3454Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3457 LLVMContext &Ctx =
M.getContext();
3458 FunctionType *FuncTy =
3460 {Builder.getPtrTy(), Builder.getInt16Ty(),
3461 Builder.getInt16Ty(), Builder.getInt16Ty()},
3465 "_omp_reduction_shuffle_and_reduce_func", &
M);
3475 Builder.SetInsertPoint(EntryBB);
3486 Type *ReduceListArgType = ReduceListArg->
getType();
3490 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3491 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3492 LaneIDArg->
getName() +
".addr");
3494 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3495 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3496 AlgoVerArg->
getName() +
".addr");
3503 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3505 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3506 ReduceListAlloca, ReduceListArgType,
3507 ReduceListAlloca->
getName() +
".ascast");
3508 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3509 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3510 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3511 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3512 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3513 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3514 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3515 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3516 RemoteReductionListAlloca,
Builder.getPtrTy(),
3517 RemoteReductionListAlloca->
getName() +
".ascast");
3519 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3520 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3521 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3522 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3524 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3525 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3526 Value *RemoteLaneOffset =
3527 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3528 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3535 Error EmitRedLsCpRes = emitReductionListCopy(
3537 ReduceList, RemoteListAddrCast, IsByRef,
3538 {RemoteLaneOffset,
nullptr,
nullptr});
3541 return EmitRedLsCpRes;
3566 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3571 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3572 Value *RemoteOffsetComp =
3574 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3575 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3576 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3582 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3584 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3585 ReduceList,
Builder.getPtrTy());
3586 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3587 RemoteListAddrCast,
Builder.getPtrTy());
3589 ->addFnAttr(Attribute::NoUnwind);
3600 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3601 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3606 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3610 EmitRedLsCpRes = emitReductionListCopy(
3612 RemoteListAddrCast, ReduceList, IsByRef);
3615 return EmitRedLsCpRes;
3630OpenMPIRBuilder::generateReductionDescriptor(
3632 Type *DescriptorType,
3638 Value *DescriptorSize =
3639 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3641 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3642 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3646 Value *DataPtrField;
3648 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3651 return GenResult.takeError();
3654 DataPtr,
Builder.getPtrTy(),
".ascast"),
3660Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3664 LLVMContext &Ctx =
M.getContext();
3667 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3671 "_omp_reduction_list_to_global_copy_func", &
M);
3678 Builder.SetInsertPoint(EntryBlock);
3688 BufferArg->
getName() +
".addr");
3692 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3693 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3694 BufferArgAlloca,
Builder.getPtrTy(),
3695 BufferArgAlloca->
getName() +
".ascast");
3696 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3697 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3698 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3699 ReduceListArgAlloca,
Builder.getPtrTy(),
3700 ReduceListArgAlloca->
getName() +
".ascast");
3702 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3703 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3704 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3706 Value *LocalReduceList =
3708 Value *BufferArgVal =
3712 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3713 for (
auto En :
enumerate(ReductionInfos)) {
3715 auto *RedListArrayTy =
3719 RedListArrayTy, LocalReduceList,
3720 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3726 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3728 ReductionsBufferTy, BufferVD, 0, En.index());
3730 switch (RI.EvaluationKind) {
3732 Value *TargetElement;
3734 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3735 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3738 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3741 return GenResult.takeError();
3744 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3747 Builder.CreateStore(TargetElement, GlobVal);
3751 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3752 RI.ElementType, ElemPtr, 0, 0,
".realp");
3754 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3756 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3758 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3760 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3761 RI.ElementType, GlobVal, 0, 0,
".realp");
3762 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3763 RI.ElementType, GlobVal, 0, 1,
".imagp");
3764 Builder.CreateStore(SrcReal, DestRealPtr);
3765 Builder.CreateStore(SrcImg, DestImgPtr);
3770 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3772 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3773 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3784Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3788 LLVMContext &Ctx =
M.getContext();
3791 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3795 "_omp_reduction_list_to_global_reduce_func", &
M);
3802 Builder.SetInsertPoint(EntryBlock);
3812 BufferArg->
getName() +
".addr");
3816 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3817 auto *RedListArrayTy =
3822 Value *LocalReduceList =
3823 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3827 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3828 BufferArgAlloca,
Builder.getPtrTy(),
3829 BufferArgAlloca->
getName() +
".ascast");
3830 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3831 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3832 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3833 ReduceListArgAlloca,
Builder.getPtrTy(),
3834 ReduceListArgAlloca->
getName() +
".ascast");
3835 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3836 LocalReduceList,
Builder.getPtrTy(),
3837 LocalReduceList->
getName() +
".ascast");
3839 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3840 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3841 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3846 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3847 for (
auto En :
enumerate(ReductionInfos)) {
3851 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3855 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3856 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3857 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3862 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3863 RedListArrayTy, LocalReduceListAddrCast,
3864 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3866 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3868 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3869 ReductionsBufferTy, BufferVD, 0, En.index());
3871 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3875 Value *SrcElementPtrPtr =
3876 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3877 {ConstantInt::get(IndexTy, 0),
3878 ConstantInt::get(IndexTy, En.index())});
3879 Value *SrcDescriptorAddr =
3884 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
3885 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3888 return GenResult.takeError();
3890 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3892 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3900 ->addFnAttr(Attribute::NoUnwind);
3906Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3910 LLVMContext &Ctx =
M.getContext();
3913 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3917 "_omp_reduction_global_to_list_copy_func", &
M);
3924 Builder.SetInsertPoint(EntryBlock);
3934 BufferArg->
getName() +
".addr");
3938 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3939 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3940 BufferArgAlloca,
Builder.getPtrTy(),
3941 BufferArgAlloca->
getName() +
".ascast");
3942 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3943 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3944 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3945 ReduceListArgAlloca,
Builder.getPtrTy(),
3946 ReduceListArgAlloca->
getName() +
".ascast");
3947 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3948 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3949 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3951 Value *LocalReduceList =
3956 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3957 for (
auto En :
enumerate(ReductionInfos)) {
3958 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3959 auto *RedListArrayTy =
3963 RedListArrayTy, LocalReduceList,
3964 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3969 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3970 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3971 ReductionsBufferTy, BufferVD, 0, En.index());
3977 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3983 return GenResult.takeError();
3988 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
3989 Builder.CreateStore(TargetElement, ElemPtr);
3993 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4002 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4004 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4006 Builder.CreateStore(SrcReal, DestRealPtr);
4007 Builder.CreateStore(SrcImg, DestImgPtr);
4014 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4015 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4027Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4031 LLVMContext &Ctx =
M.getContext();
4034 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4038 "_omp_reduction_global_to_list_reduce_func", &
M);
4045 Builder.SetInsertPoint(EntryBlock);
4055 BufferArg->
getName() +
".addr");
4059 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4065 Value *LocalReduceList =
4066 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4070 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4071 BufferArgAlloca,
Builder.getPtrTy(),
4072 BufferArgAlloca->
getName() +
".ascast");
4073 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4074 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4075 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 ReduceListArgAlloca,
Builder.getPtrTy(),
4077 ReduceListArgAlloca->
getName() +
".ascast");
4078 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4079 LocalReduceList,
Builder.getPtrTy(),
4080 LocalReduceList->
getName() +
".ascast");
4082 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4083 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4084 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4089 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4090 for (
auto En :
enumerate(ReductionInfos)) {
4094 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4098 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4099 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4100 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4105 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4106 RedListArrayTy, ReductionList,
4107 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4110 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4111 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4112 ReductionsBufferTy, BufferVD, 0, En.index());
4114 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4116 Value *ReduceListVal =
4118 Value *SrcElementPtrPtr =
4119 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4120 {ConstantInt::get(IndexTy, 0),
4121 ConstantInt::get(IndexTy, En.index())});
4122 Value *SrcDescriptorAddr =
4127 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4128 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4130 return GenResult.takeError();
4132 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4134 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4142 ->addFnAttr(Attribute::NoUnwind);
4148std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4149 std::string Suffix =
4151 return (Name + Suffix).str();
4154Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4157 AttributeList FuncAttrs) {
4159 {Builder.getPtrTy(), Builder.getPtrTy()},
4161 std::string
Name = getReductionFuncName(ReducerName);
4169 Builder.SetInsertPoint(EntryBB);
4173 Value *LHSArrayPtr =
nullptr;
4174 Value *RHSArrayPtr =
nullptr;
4181 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4183 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4184 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4185 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4186 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4187 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4188 Builder.CreateStore(Arg0, LHSAddrCast);
4189 Builder.CreateStore(Arg1, RHSAddrCast);
4190 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4191 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4195 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4197 for (
auto En :
enumerate(ReductionInfos)) {
4200 RedArrayTy, RHSArrayPtr,
4201 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4203 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4204 RHSI8Ptr, RI.PrivateVariable->getType(),
4205 RHSI8Ptr->
getName() +
".ascast");
4208 RedArrayTy, LHSArrayPtr,
4209 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4211 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4212 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4221 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4222 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4223 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4230 return AfterIP.takeError();
4231 if (!
Builder.GetInsertBlock())
4232 return ReductionFunc;
4236 if (!IsByRef.
empty() && !IsByRef[En.index()])
4237 Builder.CreateStore(Reduced, LHSPtr);
4242 for (
auto En :
enumerate(ReductionInfos)) {
4243 unsigned Index = En.index();
4245 Value *LHSFixupPtr, *RHSFixupPtr;
4246 Builder.restoreIP(RI.ReductionGenClang(
4247 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4252 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4257 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4271 return ReductionFunc;
4279 assert(RI.Variable &&
"expected non-null variable");
4280 assert(RI.PrivateVariable &&
"expected non-null private variable");
4281 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4282 "expected non-null reduction generator callback");
4285 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4286 "expected variables and their private equivalents to have the same "
4289 assert(RI.Variable->getType()->isPointerTy() &&
4290 "expected variables to be pointers");
4299 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4313 if (ReductionInfos.
size() == 0)
4323 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4327 AttributeList FuncAttrs;
4328 AttrBuilder AttrBldr(Ctx);
4330 AttrBldr.addAttribute(Attr);
4331 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4332 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4336 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4338 if (!ReductionResult)
4340 Function *ReductionFunc = *ReductionResult;
4344 if (GridValue.has_value())
4345 Config.setGridValue(GridValue.value());
4360 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4364 Value *ReductionListAlloca =
4365 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4366 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4367 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4370 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4371 for (
auto En :
enumerate(ReductionInfos)) {
4374 RedArrayTy, ReductionList,
4375 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4378 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4383 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4384 Builder.CreateStore(CastElem, ElemPtr);
4388 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4394 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4400 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4412 unsigned MaxDataSize = 0;
4414 for (
auto En :
enumerate(ReductionInfos)) {
4418 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4419 ? En.value().ByRefElementType
4420 : En.value().ElementType;
4421 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4422 if (
Size > MaxDataSize)
4426 Value *ReductionDataSize =
4427 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4428 if (!IsTeamsReduction) {
4429 Value *SarFuncCast =
4430 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4432 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4433 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4436 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4441 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4443 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4446 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4451 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4456 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4461 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4468 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4470 Value *Args3[] = {SrcLocInfo,
4471 KernelTeamsReductionPtr,
4472 Builder.getInt32(ReductionBufNum),
4483 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4500 for (
auto En :
enumerate(ReductionInfos)) {
4508 Value *LHSPtr, *RHSPtr;
4510 &LHSPtr, &RHSPtr, CurFunc));
4523 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4525 "red.value." +
Twine(En.index()));
4536 if (!IsByRef.
empty() && !IsByRef[En.index()])
4541 if (ContinuationBlock) {
4542 Builder.CreateBr(ContinuationBlock);
4543 Builder.SetInsertPoint(ContinuationBlock);
4545 Config.setEmitLLVMUsed();
4556 ".omp.reduction.func", &M);
4566 Builder.SetInsertPoint(ReductionFuncBlock);
4567 Value *LHSArrayPtr =
nullptr;
4568 Value *RHSArrayPtr =
nullptr;
4579 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4581 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4582 Value *LHSAddrCast =
4583 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4584 Value *RHSAddrCast =
4585 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4586 Builder.CreateStore(Arg0, LHSAddrCast);
4587 Builder.CreateStore(Arg1, RHSAddrCast);
4588 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4589 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4591 LHSArrayPtr = ReductionFunc->
getArg(0);
4592 RHSArrayPtr = ReductionFunc->
getArg(1);
4595 unsigned NumReductions = ReductionInfos.
size();
4598 for (
auto En :
enumerate(ReductionInfos)) {
4600 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4601 RedArrayTy, LHSArrayPtr, 0, En.index());
4602 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4603 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4606 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4607 RedArrayTy, RHSArrayPtr, 0, En.index());
4608 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4609 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4618 Builder.restoreIP(*AfterIP);
4620 if (!Builder.GetInsertBlock())
4624 if (!IsByRef[En.index()])
4625 Builder.CreateStore(Reduced, LHSPtr);
4627 Builder.CreateRetVoid();
4634 bool IsNoWait,
bool IsTeamsReduction) {
4638 IsByRef, IsNoWait, IsTeamsReduction);
4645 if (ReductionInfos.
size() == 0)
4655 unsigned NumReductions = ReductionInfos.
size();
4658 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4660 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4662 for (
auto En :
enumerate(ReductionInfos)) {
4663 unsigned Index = En.index();
4665 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4666 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4673 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4683 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4688 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4689 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4691 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4693 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4694 : RuntimeFunction::OMPRTL___kmpc_reduce);
4697 {Ident, ThreadId, NumVariables, RedArraySize,
4698 RedArray, ReductionFunc, Lock},
4709 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4710 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4711 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4716 Builder.SetInsertPoint(NonAtomicRedBlock);
4717 for (
auto En :
enumerate(ReductionInfos)) {
4723 if (!IsByRef[En.index()]) {
4725 "red.value." +
Twine(En.index()));
4727 Value *PrivateRedValue =
4729 "red.private.value." +
Twine(En.index()));
4737 if (!
Builder.GetInsertBlock())
4740 if (!IsByRef[En.index()])
4744 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4745 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4747 Builder.CreateBr(ContinuationBlock);
4752 Builder.SetInsertPoint(AtomicRedBlock);
4753 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4760 if (!
Builder.GetInsertBlock())
4763 Builder.CreateBr(ContinuationBlock);
4776 if (!
Builder.GetInsertBlock())
4779 Builder.SetInsertPoint(ContinuationBlock);
4790 Directive OMPD = Directive::OMPD_master;
4795 Value *Args[] = {Ident, ThreadId};
4803 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4814 Directive OMPD = Directive::OMPD_masked;
4820 Value *ArgsEnd[] = {Ident, ThreadId};
4828 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4838 Call->setDoesNotThrow();
4853 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4855 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4856 ScanVarsType, ScanRedInfo);
4867 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4870 Type *DestTy = ScanVarsType[i];
4871 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4874 Builder.CreateStore(Src, Val);
4879 Builder.GetInsertBlock()->getParent());
4882 IV = ScanRedInfo->
IV;
4885 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4888 Type *DestTy = ScanVarsType[i];
4890 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4892 Builder.CreateStore(Src, ScanVars[i]);
4906 Builder.GetInsertBlock()->getParent());
4911Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4915 Builder.restoreIP(AllocaIP);
4917 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4919 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4926 Builder.restoreIP(CodeGenIP);
4928 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4929 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4933 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4934 AllocSpan,
nullptr,
"arr");
4935 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4953 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4962Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4968 Value *PrivateVar = RedInfo.PrivateVariable;
4969 Value *OrigVar = RedInfo.Variable;
4973 Type *SrcTy = RedInfo.ElementType;
4978 Builder.CreateStore(Src, OrigVar);
5001 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5026 Builder.GetInsertBlock()->getModule(),
5033 Builder.GetInsertBlock()->getModule(),
5039 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5040 Builder.SetInsertPoint(InputBB);
5043 Builder.SetInsertPoint(LoopBB);
5059 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5061 Builder.SetInsertPoint(InnerLoopBB);
5065 Value *ReductionVal = RedInfo.PrivateVariable;
5068 Type *DestTy = RedInfo.ElementType;
5071 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5074 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5079 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5082 Builder.CreateStore(Result, LHSPtr);
5085 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5087 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5088 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5091 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5097 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5118 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5125Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5137 Error Err = InputLoopGen();
5148 Error Err = ScanLoopGen(Builder.saveIP());
5155void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5192 Builder.SetInsertPoint(Preheader);
5195 Builder.SetInsertPoint(Header);
5196 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5197 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5202 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5203 Builder.CreateCondBr(Cmp, Body, Exit);
5208 Builder.SetInsertPoint(Latch);
5210 "omp_" + Name +
".next",
true);
5221 CL->Header = Header;
5240 NextBB, NextBB, Name);
5272 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5281 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5282 ScanRedInfo->
Span = TripCount;
5288 ScanRedInfo->
IV =
IV;
5289 createScanBBs(ScanRedInfo);
5292 assert(Terminator->getNumSuccessors() == 1);
5293 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5296 Builder.GetInsertBlock()->getParent());
5299 Builder.GetInsertBlock()->getParent());
5300 Builder.CreateBr(ContinueBlock);
5306 const auto &&InputLoopGen = [&]() ->
Error {
5308 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5309 ComputeIP, Name,
true, ScanRedInfo);
5313 Builder.restoreIP((*LoopInfo)->getAfterIP());
5319 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5323 Builder.restoreIP((*LoopInfo)->getAfterIP());
5327 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5335 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5345 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5346 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5350 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5366 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5369 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5373 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5378 Value *CountIfLooping;
5379 if (InclusiveStop) {
5380 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5386 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5389 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5390 "omp_" + Name +
".tripcount");
5395 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5402 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5409 ScanRedInfo->
IV = IndVar;
5410 return BodyGenCB(
Builder.saveIP(), IndVar);
5416 Builder.getCurrentDebugLocation());
5427 unsigned Bitwidth = Ty->getIntegerBitWidth();
5430 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5433 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5443 unsigned Bitwidth = Ty->getIntegerBitWidth();
5446 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5449 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5457 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5459 "Require dedicated allocate IP");
5465 uint32_t SrcLocStrSize;
5471 Type *IVTy =
IV->getType();
5472 FunctionCallee StaticInit =
5473 LoopType == WorksharingLoopType::DistributeForStaticLoop
5476 FunctionCallee StaticFini =
5480 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5483 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5484 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5485 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5486 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5495 Constant *One = ConstantInt::get(IVTy, 1);
5496 Builder.CreateStore(Zero, PLowerBound);
5498 Builder.CreateStore(UpperBound, PUpperBound);
5499 Builder.CreateStore(One, PStride);
5504 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5505 ? OMPScheduleType::OrderedDistribute
5508 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5512 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5513 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5516 PLowerBound, PUpperBound});
5517 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5518 Value *PDistUpperBound =
5519 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5520 Args.push_back(PDistUpperBound);
5525 BuildInitCall(SchedulingType,
Builder);
5526 if (HasDistSchedule &&
5527 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5528 Constant *DistScheduleSchedType = ConstantInt::get(
5533 BuildInitCall(DistScheduleSchedType,
Builder);
5535 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5536 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5537 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5538 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5539 CLI->setTripCount(TripCount);
5545 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5549 return Builder.CreateAdd(OldIV, LowerBound);
5561 omp::Directive::OMPD_for,
false,
5564 return BarrierIP.takeError();
5591 Reachable.insert(
Block);
5601 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5605OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5609 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5610 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5615 Type *IVTy =
IV->getType();
5617 "Max supported tripcount bitwidth is 64 bits");
5619 :
Type::getInt64Ty(Ctx);
5622 Constant *One = ConstantInt::get(InternalIVTy, 1);
5628 for (BasicBlock &BB : *
F)
5629 if (!BB.hasTerminator())
5630 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5635 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5636 for (Instruction *
I : UIs)
5637 I->eraseFromParent();
5640 if (ChunkSize || DistScheduleChunkSize)
5645 FunctionCallee StaticInit =
5647 FunctionCallee StaticFini =
5653 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5654 Value *PLowerBound =
5655 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5656 Value *PUpperBound =
5657 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5658 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5667 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5668 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5669 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5670 "distschedulechunksize");
5671 Value *CastedTripCount =
5672 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5675 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5677 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5678 Builder.CreateStore(Zero, PLowerBound);
5679 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5680 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5682 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5683 Builder.CreateStore(UpperBound, PUpperBound);
5684 Builder.CreateStore(One, PStride);
5688 uint32_t SrcLocStrSize;
5692 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5693 PUpperBound, PStride, One,
5694 this](
Value *SchedulingType,
Value *ChunkSize,
5697 StaticInit, {SrcLoc, ThreadNum,
5698 SchedulingType, PLastIter,
5699 PLowerBound, PUpperBound,
5703 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5704 if (DistScheduleSchedType != OMPScheduleType::None &&
5705 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5706 SchedType != OMPScheduleType::OrderedDistribute) {
5710 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5714 Value *FirstChunkStart =
5715 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5716 Value *FirstChunkStop =
5717 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5718 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5720 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5721 Value *NextChunkStride =
5722 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5726 Value *DispatchCounter;
5734 DispatchCounter = Counter;
5737 FirstChunkStart, CastedTripCount, NextChunkStride,
5760 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5761 Value *IsLastChunk =
5762 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5763 Value *CountUntilOrigTripCount =
5764 Builder.CreateSub(CastedTripCount, DispatchCounter);
5766 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5767 Value *BackcastedChunkTC =
5768 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5769 CLI->setTripCount(BackcastedChunkTC);
5774 Value *BackcastedDispatchCounter =
5775 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5776 CLI->mapIndVar([&](Instruction *) ->
Value * {
5778 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5791 return AfterIP.takeError();
5806static FunctionCallee
5809 unsigned Bitwidth = Ty->getIntegerBitWidth();
5812 case WorksharingLoopType::ForStaticLoop:
5815 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5818 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5820 case WorksharingLoopType::DistributeStaticLoop:
5823 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5826 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5828 case WorksharingLoopType::DistributeForStaticLoop:
5831 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5834 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5837 if (Bitwidth != 32 && Bitwidth != 64) {
5849 Function &LoopBodyFn,
bool NoLoop) {
5860 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5861 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5862 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5863 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5868 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5869 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5873 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5874 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5875 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5876 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5877 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5879 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5903 Builder.restoreIP({Preheader, Preheader->
end()});
5906 Builder.CreateBr(CLI->
getExit());
5914 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5922 "Expected unique undroppable user of outlined function");
5924 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5926 "Expected outlined function call to be located in loop preheader");
5928 if (OutlinedFnCallInstruction->
arg_size() > 1)
5935 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5937 for (
auto &ToBeDeletedItem : ToBeDeleted)
5938 ToBeDeletedItem->eraseFromParent();
5945 uint32_t SrcLocStrSize;
5954 SmallVector<Instruction *, 4> ToBeDeleted;
5956 OI.OuterAllocaBB = AllocaIP.getBlock();
5979 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
5981 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5983 CodeExtractorAnalysisCache CEAC(*OuterFn);
5984 CodeExtractor Extractor(Blocks,
5997 SetVector<Value *> SinkingCands, HoistingCands;
6001 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6008 for (
auto Use :
Users) {
6010 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6011 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6017 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6024 OI.PostOutlineCB = [=, ToBeDeletedVec =
6025 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6035 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6036 bool HasSimdModifier,
bool HasMonotonicModifier,
6037 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6039 Value *DistScheduleChunkSize) {
6040 if (
Config.isTargetDevice())
6041 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6043 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6044 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6046 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6047 OMPScheduleType::ModifierOrdered;
6049 if (HasDistSchedule) {
6050 DistScheduleSchedType = DistScheduleChunkSize
6051 ? OMPScheduleType::OrderedDistributeChunked
6052 : OMPScheduleType::OrderedDistribute;
6054 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6055 case OMPScheduleType::BaseStatic:
6056 case OMPScheduleType::BaseDistribute:
6057 assert((!ChunkSize || !DistScheduleChunkSize) &&
6058 "No chunk size with static-chunked schedule");
6059 if (IsOrdered && !HasDistSchedule)
6060 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6061 NeedsBarrier, ChunkSize);
6063 if (DistScheduleChunkSize)
6064 return applyStaticChunkedWorkshareLoop(
6065 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6066 DistScheduleChunkSize, DistScheduleSchedType);
6067 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6070 case OMPScheduleType::BaseStaticChunked:
6071 case OMPScheduleType::BaseDistributeChunked:
6072 if (IsOrdered && !HasDistSchedule)
6073 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6074 NeedsBarrier, ChunkSize);
6076 return applyStaticChunkedWorkshareLoop(
6077 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6078 DistScheduleChunkSize, DistScheduleSchedType);
6080 case OMPScheduleType::BaseRuntime:
6081 case OMPScheduleType::BaseAuto:
6082 case OMPScheduleType::BaseGreedy:
6083 case OMPScheduleType::BaseBalanced:
6084 case OMPScheduleType::BaseSteal:
6085 case OMPScheduleType::BaseRuntimeSimd:
6087 "schedule type does not support user-defined chunk sizes");
6089 case OMPScheduleType::BaseGuidedSimd:
6090 case OMPScheduleType::BaseDynamicChunked:
6091 case OMPScheduleType::BaseGuidedChunked:
6092 case OMPScheduleType::BaseGuidedIterativeChunked:
6093 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6094 case OMPScheduleType::BaseStaticBalancedChunked:
6095 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6096 NeedsBarrier, ChunkSize);
6109 unsigned Bitwidth = Ty->getIntegerBitWidth();
6112 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6115 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6123static FunctionCallee
6125 unsigned Bitwidth = Ty->getIntegerBitWidth();
6128 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6131 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6138static FunctionCallee
6140 unsigned Bitwidth = Ty->getIntegerBitWidth();
6143 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6146 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6151OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6154 bool NeedsBarrier,
Value *Chunk) {
6155 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6157 "Require dedicated allocate IP");
6159 "Require valid schedule type");
6161 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6162 OMPScheduleType::ModifierOrdered;
6167 uint32_t SrcLocStrSize;
6173 Type *IVTy =
IV->getType();
6178 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6180 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6181 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6182 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6183 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6192 Constant *One = ConstantInt::get(IVTy, 1);
6193 Builder.CreateStore(One, PLowerBound);
6195 Builder.CreateStore(UpperBound, PUpperBound);
6196 Builder.CreateStore(One, PStride);
6213 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6225 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6228 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6229 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6232 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6233 Builder.CreateCondBr(MoreWork, Header, Exit);
6239 PI->setIncomingBlock(0, OuterCond);
6240 PI->setIncomingValue(0, LowerBound);
6245 Br->setSuccessor(OuterCond);
6251 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6254 CI->setOperand(1, UpperBound);
6258 assert(BI->getSuccessor(1) == Exit);
6259 BI->setSuccessor(1, OuterCond);
6273 omp::Directive::OMPD_for,
false,
6276 return BarrierIP.takeError();
6295 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6300 if (BBsToErase.
count(UseInst->getParent()))
6307 while (BBsToErase.
remove_if(HasRemainingUses)) {
6318 assert(
Loops.size() >= 1 &&
"At least one loop required");
6319 size_t NumLoops =
Loops.size();
6323 return Loops.front();
6335 Loop->collectControlBlocks(OldControlBBs);
6339 if (ComputeIP.
isSet())
6346 Value *CollapsedTripCount =
nullptr;
6349 "All loops to collapse must be valid canonical loops");
6350 Value *OrigTripCount = L->getTripCount();
6351 if (!CollapsedTripCount) {
6352 CollapsedTripCount = OrigTripCount;
6357 CollapsedTripCount =
6358 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6364 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6370 Builder.restoreIP(Result->getBodyIP());
6372 Value *Leftover = Result->getIndVar();
6374 NewIndVars.
resize(NumLoops);
6375 for (
int i = NumLoops - 1; i >= 1; --i) {
6376 Value *OrigTripCount =
Loops[i]->getTripCount();
6378 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6379 NewIndVars[i] = NewIndVar;
6381 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6384 NewIndVars[0] = Leftover;
6393 BasicBlock *ContinueBlock = Result->getBody();
6395 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6402 ContinueBlock =
nullptr;
6403 ContinuePred = NextSrc;
6410 for (
size_t i = 0; i < NumLoops - 1; ++i)
6411 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6417 for (
size_t i = NumLoops - 1; i > 0; --i)
6418 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6421 ContinueWith(Result->getLatch(),
nullptr);
6428 for (
size_t i = 0; i < NumLoops; ++i)
6429 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6443std::vector<CanonicalLoopInfo *>
6447 "Must pass as many tile sizes as there are loops");
6448 int NumLoops =
Loops.size();
6449 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6461 Loop->collectControlBlocks(OldControlBBs);
6469 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6470 OrigTripCounts.
push_back(L->getTripCount());
6481 for (
int i = 0; i < NumLoops - 1; ++i) {
6494 for (
int i = 0; i < NumLoops; ++i) {
6496 Value *OrigTripCount = OrigTripCounts[i];
6509 Value *FloorTripOverflow =
6510 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6512 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6513 Value *FloorTripCount =
6514 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6515 "omp_floor" +
Twine(i) +
".tripcount",
true);
6518 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6524 std::vector<CanonicalLoopInfo *> Result;
6525 Result.reserve(NumLoops * 2);
6538 auto EmbeddNewLoop =
6539 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6542 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6547 Enter = EmbeddedLoop->
getBody();
6549 OutroInsertBefore = EmbeddedLoop->
getLatch();
6550 return EmbeddedLoop;
6554 const Twine &NameBase) {
6557 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6558 Result.push_back(EmbeddedLoop);
6562 EmbeddNewLoops(FloorCount,
"floor");
6568 for (
int i = 0; i < NumLoops; ++i) {
6572 Value *FloorIsEpilogue =
6574 Value *TileTripCount =
6581 EmbeddNewLoops(TileCounts,
"tile");
6586 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6595 BodyEnter =
nullptr;
6596 BodyEntered = ExitBB;
6608 Builder.restoreIP(Result.back()->getBodyIP());
6609 for (
int i = 0; i < NumLoops; ++i) {
6612 Value *OrigIndVar = OrigIndVars[i];
6640 if (Properties.
empty())
6663 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6667 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6675 if (
I.mayReadOrWriteMemory()) {
6679 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6693 Loop->collectControlBlocks(oldControlBBs);
6698 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6699 origTripCounts.
push_back(L->getTripCount());
6708 Builder.SetInsertPoint(TCBlock);
6709 Value *fusedTripCount =
nullptr;
6711 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6712 Value *origTripCount = L->getTripCount();
6713 if (!fusedTripCount) {
6714 fusedTripCount = origTripCount;
6717 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6718 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
6732 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6733 Loops[i]->getPreheader()->moveBefore(TCBlock);
6734 Loops[i]->getAfter()->moveBefore(TCBlock);
6738 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6750 for (
size_t i = 0; i <
Loops.size(); ++i) {
6752 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
6753 Builder.SetInsertPoint(condBlock);
6761 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6762 Builder.SetInsertPoint(condBBs[i]);
6763 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
6779 "omp.fused.pre_latch");
6812 const Twine &NamePrefix) {
6841 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6843 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6846 Builder.SetInsertPoint(SplitBeforeIt);
6848 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6851 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6854 Builder.SetInsertPoint(ElseBlock);
6860 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6862 ExistingBlocks.
append(L->block_begin(), L->block_end());
6868 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6870 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6877 if (
Block == ThenBlock)
6878 NewBB->
setName(NamePrefix +
".if.else");
6881 VMap[
Block] = NewBB;
6889 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
6890 NamePrefix +
".pre_latch");
6894 L->addBasicBlockToLoop(ThenBlock, LI);
6900 if (TargetTriple.
isX86()) {
6901 if (Features.
lookup(
"avx512f"))
6903 else if (Features.
lookup(
"avx"))
6907 if (TargetTriple.
isPPC())
6909 if (TargetTriple.
isWasm())
6916 Value *IfCond, OrderKind Order,
6926 if (!BB.hasTerminator())
6942 I->eraseFromParent();
6945 if (AlignedVars.
size()) {
6947 for (
auto &AlignedItem : AlignedVars) {
6948 Value *AlignedPtr = AlignedItem.first;
6949 Value *Alignment = AlignedItem.second;
6952 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6960 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6973 Reachable.insert(
Block);
6983 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6999 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7001 if (Simdlen || Safelen) {
7005 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7031static std::unique_ptr<TargetMachine>
7035 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7036 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7047 std::nullopt, OptLevel));
7065 if (!BB.hasTerminator())
7078 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7079 FAM.registerPass([&]() {
return TIRA; });
7093 I->eraseFromParent();
7096 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7101 nullptr, ORE,
static_cast<int>(OptLevel),
7122 <<
" Threshold=" << UP.
Threshold <<
"\n"
7125 <<
" PartialOptSizeThreshold="
7145 Ptr = Load->getPointerOperand();
7147 Ptr = Store->getPointerOperand();
7154 if (Alloca->getParent() == &
F->getEntryBlock())
7174 int MaxTripCount = 0;
7175 bool MaxOrZero =
false;
7176 unsigned TripMultiple = 0;
7179 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7180 unsigned Factor = UP.
Count;
7181 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7192 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7208 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7221 *UnrolledCLI =
Loop;
7226 "unrolling only makes sense with a factor of 2 or larger");
7228 Type *IndVarTy =
Loop->getIndVarType();
7235 std::vector<CanonicalLoopInfo *>
LoopNest =
7250 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7253 (*UnrolledCLI)->assertOK();
7271 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7290 if (!CPVars.
empty()) {
7295 Directive OMPD = Directive::OMPD_single;
7300 Value *Args[] = {Ident, ThreadId};
7309 if (
Error Err = FiniCB(IP))
7330 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7337 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7340 ConstantInt::get(Int64, 0), CPVars[
I],
7343 }
else if (!IsNowait) {
7346 omp::Directive::OMPD_unknown,
false,
7361 Directive OMPD = Directive::OMPD_critical;
7366 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7367 Value *Args[] = {Ident, ThreadId, LockVar};
7384 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7392 const Twine &Name,
bool IsDependSource) {
7396 "OpenMP runtime requires depend vec with i64 type");
7409 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7423 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7441 Directive OMPD = Directive::OMPD_ordered;
7450 Value *Args[] = {Ident, ThreadId};
7460 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7467 bool HasFinalize,
bool IsCancellable) {
7474 BasicBlock *EntryBB = Builder.GetInsertBlock();
7483 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7494 "Unexpected control flow graph state!!");
7496 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7498 return AfterIP.takeError();
7503 "Unexpected Insertion point location!");
7506 auto InsertBB = merged ? ExitPredBB : ExitBB;
7509 Builder.SetInsertPoint(InsertBB);
7511 return Builder.saveIP();
7515 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7517 if (!Conditional || !EntryCall)
7523 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7533 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7537 UI->eraseFromParent();
7545 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7553 "Unexpected finalization stack state!");
7556 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7558 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7559 return std::move(Err);
7563 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7573 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7607 "copyin.not.master.end");
7614 Builder.SetInsertPoint(OMP_Entry);
7615 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7616 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7617 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7618 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7620 Builder.SetInsertPoint(CopyBegin);
7637 Value *Args[] = {ThreadId,
Size, Allocator};
7654 Value *Args[] = {ThreadId, Addr, Allocator};
7662 Value *DependenceAddress,
bool HaveNowaitClause) {
7670 if (Device ==
nullptr)
7672 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7673 if (NumDependences ==
nullptr) {
7674 NumDependences = ConstantInt::get(Int32, 0);
7678 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7680 Ident, ThreadId, InteropVar, InteropTypeVal,
7681 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7690 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7698 if (Device ==
nullptr)
7700 if (NumDependences ==
nullptr) {
7701 NumDependences = ConstantInt::get(Int32, 0);
7705 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7707 Ident, ThreadId, InteropVar, Device,
7708 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7717 Value *NumDependences,
7718 Value *DependenceAddress,
7719 bool HaveNowaitClause) {
7726 if (Device ==
nullptr)
7728 if (NumDependences ==
nullptr) {
7729 NumDependences = ConstantInt::get(Int32, 0);
7733 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7735 Ident, ThreadId, InteropVar, Device,
7736 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7766 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7767 "expected num_threads and num_teams to be specified");
7786 const std::string DebugPrefix =
"_debug__";
7787 if (KernelName.
ends_with(DebugPrefix)) {
7788 KernelName = KernelName.
drop_back(DebugPrefix.length());
7789 Kernel =
M.getFunction(KernelName);
7795 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7800 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7801 if (MaxThreadsVal < 0) {
7807 MaxThreadsVal = Attrs.MinThreads;
7811 if (MaxThreadsVal > 0)
7824 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7827 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7828 Constant *DynamicEnvironmentInitializer =
7832 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7834 DL.getDefaultGlobalsAddressSpace());
7838 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7839 ? DynamicEnvironmentGV
7841 DynamicEnvironmentPtr);
7844 ConfigurationEnvironment, {
7845 UseGenericStateMachineVal,
7846 MayUseNestedParallelismVal,
7853 ReductionBufferLength,
7856 KernelEnvironment, {
7857 ConfigurationEnvironmentInitializer,
7861 std::string KernelEnvironmentName =
7862 (KernelName +
"_kernel_environment").str();
7865 KernelEnvironmentInitializer, KernelEnvironmentName,
7867 DL.getDefaultGlobalsAddressSpace());
7871 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7872 ? KernelEnvironmentGV
7874 KernelEnvironmentPtr);
7875 Value *KernelLaunchEnvironment =
7878 KernelLaunchEnvironment =
7879 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7880 ? KernelLaunchEnvironment
7881 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7882 KernelLaunchEnvParamTy);
7884 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7896 auto *UI =
Builder.CreateUnreachable();
7902 Builder.SetInsertPoint(WorkerExitBB);
7906 Builder.SetInsertPoint(CheckBBTI);
7907 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7909 CheckBBTI->eraseFromParent();
7910 UI->eraseFromParent();
7918 int32_t TeamsReductionDataSize,
7919 int32_t TeamsReductionBufferLength) {
7924 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7928 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7934 const std::string DebugPrefix =
"_debug__";
7936 KernelName = KernelName.
drop_back(DebugPrefix.length());
7937 auto *KernelEnvironmentGV =
7938 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7939 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7940 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
7942 KernelEnvironmentInitializer,
7943 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7945 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7947 KernelEnvironmentGV->setInitializer(NewInitializer);
7952 if (
Kernel.hasFnAttribute(Name)) {
7953 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7959std::pair<int32_t, int32_t>
7961 int32_t ThreadLimit =
7962 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7965 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7966 if (!Attr.isValid() || !Attr.isStringAttribute())
7967 return {0, ThreadLimit};
7968 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7971 return {0, ThreadLimit};
7972 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7980 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7982 return {0, ThreadLimit};
7988 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7991 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7999std::pair<int32_t, int32_t>
8002 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8006 int32_t LB, int32_t UB) {
8013 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8016void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8025 else if (
T.isNVPTX())
8027 else if (
T.isSPIRV())
8032Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8033 StringRef EntryFnIDName) {
8034 if (
Config.isTargetDevice()) {
8035 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8039 return new GlobalVariable(
8044Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8045 StringRef EntryFnName) {
8049 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8050 "Named kernel already exists?");
8051 return new GlobalVariable(
8064 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8068 OutlinedFn = *CBResult;
8070 OutlinedFn =
nullptr;
8076 if (!IsOffloadEntry)
8079 std::string EntryFnIDName =
8081 ? std::string(EntryFnName)
8085 EntryFnName, EntryFnIDName);
8093 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8094 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8095 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8097 EntryInfo, EntryAddr, OutlinedFnID,
8099 return OutlinedFnID;
8116 bool IsStandAlone = !BodyGenCB;
8123 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8125 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8126 true, DeviceAddrCB))
8133 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8143 SrcLocInfo, DeviceID,
8150 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8154 if (Info.HasNoWait) {
8164 if (Info.HasNoWait) {
8168 emitBlock(OffloadContBlock, CurFn,
true);
8174 bool RequiresOuterTargetTask = Info.HasNoWait;
8175 if (!RequiresOuterTargetTask)
8176 cantFail(TaskBodyCB(
nullptr,
nullptr,
8180 {}, RTArgs, Info.HasNoWait));
8183 omp::OMPRTL___tgt_target_data_begin_mapper);
8187 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8191 Builder.CreateStore(LI, DeviceMap.second.second);
8227 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8236 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8258 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8259 return BeginThenGen(AllocaIP,
Builder.saveIP());
8274 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8275 return EndThenGen(AllocaIP,
Builder.saveIP());
8278 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8279 return BeginThenGen(AllocaIP,
Builder.saveIP());
8290 bool IsGPUDistribute) {
8291 assert((IVSize == 32 || IVSize == 64) &&
8292 "IV size is not compatible with the omp runtime");
8294 if (IsGPUDistribute)
8296 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8297 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8298 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8299 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8301 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8302 : omp::OMPRTL___kmpc_for_static_init_4u)
8303 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8304 : omp::OMPRTL___kmpc_for_static_init_8u);
8311 assert((IVSize == 32 || IVSize == 64) &&
8312 "IV size is not compatible with the omp runtime");
8314 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8315 : omp::OMPRTL___kmpc_dispatch_init_4u)
8316 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8317 : omp::OMPRTL___kmpc_dispatch_init_8u);
8324 assert((IVSize == 32 || IVSize == 64) &&
8325 "IV size is not compatible with the omp runtime");
8327 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8328 : omp::OMPRTL___kmpc_dispatch_next_4u)
8329 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8330 : omp::OMPRTL___kmpc_dispatch_next_8u);
8337 assert((IVSize == 32 || IVSize == 64) &&
8338 "IV size is not compatible with the omp runtime");
8340 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8341 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8342 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8343 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8354 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8362 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8366 if (NewVar && (arg == NewVar->
getArg()))
8376 auto UpdateDebugRecord = [&](
auto *DR) {
8379 for (
auto Loc : DR->location_ops()) {
8380 auto Iter = ValueReplacementMap.find(
Loc);
8381 if (Iter != ValueReplacementMap.end()) {
8382 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8383 ArgNo = std::get<1>(Iter->second) + 1;
8387 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8392 if (DVR->getNumVariableLocationOps() != 1u) {
8393 DVR->setKillLocation();
8396 Value *
Loc = DVR->getVariableLocationOp(0u);
8403 RequiredBB = &DVR->getFunction()->getEntryBlock();
8405 if (RequiredBB && RequiredBB != CurBB) {
8417 "Unexpected debug intrinsic");
8419 UpdateDebugRecord(&DVR);
8420 MoveDebugRecordToCorrectBlock(&DVR);
8423 for (
auto *DVR : DVRsToDelete)
8424 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8428 Module *M = Func->getParent();
8431 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8432 unsigned ArgNo = Func->arg_size();
8434 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8435 false, DINode::DIFlags::FlagArtificial);
8437 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8438 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8459 for (
auto &Arg : Inputs)
8460 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8464 for (
auto &Arg : Inputs)
8465 ParameterTypes.
push_back(Arg->getType());
8473 auto BB = Builder.GetInsertBlock();
8474 auto M = BB->getModule();
8485 if (TargetCpuAttr.isStringAttribute())
8486 Func->addFnAttr(TargetCpuAttr);
8488 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8489 if (TargetFeaturesAttr.isStringAttribute())
8490 Func->addFnAttr(TargetFeaturesAttr);
8495 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8506 Builder.SetInsertPoint(EntryBB);
8512 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8522 splitBB(Builder,
true,
"outlined.body");
8528 Builder.restoreIP(*AfterIP);
8533 Builder.CreateRetVoid();
8537 auto AllocaIP = Builder.saveIP();
8542 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8574 if (Instr->getFunction() == Func)
8575 Instr->replaceUsesOfWith(
Input, InputCopy);
8581 for (
auto InArg :
zip(Inputs, ArgRange)) {
8583 Argument &Arg = std::get<1>(InArg);
8584 Value *InputCopy =
nullptr;
8587 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8590 Builder.restoreIP(*AfterIP);
8591 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8611 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8618 ReplaceValue(
Input, InputCopy, Func);
8622 for (
auto Deferred : DeferredReplacement)
8623 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8626 ValueReplacementMap);
8634 Value *TaskWithPrivates,
8635 Type *TaskWithPrivatesTy) {
8637 Type *TaskTy = OMPIRBuilder.Task;
8640 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8641 Value *Shareds = TaskT;
8651 if (TaskWithPrivatesTy != TaskTy)
8652 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8669 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8674 assert((!NumOffloadingArrays || PrivatesTy) &&
8675 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8708 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8709 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8715 ".omp_target_task_proxy_func",
8716 Builder.GetInsertBlock()->getModule());
8717 Value *ThreadId = ProxyFn->getArg(0);
8718 Value *TaskWithPrivates = ProxyFn->getArg(1);
8719 ThreadId->
setName(
"thread.id");
8720 TaskWithPrivates->
setName(
"task");
8722 bool HasShareds = SharedArgsOperandNo > 0;
8723 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8726 Builder.SetInsertPoint(EntryBB);
8732 if (HasOffloadingArrays) {
8733 assert(TaskTy != TaskWithPrivatesTy &&
8734 "If there are offloading arrays to pass to the target"
8735 "TaskTy cannot be the same as TaskWithPrivatesTy");
8738 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8739 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8741 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8745 auto *ArgStructAlloca =
8747 assert(ArgStructAlloca &&
8748 "Unable to find the alloca instruction corresponding to arguments "
8749 "for extracted function");
8751 std::optional<TypeSize> ArgAllocSize =
8753 assert(ArgStructType && ArgAllocSize &&
8754 "Unable to determine size of arguments for extracted function");
8755 uint64_t StructSize = ArgAllocSize->getFixedValue();
8758 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8760 Value *SharedsSize = Builder.getInt64(StructSize);
8763 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8765 Builder.CreateMemCpy(
8766 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8768 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8771 Builder.CreateRetVoid();
8777 return GEP->getSourceElementType();
8779 return Alloca->getAllocatedType();
8802 if (OffloadingArraysToPrivatize.
empty())
8803 return OMPIRBuilder.Task;
8806 for (
Value *V : OffloadingArraysToPrivatize) {
8807 assert(V->getType()->isPointerTy() &&
8808 "Expected pointer to array to privatize. Got a non-pointer value "
8811 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8817 "struct.task_with_privates");
8831 EntryFnName, Inputs, CBFunc,
8836 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8973 TargetTaskAllocaBB->
begin());
8977 OI.
EntryBB = TargetTaskAllocaBB;
8983 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8986 Builder.restoreIP(TargetTaskBodyIP);
8987 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9005 bool NeedsTargetTask = HasNoWait && DeviceID;
9006 if (NeedsTargetTask) {
9012 OffloadingArraysToPrivatize.
push_back(V);
9017 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9018 DeviceID, OffloadingArraysToPrivatize](
9021 "there must be a single user for the outlined function");
9035 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9036 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9038 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9039 "Wrong number of arguments for StaleCI when shareds are present");
9040 int SharedArgOperandNo =
9041 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9047 if (!OffloadingArraysToPrivatize.
empty())
9052 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9053 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9055 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9058 Builder.SetInsertPoint(StaleCI);
9075 OMPRTL___kmpc_omp_target_task_alloc);
9087 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9094 auto *ArgStructAlloca =
9096 assert(ArgStructAlloca &&
9097 "Unable to find the alloca instruction corresponding to arguments "
9098 "for extracted function");
9099 std::optional<TypeSize> ArgAllocSize =
9102 "Unable to determine size of arguments for extracted function");
9103 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9122 TaskSize, SharedsSize,
9125 if (NeedsTargetTask) {
9126 assert(DeviceID &&
"Expected non-empty device ID.");
9136 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9137 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9140 if (!OffloadingArraysToPrivatize.
empty()) {
9142 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9143 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9144 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9151 "ElementType should match ArrayType");
9154 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9156 Dst, Alignment, PtrToPrivatize, Alignment,
9157 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9171 if (!NeedsTargetTask) {
9180 ConstantInt::get(
Builder.getInt32Ty(), 0),
9193 }
else if (DepArray) {
9201 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
9202 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
9212 I->eraseFromParent();
9217 << *(
Builder.GetInsertBlock()) <<
"\n");
9219 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9231 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9248 bool HasNoWait,
Value *DynCGroupMem,
9255 Builder.restoreIP(IP);
9261 return Builder.saveIP();
9264 bool HasDependencies = Dependencies.
size() > 0;
9265 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9282 if (OutlinedFnID && DeviceID)
9284 EmitTargetCallFallbackCB, KArgs,
9285 DeviceID, RTLoc, TargetTaskAllocaIP);
9293 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9300 auto &&EmitTargetCallElse =
9306 if (RequiresOuterTargetTask) {
9313 Dependencies, EmptyRTArgs, HasNoWait);
9315 return EmitTargetCallFallbackCB(Builder.saveIP());
9318 Builder.restoreIP(AfterIP);
9322 auto &&EmitTargetCallThen =
9325 Info.HasNoWait = HasNoWait;
9330 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9336 for (
auto [DefaultVal, RuntimeVal] :
9338 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9339 : Builder.getInt32(DefaultVal));
9343 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9345 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9349 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9352 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9360 Value *MaxThreadsClause =
9362 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9365 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9367 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9368 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9370 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9371 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9373 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9376 unsigned NumTargetItems = Info.NumberOfPtrs;
9384 Builder.getInt64Ty(),
9386 : Builder.getInt64(0);
9390 DynCGroupMem = Builder.getInt32(0);
9393 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9394 HasNoWait, DynCGroupMemFallback);
9401 if (RequiresOuterTargetTask)
9403 RTLoc, AllocaIP, Dependencies,
9404 KArgs.
RTArgs, Info.HasNoWait);
9407 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9408 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9411 Builder.restoreIP(AfterIP);
9418 if (!OutlinedFnID) {
9419 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9425 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9430 EmitTargetCallElse, AllocaIP));
9457 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9458 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9464 if (!
Config.isTargetDevice())
9466 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9467 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9468 DynCGroupMemFallback);
9482 return OS.
str().str();
9487 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9493 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9495 assert(Elem.second->getValueType() == Ty &&
9496 "OMP internal variable has different type than requested");
9509 :
M.getTargetTriple().isAMDGPU()
9511 :
DL.getDefaultGlobalsAddressSpace();
9520 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9521 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9528Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9529 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9530 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9541 return SizePtrToInt;
9546 std::string VarName) {
9554 return MaptypesArrayGlobal;
9559 unsigned NumOperands,
9568 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9572 ArrI64Ty,
nullptr,
".offload_sizes");
9583 int64_t DeviceID,
unsigned NumOperands) {
9589 Value *ArgsBaseGEP =
9591 {Builder.getInt32(0), Builder.getInt32(0)});
9594 {Builder.getInt32(0), Builder.getInt32(0)});
9595 Value *ArgSizesGEP =
9597 {Builder.getInt32(0), Builder.getInt32(0)});
9601 Builder.getInt32(NumOperands),
9602 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9603 MaptypesArg, MapnamesArg, NullPtr});
9610 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9611 "expected region end call to runtime only when end call is separate");
9613 auto VoidPtrTy = UnqualPtrTy;
9614 auto VoidPtrPtrTy = UnqualPtrTy;
9616 auto Int64PtrTy = UnqualPtrTy;
9618 if (!Info.NumberOfPtrs) {
9630 Info.RTArgs.BasePointersArray,
9633 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9637 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9641 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9642 : Info.RTArgs.MapTypesArray,
9648 if (!Info.EmitDebug)
9652 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9657 if (!Info.HasMapper)
9661 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9682 "struct.descriptor_dim");
9684 enum { OffsetFD = 0, CountFD, StrideFD };
9688 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9691 if (NonContigInfo.
Dims[
I] == 1)
9696 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9698 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9699 unsigned RevIdx = EE -
II - 1;
9703 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9705 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9706 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9708 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9710 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9711 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9713 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9715 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9716 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9720 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9721 DimsAddr,
Builder.getPtrTy());
9724 Info.RTArgs.PointersArray, 0,
I);
9726 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9731void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9735 StringRef Prefix = IsInit ?
".init" :
".del";
9741 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9742 Value *DeleteBit = Builder.CreateAnd(
9745 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9746 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9751 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9752 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9753 DeleteCond = Builder.CreateIsNull(
9758 DeleteCond =
Builder.CreateIsNotNull(
9774 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9775 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9776 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9777 MapTypeArg =
Builder.CreateOr(
9780 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9781 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9785 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9786 ArraySize, MapTypeArg, MapName};
9812 MapperFn->
addFnAttr(Attribute::NoInline);
9813 MapperFn->
addFnAttr(Attribute::NoUnwind);
9823 auto SavedIP =
Builder.saveIP();
9824 Builder.SetInsertPoint(EntryBB);
9836 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9838 Value *PtrBegin = BeginIn;
9844 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9845 MapType, MapName, ElementSize, HeadBB,
9856 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9857 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9863 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9864 PtrPHI->addIncoming(PtrBegin, HeadBB);
9869 return Info.takeError();
9873 Value *OffloadingArgs[] = {MapperHandle};
9877 Value *ShiftedPreviousSize =
9881 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9882 Value *CurBaseArg = Info->BasePointers[
I];
9883 Value *CurBeginArg = Info->Pointers[
I];
9884 Value *CurSizeArg = Info->Sizes[
I];
9885 Value *CurNameArg = Info->Names.size()
9891 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9893 Value *MemberMapType =
9894 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9911 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9912 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9913 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9923 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9929 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9930 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9931 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9937 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9938 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9939 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9945 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9946 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9952 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9953 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9954 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9960 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9961 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9972 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9973 CurSizeArg, CurMapType, CurNameArg};
9975 auto ChildMapperFn = CustomMapperCB(
I);
9977 return ChildMapperFn.takeError();
9978 if (*ChildMapperFn) {
9993 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9994 "omp.arraymap.next");
9995 PtrPHI->addIncoming(PtrNext, LastBB);
9996 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9998 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10003 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10004 MapType, MapName, ElementSize, DoneBB,
10018 bool IsNonContiguous,
10022 Info.clearArrayInfo();
10025 if (Info.NumberOfPtrs == 0)
10034 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10035 PointerArrayType,
nullptr,
".offload_baseptrs");
10037 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10038 PointerArrayType,
nullptr,
".offload_ptrs");
10040 PointerArrayType,
nullptr,
".offload_mappers");
10041 Info.RTArgs.MappersArray = MappersArray;
10048 ConstantInt::get(Int64Ty, 0));
10050 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10051 bool IsNonContigEntry =
10053 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10055 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10058 if (IsNonContigEntry) {
10060 "Index must be in-bounds for NON_CONTIG Dims array");
10062 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10063 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10068 ConstSizes[
I] = CI;
10072 RuntimeSizes.
set(
I);
10075 if (RuntimeSizes.
all()) {
10077 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10078 SizeArrayType,
nullptr,
".offload_sizes");
10084 auto *SizesArrayGbl =
10089 if (!RuntimeSizes.
any()) {
10090 Info.RTArgs.SizesArray = SizesArrayGbl;
10092 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10093 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10096 SizeArrayType,
nullptr,
".offload_sizes");
10100 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10101 SizesArrayGbl, OffloadSizeAlign,
10106 Info.RTArgs.SizesArray = Buffer;
10114 for (
auto mapFlag : CombinedInfo.
Types)
10116 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10120 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10126 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10127 Info.EmitDebug =
true;
10129 Info.RTArgs.MapNamesArray =
10131 Info.EmitDebug =
false;
10136 if (Info.separateBeginEndCalls()) {
10137 bool EndMapTypesDiffer =
false;
10139 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10140 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10141 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10142 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10143 EndMapTypesDiffer =
true;
10146 if (EndMapTypesDiffer) {
10148 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10153 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10156 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10158 Builder.CreateAlignedStore(BPVal, BP,
10159 M.getDataLayout().getPrefTypeAlign(PtrTy));
10161 if (Info.requiresDevicePointerInfo()) {
10163 CodeGenIP =
Builder.saveIP();
10165 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10166 Builder.restoreIP(CodeGenIP);
10168 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10170 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10172 DeviceAddrCB(
I, BP);
10178 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10181 Builder.CreateAlignedStore(PVal,
P,
10182 M.getDataLayout().getPrefTypeAlign(PtrTy));
10184 if (RuntimeSizes.
test(
I)) {
10186 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10192 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10195 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10198 auto CustomMFunc = CustomMapperCB(
I);
10200 return CustomMFunc.takeError();
10202 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10205 PointerArrayType, MappersArray,
10208 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10212 Info.NumberOfPtrs == 0)
10229 Builder.ClearInsertionPoint();
10259 auto CondConstant = CI->getSExtValue();
10261 return ThenGen(AllocaIP,
Builder.saveIP());
10263 return ElseGen(AllocaIP,
Builder.saveIP());
10273 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10291bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10295 "Unexpected Atomic Ordering.");
10297 bool Flush =
false;
10359 assert(
X.Var->getType()->isPointerTy() &&
10360 "OMP Atomic expects a pointer to target memory");
10361 Type *XElemTy =
X.ElemTy;
10364 "OMP atomic read expected a scalar type");
10366 Value *XRead =
nullptr;
10370 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10379 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10382 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10384 XRead = AtomicLoadRes.first;
10391 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10394 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10396 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10399 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10400 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10411 assert(
X.Var->getType()->isPointerTy() &&
10412 "OMP Atomic expects a pointer to target memory");
10413 Type *XElemTy =
X.ElemTy;
10416 "OMP atomic write expected a scalar type");
10424 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10427 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10435 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10440 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10447 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10448 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10454 Type *XTy =
X.Var->getType();
10456 "OMP Atomic expects a pointer to target memory");
10457 Type *XElemTy =
X.ElemTy;
10460 "OMP atomic update expected a scalar type");
10463 "OpenMP atomic does not support LT or GT operations");
10467 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10468 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10470 return AtomicResult.takeError();
10471 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10476Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10480 return Builder.CreateAdd(Src1, Src2);
10482 return Builder.CreateSub(Src1, Src2);
10484 return Builder.CreateAnd(Src1, Src2);
10486 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10488 return Builder.CreateOr(Src1, Src2);
10490 return Builder.CreateXor(Src1, Src2);
10514Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10517 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10518 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10521 bool emitRMWOp =
false;
10529 emitRMWOp = XElemTy;
10532 emitRMWOp = (IsXBinopExpr && XElemTy);
10539 std::pair<Value *, Value *> Res;
10541 AtomicRMWInst *RMWInst =
10542 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10543 if (
T.isAMDGPU()) {
10544 if (IsIgnoreDenormalMode)
10545 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10547 if (!IsFineGrainedMemory)
10548 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10550 if (!IsRemoteMemory)
10554 Res.first = RMWInst;
10559 Res.second = Res.first;
10561 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10565 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10568 unsigned LoadSize =
10571 OpenMPIRBuilder::AtomicInfo atomicInfo(
10573 OldVal->
getAlign(),
true , AllocaIP,
X);
10574 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10577 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10584 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10585 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10586 Builder.SetInsertPoint(ContBB);
10588 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10590 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10593 Value *Upd = *CBResult;
10594 Builder.CreateStore(Upd, NewAtomicAddr);
10597 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10598 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10599 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10600 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10603 Res.first = OldExprVal;
10606 if (UnreachableInst *ExitTI =
10609 Builder.SetInsertPoint(ExitBB);
10611 Builder.SetInsertPoint(ExitTI);
10614 IntegerType *IntCastTy =
10617 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10626 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10633 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10634 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10635 Builder.SetInsertPoint(ContBB);
10637 PHI->addIncoming(OldVal, CurBB);
10642 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10643 X->getName() +
".atomic.fltCast");
10645 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10646 X->getName() +
".atomic.ptrCast");
10650 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10653 Value *Upd = *CBResult;
10654 Builder.CreateStore(Upd, NewAtomicAddr);
10655 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10659 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10660 Result->setVolatile(VolatileX);
10661 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10662 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10663 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10664 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10666 Res.first = OldExprVal;
10670 if (UnreachableInst *ExitTI =
10673 Builder.SetInsertPoint(ExitBB);
10675 Builder.SetInsertPoint(ExitTI);
10686 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10687 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10692 Type *XTy =
X.Var->getType();
10694 "OMP Atomic expects a pointer to target memory");
10695 Type *XElemTy =
X.ElemTy;
10698 "OMP atomic capture expected a scalar type");
10700 "OpenMP atomic does not support LT or GT operations");
10707 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10708 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10711 Value *CapturedVal =
10712 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10713 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10715 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10727 IsPostfixUpdate, IsFailOnly, Failure);
10739 assert(
X.Var->getType()->isPointerTy() &&
10740 "OMP atomic expects a pointer to target memory");
10743 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10744 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10747 bool IsInteger = E->getType()->isIntegerTy();
10749 if (
Op == OMPAtomicCompareOp::EQ) {
10764 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10766 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10768 "OldValue and V must be of same type");
10769 if (IsPostfixUpdate) {
10770 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10772 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10785 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10787 CurBBTI,
X.Var->getName() +
".atomic.exit");
10793 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10795 Builder.SetInsertPoint(ContBB);
10796 Builder.CreateStore(OldValue, V.Var);
10802 Builder.SetInsertPoint(ExitBB);
10804 Builder.SetInsertPoint(ExitTI);
10807 Value *CapturedValue =
10808 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10809 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10815 assert(R.Var->getType()->isPointerTy() &&
10816 "r.var must be of pointer type");
10817 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10819 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10820 Value *ResultCast = R.IsSigned
10821 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10822 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10823 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10826 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10827 "Op should be either max or min at this point");
10828 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10839 if (IsXBinopExpr) {
10868 Value *CapturedValue =
nullptr;
10869 if (IsPostfixUpdate) {
10870 CapturedValue = OldValue;
10895 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10896 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10898 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10902 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10922 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
10949 bool SubClausesPresent =
10950 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10952 if (!
Config.isTargetDevice() && SubClausesPresent) {
10953 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10954 "if lowerbound is non-null, then upperbound must also be non-null "
10955 "for bounds on num_teams");
10957 if (NumTeamsUpper ==
nullptr)
10958 NumTeamsUpper =
Builder.getInt32(0);
10960 if (NumTeamsLower ==
nullptr)
10961 NumTeamsLower = NumTeamsUpper;
10965 "argument to if clause must be an integer value");
10969 IfExpr =
Builder.CreateICmpNE(IfExpr,
10970 ConstantInt::get(IfExpr->
getType(), 0));
10971 NumTeamsUpper =
Builder.CreateSelect(
10972 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
10975 NumTeamsLower =
Builder.CreateSelect(
10976 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
10979 if (ThreadLimit ==
nullptr)
10980 ThreadLimit =
Builder.getInt32(0);
10984 Value *NumTeamsLowerInt32 =
10986 Value *NumTeamsUpperInt32 =
10988 Value *ThreadLimitInt32 =
10995 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
10996 ThreadLimitInt32});
11001 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
11013 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11015 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11017 auto HostPostOutlineCB = [
this, Ident,
11018 ToBeDeleted](
Function &OutlinedFn)
mutable {
11023 "there must be a single user for the outlined function");
11028 "Outlined function must have two or three arguments only");
11030 bool HasShared = OutlinedFn.
arg_size() == 3;
11038 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11039 "outlined function.");
11040 Builder.SetInsertPoint(StaleCI);
11047 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11051 I->eraseFromParent();
11054 if (!
Config.isTargetDevice())
11073 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11088 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
11093 if (
Config.isTargetDevice()) {
11108 std::string VarName) {
11117 return MapNamesArrayGlobal;
11122void OpenMPIRBuilder::initializeTypes(
Module &M) {
11126 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11127#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11128#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11129 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11130 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11131#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11132 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11133 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11134#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11135 T = StructType::getTypeByName(Ctx, StructName); \
11137 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11139 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11140#include "llvm/Frontend/OpenMP/OMPKinds.def"
11151 while (!Worklist.
empty()) {
11155 if (
BlockSet.insert(SuccBB).second)
11167 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11179 Fn->
addFnAttr(
"uniform-work-group-size");
11180 Fn->
addFnAttr(Attribute::MustProgress);
11198 auto &&GetMDInt = [
this](
unsigned V) {
11205 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11206 auto &&TargetRegionMetadataEmitter =
11207 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11222 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11223 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11224 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11225 GetMDInt(E.getOrder())};
11228 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11237 auto &&DeviceGlobalVarMetadataEmitter =
11238 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11248 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11249 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11253 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11260 DeviceGlobalVarMetadataEmitter);
11262 for (
const auto &E : OrderedEntries) {
11263 assert(E.first &&
"All ordered entries must exist!");
11264 if (
const auto *CE =
11267 if (!CE->getID() || !CE->getAddress()) {
11271 if (!
M.getNamedValue(FnName))
11279 }
else if (
const auto *CE =
dyn_cast<
11288 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11290 if (!CE->getAddress()) {
11295 if (CE->getVarSize() == 0)
11299 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11300 (!
Config.isTargetDevice() && CE->getAddress())) &&
11301 "Declaret target link address is set.");
11302 if (
Config.isTargetDevice())
11304 if (!CE->getAddress()) {
11311 if (!CE->getAddress()) {
11324 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11328 OMPTargetGlobalVarEntryIndirectVTable))
11337 Flags, CE->getLinkage(), CE->getVarName());
11340 Flags, CE->getLinkage());
11351 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11357 Config.getRequiresFlags());
11367 OS <<
"_" <<
Count;
11372 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11375 EntryInfo.
Line, NewCount);
11383 auto FileIDInfo = CallBack();
11387 FileID =
Status->getUniqueID().getFile();
11391 FileID =
hash_value(std::get<0>(FileIDInfo));
11395 std::get<1>(FileIDInfo));
11401 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11403 !(Remain & 1); Remain = Remain >> 1)
11421 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11423 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11430 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11436 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11437 Flags |= MemberOfFlag;
11443 bool IsDeclaration,
bool IsExternallyVisible,
11445 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11446 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11447 std::function<
Constant *()> GlobalInitializer,
11458 Config.hasRequiresUnifiedSharedMemory())) {
11463 if (!IsExternallyVisible)
11465 OS <<
"_decl_tgt_ref_ptr";
11468 Value *Ptr =
M.getNamedValue(PtrName);
11477 if (!
Config.isTargetDevice()) {
11478 if (GlobalInitializer)
11479 GV->setInitializer(GlobalInitializer());
11485 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11486 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11487 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11499 bool IsDeclaration,
bool IsExternallyVisible,
11501 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11502 std::vector<Triple> TargetTriple,
11503 std::function<
Constant *()> GlobalInitializer,
11507 (TargetTriple.empty() && !
Config.isTargetDevice()))
11518 !
Config.hasRequiresUnifiedSharedMemory()) {
11520 VarName = MangledName;
11523 if (!IsDeclaration)
11525 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11528 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11532 if (
Config.isTargetDevice() &&
11541 if (!
M.getNamedValue(RefName)) {
11545 GvAddrRef->setConstant(
true);
11547 GvAddrRef->setInitializer(Addr);
11548 GeneratedRefs.push_back(GvAddrRef);
11557 if (
Config.isTargetDevice()) {
11558 VarName = (Addr) ? Addr->
getName() :
"";
11562 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11563 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11564 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11565 VarName = (Addr) ? Addr->
getName() :
"";
11567 VarSize =
M.getDataLayout().getPointerSize();
11586 auto &&GetMDInt = [MN](
unsigned Idx) {
11591 auto &&GetMDString = [MN](
unsigned Idx) {
11593 return V->getString();
11596 switch (GetMDInt(0)) {
11600 case OffloadEntriesInfoManager::OffloadEntryInfo::
11601 OffloadingEntryInfoTargetRegion: {
11611 case OffloadEntriesInfoManager::OffloadEntryInfo::
11612 OffloadingEntryInfoDeviceGlobalVar:
11625 if (HostFilePath.
empty())
11629 if (std::error_code Err = Buf.getError()) {
11631 "OpenMPIRBuilder: " +
11639 if (std::error_code Err =
M.getError()) {
11641 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11655 "expected a valid insertion block for creating an iterator loop");
11665 Builder.getCurrentDebugLocation(),
"omp.it.cont");
11677 T->eraseFromParent();
11686 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
11688 "iterator bodygen must terminate the canonical body with an "
11689 "unconditional branch to the loop latch",
11713 for (
const auto &
ParamAttr : ParamAttrs) {
11756 return std::string(Out.
str());
11764 unsigned VecRegSize;
11766 ISADataTy ISAData[] = {
11785 for (
char Mask :
Masked) {
11786 for (
const ISADataTy &
Data : ISAData) {
11789 Out <<
"_ZGV" <<
Data.ISA << Mask;
11791 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
11805template <
typename T>
11808 StringRef MangledName,
bool OutputBecomesInput,
11812 Out << Prefix << ISA << LMask << VLEN;
11813 if (OutputBecomesInput)
11815 Out << ParSeq <<
'_' << MangledName;
11824 bool OutputBecomesInput,
11829 OutputBecomesInput, Fn);
11831 OutputBecomesInput, Fn);
11835 OutputBecomesInput, Fn);
11837 OutputBecomesInput, Fn);
11841 OutputBecomesInput, Fn);
11843 OutputBecomesInput, Fn);
11848 OutputBecomesInput, Fn);
11859 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
11860 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
11872 OutputBecomesInput, Fn);
11879 OutputBecomesInput, Fn);
11881 OutputBecomesInput, Fn);
11885 OutputBecomesInput, Fn);
11889 OutputBecomesInput, Fn);
11898 OutputBecomesInput, Fn);
11905 MangledName, OutputBecomesInput, Fn);
11907 MangledName, OutputBecomesInput, Fn);
11911 MangledName, OutputBecomesInput, Fn);
11915 MangledName, OutputBecomesInput, Fn);
11925 return OffloadEntriesTargetRegion.empty() &&
11926 OffloadEntriesDeviceGlobalVar.empty();
11929unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11931 auto It = OffloadEntriesTargetRegionCount.find(
11932 getTargetRegionEntryCountKey(EntryInfo));
11933 if (It == OffloadEntriesTargetRegionCount.end())
11938void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11940 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11941 EntryInfo.
Count + 1;
11947 OffloadEntriesTargetRegion[EntryInfo] =
11950 ++OffloadingEntriesNum;
11956 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
11959 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11963 if (OMPBuilder->Config.isTargetDevice()) {
11968 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11969 Entry.setAddress(Addr);
11971 Entry.setFlags(Flags);
11977 "Target region entry already registered!");
11979 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11980 ++OffloadingEntriesNum;
11982 incrementTargetRegionEntryInfoCount(EntryInfo);
11989 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11991 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11992 if (It == OffloadEntriesTargetRegion.end()) {
11996 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12004 for (
const auto &It : OffloadEntriesTargetRegion) {
12005 Action(It.first, It.second);
12011 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12012 ++OffloadingEntriesNum;
12018 if (OMPBuilder->Config.isTargetDevice()) {
12022 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12024 if (Entry.getVarSize() == 0) {
12025 Entry.setVarSize(VarSize);
12026 Entry.setLinkage(Linkage);
12030 Entry.setVarSize(VarSize);
12031 Entry.setLinkage(Linkage);
12032 Entry.setAddress(Addr);
12035 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12036 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12037 "Entry not initialized!");
12038 if (Entry.getVarSize() == 0) {
12039 Entry.setVarSize(VarSize);
12040 Entry.setLinkage(Linkage);
12047 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12048 Addr, VarSize, Flags, Linkage,
12051 OffloadEntriesDeviceGlobalVar.try_emplace(
12052 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12053 ++OffloadingEntriesNum;
12060 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12061 Action(E.getKey(), E.getValue());
12068void CanonicalLoopInfo::collectControlBlocks(
12075 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12087void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12099void CanonicalLoopInfo::mapIndVar(
12109 for (
Use &U : OldIV->
uses()) {
12113 if (
User->getParent() == getCond())
12115 if (
User->getParent() == getLatch())
12121 Value *NewIV = Updater(OldIV);
12124 for (Use *U : ReplacableUses)
12145 "Preheader must terminate with unconditional branch");
12147 "Preheader must jump to header");
12151 "Header must terminate with unconditional branch");
12152 assert(Header->getSingleSuccessor() == Cond &&
12153 "Header must jump to exiting block");
12156 assert(Cond->getSinglePredecessor() == Header &&
12157 "Exiting block only reachable from header");
12160 "Exiting block must terminate with conditional branch");
12162 "Exiting block's first successor jump to the body");
12164 "Exiting block's second successor must exit the loop");
12168 "Body only reachable from exiting block");
12173 "Latch must terminate with unconditional branch");
12174 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12177 assert(Latch->getSinglePredecessor() !=
nullptr);
12182 "Exit block must terminate with unconditional branch");
12183 assert(Exit->getSingleSuccessor() == After &&
12184 "Exit block must jump to after block");
12188 "After block only reachable from exit block");
12192 assert(IndVar &&
"Canonical induction variable not found?");
12194 "Induction variable must be an integer");
12196 "Induction variable must be a PHI in the loop header");
12202 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12210 assert(TripCount &&
"Loop trip count not found?");
12212 "Trip count and induction variable must have the same type");
12216 "Exit condition must be a signed less-than comparison");
12218 "Exit condition must compare the induction variable");
12220 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, AffinityData Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...