68#define DEBUG_TYPE "openmp-ir-builder"
75 cl::desc(
"Use optimistic attributes describing "
76 "'as-if' properties of runtime calls."),
80 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
81 cl::desc(
"Factor for the unroll threshold to account for code "
82 "simplifications still taking place"),
93 if (!IP1.isSet() || !IP2.isSet())
95 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
100 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
101 case OMPScheduleType::UnorderedStaticChunked:
102 case OMPScheduleType::UnorderedStatic:
103 case OMPScheduleType::UnorderedDynamicChunked:
104 case OMPScheduleType::UnorderedGuidedChunked:
105 case OMPScheduleType::UnorderedRuntime:
106 case OMPScheduleType::UnorderedAuto:
107 case OMPScheduleType::UnorderedTrapezoidal:
108 case OMPScheduleType::UnorderedGreedy:
109 case OMPScheduleType::UnorderedBalanced:
110 case OMPScheduleType::UnorderedGuidedIterativeChunked:
111 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
112 case OMPScheduleType::UnorderedSteal:
113 case OMPScheduleType::UnorderedStaticBalancedChunked:
114 case OMPScheduleType::UnorderedGuidedSimd:
115 case OMPScheduleType::UnorderedRuntimeSimd:
116 case OMPScheduleType::OrderedStaticChunked:
117 case OMPScheduleType::OrderedStatic:
118 case OMPScheduleType::OrderedDynamicChunked:
119 case OMPScheduleType::OrderedGuidedChunked:
120 case OMPScheduleType::OrderedRuntime:
121 case OMPScheduleType::OrderedAuto:
122 case OMPScheduleType::OrderdTrapezoidal:
123 case OMPScheduleType::NomergeUnorderedStaticChunked:
124 case OMPScheduleType::NomergeUnorderedStatic:
125 case OMPScheduleType::NomergeUnorderedDynamicChunked:
126 case OMPScheduleType::NomergeUnorderedGuidedChunked:
127 case OMPScheduleType::NomergeUnorderedRuntime:
128 case OMPScheduleType::NomergeUnorderedAuto:
129 case OMPScheduleType::NomergeUnorderedTrapezoidal:
130 case OMPScheduleType::NomergeUnorderedGreedy:
131 case OMPScheduleType::NomergeUnorderedBalanced:
132 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
133 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
134 case OMPScheduleType::NomergeUnorderedSteal:
135 case OMPScheduleType::NomergeOrderedStaticChunked:
136 case OMPScheduleType::NomergeOrderedStatic:
137 case OMPScheduleType::NomergeOrderedDynamicChunked:
138 case OMPScheduleType::NomergeOrderedGuidedChunked:
139 case OMPScheduleType::NomergeOrderedRuntime:
140 case OMPScheduleType::NomergeOrderedAuto:
141 case OMPScheduleType::NomergeOrderedTrapezoidal:
142 case OMPScheduleType::OrderedDistributeChunked:
143 case OMPScheduleType::OrderedDistribute:
151 SchedType & OMPScheduleType::MonotonicityMask;
152 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
164 Builder.restoreIP(IP);
174 Kernel->getFnAttribute(
"target-features").getValueAsString();
175 if (Features.
count(
"+wavefrontsize64"))
190 bool HasSimdModifier,
bool HasDistScheduleChunks) {
192 switch (ClauseKind) {
193 case OMP_SCHEDULE_Default:
194 case OMP_SCHEDULE_Static:
195 return HasChunks ? OMPScheduleType::BaseStaticChunked
196 : OMPScheduleType::BaseStatic;
197 case OMP_SCHEDULE_Dynamic:
198 return OMPScheduleType::BaseDynamicChunked;
199 case OMP_SCHEDULE_Guided:
200 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
201 : OMPScheduleType::BaseGuidedChunked;
202 case OMP_SCHEDULE_Auto:
204 case OMP_SCHEDULE_Runtime:
205 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
206 : OMPScheduleType::BaseRuntime;
207 case OMP_SCHEDULE_Distribute:
208 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
209 : OMPScheduleType::BaseDistribute;
217 bool HasOrderedClause) {
218 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
219 OMPScheduleType::None &&
220 "Must not have ordering nor monotonicity flags already set");
223 ? OMPScheduleType::ModifierOrdered
224 : OMPScheduleType::ModifierUnordered;
225 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
228 if (OrderingScheduleType ==
229 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
230 return OMPScheduleType::OrderedGuidedChunked;
231 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
232 OMPScheduleType::ModifierOrdered))
233 return OMPScheduleType::OrderedRuntime;
235 return OrderingScheduleType;
241 bool HasSimdModifier,
bool HasMonotonic,
242 bool HasNonmonotonic,
bool HasOrderedClause) {
243 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
244 OMPScheduleType::None &&
245 "Must not have monotonicity flags already set");
246 assert((!HasMonotonic || !HasNonmonotonic) &&
247 "Monotonic and Nonmonotonic are contradicting each other");
250 return ScheduleType | OMPScheduleType::ModifierMonotonic;
251 }
else if (HasNonmonotonic) {
252 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
262 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
263 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
269 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
277 bool HasSimdModifier,
bool HasMonotonicModifier,
278 bool HasNonmonotonicModifier,
bool HasOrderedClause,
279 bool HasDistScheduleChunks) {
281 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
285 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
286 HasNonmonotonicModifier, HasOrderedClause);
308 NewBr->setDebugLoc(
DL);
313 assert(New->getFirstInsertionPt() == New->begin() &&
314 "Target BB must not have PHI nodes");
330 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
334 NewBr->setDebugLoc(
DL);
346 Builder.SetInsertPoint(Old);
350 Builder.SetCurrentDebugLocation(
DebugLoc);
360 New->replaceSuccessorsPhiUsesWith(Old, New);
369 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
371 Builder.SetInsertPoint(Builder.GetInsertBlock());
374 Builder.SetCurrentDebugLocation(
DebugLoc);
383 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
385 Builder.SetInsertPoint(Builder.GetInsertBlock());
388 Builder.SetCurrentDebugLocation(
DebugLoc);
405 const Twine &Name =
"",
bool AsPtr =
true,
406 bool Is64Bit =
false) {
407 Builder.restoreIP(OuterAllocaIP);
411 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
415 FakeVal = FakeValAddr;
417 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
422 Builder.restoreIP(InnerAllocaIP);
425 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
428 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
441enum OpenMPOffloadingRequiresDirFlags {
443 OMP_REQ_UNDEFINED = 0x000,
445 OMP_REQ_NONE = 0x001,
447 OMP_REQ_REVERSE_OFFLOAD = 0x002,
449 OMP_REQ_UNIFIED_ADDRESS = 0x004,
451 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
453 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
460 : RequiresFlags(OMP_REQ_UNDEFINED) {}
464 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
465 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
468 RequiresFlags(OMP_REQ_UNDEFINED) {
469 if (HasRequiresReverseOffload)
470 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
471 if (HasRequiresUnifiedAddress)
472 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
473 if (HasRequiresUnifiedSharedMemory)
474 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
475 if (HasRequiresDynamicAllocators)
476 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
480 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
484 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
488 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
492 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
497 :
static_cast<int64_t
>(OMP_REQ_NONE);
502 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
504 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
509 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
516 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
518 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
523 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
525 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
538 constexpr size_t MaxDim = 3;
543 Value *DynCGroupMemFallbackFlag =
545 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
546 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
552 Value *NumThreads3D =
583 auto FnAttrs = Attrs.getFnAttrs();
584 auto RetAttrs = Attrs.getRetAttrs();
586 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
591 bool Param =
true) ->
void {
592 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
593 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
594 if (HasSignExt || HasZeroExt) {
595 assert(AS.getNumAttributes() == 1 &&
596 "Currently not handling extension attr combined with others.");
598 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
601 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
608#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
609#include "llvm/Frontend/OpenMP/OMPKinds.def"
613#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
615 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
616 addAttrSet(RetAttrs, RetAttrSet, false); \
617 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
618 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
619 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
621#include "llvm/Frontend/OpenMP/OMPKinds.def"
635#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
637 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
639 Fn = M.getFunction(Str); \
641#include "llvm/Frontend/OpenMP/OMPKinds.def"
647#define OMP_RTL(Enum, Str, ...) \
649 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
651#include "llvm/Frontend/OpenMP/OMPKinds.def"
655 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
665 LLVMContext::MD_callback,
667 2, {-1, -1},
true)}));
680 assert(Fn &&
"Failed to create OpenMP runtime function");
691 Builder.SetInsertPoint(FiniBB);
703 FiniBB = OtherFiniBB;
705 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
713 auto EndIt = FiniBB->end();
714 if (FiniBB->size() >= 1)
715 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
720 FiniBB->replaceAllUsesWith(OtherFiniBB);
721 FiniBB->eraseFromParent();
722 FiniBB = OtherFiniBB;
729 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
752 for (
auto Inst =
Block->getReverseIterator()->begin();
753 Inst !=
Block->getReverseIterator()->end();) {
782 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
807 ParallelRegionBlockSet.
clear();
809 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
819 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
828 ".omp_par", ArgsInZeroAddressSpace);
832 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
833 assert(Extractor.isEligible() &&
834 "Expected OpenMP outlining to be possible!");
836 for (
auto *V : OI.ExcludeArgsFromAggregate)
837 Extractor.excludeArgFromAggregate(V);
840 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
844 if (TargetCpuAttr.isStringAttribute())
847 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
848 if (TargetFeaturesAttr.isStringAttribute())
849 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
852 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
854 "OpenMP outlined functions should not return a value!");
859 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
866 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
873 "Expected instructions to add in the outlined region entry");
875 End = ArtificialEntry.
rend();
880 if (
I.isTerminator()) {
882 if (OI.EntryBB->getTerminator())
883 OI.EntryBB->getTerminator()->adoptDbgRecords(
884 &ArtificialEntry,
I.getIterator(),
false);
888 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
891 OI.EntryBB->moveBefore(&ArtificialEntry);
898 if (OI.PostOutlineCB)
899 OI.PostOutlineCB(*OutlinedFn);
901 if (OI.FixUpNonEntryAllocas)
933 errs() <<
"Error of kind: " << Kind
934 <<
" when emitting offload entries and metadata during "
935 "OMPIRBuilder finalization \n";
941 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
942 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
943 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
944 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
961 ConstantInt::get(I32Ty,
Value), Name);
974 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
978 if (UsedArray.
empty())
985 GV->setSection(
"llvm.metadata");
991 auto *Int8Ty =
Builder.getInt8Ty();
994 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1002 unsigned Reserve2Flags) {
1004 LocFlags |= OMP_IDENT_FLAG_KMPC;
1011 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1012 ConstantInt::get(Int32, Reserve2Flags),
1013 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1015 size_t SrcLocStrArgIdx = 4;
1016 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1020 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1027 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1028 if (
GV.getInitializer() == Initializer)
1033 M, OpenMPIRBuilder::Ident,
1036 M.getDataLayout().getDefaultGlobalsAddressSpace());
1048 SrcLocStrSize = LocStr.
size();
1057 if (
GV.isConstant() &&
GV.hasInitializer() &&
1058 GV.getInitializer() == Initializer)
1061 SrcLocStr =
Builder.CreateGlobalString(
1062 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1070 unsigned Line,
unsigned Column,
1076 Buffer.
append(FunctionName);
1078 Buffer.
append(std::to_string(Line));
1080 Buffer.
append(std::to_string(Column));
1088 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1099 if (
DIFile *DIF = DIL->getFile())
1100 if (std::optional<StringRef> Source = DIF->getSource())
1106 DIL->getColumn(), SrcLocStrSize);
1112 Loc.IP.getBlock()->getParent());
1118 "omp_global_thread_num");
1123 bool ForceSimpleCall,
bool CheckCancelFlag) {
1133 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1136 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1139 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1142 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1145 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1158 bool UseCancelBarrier =
1163 ? OMPRTL___kmpc_cancel_barrier
1164 : OMPRTL___kmpc_barrier),
1167 if (UseCancelBarrier && CheckCancelFlag)
1177 omp::Directive CanceledDirective) {
1182 auto *UI =
Builder.CreateUnreachable();
1190 Builder.SetInsertPoint(ElseTI);
1191 auto ElseIP =
Builder.saveIP();
1199 Builder.SetInsertPoint(ThenTI);
1201 Value *CancelKind =
nullptr;
1202 switch (CanceledDirective) {
1203#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1204 case DirectiveEnum: \
1205 CancelKind = Builder.getInt32(Value); \
1207#include "llvm/Frontend/OpenMP/OMPKinds.def"
1224 Builder.SetInsertPoint(UI->getParent());
1225 UI->eraseFromParent();
1232 omp::Directive CanceledDirective) {
1237 auto *UI =
Builder.CreateUnreachable();
1240 Value *CancelKind =
nullptr;
1241 switch (CanceledDirective) {
1242#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1243 case DirectiveEnum: \
1244 CancelKind = Builder.getInt32(Value); \
1246#include "llvm/Frontend/OpenMP/OMPKinds.def"
1263 Builder.SetInsertPoint(UI->getParent());
1264 UI->eraseFromParent();
1277 auto *KernelArgsPtr =
1278 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1283 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1286 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1290 NumThreads, HostPtr, KernelArgsPtr};
1317 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1321 Value *Return =
nullptr;
1341 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1342 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1349 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1351 auto CurFn =
Builder.GetInsertBlock()->getParent();
1358 emitBlock(OffloadContBlock, CurFn,
true);
1363 Value *CancelFlag, omp::Directive CanceledDirective) {
1365 "Unexpected cancellation!");
1385 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1394 Builder.SetInsertPoint(CancellationBlock);
1395 Builder.CreateBr(*FiniBBOrErr);
1398 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1417 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1420 "Expected at least tid and bounded tid as arguments");
1421 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1424 assert(CI &&
"Expected call instruction to outlined function");
1425 CI->
getParent()->setName(
"omp_parallel");
1427 Builder.SetInsertPoint(CI);
1428 Type *PtrTy = OMPIRBuilder->VoidPtr;
1432 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1436 Value *Args = ArgsAlloca;
1440 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1441 Builder.restoreIP(CurrentIP);
1444 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1446 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1448 Builder.CreateStore(V, StoreAddress);
1452 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1453 : Builder.getInt32(1);
1456 Value *Parallel60CallArgs[] = {
1460 NumThreads ? NumThreads : Builder.getInt32(-1),
1461 Builder.getInt32(-1),
1465 Builder.getInt64(NumCapturedVars),
1466 Builder.getInt32(0)};
1474 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1477 Builder.SetInsertPoint(PrivTID);
1479 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1486 I->eraseFromParent();
1509 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1517 F->addMetadata(LLVMContext::MD_callback,
1526 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1529 "Expected at least tid and bounded tid as arguments");
1530 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1533 CI->
getParent()->setName(
"omp_parallel");
1534 Builder.SetInsertPoint(CI);
1537 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1541 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1543 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1550 auto PtrTy = OMPIRBuilder->VoidPtr;
1551 if (IfCondition && NumCapturedVars == 0) {
1559 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1562 Builder.SetInsertPoint(PrivTID);
1564 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1571 I->eraseFromParent();
1579 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1588 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1589 (ProcBind != OMP_PROC_BIND_default);
1596 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1600 if (NumThreads && !
Config.isTargetDevice()) {
1603 Builder.CreateIntCast(NumThreads, Int32,
false)};
1608 if (ProcBind != OMP_PROC_BIND_default) {
1612 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1634 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1637 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1640 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1644 PointerType ::get(
M.getContext(), 0),
1645 "zero.addr.ascast");
1669 if (IP.getBlock()->end() == IP.getPoint()) {
1675 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1676 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1677 "Unexpected insertion point for finalization call!");
1689 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1695 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1713 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1716 assert(BodyGenCB &&
"Expected body generation callback!");
1718 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1721 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1724 if (
Config.isTargetDevice()) {
1727 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1729 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1730 ThreadID, ToBeDeletedVec);
1736 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1738 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1760 ".omp_par", ArgsInZeroAddressSpace);
1765 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1767 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1772 return GV->getValueType() == OpenMPIRBuilder::Ident;
1777 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1783 if (&V == TIDAddr || &V == ZeroAddr) {
1789 for (
Use &U : V.uses())
1791 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1801 if (!V.getType()->isPointerTy()) {
1805 Builder.restoreIP(OuterAllocaIP);
1807 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1811 Builder.SetInsertPoint(InsertBB,
1816 Builder.restoreIP(InnerAllocaIP);
1817 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1820 Value *ReplacementValue =
nullptr;
1823 ReplacementValue = PrivTID;
1826 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1834 assert(ReplacementValue &&
1835 "Expected copy/create callback to set replacement value!");
1836 if (ReplacementValue == &V)
1841 UPtr->set(ReplacementValue);
1866 for (
Value *Output : Outputs)
1870 "OpenMP outlining should not produce live-out values!");
1872 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1874 for (
auto *BB : Blocks)
1875 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1883 assert(FiniInfo.DK == OMPD_parallel &&
1884 "Unexpected finalization stack state!");
1895 Builder.CreateBr(*FiniBBOrErr);
1899 Term->eraseFromParent();
1905 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1906 UI->eraseFromParent();
1973 if (Dependencies.
empty())
1993 Type *DependInfo = OMPBuilder.DependInfo;
1996 Value *DepArray =
nullptr;
1998 Builder.SetInsertPoint(
2002 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2004 Builder.restoreIP(OldIP);
2006 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2008 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2010 Value *Addr = Builder.CreateStructGEP(
2012 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2013 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2014 Builder.CreateStore(DepValPtr, Addr);
2017 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2018 Builder.CreateStore(
2019 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2022 Value *Flags = Builder.CreateStructGEP(
2024 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2025 Builder.CreateStore(
2026 ConstantInt::get(Builder.getInt8Ty(),
2027 static_cast<unsigned int>(Dep.DepKind)),
2034Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2036 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2051 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2055 "omp_taskloop_dup",
M);
2058 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2059 DestTaskArg->
setName(
"dest_task");
2060 SrcTaskArg->
setName(
"src_task");
2061 LastprivateFlagArg->
setName(
"lastprivate_flag");
2063 IRBuilderBase::InsertPointGuard Guard(
Builder);
2067 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2068 Type *TaskWithPrivatesTy =
2071 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2073 PrivatesTy, TaskPrivates,
2078 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2079 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2081 DestTaskContextPtr->
setName(
"destPtr");
2082 SrcTaskContextPtr->
setName(
"srcPtr");
2087 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2088 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2089 if (!AfterIPOrError)
2091 Builder.restoreIP(*AfterIPOrError);
2101 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2103 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2105 Value *TaskContextStructPtrVal) {
2110 uint32_t SrcLocStrSize;
2126 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2129 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2134 llvm::CanonicalLoopInfo *CLI = result.
get();
2136 OI.
EntryBB = TaskloopAllocaBB;
2137 OI.OuterAllocaBB = AllocaIP.getBlock();
2138 OI.ExitBB = TaskloopExitBB;
2144 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2146 TaskloopAllocaIP,
"lb",
false,
true);
2148 TaskloopAllocaIP,
"ub",
false,
true);
2150 TaskloopAllocaIP,
"step",
false,
true);
2153 OI.Inputs.insert(FakeLB);
2154 OI.Inputs.insert(FakeUB);
2155 OI.Inputs.insert(FakeStep);
2156 if (TaskContextStructPtrVal)
2157 OI.Inputs.insert(TaskContextStructPtrVal);
2158 assert(((TaskContextStructPtrVal && DupCB) ||
2159 (!TaskContextStructPtrVal && !DupCB)) &&
2160 "Task context struct ptr and duplication callback must be both set "
2166 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2170 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2171 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2174 if (!TaskDupFnOrErr) {
2177 Value *TaskDupFn = *TaskDupFnOrErr;
2179 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2180 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2181 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2182 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2183 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2185 assert(OutlinedFn.hasOneUse() &&
2186 "there must be a single user for the outlined function");
2192 IRBuilderBase::InsertPoint CurrentIp =
Builder.saveIP();
2194 Value *CastedLBVal =
2195 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2196 Value *CastedUBVal =
2197 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2198 Value *CastedStepVal =
2199 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2202 Builder.SetInsertPoint(StaleCI);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2236 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2238 AllocaInst *ArgStructAlloca =
2240 assert(ArgStructAlloca &&
2241 "Unable to find the alloca instruction corresponding to arguments "
2242 "for extracted function");
2243 std::optional<TypeSize> ArgAllocSize =
2246 "Unable to determine size of arguments for extracted function");
2247 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2252 CallInst *TaskData =
Builder.CreateCall(
2253 TaskAllocFn, {Ident, ThreadID,
Flags,
2254 TaskSize, SharedsSize,
2259 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2260 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2265 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2268 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2271 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2277 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2283 Value *GrainSizeVal =
2284 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2286 Value *TaskDup = TaskDupFn;
2288 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2289 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2294 Builder.CreateCall(TaskloopFn, Args);
2301 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2306 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2308 LoadInst *SharedsOutlined =
2309 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2310 OutlinedFn.getArg(1)->replaceUsesWithIf(
2312 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2315 Type *IVTy =
IV->getType();
2321 Value *TaskLB =
nullptr;
2322 Value *TaskUB =
nullptr;
2323 Value *LoadTaskLB =
nullptr;
2324 Value *LoadTaskUB =
nullptr;
2325 for (Instruction &
I : *TaskloopAllocaBB) {
2326 if (
I.getOpcode() == Instruction::GetElementPtr) {
2329 switch (CI->getZExtValue()) {
2338 }
else if (
I.getOpcode() == Instruction::Load) {
2340 if (
Load.getPointerOperand() == TaskLB) {
2341 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2343 }
else if (
Load.getPointerOperand() == TaskUB) {
2344 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2350 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2352 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2353 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2354 Value *TripCountMinusOne =
2355 Builder.CreateSDiv(
Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
2356 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2357 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2358 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2360 CLI->setTripCount(CastedTripCount);
2362 Builder.SetInsertPoint(CLI->getBody(),
2363 CLI->getBody()->getFirstInsertionPt());
2365 if (NumOfCollapseLoops > 1) {
2371 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2374 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2375 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2376 User *IVUser = IVUse->getUser();
2378 if (
Op->getOpcode() == Instruction::URem ||
2379 Op->getOpcode() == Instruction::UDiv) {
2384 for (User *User : UsersToReplace) {
2385 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2402 assert(CLI->getIndVar()->getNumUses() == 3 &&
2403 "Canonical loop should have exactly three uses of the ind var");
2404 for (User *IVUser : CLI->getIndVar()->users()) {
2406 if (
Mul->getOpcode() == Instruction::Mul) {
2407 for (User *MulUser :
Mul->users()) {
2409 if (
Add->getOpcode() == Instruction::Add) {
2410 Add->setOperand(1, CastedTaskLB);
2419 FakeLB->replaceAllUsesWith(CastedLBVal);
2420 FakeUB->replaceAllUsesWith(CastedUBVal);
2421 FakeStep->replaceAllUsesWith(CastedStepVal);
2423 I->eraseFromParent();
2428 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2434 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2443 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2475 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2486 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2488 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2489 Affinities, Mergeable, Priority, EventHandle,
2490 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
2492 assert(OutlinedFn.hasOneUse() &&
2493 "there must be a single user for the outlined function");
2498 bool HasShareds = StaleCI->
arg_size() > 1;
2499 Builder.SetInsertPoint(StaleCI);
2524 Flags =
Builder.CreateOr(FinalFlag, Flags);
2537 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2546 assert(ArgStructAlloca &&
2547 "Unable to find the alloca instruction corresponding to arguments "
2548 "for extracted function");
2549 std::optional<TypeSize> ArgAllocSize =
2552 "Unable to determine size of arguments for extracted function");
2553 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2559 TaskAllocFn, {Ident, ThreadID, Flags,
2560 TaskSize, SharedsSize,
2563 if (Affinities.
Count && Affinities.
Info) {
2565 OMPRTL___kmpc_omp_reg_task_with_affinity);
2576 OMPRTL___kmpc_task_allow_completion_event);
2580 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2582 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2583 Builder.CreateStore(EventVal, EventHandleAddr);
2589 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2590 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2608 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2611 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2613 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2616 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2617 PriorityData, {Zero, Zero});
2618 Builder.CreateStore(Priority, CmplrData);
2645 Builder.GetInsertPoint()->getParent()->getTerminator();
2646 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2647 Builder.SetInsertPoint(IfTerminator);
2650 Builder.SetInsertPoint(ElseTI);
2652 if (Dependencies.
size()) {
2657 {Ident, ThreadID,
Builder.getInt32(Dependencies.
size()), DepArray,
2658 ConstantInt::get(
Builder.getInt32Ty(), 0),
2673 Builder.SetInsertPoint(ThenTI);
2676 if (Dependencies.
size()) {
2681 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
2682 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
2693 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2695 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2696 OutlinedFn.getArg(1)->replaceUsesWithIf(
2697 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2701 I->eraseFromParent();
2705 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2731 Builder.SetInsertPoint(TaskgroupExitBB);
2774 unsigned CaseNumber = 0;
2775 for (
auto SectionCB : SectionCBs) {
2777 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2779 Builder.SetInsertPoint(CaseBB);
2782 CaseEndBr->getIterator()}))
2793 Value *LB = ConstantInt::get(I32Ty, 0);
2794 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2795 Value *ST = ConstantInt::get(I32Ty, 1);
2797 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2802 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2803 WorksharingLoopType::ForStaticLoop, !IsNowait);
2809 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2813 assert(FiniInfo.DK == OMPD_sections &&
2814 "Unexpected finalization stack state!");
2815 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2829 if (IP.getBlock()->end() != IP.getPoint())
2840 auto *CaseBB =
Loc.IP.getBlock();
2841 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2842 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2848 Directive OMPD = Directive::OMPD_sections;
2851 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2862Value *OpenMPIRBuilder::getGPUThreadID() {
2865 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2869Value *OpenMPIRBuilder::getGPUWarpSize() {
2874Value *OpenMPIRBuilder::getNVPTXWarpID() {
2875 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2876 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2879Value *OpenMPIRBuilder::getNVPTXLaneID() {
2880 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2881 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2882 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2883 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2890 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2891 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2892 assert(FromSize > 0 &&
"From size must be greater than zero");
2893 assert(ToSize > 0 &&
"To size must be greater than zero");
2894 if (FromType == ToType)
2896 if (FromSize == ToSize)
2897 return Builder.CreateBitCast(From, ToType);
2899 return Builder.CreateIntCast(From, ToType,
true);
2905 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2906 CastItem,
Builder.getPtrTy(0));
2907 Builder.CreateStore(From, ValCastItem);
2908 return Builder.CreateLoad(ToType, CastItem);
2915 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2916 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2920 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2922 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2924 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2925 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2926 Value *WarpSizeCast =
2928 Value *ShuffleCall =
2930 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2937 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2949 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2950 Value *ElemPtr = DstAddr;
2951 Value *Ptr = SrcAddr;
2952 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2956 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2959 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2960 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2964 if ((
Size / IntSize) > 1) {
2965 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2966 SrcAddrGEP,
Builder.getPtrTy());
2983 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
2985 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
2988 Value *Res = createRuntimeShuffleFunction(
2991 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
2993 Builder.CreateAlignedStore(Res, ElemPtr,
2994 M.getDataLayout().getPrefTypeAlign(ElemType));
2996 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2997 Value *LocalElemPtr =
2998 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3004 Value *Res = createRuntimeShuffleFunction(
3005 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3008 Res =
Builder.CreateTrunc(Res, ElemType);
3009 Builder.CreateStore(Res, ElemPtr);
3010 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3012 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3018Error OpenMPIRBuilder::emitReductionListCopy(
3023 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3024 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3028 for (
auto En :
enumerate(ReductionInfos)) {
3030 Value *SrcElementAddr =
nullptr;
3031 AllocaInst *DestAlloca =
nullptr;
3032 Value *DestElementAddr =
nullptr;
3033 Value *DestElementPtrAddr =
nullptr;
3035 bool ShuffleInElement =
false;
3038 bool UpdateDestListPtr =
false;
3042 ReductionArrayTy, SrcBase,
3043 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3044 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3048 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3049 ReductionArrayTy, DestBase,
3050 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3051 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3057 Type *DestAllocaType =
3058 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3059 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3060 ".omp.reduction.element");
3062 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3063 DestElementAddr = DestAlloca;
3066 DestElementAddr->
getName() +
".ascast");
3068 ShuffleInElement =
true;
3069 UpdateDestListPtr =
true;
3081 if (ShuffleInElement) {
3082 Type *ShuffleType = RI.ElementType;
3083 Value *ShuffleSrcAddr = SrcElementAddr;
3084 Value *ShuffleDestAddr = DestElementAddr;
3085 AllocaInst *LocalStorage =
nullptr;
3088 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3089 assert(RI.ByRefAllocatedType &&
3090 "Expected by-ref allocated type to be set");
3095 ShuffleType = RI.ByRefElementType;
3098 RI.DataPtrPtrGen(
Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3101 return GenResult.takeError();
3103 ShuffleSrcAddr =
Builder.CreateLoad(
Builder.getPtrTy(), ShuffleSrcAddr);
3109 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3111 ShuffleDestAddr = LocalStorage;
3115 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3116 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3120 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3121 DestAlloca,
Builder.getPtrTy(),
".ascast");
3124 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3125 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3128 return GenResult.takeError();
3131 switch (RI.EvaluationKind) {
3133 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3135 Builder.CreateStore(Elem, DestElementAddr);
3139 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3140 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3142 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3144 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3146 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3148 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3149 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3150 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3151 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3152 Builder.CreateStore(SrcReal, DestRealPtr);
3153 Builder.CreateStore(SrcImg, DestImgPtr);
3158 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3160 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3161 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3173 if (UpdateDestListPtr) {
3174 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3175 DestElementAddr,
Builder.getPtrTy(),
3176 DestElementAddr->
getName() +
".ascast");
3177 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3184Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3188 LLVMContext &Ctx =
M.getContext();
3190 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3194 "_omp_reduction_inter_warp_copy_func", &
M);
3199 Builder.SetInsertPoint(EntryBB);
3216 StringRef TransferMediumName =
3217 "__openmp_nvptx_data_transfer_temporary_storage";
3218 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3219 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3221 if (!TransferMedium) {
3222 TransferMedium =
new GlobalVariable(
3230 Value *GPUThreadID = getGPUThreadID();
3232 Value *LaneID = getNVPTXLaneID();
3234 Value *WarpID = getNVPTXWarpID();
3238 Builder.GetInsertBlock()->getFirstInsertionPt());
3242 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3243 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3244 AllocaInst *NumWarpsAlloca =
3245 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3246 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3247 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3248 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3249 NumWarpsAlloca,
Builder.getPtrTy(0),
3250 NumWarpsAlloca->
getName() +
".ascast");
3251 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3252 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3261 for (
auto En :
enumerate(ReductionInfos)) {
3267 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3268 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3269 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3270 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3273 unsigned NumIters = RealTySize / TySize;
3276 Value *Cnt =
nullptr;
3277 Value *CntAddr =
nullptr;
3284 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3286 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3287 CntAddr->
getName() +
".ascast");
3299 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3300 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3307 omp::Directive::OMPD_unknown,
3311 return BarrierIP1.takeError();
3317 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3318 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3322 auto *RedListArrayTy =
3325 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3327 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3328 {ConstantInt::get(IndexTy, 0),
3329 ConstantInt::get(IndexTy, En.index())});
3335 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3338 return GenRes.takeError();
3349 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3354 Builder.CreateStore(Elem, MediumPtr,
3366 omp::Directive::OMPD_unknown,
3370 return BarrierIP2.takeError();
3377 Value *NumWarpsVal =
3380 Value *IsActiveThread =
3381 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3382 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3389 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3391 Value *TargetElemPtrPtr =
3392 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3393 {ConstantInt::get(IndexTy, 0),
3394 ConstantInt::get(IndexTy, En.index())});
3395 Value *TargetElemPtrVal =
3397 Value *TargetElemPtr = TargetElemPtrVal;
3401 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3404 return GenRes.takeError();
3406 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3414 Value *SrcMediumValue =
3415 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3416 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3426 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3427 Builder.CreateStore(Cnt, CntAddr,
false);
3429 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3433 RealTySize %= TySize;
3443Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3446 LLVMContext &Ctx =
M.getContext();
3447 FunctionType *FuncTy =
3449 {Builder.getPtrTy(), Builder.getInt16Ty(),
3450 Builder.getInt16Ty(), Builder.getInt16Ty()},
3454 "_omp_reduction_shuffle_and_reduce_func", &
M);
3464 Builder.SetInsertPoint(EntryBB);
3475 Type *ReduceListArgType = ReduceListArg->
getType();
3479 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3480 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3481 LaneIDArg->
getName() +
".addr");
3483 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3484 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3485 AlgoVerArg->
getName() +
".addr");
3492 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3494 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 ReduceListAlloca, ReduceListArgType,
3496 ReduceListAlloca->
getName() +
".ascast");
3497 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3498 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3499 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3500 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3501 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3502 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3503 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3504 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3505 RemoteReductionListAlloca,
Builder.getPtrTy(),
3506 RemoteReductionListAlloca->
getName() +
".ascast");
3508 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3509 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3510 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3511 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3513 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3514 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3515 Value *RemoteLaneOffset =
3516 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3517 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3524 Error EmitRedLsCpRes = emitReductionListCopy(
3526 ReduceList, RemoteListAddrCast, IsByRef,
3527 {RemoteLaneOffset,
nullptr,
nullptr});
3530 return EmitRedLsCpRes;
3555 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3560 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3561 Value *RemoteOffsetComp =
3563 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3564 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3565 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3571 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3573 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3574 ReduceList,
Builder.getPtrTy());
3575 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3576 RemoteListAddrCast,
Builder.getPtrTy());
3578 ->addFnAttr(Attribute::NoUnwind);
3589 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3590 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3595 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3599 EmitRedLsCpRes = emitReductionListCopy(
3601 RemoteListAddrCast, ReduceList, IsByRef);
3604 return EmitRedLsCpRes;
3619OpenMPIRBuilder::generateReductionDescriptor(
3621 Type *DescriptorType,
3627 Value *DescriptorSize =
3628 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3630 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3631 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3635 Value *DataPtrField;
3637 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3640 return GenResult.takeError();
3643 DataPtr,
Builder.getPtrTy(),
".ascast"),
3649Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3653 LLVMContext &Ctx =
M.getContext();
3656 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3660 "_omp_reduction_list_to_global_copy_func", &
M);
3667 Builder.SetInsertPoint(EntryBlock);
3677 BufferArg->
getName() +
".addr");
3681 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3682 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3683 BufferArgAlloca,
Builder.getPtrTy(),
3684 BufferArgAlloca->
getName() +
".ascast");
3685 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3686 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3687 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3688 ReduceListArgAlloca,
Builder.getPtrTy(),
3689 ReduceListArgAlloca->
getName() +
".ascast");
3691 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3692 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3693 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3695 Value *LocalReduceList =
3697 Value *BufferArgVal =
3701 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3702 for (
auto En :
enumerate(ReductionInfos)) {
3704 auto *RedListArrayTy =
3708 RedListArrayTy, LocalReduceList,
3709 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3715 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3717 ReductionsBufferTy, BufferVD, 0, En.index());
3719 switch (RI.EvaluationKind) {
3721 Value *TargetElement;
3723 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3724 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3727 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3730 return GenResult.takeError();
3733 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3736 Builder.CreateStore(TargetElement, GlobVal);
3740 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3741 RI.ElementType, ElemPtr, 0, 0,
".realp");
3743 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3745 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3747 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3749 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3750 RI.ElementType, GlobVal, 0, 0,
".realp");
3751 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3752 RI.ElementType, GlobVal, 0, 1,
".imagp");
3753 Builder.CreateStore(SrcReal, DestRealPtr);
3754 Builder.CreateStore(SrcImg, DestImgPtr);
3759 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3761 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3762 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3773Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3777 LLVMContext &Ctx =
M.getContext();
3780 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3784 "_omp_reduction_list_to_global_reduce_func", &
M);
3791 Builder.SetInsertPoint(EntryBlock);
3801 BufferArg->
getName() +
".addr");
3805 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3806 auto *RedListArrayTy =
3811 Value *LocalReduceList =
3812 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3816 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3817 BufferArgAlloca,
Builder.getPtrTy(),
3818 BufferArgAlloca->
getName() +
".ascast");
3819 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3820 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3821 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3822 ReduceListArgAlloca,
Builder.getPtrTy(),
3823 ReduceListArgAlloca->
getName() +
".ascast");
3824 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3825 LocalReduceList,
Builder.getPtrTy(),
3826 LocalReduceList->
getName() +
".ascast");
3828 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3829 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3830 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3835 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3836 for (
auto En :
enumerate(ReductionInfos)) {
3840 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3844 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3845 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3846 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3851 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3852 RedListArrayTy, LocalReduceListAddrCast,
3853 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3855 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3857 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3858 ReductionsBufferTy, BufferVD, 0, En.index());
3860 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3864 Value *SrcElementPtrPtr =
3865 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3866 {ConstantInt::get(IndexTy, 0),
3867 ConstantInt::get(IndexTy, En.index())});
3868 Value *SrcDescriptorAddr =
3873 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
3874 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3877 return GenResult.takeError();
3879 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3881 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3889 ->addFnAttr(Attribute::NoUnwind);
3895Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3899 LLVMContext &Ctx =
M.getContext();
3902 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3906 "_omp_reduction_global_to_list_copy_func", &
M);
3913 Builder.SetInsertPoint(EntryBlock);
3923 BufferArg->
getName() +
".addr");
3927 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3928 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3929 BufferArgAlloca,
Builder.getPtrTy(),
3930 BufferArgAlloca->
getName() +
".ascast");
3931 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3932 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3933 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3934 ReduceListArgAlloca,
Builder.getPtrTy(),
3935 ReduceListArgAlloca->
getName() +
".ascast");
3936 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3937 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3938 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3940 Value *LocalReduceList =
3945 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3946 for (
auto En :
enumerate(ReductionInfos)) {
3947 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3948 auto *RedListArrayTy =
3952 RedListArrayTy, LocalReduceList,
3953 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3958 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3959 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3960 ReductionsBufferTy, BufferVD, 0, En.index());
3966 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3972 return GenResult.takeError();
3977 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
3978 Builder.CreateStore(TargetElement, ElemPtr);
3982 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3991 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3993 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3995 Builder.CreateStore(SrcReal, DestRealPtr);
3996 Builder.CreateStore(SrcImg, DestImgPtr);
4003 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4004 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4016Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4020 LLVMContext &Ctx =
M.getContext();
4023 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4027 "_omp_reduction_global_to_list_reduce_func", &
M);
4034 Builder.SetInsertPoint(EntryBlock);
4044 BufferArg->
getName() +
".addr");
4048 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4054 Value *LocalReduceList =
4055 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4059 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4060 BufferArgAlloca,
Builder.getPtrTy(),
4061 BufferArgAlloca->
getName() +
".ascast");
4062 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4063 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4064 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4065 ReduceListArgAlloca,
Builder.getPtrTy(),
4066 ReduceListArgAlloca->
getName() +
".ascast");
4067 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4068 LocalReduceList,
Builder.getPtrTy(),
4069 LocalReduceList->
getName() +
".ascast");
4071 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4072 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4073 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4078 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4079 for (
auto En :
enumerate(ReductionInfos)) {
4083 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4087 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4088 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4089 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4094 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4095 RedListArrayTy, ReductionList,
4096 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4099 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4100 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4101 ReductionsBufferTy, BufferVD, 0, En.index());
4103 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4105 Value *ReduceListVal =
4107 Value *SrcElementPtrPtr =
4108 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4109 {ConstantInt::get(IndexTy, 0),
4110 ConstantInt::get(IndexTy, En.index())});
4111 Value *SrcDescriptorAddr =
4116 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4117 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4119 return GenResult.takeError();
4121 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4123 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4131 ->addFnAttr(Attribute::NoUnwind);
4137std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4138 std::string Suffix =
4140 return (Name + Suffix).str();
4143Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4146 AttributeList FuncAttrs) {
4148 {Builder.getPtrTy(), Builder.getPtrTy()},
4150 std::string
Name = getReductionFuncName(ReducerName);
4158 Builder.SetInsertPoint(EntryBB);
4162 Value *LHSArrayPtr =
nullptr;
4163 Value *RHSArrayPtr =
nullptr;
4170 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4172 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4173 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4174 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4175 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4176 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4177 Builder.CreateStore(Arg0, LHSAddrCast);
4178 Builder.CreateStore(Arg1, RHSAddrCast);
4179 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4180 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4184 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4186 for (
auto En :
enumerate(ReductionInfos)) {
4189 RedArrayTy, RHSArrayPtr,
4190 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4192 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4193 RHSI8Ptr, RI.PrivateVariable->getType(),
4194 RHSI8Ptr->
getName() +
".ascast");
4197 RedArrayTy, LHSArrayPtr,
4198 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4200 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4201 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4210 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4211 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4212 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4219 return AfterIP.takeError();
4220 if (!
Builder.GetInsertBlock())
4221 return ReductionFunc;
4225 if (!IsByRef.
empty() && !IsByRef[En.index()])
4226 Builder.CreateStore(Reduced, LHSPtr);
4231 for (
auto En :
enumerate(ReductionInfos)) {
4232 unsigned Index = En.index();
4234 Value *LHSFixupPtr, *RHSFixupPtr;
4235 Builder.restoreIP(RI.ReductionGenClang(
4236 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4241 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4246 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4260 return ReductionFunc;
4268 assert(RI.Variable &&
"expected non-null variable");
4269 assert(RI.PrivateVariable &&
"expected non-null private variable");
4270 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4271 "expected non-null reduction generator callback");
4274 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4275 "expected variables and their private equivalents to have the same "
4278 assert(RI.Variable->getType()->isPointerTy() &&
4279 "expected variables to be pointers");
4288 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4302 if (ReductionInfos.
size() == 0)
4312 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4316 AttributeList FuncAttrs;
4317 AttrBuilder AttrBldr(Ctx);
4319 AttrBldr.addAttribute(Attr);
4320 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4321 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4325 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4327 if (!ReductionResult)
4329 Function *ReductionFunc = *ReductionResult;
4333 if (GridValue.has_value())
4334 Config.setGridValue(GridValue.value());
4349 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4353 Value *ReductionListAlloca =
4354 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4355 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4356 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4359 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4360 for (
auto En :
enumerate(ReductionInfos)) {
4363 RedArrayTy, ReductionList,
4364 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4367 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4372 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4373 Builder.CreateStore(CastElem, ElemPtr);
4377 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4383 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4389 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4391 unsigned MaxDataSize = 0;
4393 for (
auto En :
enumerate(ReductionInfos)) {
4394 auto Size =
M.getDataLayout().getTypeStoreSize(En.value().ElementType);
4395 if (
Size > MaxDataSize)
4397 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4398 ? En.value().ByRefElementType
4399 : En.value().ElementType;
4402 Value *ReductionDataSize =
4403 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4404 if (!IsTeamsReduction) {
4405 Value *SarFuncCast =
4406 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4408 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4409 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4412 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4417 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4419 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4422 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4427 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4432 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4437 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4444 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4446 Value *Args3[] = {SrcLocInfo,
4447 KernelTeamsReductionPtr,
4448 Builder.getInt32(ReductionBufNum),
4459 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4476 for (
auto En :
enumerate(ReductionInfos)) {
4484 Value *LHSPtr, *RHSPtr;
4486 &LHSPtr, &RHSPtr, CurFunc));
4499 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4501 "red.value." +
Twine(En.index()));
4512 if (!IsByRef.
empty() && !IsByRef[En.index()])
4517 if (ContinuationBlock) {
4518 Builder.CreateBr(ContinuationBlock);
4519 Builder.SetInsertPoint(ContinuationBlock);
4521 Config.setEmitLLVMUsed();
4532 ".omp.reduction.func", &M);
4542 Builder.SetInsertPoint(ReductionFuncBlock);
4543 Value *LHSArrayPtr =
nullptr;
4544 Value *RHSArrayPtr =
nullptr;
4555 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4557 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4558 Value *LHSAddrCast =
4559 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4560 Value *RHSAddrCast =
4561 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4562 Builder.CreateStore(Arg0, LHSAddrCast);
4563 Builder.CreateStore(Arg1, RHSAddrCast);
4564 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4565 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4567 LHSArrayPtr = ReductionFunc->
getArg(0);
4568 RHSArrayPtr = ReductionFunc->
getArg(1);
4571 unsigned NumReductions = ReductionInfos.
size();
4574 for (
auto En :
enumerate(ReductionInfos)) {
4576 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4577 RedArrayTy, LHSArrayPtr, 0, En.index());
4578 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4579 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4582 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4583 RedArrayTy, RHSArrayPtr, 0, En.index());
4584 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4585 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4594 Builder.restoreIP(*AfterIP);
4596 if (!Builder.GetInsertBlock())
4600 if (!IsByRef[En.index()])
4601 Builder.CreateStore(Reduced, LHSPtr);
4603 Builder.CreateRetVoid();
4610 bool IsNoWait,
bool IsTeamsReduction) {
4614 IsByRef, IsNoWait, IsTeamsReduction);
4621 if (ReductionInfos.
size() == 0)
4631 unsigned NumReductions = ReductionInfos.
size();
4634 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4636 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4638 for (
auto En :
enumerate(ReductionInfos)) {
4639 unsigned Index = En.index();
4641 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4642 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4649 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4659 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4664 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4665 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4667 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4669 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4670 : RuntimeFunction::OMPRTL___kmpc_reduce);
4673 {Ident, ThreadId, NumVariables, RedArraySize,
4674 RedArray, ReductionFunc, Lock},
4685 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4686 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4687 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4692 Builder.SetInsertPoint(NonAtomicRedBlock);
4693 for (
auto En :
enumerate(ReductionInfos)) {
4699 if (!IsByRef[En.index()]) {
4701 "red.value." +
Twine(En.index()));
4703 Value *PrivateRedValue =
4705 "red.private.value." +
Twine(En.index()));
4713 if (!
Builder.GetInsertBlock())
4716 if (!IsByRef[En.index()])
4720 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4721 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4723 Builder.CreateBr(ContinuationBlock);
4728 Builder.SetInsertPoint(AtomicRedBlock);
4729 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4736 if (!
Builder.GetInsertBlock())
4739 Builder.CreateBr(ContinuationBlock);
4752 if (!
Builder.GetInsertBlock())
4755 Builder.SetInsertPoint(ContinuationBlock);
4766 Directive OMPD = Directive::OMPD_master;
4771 Value *Args[] = {Ident, ThreadId};
4779 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4790 Directive OMPD = Directive::OMPD_masked;
4796 Value *ArgsEnd[] = {Ident, ThreadId};
4804 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4814 Call->setDoesNotThrow();
4829 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4831 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4832 ScanVarsType, ScanRedInfo);
4843 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4846 Type *DestTy = ScanVarsType[i];
4847 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4850 Builder.CreateStore(Src, Val);
4855 Builder.GetInsertBlock()->getParent());
4858 IV = ScanRedInfo->
IV;
4861 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4864 Type *DestTy = ScanVarsType[i];
4866 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4868 Builder.CreateStore(Src, ScanVars[i]);
4882 Builder.GetInsertBlock()->getParent());
4887Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4891 Builder.restoreIP(AllocaIP);
4893 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4895 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4902 Builder.restoreIP(CodeGenIP);
4904 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4905 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4909 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4910 AllocSpan,
nullptr,
"arr");
4911 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4929 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4938Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4944 Value *PrivateVar = RedInfo.PrivateVariable;
4945 Value *OrigVar = RedInfo.Variable;
4949 Type *SrcTy = RedInfo.ElementType;
4954 Builder.CreateStore(Src, OrigVar);
4977 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5002 Builder.GetInsertBlock()->getModule(),
5009 Builder.GetInsertBlock()->getModule(),
5015 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5016 Builder.SetInsertPoint(InputBB);
5019 Builder.SetInsertPoint(LoopBB);
5035 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5037 Builder.SetInsertPoint(InnerLoopBB);
5041 Value *ReductionVal = RedInfo.PrivateVariable;
5044 Type *DestTy = RedInfo.ElementType;
5047 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5050 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5055 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5058 Builder.CreateStore(Result, LHSPtr);
5061 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5063 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5064 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5067 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5073 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5094 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5101Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5113 Error Err = InputLoopGen();
5124 Error Err = ScanLoopGen(Builder.saveIP());
5131void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5168 Builder.SetInsertPoint(Preheader);
5171 Builder.SetInsertPoint(Header);
5172 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5173 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5178 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5179 Builder.CreateCondBr(Cmp, Body, Exit);
5184 Builder.SetInsertPoint(Latch);
5186 "omp_" + Name +
".next",
true);
5197 CL->Header = Header;
5216 NextBB, NextBB, Name);
5248 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5257 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5258 ScanRedInfo->
Span = TripCount;
5264 ScanRedInfo->
IV =
IV;
5265 createScanBBs(ScanRedInfo);
5268 assert(Terminator->getNumSuccessors() == 1);
5269 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5272 Builder.GetInsertBlock()->getParent());
5275 Builder.GetInsertBlock()->getParent());
5276 Builder.CreateBr(ContinueBlock);
5282 const auto &&InputLoopGen = [&]() ->
Error {
5284 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5285 ComputeIP, Name,
true, ScanRedInfo);
5289 Builder.restoreIP((*LoopInfo)->getAfterIP());
5295 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5299 Builder.restoreIP((*LoopInfo)->getAfterIP());
5303 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5311 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5321 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5322 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5326 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5342 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5345 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5349 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5354 Value *CountIfLooping;
5355 if (InclusiveStop) {
5356 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5362 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5365 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5366 "omp_" + Name +
".tripcount");
5371 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5378 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5385 ScanRedInfo->
IV = IndVar;
5386 return BodyGenCB(
Builder.saveIP(), IndVar);
5392 Builder.getCurrentDebugLocation());
5403 unsigned Bitwidth = Ty->getIntegerBitWidth();
5406 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5409 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5419 unsigned Bitwidth = Ty->getIntegerBitWidth();
5422 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5425 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5433 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5435 "Require dedicated allocate IP");
5441 uint32_t SrcLocStrSize;
5447 Type *IVTy =
IV->getType();
5448 FunctionCallee StaticInit =
5449 LoopType == WorksharingLoopType::DistributeForStaticLoop
5452 FunctionCallee StaticFini =
5456 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5459 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5460 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5461 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5462 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5471 Constant *One = ConstantInt::get(IVTy, 1);
5472 Builder.CreateStore(Zero, PLowerBound);
5474 Builder.CreateStore(UpperBound, PUpperBound);
5475 Builder.CreateStore(One, PStride);
5480 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5481 ? OMPScheduleType::OrderedDistribute
5484 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5488 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5489 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5492 PLowerBound, PUpperBound});
5493 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5494 Value *PDistUpperBound =
5495 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5496 Args.push_back(PDistUpperBound);
5501 BuildInitCall(SchedulingType,
Builder);
5502 if (HasDistSchedule &&
5503 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5504 Constant *DistScheduleSchedType = ConstantInt::get(
5509 BuildInitCall(DistScheduleSchedType,
Builder);
5511 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5512 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5513 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5514 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5515 CLI->setTripCount(TripCount);
5521 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5525 return Builder.CreateAdd(OldIV, LowerBound);
5537 omp::Directive::OMPD_for,
false,
5540 return BarrierIP.takeError();
5567 Reachable.insert(
Block);
5577 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5581OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5585 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5586 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5591 Type *IVTy =
IV->getType();
5593 "Max supported tripcount bitwidth is 64 bits");
5595 :
Type::getInt64Ty(Ctx);
5598 Constant *One = ConstantInt::get(InternalIVTy, 1);
5604 for (BasicBlock &BB : *
F)
5605 if (!BB.getTerminator())
5606 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5611 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5612 for (Instruction *
I : UIs)
5613 I->eraseFromParent();
5616 if (ChunkSize || DistScheduleChunkSize)
5621 FunctionCallee StaticInit =
5623 FunctionCallee StaticFini =
5629 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5630 Value *PLowerBound =
5631 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5632 Value *PUpperBound =
5633 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5634 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5643 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5644 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5645 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5646 "distschedulechunksize");
5647 Value *CastedTripCount =
5648 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5651 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5653 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5654 Builder.CreateStore(Zero, PLowerBound);
5655 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5656 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5658 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5659 Builder.CreateStore(UpperBound, PUpperBound);
5660 Builder.CreateStore(One, PStride);
5664 uint32_t SrcLocStrSize;
5668 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5669 PUpperBound, PStride, One,
5670 this](
Value *SchedulingType,
Value *ChunkSize,
5673 StaticInit, {SrcLoc, ThreadNum,
5674 SchedulingType, PLastIter,
5675 PLowerBound, PUpperBound,
5679 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5680 if (DistScheduleSchedType != OMPScheduleType::None &&
5681 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5682 SchedType != OMPScheduleType::OrderedDistribute) {
5686 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5690 Value *FirstChunkStart =
5691 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5692 Value *FirstChunkStop =
5693 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5694 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5696 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5697 Value *NextChunkStride =
5698 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5702 Value *DispatchCounter;
5710 DispatchCounter = Counter;
5713 FirstChunkStart, CastedTripCount, NextChunkStride,
5736 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5737 Value *IsLastChunk =
5738 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5739 Value *CountUntilOrigTripCount =
5740 Builder.CreateSub(CastedTripCount, DispatchCounter);
5742 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5743 Value *BackcastedChunkTC =
5744 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5745 CLI->setTripCount(BackcastedChunkTC);
5750 Value *BackcastedDispatchCounter =
5751 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5752 CLI->mapIndVar([&](Instruction *) ->
Value * {
5754 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5767 return AfterIP.takeError();
5782static FunctionCallee
5785 unsigned Bitwidth = Ty->getIntegerBitWidth();
5788 case WorksharingLoopType::ForStaticLoop:
5791 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5794 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5796 case WorksharingLoopType::DistributeStaticLoop:
5799 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5802 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5804 case WorksharingLoopType::DistributeForStaticLoop:
5807 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5810 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5813 if (Bitwidth != 32 && Bitwidth != 64) {
5825 Function &LoopBodyFn,
bool NoLoop) {
5836 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5837 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5838 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5839 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5844 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5845 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5849 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5850 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5851 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5852 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5853 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5855 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5879 Builder.restoreIP({Preheader, Preheader->
end()});
5882 Builder.CreateBr(CLI->
getExit());
5890 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5898 "Expected unique undroppable user of outlined function");
5900 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5902 "Expected outlined function call to be located in loop preheader");
5904 if (OutlinedFnCallInstruction->
arg_size() > 1)
5911 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5913 for (
auto &ToBeDeletedItem : ToBeDeleted)
5914 ToBeDeletedItem->eraseFromParent();
5921 uint32_t SrcLocStrSize;
5930 SmallVector<Instruction *, 4> ToBeDeleted;
5932 OI.OuterAllocaBB = AllocaIP.getBlock();
5955 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
5957 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5959 CodeExtractorAnalysisCache CEAC(*OuterFn);
5960 CodeExtractor Extractor(Blocks,
5973 SetVector<Value *> SinkingCands, HoistingCands;
5977 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5984 for (
auto Use :
Users) {
5986 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5987 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
5993 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6000 OI.PostOutlineCB = [=, ToBeDeletedVec =
6001 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6011 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6012 bool HasSimdModifier,
bool HasMonotonicModifier,
6013 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6015 Value *DistScheduleChunkSize) {
6016 if (
Config.isTargetDevice())
6017 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6019 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6020 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6022 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6023 OMPScheduleType::ModifierOrdered;
6025 if (HasDistSchedule) {
6026 DistScheduleSchedType = DistScheduleChunkSize
6027 ? OMPScheduleType::OrderedDistributeChunked
6028 : OMPScheduleType::OrderedDistribute;
6030 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6031 case OMPScheduleType::BaseStatic:
6032 case OMPScheduleType::BaseDistribute:
6033 assert((!ChunkSize || !DistScheduleChunkSize) &&
6034 "No chunk size with static-chunked schedule");
6035 if (IsOrdered && !HasDistSchedule)
6036 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6037 NeedsBarrier, ChunkSize);
6039 if (DistScheduleChunkSize)
6040 return applyStaticChunkedWorkshareLoop(
6041 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6042 DistScheduleChunkSize, DistScheduleSchedType);
6043 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6046 case OMPScheduleType::BaseStaticChunked:
6047 case OMPScheduleType::BaseDistributeChunked:
6048 if (IsOrdered && !HasDistSchedule)
6049 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6050 NeedsBarrier, ChunkSize);
6052 return applyStaticChunkedWorkshareLoop(
6053 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6054 DistScheduleChunkSize, DistScheduleSchedType);
6056 case OMPScheduleType::BaseRuntime:
6057 case OMPScheduleType::BaseAuto:
6058 case OMPScheduleType::BaseGreedy:
6059 case OMPScheduleType::BaseBalanced:
6060 case OMPScheduleType::BaseSteal:
6061 case OMPScheduleType::BaseRuntimeSimd:
6063 "schedule type does not support user-defined chunk sizes");
6065 case OMPScheduleType::BaseGuidedSimd:
6066 case OMPScheduleType::BaseDynamicChunked:
6067 case OMPScheduleType::BaseGuidedChunked:
6068 case OMPScheduleType::BaseGuidedIterativeChunked:
6069 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6070 case OMPScheduleType::BaseStaticBalancedChunked:
6071 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6072 NeedsBarrier, ChunkSize);
6085 unsigned Bitwidth = Ty->getIntegerBitWidth();
6088 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6091 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6099static FunctionCallee
6101 unsigned Bitwidth = Ty->getIntegerBitWidth();
6104 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6107 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6114static FunctionCallee
6116 unsigned Bitwidth = Ty->getIntegerBitWidth();
6119 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6122 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6127OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6130 bool NeedsBarrier,
Value *Chunk) {
6131 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6133 "Require dedicated allocate IP");
6135 "Require valid schedule type");
6137 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6138 OMPScheduleType::ModifierOrdered;
6143 uint32_t SrcLocStrSize;
6149 Type *IVTy =
IV->getType();
6154 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6156 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6157 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6158 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6159 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6168 Constant *One = ConstantInt::get(IVTy, 1);
6169 Builder.CreateStore(One, PLowerBound);
6171 Builder.CreateStore(UpperBound, PUpperBound);
6172 Builder.CreateStore(One, PStride);
6189 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6201 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6204 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6205 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6208 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6209 Builder.CreateCondBr(MoreWork, Header, Exit);
6215 PI->setIncomingBlock(0, OuterCond);
6216 PI->setIncomingValue(0, LowerBound);
6221 Br->setSuccessor(OuterCond);
6227 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6230 CI->setOperand(1, UpperBound);
6234 assert(BI->getSuccessor(1) == Exit);
6235 BI->setSuccessor(1, OuterCond);
6249 omp::Directive::OMPD_for,
false,
6252 return BarrierIP.takeError();
6271 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6276 if (BBsToErase.
count(UseInst->getParent()))
6283 while (BBsToErase.
remove_if(HasRemainingUses)) {
6294 assert(
Loops.size() >= 1 &&
"At least one loop required");
6295 size_t NumLoops =
Loops.size();
6299 return Loops.front();
6311 Loop->collectControlBlocks(OldControlBBs);
6315 if (ComputeIP.
isSet())
6322 Value *CollapsedTripCount =
nullptr;
6325 "All loops to collapse must be valid canonical loops");
6326 Value *OrigTripCount = L->getTripCount();
6327 if (!CollapsedTripCount) {
6328 CollapsedTripCount = OrigTripCount;
6333 CollapsedTripCount =
6334 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6340 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6346 Builder.restoreIP(Result->getBodyIP());
6348 Value *Leftover = Result->getIndVar();
6350 NewIndVars.
resize(NumLoops);
6351 for (
int i = NumLoops - 1; i >= 1; --i) {
6352 Value *OrigTripCount =
Loops[i]->getTripCount();
6354 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6355 NewIndVars[i] = NewIndVar;
6357 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6360 NewIndVars[0] = Leftover;
6369 BasicBlock *ContinueBlock = Result->getBody();
6371 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6378 ContinueBlock =
nullptr;
6379 ContinuePred = NextSrc;
6386 for (
size_t i = 0; i < NumLoops - 1; ++i)
6387 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6393 for (
size_t i = NumLoops - 1; i > 0; --i)
6394 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6397 ContinueWith(Result->getLatch(),
nullptr);
6404 for (
size_t i = 0; i < NumLoops; ++i)
6405 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6419std::vector<CanonicalLoopInfo *>
6423 "Must pass as many tile sizes as there are loops");
6424 int NumLoops =
Loops.size();
6425 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6437 Loop->collectControlBlocks(OldControlBBs);
6445 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6446 OrigTripCounts.
push_back(L->getTripCount());
6457 for (
int i = 0; i < NumLoops - 1; ++i) {
6470 for (
int i = 0; i < NumLoops; ++i) {
6472 Value *OrigTripCount = OrigTripCounts[i];
6485 Value *FloorTripOverflow =
6486 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6488 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6489 Value *FloorTripCount =
6490 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6491 "omp_floor" +
Twine(i) +
".tripcount",
true);
6494 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6500 std::vector<CanonicalLoopInfo *> Result;
6501 Result.reserve(NumLoops * 2);
6514 auto EmbeddNewLoop =
6515 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6518 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6523 Enter = EmbeddedLoop->
getBody();
6525 OutroInsertBefore = EmbeddedLoop->
getLatch();
6526 return EmbeddedLoop;
6530 const Twine &NameBase) {
6533 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6534 Result.push_back(EmbeddedLoop);
6538 EmbeddNewLoops(FloorCount,
"floor");
6544 for (
int i = 0; i < NumLoops; ++i) {
6548 Value *FloorIsEpilogue =
6550 Value *TileTripCount =
6557 EmbeddNewLoops(TileCounts,
"tile");
6562 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6571 BodyEnter =
nullptr;
6572 BodyEntered = ExitBB;
6584 Builder.restoreIP(Result.back()->getBodyIP());
6585 for (
int i = 0; i < NumLoops; ++i) {
6588 Value *OrigIndVar = OrigIndVars[i];
6616 if (Properties.
empty())
6639 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6643 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6651 if (
I.mayReadOrWriteMemory()) {
6655 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6669 Loop->collectControlBlocks(oldControlBBs);
6674 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6675 origTripCounts.
push_back(L->getTripCount());
6684 Builder.SetInsertPoint(TCBlock);
6685 Value *fusedTripCount =
nullptr;
6687 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6688 Value *origTripCount = L->getTripCount();
6689 if (!fusedTripCount) {
6690 fusedTripCount = origTripCount;
6693 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6694 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
6708 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6709 Loops[i]->getPreheader()->moveBefore(TCBlock);
6710 Loops[i]->getAfter()->moveBefore(TCBlock);
6714 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6726 for (
size_t i = 0; i <
Loops.size(); ++i) {
6728 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
6729 Builder.SetInsertPoint(condBlock);
6737 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6738 Builder.SetInsertPoint(condBBs[i]);
6739 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
6755 "omp.fused.pre_latch");
6788 const Twine &NamePrefix) {
6817 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6819 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6822 Builder.SetInsertPoint(SplitBeforeIt);
6824 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6827 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6830 Builder.SetInsertPoint(ElseBlock);
6836 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6838 ExistingBlocks.
append(L->block_begin(), L->block_end());
6844 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6846 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6853 if (
Block == ThenBlock)
6854 NewBB->
setName(NamePrefix +
".if.else");
6857 VMap[
Block] = NewBB;
6865 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
6866 NamePrefix +
".pre_latch");
6870 L->addBasicBlockToLoop(ThenBlock, LI);
6876 if (TargetTriple.
isX86()) {
6877 if (Features.
lookup(
"avx512f"))
6879 else if (Features.
lookup(
"avx"))
6883 if (TargetTriple.
isPPC())
6885 if (TargetTriple.
isWasm())
6892 Value *IfCond, OrderKind Order,
6902 if (!BB.getTerminator())
6918 I->eraseFromParent();
6921 if (AlignedVars.
size()) {
6923 for (
auto &AlignedItem : AlignedVars) {
6924 Value *AlignedPtr = AlignedItem.first;
6925 Value *Alignment = AlignedItem.second;
6928 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6936 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6949 Reachable.insert(
Block);
6959 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6975 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6977 if (Simdlen || Safelen) {
6981 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7007static std::unique_ptr<TargetMachine>
7011 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7012 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7023 std::nullopt, OptLevel));
7041 if (!BB.getTerminator())
7054 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7055 FAM.registerPass([&]() {
return TIRA; });
7069 I->eraseFromParent();
7072 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7077 nullptr, ORE,
static_cast<int>(OptLevel),
7098 <<
" Threshold=" << UP.
Threshold <<
"\n"
7101 <<
" PartialOptSizeThreshold="
7121 Ptr = Load->getPointerOperand();
7123 Ptr = Store->getPointerOperand();
7130 if (Alloca->getParent() == &
F->getEntryBlock())
7150 int MaxTripCount = 0;
7151 bool MaxOrZero =
false;
7152 unsigned TripMultiple = 0;
7155 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7156 unsigned Factor = UP.
Count;
7157 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7168 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7184 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7197 *UnrolledCLI =
Loop;
7202 "unrolling only makes sense with a factor of 2 or larger");
7204 Type *IndVarTy =
Loop->getIndVarType();
7211 std::vector<CanonicalLoopInfo *>
LoopNest =
7226 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7229 (*UnrolledCLI)->assertOK();
7247 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7266 if (!CPVars.
empty()) {
7271 Directive OMPD = Directive::OMPD_single;
7276 Value *Args[] = {Ident, ThreadId};
7285 if (
Error Err = FiniCB(IP))
7306 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7313 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7316 ConstantInt::get(Int64, 0), CPVars[
I],
7319 }
else if (!IsNowait) {
7322 omp::Directive::OMPD_unknown,
false,
7337 Directive OMPD = Directive::OMPD_critical;
7342 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7343 Value *Args[] = {Ident, ThreadId, LockVar};
7360 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7368 const Twine &Name,
bool IsDependSource) {
7372 "OpenMP runtime requires depend vec with i64 type");
7385 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7399 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7417 Directive OMPD = Directive::OMPD_ordered;
7426 Value *Args[] = {Ident, ThreadId};
7436 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7443 bool HasFinalize,
bool IsCancellable) {
7450 BasicBlock *EntryBB = Builder.GetInsertBlock();
7459 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7470 "Unexpected control flow graph state!!");
7472 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7474 return AfterIP.takeError();
7479 "Unexpected Insertion point location!");
7482 auto InsertBB = merged ? ExitPredBB : ExitBB;
7485 Builder.SetInsertPoint(InsertBB);
7487 return Builder.saveIP();
7491 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7493 if (!Conditional || !EntryCall)
7499 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7509 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7513 UI->eraseFromParent();
7521 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7529 "Unexpected finalization stack state!");
7532 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7534 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7535 return std::move(Err);
7539 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7549 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7583 "copyin.not.master.end");
7590 Builder.SetInsertPoint(OMP_Entry);
7591 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7592 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7593 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7594 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7596 Builder.SetInsertPoint(CopyBegin);
7613 Value *Args[] = {ThreadId,
Size, Allocator};
7630 Value *Args[] = {ThreadId, Addr, Allocator};
7638 Value *DependenceAddress,
bool HaveNowaitClause) {
7646 if (Device ==
nullptr)
7648 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7649 if (NumDependences ==
nullptr) {
7650 NumDependences = ConstantInt::get(Int32, 0);
7654 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7656 Ident, ThreadId, InteropVar, InteropTypeVal,
7657 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7666 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7674 if (Device ==
nullptr)
7676 if (NumDependences ==
nullptr) {
7677 NumDependences = ConstantInt::get(Int32, 0);
7681 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7683 Ident, ThreadId, InteropVar, Device,
7684 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7693 Value *NumDependences,
7694 Value *DependenceAddress,
7695 bool HaveNowaitClause) {
7702 if (Device ==
nullptr)
7704 if (NumDependences ==
nullptr) {
7705 NumDependences = ConstantInt::get(Int32, 0);
7709 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7711 Ident, ThreadId, InteropVar, Device,
7712 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7742 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7743 "expected num_threads and num_teams to be specified");
7762 const std::string DebugPrefix =
"_debug__";
7763 if (KernelName.
ends_with(DebugPrefix)) {
7764 KernelName = KernelName.
drop_back(DebugPrefix.length());
7765 Kernel =
M.getFunction(KernelName);
7771 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7776 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7777 if (MaxThreadsVal < 0)
7778 MaxThreadsVal = std::max(
7781 if (MaxThreadsVal > 0)
7794 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7797 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7798 Constant *DynamicEnvironmentInitializer =
7802 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7804 DL.getDefaultGlobalsAddressSpace());
7808 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7809 ? DynamicEnvironmentGV
7811 DynamicEnvironmentPtr);
7814 ConfigurationEnvironment, {
7815 UseGenericStateMachineVal,
7816 MayUseNestedParallelismVal,
7823 ReductionBufferLength,
7826 KernelEnvironment, {
7827 ConfigurationEnvironmentInitializer,
7831 std::string KernelEnvironmentName =
7832 (KernelName +
"_kernel_environment").str();
7835 KernelEnvironmentInitializer, KernelEnvironmentName,
7837 DL.getDefaultGlobalsAddressSpace());
7841 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7842 ? KernelEnvironmentGV
7844 KernelEnvironmentPtr);
7845 Value *KernelLaunchEnvironment =
7848 KernelLaunchEnvironment =
7849 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7850 ? KernelLaunchEnvironment
7851 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7852 KernelLaunchEnvParamTy);
7854 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7866 auto *UI =
Builder.CreateUnreachable();
7872 Builder.SetInsertPoint(WorkerExitBB);
7876 Builder.SetInsertPoint(CheckBBTI);
7877 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7879 CheckBBTI->eraseFromParent();
7880 UI->eraseFromParent();
7888 int32_t TeamsReductionDataSize,
7889 int32_t TeamsReductionBufferLength) {
7894 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7898 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7904 const std::string DebugPrefix =
"_debug__";
7906 KernelName = KernelName.
drop_back(DebugPrefix.length());
7907 auto *KernelEnvironmentGV =
7908 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7909 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7910 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
7912 KernelEnvironmentInitializer,
7913 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7915 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7917 KernelEnvironmentGV->setInitializer(NewInitializer);
7922 if (
Kernel.hasFnAttribute(Name)) {
7923 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7929std::pair<int32_t, int32_t>
7931 int32_t ThreadLimit =
7932 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7935 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7936 if (!Attr.isValid() || !Attr.isStringAttribute())
7937 return {0, ThreadLimit};
7938 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7941 return {0, ThreadLimit};
7942 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7948 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7949 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7950 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7952 return {0, ThreadLimit};
7958 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7961 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7969std::pair<int32_t, int32_t>
7972 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7976 int32_t LB, int32_t UB) {
7983 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7986void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7995 else if (
T.isNVPTX())
7997 else if (
T.isSPIRV())
8002Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8003 StringRef EntryFnIDName) {
8004 if (
Config.isTargetDevice()) {
8005 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8009 return new GlobalVariable(
8014Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8015 StringRef EntryFnName) {
8019 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8020 "Named kernel already exists?");
8021 return new GlobalVariable(
8034 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8038 OutlinedFn = *CBResult;
8040 OutlinedFn =
nullptr;
8046 if (!IsOffloadEntry)
8049 std::string EntryFnIDName =
8051 ? std::string(EntryFnName)
8055 EntryFnName, EntryFnIDName);
8063 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8064 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8065 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8067 EntryInfo, EntryAddr, OutlinedFnID,
8069 return OutlinedFnID;
8086 bool IsStandAlone = !BodyGenCB;
8093 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8095 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8096 true, DeviceAddrCB))
8103 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8113 SrcLocInfo, DeviceID,
8120 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8124 if (Info.HasNoWait) {
8134 if (Info.HasNoWait) {
8138 emitBlock(OffloadContBlock, CurFn,
true);
8144 bool RequiresOuterTargetTask = Info.HasNoWait;
8145 if (!RequiresOuterTargetTask)
8146 cantFail(TaskBodyCB(
nullptr,
nullptr,
8150 {}, RTArgs, Info.HasNoWait));
8153 omp::OMPRTL___tgt_target_data_begin_mapper);
8157 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8161 Builder.CreateStore(LI, DeviceMap.second.second);
8197 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8206 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8228 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8229 return BeginThenGen(AllocaIP,
Builder.saveIP());
8244 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8245 return EndThenGen(AllocaIP,
Builder.saveIP());
8248 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8249 return BeginThenGen(AllocaIP,
Builder.saveIP());
8260 bool IsGPUDistribute) {
8261 assert((IVSize == 32 || IVSize == 64) &&
8262 "IV size is not compatible with the omp runtime");
8264 if (IsGPUDistribute)
8266 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8267 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8268 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8269 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8271 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8272 : omp::OMPRTL___kmpc_for_static_init_4u)
8273 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8274 : omp::OMPRTL___kmpc_for_static_init_8u);
8281 assert((IVSize == 32 || IVSize == 64) &&
8282 "IV size is not compatible with the omp runtime");
8284 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8285 : omp::OMPRTL___kmpc_dispatch_init_4u)
8286 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8287 : omp::OMPRTL___kmpc_dispatch_init_8u);
8294 assert((IVSize == 32 || IVSize == 64) &&
8295 "IV size is not compatible with the omp runtime");
8297 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8298 : omp::OMPRTL___kmpc_dispatch_next_4u)
8299 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8300 : omp::OMPRTL___kmpc_dispatch_next_8u);
8307 assert((IVSize == 32 || IVSize == 64) &&
8308 "IV size is not compatible with the omp runtime");
8310 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8311 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8312 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8313 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8324 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8332 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8336 if (NewVar && (arg == NewVar->
getArg()))
8346 auto UpdateDebugRecord = [&](
auto *DR) {
8349 for (
auto Loc : DR->location_ops()) {
8350 auto Iter = ValueReplacementMap.find(
Loc);
8351 if (Iter != ValueReplacementMap.end()) {
8352 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8353 ArgNo = std::get<1>(Iter->second) + 1;
8357 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8364 "Unexpected debug intrinsic");
8366 UpdateDebugRecord(&DVR);
8371 Module *M = Func->getParent();
8374 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8375 unsigned ArgNo = Func->arg_size();
8377 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8378 false, DINode::DIFlags::FlagArtificial);
8380 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8381 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8402 for (
auto &Arg : Inputs)
8403 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8407 for (
auto &Arg : Inputs)
8408 ParameterTypes.
push_back(Arg->getType());
8416 auto BB = Builder.GetInsertBlock();
8417 auto M = BB->getModule();
8428 if (TargetCpuAttr.isStringAttribute())
8429 Func->addFnAttr(TargetCpuAttr);
8431 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8432 if (TargetFeaturesAttr.isStringAttribute())
8433 Func->addFnAttr(TargetFeaturesAttr);
8438 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8449 Builder.SetInsertPoint(EntryBB);
8455 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8465 splitBB(Builder,
true,
"outlined.body");
8471 Builder.restoreIP(*AfterIP);
8476 Builder.CreateRetVoid();
8480 auto AllocaIP = Builder.saveIP();
8485 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8517 if (Instr->getFunction() == Func)
8518 Instr->replaceUsesOfWith(
Input, InputCopy);
8524 for (
auto InArg :
zip(Inputs, ArgRange)) {
8526 Argument &Arg = std::get<1>(InArg);
8527 Value *InputCopy =
nullptr;
8530 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8533 Builder.restoreIP(*AfterIP);
8534 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8554 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8561 ReplaceValue(
Input, InputCopy, Func);
8565 for (
auto Deferred : DeferredReplacement)
8566 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8569 ValueReplacementMap);
8577 Value *TaskWithPrivates,
8578 Type *TaskWithPrivatesTy) {
8580 Type *TaskTy = OMPIRBuilder.Task;
8583 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8584 Value *Shareds = TaskT;
8594 if (TaskWithPrivatesTy != TaskTy)
8595 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8612 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8617 assert((!NumOffloadingArrays || PrivatesTy) &&
8618 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8651 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8652 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8658 ".omp_target_task_proxy_func",
8659 Builder.GetInsertBlock()->getModule());
8660 Value *ThreadId = ProxyFn->getArg(0);
8661 Value *TaskWithPrivates = ProxyFn->getArg(1);
8662 ThreadId->
setName(
"thread.id");
8663 TaskWithPrivates->
setName(
"task");
8665 bool HasShareds = SharedArgsOperandNo > 0;
8666 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8669 Builder.SetInsertPoint(EntryBB);
8675 if (HasOffloadingArrays) {
8676 assert(TaskTy != TaskWithPrivatesTy &&
8677 "If there are offloading arrays to pass to the target"
8678 "TaskTy cannot be the same as TaskWithPrivatesTy");
8681 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8682 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8684 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8688 auto *ArgStructAlloca =
8690 assert(ArgStructAlloca &&
8691 "Unable to find the alloca instruction corresponding to arguments "
8692 "for extracted function");
8694 std::optional<TypeSize> ArgAllocSize =
8696 assert(ArgStructType && ArgAllocSize &&
8697 "Unable to determine size of arguments for extracted function");
8698 uint64_t StructSize = ArgAllocSize->getFixedValue();
8701 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8703 Value *SharedsSize = Builder.getInt64(StructSize);
8706 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8708 Builder.CreateMemCpy(
8709 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8711 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8714 Builder.CreateRetVoid();
8720 return GEP->getSourceElementType();
8722 return Alloca->getAllocatedType();
8745 if (OffloadingArraysToPrivatize.
empty())
8746 return OMPIRBuilder.Task;
8749 for (
Value *V : OffloadingArraysToPrivatize) {
8750 assert(V->getType()->isPointerTy() &&
8751 "Expected pointer to array to privatize. Got a non-pointer value "
8754 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8760 "struct.task_with_privates");
8774 EntryFnName, Inputs, CBFunc,
8779 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8916 TargetTaskAllocaBB->
begin());
8920 OI.
EntryBB = TargetTaskAllocaBB;
8926 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8929 Builder.restoreIP(TargetTaskBodyIP);
8930 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8948 bool NeedsTargetTask = HasNoWait && DeviceID;
8949 if (NeedsTargetTask) {
8955 OffloadingArraysToPrivatize.
push_back(V);
8960 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8961 DeviceID, OffloadingArraysToPrivatize](
8964 "there must be a single user for the outlined function");
8978 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8979 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8981 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8982 "Wrong number of arguments for StaleCI when shareds are present");
8983 int SharedArgOperandNo =
8984 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8990 if (!OffloadingArraysToPrivatize.
empty())
8995 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8996 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8998 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9001 Builder.SetInsertPoint(StaleCI);
9018 OMPRTL___kmpc_omp_target_task_alloc);
9030 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9037 auto *ArgStructAlloca =
9039 assert(ArgStructAlloca &&
9040 "Unable to find the alloca instruction corresponding to arguments "
9041 "for extracted function");
9042 std::optional<TypeSize> ArgAllocSize =
9045 "Unable to determine size of arguments for extracted function");
9046 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9065 TaskSize, SharedsSize,
9068 if (NeedsTargetTask) {
9069 assert(DeviceID &&
"Expected non-empty device ID.");
9079 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9083 if (!OffloadingArraysToPrivatize.
empty()) {
9085 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9086 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9087 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9094 "ElementType should match ArrayType");
9097 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9099 Dst, Alignment, PtrToPrivatize, Alignment,
9100 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9114 if (!NeedsTargetTask) {
9123 ConstantInt::get(
Builder.getInt32Ty(), 0),
9136 }
else if (DepArray) {
9144 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
9145 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
9155 I->eraseFromParent();
9160 << *(
Builder.GetInsertBlock()) <<
"\n");
9162 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9174 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9191 bool HasNoWait,
Value *DynCGroupMem,
9198 Builder.restoreIP(IP);
9204 return Builder.saveIP();
9207 bool HasDependencies = Dependencies.
size() > 0;
9208 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9225 if (OutlinedFnID && DeviceID)
9227 EmitTargetCallFallbackCB, KArgs,
9228 DeviceID, RTLoc, TargetTaskAllocaIP);
9236 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9243 auto &&EmitTargetCallElse =
9249 if (RequiresOuterTargetTask) {
9256 Dependencies, EmptyRTArgs, HasNoWait);
9258 return EmitTargetCallFallbackCB(Builder.saveIP());
9261 Builder.restoreIP(AfterIP);
9265 auto &&EmitTargetCallThen =
9268 Info.HasNoWait = HasNoWait;
9273 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9279 for (
auto [DefaultVal, RuntimeVal] :
9281 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9282 : Builder.getInt32(DefaultVal));
9286 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9288 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9292 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9295 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9303 Value *MaxThreadsClause =
9305 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9308 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9310 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9311 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9313 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9314 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9316 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9319 unsigned NumTargetItems = Info.NumberOfPtrs;
9327 Builder.getInt64Ty(),
9329 : Builder.getInt64(0);
9333 DynCGroupMem = Builder.getInt32(0);
9336 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9337 HasNoWait, DynCGroupMemFallback);
9344 if (RequiresOuterTargetTask)
9346 RTLoc, AllocaIP, Dependencies,
9347 KArgs.
RTArgs, Info.HasNoWait);
9350 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9351 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9354 Builder.restoreIP(AfterIP);
9361 if (!OutlinedFnID) {
9362 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9368 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9373 EmitTargetCallElse, AllocaIP));
9400 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9401 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9407 if (!
Config.isTargetDevice())
9409 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9410 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9411 DynCGroupMemFallback);
9425 return OS.
str().str();
9430 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9436 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9438 assert(Elem.second->getValueType() == Ty &&
9439 "OMP internal variable has different type than requested");
9452 :
M.getTargetTriple().isAMDGPU()
9454 :
DL.getDefaultGlobalsAddressSpace();
9463 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9464 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9471Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9472 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9473 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9484 return SizePtrToInt;
9489 std::string VarName) {
9497 return MaptypesArrayGlobal;
9502 unsigned NumOperands,
9511 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9515 ArrI64Ty,
nullptr,
".offload_sizes");
9526 int64_t DeviceID,
unsigned NumOperands) {
9532 Value *ArgsBaseGEP =
9534 {Builder.getInt32(0), Builder.getInt32(0)});
9537 {Builder.getInt32(0), Builder.getInt32(0)});
9538 Value *ArgSizesGEP =
9540 {Builder.getInt32(0), Builder.getInt32(0)});
9544 Builder.getInt32(NumOperands),
9545 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9546 MaptypesArg, MapnamesArg, NullPtr});
9553 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9554 "expected region end call to runtime only when end call is separate");
9556 auto VoidPtrTy = UnqualPtrTy;
9557 auto VoidPtrPtrTy = UnqualPtrTy;
9559 auto Int64PtrTy = UnqualPtrTy;
9561 if (!Info.NumberOfPtrs) {
9573 Info.RTArgs.BasePointersArray,
9576 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9580 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9584 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9585 : Info.RTArgs.MapTypesArray,
9591 if (!Info.EmitDebug)
9595 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9600 if (!Info.HasMapper)
9604 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9625 "struct.descriptor_dim");
9627 enum { OffsetFD = 0, CountFD, StrideFD };
9631 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9634 if (NonContigInfo.
Dims[
I] == 1)
9639 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9641 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9642 unsigned RevIdx = EE -
II - 1;
9646 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9648 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9649 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9651 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9653 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9654 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9656 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9658 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9659 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9663 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9664 DimsAddr,
Builder.getPtrTy());
9667 Info.RTArgs.PointersArray, 0,
I);
9669 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9674void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9678 StringRef Prefix = IsInit ?
".init" :
".del";
9684 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9685 Value *DeleteBit = Builder.CreateAnd(
9688 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9689 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9694 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9695 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9696 DeleteCond = Builder.CreateIsNull(
9701 DeleteCond =
Builder.CreateIsNotNull(
9717 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9718 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9719 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9720 MapTypeArg =
Builder.CreateOr(
9723 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9724 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9728 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9729 ArraySize, MapTypeArg, MapName};
9755 MapperFn->
addFnAttr(Attribute::NoInline);
9756 MapperFn->
addFnAttr(Attribute::NoUnwind);
9766 auto SavedIP =
Builder.saveIP();
9767 Builder.SetInsertPoint(EntryBB);
9779 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9781 Value *PtrBegin = BeginIn;
9787 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9788 MapType, MapName, ElementSize, HeadBB,
9799 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9800 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9806 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9807 PtrPHI->addIncoming(PtrBegin, HeadBB);
9812 return Info.takeError();
9816 Value *OffloadingArgs[] = {MapperHandle};
9820 Value *ShiftedPreviousSize =
9824 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9825 Value *CurBaseArg = Info->BasePointers[
I];
9826 Value *CurBeginArg = Info->Pointers[
I];
9827 Value *CurSizeArg = Info->Sizes[
I];
9828 Value *CurNameArg = Info->Names.size()
9834 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9836 Value *MemberMapType =
9837 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9854 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9855 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9856 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9866 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9872 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9873 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9874 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9880 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9881 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9882 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9888 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9889 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9895 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9896 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9897 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9903 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9904 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9915 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9916 CurSizeArg, CurMapType, CurNameArg};
9918 auto ChildMapperFn = CustomMapperCB(
I);
9920 return ChildMapperFn.takeError();
9921 if (*ChildMapperFn) {
9936 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9937 "omp.arraymap.next");
9938 PtrPHI->addIncoming(PtrNext, LastBB);
9939 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9941 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9946 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9947 MapType, MapName, ElementSize, DoneBB,
9961 bool IsNonContiguous,
9965 Info.clearArrayInfo();
9968 if (Info.NumberOfPtrs == 0)
9977 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
9978 PointerArrayType,
nullptr,
".offload_baseptrs");
9980 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
9981 PointerArrayType,
nullptr,
".offload_ptrs");
9983 PointerArrayType,
nullptr,
".offload_mappers");
9984 Info.RTArgs.MappersArray = MappersArray;
9991 ConstantInt::get(Int64Ty, 0));
9993 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
9994 bool IsNonContigEntry =
9996 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9998 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10001 if (IsNonContigEntry) {
10003 "Index must be in-bounds for NON_CONTIG Dims array");
10005 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10006 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10011 ConstSizes[
I] = CI;
10015 RuntimeSizes.
set(
I);
10018 if (RuntimeSizes.
all()) {
10020 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10021 SizeArrayType,
nullptr,
".offload_sizes");
10027 auto *SizesArrayGbl =
10032 if (!RuntimeSizes.
any()) {
10033 Info.RTArgs.SizesArray = SizesArrayGbl;
10035 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10036 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10039 SizeArrayType,
nullptr,
".offload_sizes");
10043 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10044 SizesArrayGbl, OffloadSizeAlign,
10049 Info.RTArgs.SizesArray = Buffer;
10057 for (
auto mapFlag : CombinedInfo.
Types)
10059 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10063 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10069 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10070 Info.EmitDebug =
true;
10072 Info.RTArgs.MapNamesArray =
10074 Info.EmitDebug =
false;
10079 if (Info.separateBeginEndCalls()) {
10080 bool EndMapTypesDiffer =
false;
10082 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10083 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10084 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10085 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10086 EndMapTypesDiffer =
true;
10089 if (EndMapTypesDiffer) {
10091 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10096 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10099 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10101 Builder.CreateAlignedStore(BPVal, BP,
10102 M.getDataLayout().getPrefTypeAlign(PtrTy));
10104 if (Info.requiresDevicePointerInfo()) {
10106 CodeGenIP =
Builder.saveIP();
10108 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10109 Builder.restoreIP(CodeGenIP);
10111 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10113 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10115 DeviceAddrCB(
I, BP);
10121 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10124 Builder.CreateAlignedStore(PVal,
P,
10125 M.getDataLayout().getPrefTypeAlign(PtrTy));
10127 if (RuntimeSizes.
test(
I)) {
10129 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10135 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10138 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10141 auto CustomMFunc = CustomMapperCB(
I);
10143 return CustomMFunc.takeError();
10145 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10148 PointerArrayType, MappersArray,
10151 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10155 Info.NumberOfPtrs == 0)
10172 Builder.ClearInsertionPoint();
10202 auto CondConstant = CI->getSExtValue();
10204 return ThenGen(AllocaIP,
Builder.saveIP());
10206 return ElseGen(AllocaIP,
Builder.saveIP());
10216 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10234bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10238 "Unexpected Atomic Ordering.");
10240 bool Flush =
false;
10302 assert(
X.Var->getType()->isPointerTy() &&
10303 "OMP Atomic expects a pointer to target memory");
10304 Type *XElemTy =
X.ElemTy;
10307 "OMP atomic read expected a scalar type");
10309 Value *XRead =
nullptr;
10313 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10322 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10325 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10327 XRead = AtomicLoadRes.first;
10334 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10337 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10339 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10342 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10343 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10354 assert(
X.Var->getType()->isPointerTy() &&
10355 "OMP Atomic expects a pointer to target memory");
10356 Type *XElemTy =
X.ElemTy;
10359 "OMP atomic write expected a scalar type");
10367 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10370 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10378 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10383 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10390 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10391 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10397 Type *XTy =
X.Var->getType();
10399 "OMP Atomic expects a pointer to target memory");
10400 Type *XElemTy =
X.ElemTy;
10403 "OMP atomic update expected a scalar type");
10406 "OpenMP atomic does not support LT or GT operations");
10410 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10411 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10413 return AtomicResult.takeError();
10414 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10419Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10423 return Builder.CreateAdd(Src1, Src2);
10425 return Builder.CreateSub(Src1, Src2);
10427 return Builder.CreateAnd(Src1, Src2);
10429 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10431 return Builder.CreateOr(Src1, Src2);
10433 return Builder.CreateXor(Src1, Src2);
10457Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10460 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10461 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10464 bool emitRMWOp =
false;
10472 emitRMWOp = XElemTy;
10475 emitRMWOp = (IsXBinopExpr && XElemTy);
10482 std::pair<Value *, Value *> Res;
10484 AtomicRMWInst *RMWInst =
10485 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10486 if (
T.isAMDGPU()) {
10487 if (IsIgnoreDenormalMode)
10488 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10490 if (!IsFineGrainedMemory)
10491 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10493 if (!IsRemoteMemory)
10497 Res.first = RMWInst;
10502 Res.second = Res.first;
10504 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10508 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10511 unsigned LoadSize =
10514 OpenMPIRBuilder::AtomicInfo atomicInfo(
10516 OldVal->
getAlign(),
true , AllocaIP,
X);
10517 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10520 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10527 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10528 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10529 Builder.SetInsertPoint(ContBB);
10531 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10533 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10536 Value *Upd = *CBResult;
10537 Builder.CreateStore(Upd, NewAtomicAddr);
10540 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10541 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10542 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10543 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10546 Res.first = OldExprVal;
10549 if (UnreachableInst *ExitTI =
10552 Builder.SetInsertPoint(ExitBB);
10554 Builder.SetInsertPoint(ExitTI);
10557 IntegerType *IntCastTy =
10560 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10569 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10576 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10577 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10578 Builder.SetInsertPoint(ContBB);
10580 PHI->addIncoming(OldVal, CurBB);
10585 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10586 X->getName() +
".atomic.fltCast");
10588 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10589 X->getName() +
".atomic.ptrCast");
10593 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10596 Value *Upd = *CBResult;
10597 Builder.CreateStore(Upd, NewAtomicAddr);
10598 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10602 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10603 Result->setVolatile(VolatileX);
10604 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10605 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10606 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10607 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10609 Res.first = OldExprVal;
10613 if (UnreachableInst *ExitTI =
10616 Builder.SetInsertPoint(ExitBB);
10618 Builder.SetInsertPoint(ExitTI);
10629 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10630 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10635 Type *XTy =
X.Var->getType();
10637 "OMP Atomic expects a pointer to target memory");
10638 Type *XElemTy =
X.ElemTy;
10641 "OMP atomic capture expected a scalar type");
10643 "OpenMP atomic does not support LT or GT operations");
10650 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10651 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10654 Value *CapturedVal =
10655 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10656 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10658 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10670 IsPostfixUpdate, IsFailOnly, Failure);
10682 assert(
X.Var->getType()->isPointerTy() &&
10683 "OMP atomic expects a pointer to target memory");
10686 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10687 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10690 bool IsInteger = E->getType()->isIntegerTy();
10692 if (
Op == OMPAtomicCompareOp::EQ) {
10707 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10709 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10711 "OldValue and V must be of same type");
10712 if (IsPostfixUpdate) {
10713 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10715 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10728 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10730 CurBBTI,
X.Var->getName() +
".atomic.exit");
10736 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10738 Builder.SetInsertPoint(ContBB);
10739 Builder.CreateStore(OldValue, V.Var);
10745 Builder.SetInsertPoint(ExitBB);
10747 Builder.SetInsertPoint(ExitTI);
10750 Value *CapturedValue =
10751 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10752 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10758 assert(R.Var->getType()->isPointerTy() &&
10759 "r.var must be of pointer type");
10760 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10762 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10763 Value *ResultCast = R.IsSigned
10764 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10765 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10766 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10769 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10770 "Op should be either max or min at this point");
10771 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10782 if (IsXBinopExpr) {
10811 Value *CapturedValue =
nullptr;
10812 if (IsPostfixUpdate) {
10813 CapturedValue = OldValue;
10838 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10839 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10841 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10845 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10865 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
10892 bool SubClausesPresent =
10893 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10895 if (!
Config.isTargetDevice() && SubClausesPresent) {
10896 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10897 "if lowerbound is non-null, then upperbound must also be non-null "
10898 "for bounds on num_teams");
10900 if (NumTeamsUpper ==
nullptr)
10901 NumTeamsUpper =
Builder.getInt32(0);
10903 if (NumTeamsLower ==
nullptr)
10904 NumTeamsLower = NumTeamsUpper;
10908 "argument to if clause must be an integer value");
10912 IfExpr =
Builder.CreateICmpNE(IfExpr,
10913 ConstantInt::get(IfExpr->
getType(), 0));
10914 NumTeamsUpper =
Builder.CreateSelect(
10915 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
10918 NumTeamsLower =
Builder.CreateSelect(
10919 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
10922 if (ThreadLimit ==
nullptr)
10923 ThreadLimit =
Builder.getInt32(0);
10927 Value *NumTeamsLowerInt32 =
10929 Value *NumTeamsUpperInt32 =
10931 Value *ThreadLimitInt32 =
10938 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
10939 ThreadLimitInt32});
10944 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10956 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10958 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10960 auto HostPostOutlineCB = [
this, Ident,
10961 ToBeDeleted](
Function &OutlinedFn)
mutable {
10966 "there must be a single user for the outlined function");
10971 "Outlined function must have two or three arguments only");
10973 bool HasShared = OutlinedFn.
arg_size() == 3;
10981 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10982 "outlined function.");
10983 Builder.SetInsertPoint(StaleCI);
10990 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10994 I->eraseFromParent();
10997 if (!
Config.isTargetDevice())
11016 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11031 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
11036 if (
Config.isTargetDevice()) {
11051 std::string VarName) {
11060 return MapNamesArrayGlobal;
11065void OpenMPIRBuilder::initializeTypes(
Module &M) {
11069 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11070#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11071#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11072 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11073 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11074#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11075 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11076 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11077#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11078 T = StructType::getTypeByName(Ctx, StructName); \
11080 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11082 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11083#include "llvm/Frontend/OpenMP/OMPKinds.def"
11094 while (!Worklist.
empty()) {
11098 if (
BlockSet.insert(SuccBB).second)
11110 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11122 Fn->
addFnAttr(
"uniform-work-group-size");
11123 Fn->
addFnAttr(Attribute::MustProgress);
11141 auto &&GetMDInt = [
this](
unsigned V) {
11148 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11149 auto &&TargetRegionMetadataEmitter =
11150 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11165 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11166 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11167 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11168 GetMDInt(E.getOrder())};
11171 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11180 auto &&DeviceGlobalVarMetadataEmitter =
11181 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11191 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11192 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11196 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11203 DeviceGlobalVarMetadataEmitter);
11205 for (
const auto &E : OrderedEntries) {
11206 assert(E.first &&
"All ordered entries must exist!");
11207 if (
const auto *CE =
11210 if (!CE->getID() || !CE->getAddress()) {
11214 if (!
M.getNamedValue(FnName))
11222 }
else if (
const auto *CE =
dyn_cast<
11231 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11233 if (!CE->getAddress()) {
11238 if (CE->getVarSize() == 0)
11242 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11243 (!
Config.isTargetDevice() && CE->getAddress())) &&
11244 "Declaret target link address is set.");
11245 if (
Config.isTargetDevice())
11247 if (!CE->getAddress()) {
11254 if (!CE->getAddress()) {
11267 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11271 OMPTargetGlobalVarEntryIndirectVTable))
11280 Flags, CE->getLinkage(), CE->getVarName());
11283 Flags, CE->getLinkage());
11294 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11300 Config.getRequiresFlags());
11310 OS <<
"_" <<
Count;
11315 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11318 EntryInfo.
Line, NewCount);
11326 auto FileIDInfo = CallBack();
11330 FileID =
Status->getUniqueID().getFile();
11334 FileID =
hash_value(std::get<0>(FileIDInfo));
11338 std::get<1>(FileIDInfo));
11344 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11346 !(Remain & 1); Remain = Remain >> 1)
11364 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11366 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11373 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11379 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11380 Flags |= MemberOfFlag;
11386 bool IsDeclaration,
bool IsExternallyVisible,
11388 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11389 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11390 std::function<
Constant *()> GlobalInitializer,
11401 Config.hasRequiresUnifiedSharedMemory())) {
11406 if (!IsExternallyVisible)
11408 OS <<
"_decl_tgt_ref_ptr";
11411 Value *Ptr =
M.getNamedValue(PtrName);
11420 if (!
Config.isTargetDevice()) {
11421 if (GlobalInitializer)
11422 GV->setInitializer(GlobalInitializer());
11428 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11429 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11430 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11442 bool IsDeclaration,
bool IsExternallyVisible,
11444 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11445 std::vector<Triple> TargetTriple,
11446 std::function<
Constant *()> GlobalInitializer,
11450 (TargetTriple.empty() && !
Config.isTargetDevice()))
11461 !
Config.hasRequiresUnifiedSharedMemory()) {
11463 VarName = MangledName;
11466 if (!IsDeclaration)
11468 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11471 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11475 if (
Config.isTargetDevice() &&
11484 if (!
M.getNamedValue(RefName)) {
11488 GvAddrRef->setConstant(
true);
11490 GvAddrRef->setInitializer(Addr);
11491 GeneratedRefs.push_back(GvAddrRef);
11500 if (
Config.isTargetDevice()) {
11501 VarName = (Addr) ? Addr->
getName() :
"";
11505 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11506 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11507 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11508 VarName = (Addr) ? Addr->
getName() :
"";
11510 VarSize =
M.getDataLayout().getPointerSize();
11529 auto &&GetMDInt = [MN](
unsigned Idx) {
11534 auto &&GetMDString = [MN](
unsigned Idx) {
11536 return V->getString();
11539 switch (GetMDInt(0)) {
11543 case OffloadEntriesInfoManager::OffloadEntryInfo::
11544 OffloadingEntryInfoTargetRegion: {
11554 case OffloadEntriesInfoManager::OffloadEntryInfo::
11555 OffloadingEntryInfoDeviceGlobalVar:
11568 if (HostFilePath.
empty())
11572 if (std::error_code Err = Buf.getError()) {
11574 "OpenMPIRBuilder: " +
11582 if (std::error_code Err =
M.getError()) {
11584 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11598 "expected a valid insertion block for creating an iterator loop");
11608 Builder.getCurrentDebugLocation(),
"omp.it.cont");
11620 T->eraseFromParent();
11629 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
11631 "iterator bodygen must terminate the canonical body with an "
11632 "unconditional branch to the loop latch",
11656 for (
const auto &
ParamAttr : ParamAttrs) {
11699 return std::string(Out.
str());
11707 unsigned VecRegSize;
11709 ISADataTy ISAData[] = {
11728 for (
char Mask :
Masked) {
11729 for (
const ISADataTy &
Data : ISAData) {
11732 Out <<
"_ZGV" <<
Data.ISA << Mask;
11734 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
11748template <
typename T>
11751 StringRef MangledName,
bool OutputBecomesInput,
11755 Out << Prefix << ISA << LMask << VLEN;
11756 if (OutputBecomesInput)
11758 Out << ParSeq <<
'_' << MangledName;
11767 bool OutputBecomesInput,
11772 OutputBecomesInput, Fn);
11774 OutputBecomesInput, Fn);
11778 OutputBecomesInput, Fn);
11780 OutputBecomesInput, Fn);
11784 OutputBecomesInput, Fn);
11786 OutputBecomesInput, Fn);
11791 OutputBecomesInput, Fn);
11802 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
11803 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
11815 OutputBecomesInput, Fn);
11822 OutputBecomesInput, Fn);
11824 OutputBecomesInput, Fn);
11828 OutputBecomesInput, Fn);
11832 OutputBecomesInput, Fn);
11841 OutputBecomesInput, Fn);
11848 MangledName, OutputBecomesInput, Fn);
11850 MangledName, OutputBecomesInput, Fn);
11854 MangledName, OutputBecomesInput, Fn);
11858 MangledName, OutputBecomesInput, Fn);
11868 return OffloadEntriesTargetRegion.empty() &&
11869 OffloadEntriesDeviceGlobalVar.empty();
11872unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11874 auto It = OffloadEntriesTargetRegionCount.find(
11875 getTargetRegionEntryCountKey(EntryInfo));
11876 if (It == OffloadEntriesTargetRegionCount.end())
11881void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11883 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11884 EntryInfo.
Count + 1;
11890 OffloadEntriesTargetRegion[EntryInfo] =
11893 ++OffloadingEntriesNum;
11899 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
11902 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11906 if (OMPBuilder->Config.isTargetDevice()) {
11911 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11912 Entry.setAddress(Addr);
11914 Entry.setFlags(Flags);
11920 "Target region entry already registered!");
11922 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11923 ++OffloadingEntriesNum;
11925 incrementTargetRegionEntryInfoCount(EntryInfo);
11932 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11934 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11935 if (It == OffloadEntriesTargetRegion.end()) {
11939 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11947 for (
const auto &It : OffloadEntriesTargetRegion) {
11948 Action(It.first, It.second);
11954 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11955 ++OffloadingEntriesNum;
11961 if (OMPBuilder->Config.isTargetDevice()) {
11965 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11967 if (Entry.getVarSize() == 0) {
11968 Entry.setVarSize(VarSize);
11969 Entry.setLinkage(Linkage);
11973 Entry.setVarSize(VarSize);
11974 Entry.setLinkage(Linkage);
11975 Entry.setAddress(Addr);
11978 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11979 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11980 "Entry not initialized!");
11981 if (Entry.getVarSize() == 0) {
11982 Entry.setVarSize(VarSize);
11983 Entry.setLinkage(Linkage);
11990 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11991 Addr, VarSize, Flags, Linkage,
11994 OffloadEntriesDeviceGlobalVar.try_emplace(
11995 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
11996 ++OffloadingEntriesNum;
12003 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12004 Action(E.getKey(), E.getValue());
12011void CanonicalLoopInfo::collectControlBlocks(
12018 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12030void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12042void CanonicalLoopInfo::mapIndVar(
12052 for (
Use &U : OldIV->
uses()) {
12056 if (
User->getParent() == getCond())
12058 if (
User->getParent() == getLatch())
12064 Value *NewIV = Updater(OldIV);
12067 for (Use *U : ReplacableUses)
12088 "Preheader must terminate with unconditional branch");
12090 "Preheader must jump to header");
12094 "Header must terminate with unconditional branch");
12095 assert(Header->getSingleSuccessor() == Cond &&
12096 "Header must jump to exiting block");
12099 assert(Cond->getSinglePredecessor() == Header &&
12100 "Exiting block only reachable from header");
12103 "Exiting block must terminate with conditional branch");
12105 "Exiting block's first successor jump to the body");
12107 "Exiting block's second successor must exit the loop");
12111 "Body only reachable from exiting block");
12116 "Latch must terminate with unconditional branch");
12117 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12120 assert(Latch->getSinglePredecessor() !=
nullptr);
12125 "Exit block must terminate with unconditional branch");
12126 assert(Exit->getSingleSuccessor() == After &&
12127 "Exit block must jump to after block");
12131 "After block only reachable from exit block");
12135 assert(IndVar &&
"Canonical induction variable not found?");
12137 "Induction variable must be an integer");
12139 "Induction variable must be a PHI in the loop header");
12145 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12153 assert(TripCount &&
"Loop trip count not found?");
12155 "Trip count and induction variable must have the same type");
12159 "Exit condition must be a signed less-than comparison");
12161 "Exit condition must compare the induction variable");
12163 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, AffinityData Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...