70#define DEBUG_TYPE "openmp-ir-builder"
77 cl::desc(
"Use optimistic attributes describing "
78 "'as-if' properties of runtime calls."),
82 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
83 cl::desc(
"Factor for the unroll threshold to account for code "
84 "simplifications still taking place"),
88 "openmp-ir-builder-use-default-max-threads",
cl::Hidden,
99 if (!IP1.isSet() || !IP2.isSet())
101 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
106 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
107 case OMPScheduleType::UnorderedStaticChunked:
108 case OMPScheduleType::UnorderedStatic:
109 case OMPScheduleType::UnorderedDynamicChunked:
110 case OMPScheduleType::UnorderedGuidedChunked:
111 case OMPScheduleType::UnorderedRuntime:
112 case OMPScheduleType::UnorderedAuto:
113 case OMPScheduleType::UnorderedTrapezoidal:
114 case OMPScheduleType::UnorderedGreedy:
115 case OMPScheduleType::UnorderedBalanced:
116 case OMPScheduleType::UnorderedGuidedIterativeChunked:
117 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
118 case OMPScheduleType::UnorderedSteal:
119 case OMPScheduleType::UnorderedStaticBalancedChunked:
120 case OMPScheduleType::UnorderedGuidedSimd:
121 case OMPScheduleType::UnorderedRuntimeSimd:
122 case OMPScheduleType::OrderedStaticChunked:
123 case OMPScheduleType::OrderedStatic:
124 case OMPScheduleType::OrderedDynamicChunked:
125 case OMPScheduleType::OrderedGuidedChunked:
126 case OMPScheduleType::OrderedRuntime:
127 case OMPScheduleType::OrderedAuto:
128 case OMPScheduleType::OrderdTrapezoidal:
129 case OMPScheduleType::NomergeUnorderedStaticChunked:
130 case OMPScheduleType::NomergeUnorderedStatic:
131 case OMPScheduleType::NomergeUnorderedDynamicChunked:
132 case OMPScheduleType::NomergeUnorderedGuidedChunked:
133 case OMPScheduleType::NomergeUnorderedRuntime:
134 case OMPScheduleType::NomergeUnorderedAuto:
135 case OMPScheduleType::NomergeUnorderedTrapezoidal:
136 case OMPScheduleType::NomergeUnorderedGreedy:
137 case OMPScheduleType::NomergeUnorderedBalanced:
138 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
139 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
140 case OMPScheduleType::NomergeUnorderedSteal:
141 case OMPScheduleType::NomergeOrderedStaticChunked:
142 case OMPScheduleType::NomergeOrderedStatic:
143 case OMPScheduleType::NomergeOrderedDynamicChunked:
144 case OMPScheduleType::NomergeOrderedGuidedChunked:
145 case OMPScheduleType::NomergeOrderedRuntime:
146 case OMPScheduleType::NomergeOrderedAuto:
147 case OMPScheduleType::NomergeOrderedTrapezoidal:
148 case OMPScheduleType::OrderedDistributeChunked:
149 case OMPScheduleType::OrderedDistribute:
157 SchedType & OMPScheduleType::MonotonicityMask;
158 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
170 Builder.restoreIP(IP);
178 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
184 Kernel->getFnAttribute(
"target-features").getValueAsString();
185 if (Features.
count(
"+wavefrontsize64"))
200 bool HasSimdModifier,
bool HasDistScheduleChunks) {
202 switch (ClauseKind) {
203 case OMP_SCHEDULE_Default:
204 case OMP_SCHEDULE_Static:
205 return HasChunks ? OMPScheduleType::BaseStaticChunked
206 : OMPScheduleType::BaseStatic;
207 case OMP_SCHEDULE_Dynamic:
208 return OMPScheduleType::BaseDynamicChunked;
209 case OMP_SCHEDULE_Guided:
210 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
211 : OMPScheduleType::BaseGuidedChunked;
212 case OMP_SCHEDULE_Auto:
214 case OMP_SCHEDULE_Runtime:
215 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
216 : OMPScheduleType::BaseRuntime;
217 case OMP_SCHEDULE_Distribute:
218 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
219 : OMPScheduleType::BaseDistribute;
227 bool HasOrderedClause) {
228 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
229 OMPScheduleType::None &&
230 "Must not have ordering nor monotonicity flags already set");
233 ? OMPScheduleType::ModifierOrdered
234 : OMPScheduleType::ModifierUnordered;
235 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
238 if (OrderingScheduleType ==
239 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
240 return OMPScheduleType::OrderedGuidedChunked;
241 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
242 OMPScheduleType::ModifierOrdered))
243 return OMPScheduleType::OrderedRuntime;
245 return OrderingScheduleType;
251 bool HasSimdModifier,
bool HasMonotonic,
252 bool HasNonmonotonic,
bool HasOrderedClause) {
253 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
254 OMPScheduleType::None &&
255 "Must not have monotonicity flags already set");
256 assert((!HasMonotonic || !HasNonmonotonic) &&
257 "Monotonic and Nonmonotonic are contradicting each other");
260 return ScheduleType | OMPScheduleType::ModifierMonotonic;
261 }
else if (HasNonmonotonic) {
262 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
272 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
273 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
279 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
287 bool HasSimdModifier,
bool HasMonotonicModifier,
288 bool HasNonmonotonicModifier,
bool HasOrderedClause,
289 bool HasDistScheduleChunks) {
291 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
295 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
296 HasNonmonotonicModifier, HasOrderedClause);
304static std::optional<omp::OMPTgtExecModeFlags>
309 if (
Call->getCalledFunction()->getName() ==
"__kmpc_target_init") {
310 TargetInitCall =
Call;
335 std::optional<omp::OMPTgtExecModeFlags> ExecMode =
347 if (
Instruction *Term = Source->getTerminatorOrNull()) {
356 NewBr->setDebugLoc(
DL);
361 assert(New->getFirstInsertionPt() == New->begin() &&
362 "Target BB must not have PHI nodes");
378 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
382 NewBr->setDebugLoc(
DL);
394 Builder.SetInsertPoint(Old);
398 Builder.SetCurrentDebugLocation(
DebugLoc);
408 New->replaceSuccessorsPhiUsesWith(Old, New);
417 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
419 Builder.SetInsertPoint(Builder.GetInsertBlock());
422 Builder.SetCurrentDebugLocation(
DebugLoc);
431 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
433 Builder.SetInsertPoint(Builder.GetInsertBlock());
436 Builder.SetCurrentDebugLocation(
DebugLoc);
453 const Twine &Name =
"",
bool AsPtr =
true,
454 bool Is64Bit =
false) {
455 Builder.restoreIP(OuterAllocaIP);
459 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
463 FakeVal = FakeValAddr;
465 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
470 Builder.restoreIP(InnerAllocaIP);
473 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
476 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
489enum OpenMPOffloadingRequiresDirFlags {
491 OMP_REQ_UNDEFINED = 0x000,
493 OMP_REQ_NONE = 0x001,
495 OMP_REQ_REVERSE_OFFLOAD = 0x002,
497 OMP_REQ_UNIFIED_ADDRESS = 0x004,
499 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
501 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
508 DominatorTree *DT =
nullptr,
bool AggregateArgs =
false,
509 BlockFrequencyInfo *BFI =
nullptr,
510 BranchProbabilityInfo *BPI =
nullptr,
511 AssumptionCache *AC =
nullptr,
bool AllowVarArgs =
false,
512 bool AllowAlloca =
false,
513 BasicBlock *AllocationBlock =
nullptr,
515 std::string Suffix =
"",
bool ArgsInZeroAddressSpace =
false)
516 : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs,
517 AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix,
518 ArgsInZeroAddressSpace),
519 OMPBuilder(OMPBuilder) {}
521 virtual ~OMPCodeExtractor() =
default;
524 OpenMPIRBuilder &OMPBuilder;
527class DeviceSharedMemCodeExtractor :
public OMPCodeExtractor {
529 using OMPCodeExtractor::OMPCodeExtractor;
530 virtual ~DeviceSharedMemCodeExtractor() =
default;
534 allocateVar(IRBuilder<>::InsertPoint AllocaIP,
Type *VarType,
535 const Twine &Name = Twine(
""),
536 AddrSpaceCastInst **CastedAlloc =
nullptr)
override {
537 return OMPBuilder.createOMPAllocShared(AllocaIP, VarType, Name);
540 virtual Instruction *deallocateVar(IRBuilder<>::InsertPoint DeallocIP,
542 return OMPBuilder.createOMPFreeShared(DeallocIP, Var, VarType);
549 OpenMPIRBuilder &OMPBuilder;
551 DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder)
552 : OMPBuilder(OMPBuilder) {}
553 virtual ~DeviceSharedMemOutlineInfo() =
default;
555 virtual std::unique_ptr<CodeExtractor>
557 bool ArgsInZeroAddressSpace,
558 Twine Suffix = Twine(
""))
override;
564 : RequiresFlags(OMP_REQ_UNDEFINED) {}
568 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
569 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
572 RequiresFlags(OMP_REQ_UNDEFINED) {
573 if (HasRequiresReverseOffload)
574 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
575 if (HasRequiresUnifiedAddress)
576 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
577 if (HasRequiresUnifiedSharedMemory)
578 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
579 if (HasRequiresDynamicAllocators)
580 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
584 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
588 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
592 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
596 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
601 :
static_cast<int64_t
>(OMP_REQ_NONE);
606 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
608 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
613 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
615 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
620 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
622 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
627 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
629 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
642 constexpr size_t MaxDim = 3;
647 Value *DynCGroupMemFallbackFlag =
649 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
650 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
656 Value *NumThreads3D =
687 auto FnAttrs = Attrs.getFnAttrs();
688 auto RetAttrs = Attrs.getRetAttrs();
690 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
695 bool Param =
true) ->
void {
696 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
697 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
698 if (HasSignExt || HasZeroExt) {
699 assert(AS.getNumAttributes() == 1 &&
700 "Currently not handling extension attr combined with others.");
702 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
705 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
712#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
713#include "llvm/Frontend/OpenMP/OMPKinds.def"
717#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
719 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
720 addAttrSet(RetAttrs, RetAttrSet, false); \
721 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
722 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
723 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
725#include "llvm/Frontend/OpenMP/OMPKinds.def"
739#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
741 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
743 Fn = M.getFunction(Str); \
745#include "llvm/Frontend/OpenMP/OMPKinds.def"
751#define OMP_RTL(Enum, Str, ...) \
753 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
755#include "llvm/Frontend/OpenMP/OMPKinds.def"
759 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
769 LLVMContext::MD_callback,
771 2, {-1, -1},
true)}));
784 assert(Fn &&
"Failed to create OpenMP runtime function");
795 Builder.SetInsertPoint(FiniBB);
807 FiniBB = OtherFiniBB;
809 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
817 auto EndIt = FiniBB->end();
818 if (FiniBB->size() >= 1)
819 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
824 FiniBB->replaceAllUsesWith(OtherFiniBB);
825 FiniBB->eraseFromParent();
826 FiniBB = OtherFiniBB;
833 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
856 for (
auto Inst =
Block->getReverseIterator()->begin();
857 Inst !=
Block->getReverseIterator()->end();) {
886 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
907 DeferredOutlines.
push_back(std::move(OI));
911 ParallelRegionBlockSet.
clear();
913 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
923 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
924 std::unique_ptr<CodeExtractor> Extractor =
925 OI->createCodeExtractor(Blocks, ArgsInZeroAddressSpace,
".omp_par");
929 <<
" Exit: " << OI->ExitBB->getName() <<
"\n");
930 assert(Extractor->isEligible() &&
931 "Expected OpenMP outlining to be possible!");
933 for (
auto *V : OI->ExcludeArgsFromAggregate)
934 Extractor->excludeArgFromAggregate(V);
937 Extractor->extractCodeRegion(CEAC, OI->Inputs, OI->Outputs);
941 if (TargetCpuAttr.isStringAttribute())
944 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
945 if (TargetFeaturesAttr.isStringAttribute())
946 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
949 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
951 "OpenMP outlined functions should not return a value!");
956 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
963 assert(OI->EntryBB->getUniquePredecessor() == &ArtificialEntry);
970 "Expected instructions to add in the outlined region entry");
972 End = ArtificialEntry.
rend();
977 if (
I.isTerminator()) {
979 if (
Instruction *TI = OI->EntryBB->getTerminatorOrNull())
980 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
984 I.moveBeforePreserving(*OI->EntryBB,
985 OI->EntryBB->getFirstInsertionPt());
988 OI->EntryBB->moveBefore(&ArtificialEntry);
995 if (OI->PostOutlineCB)
996 OI->PostOutlineCB(*OutlinedFn);
998 if (OI->FixUpNonEntryAllocas)
1030 errs() <<
"Error of kind: " << Kind
1031 <<
" when emitting offload entries and metadata during "
1032 "OMPIRBuilder finalization \n";
1038 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
1039 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
1040 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
1041 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
1058 ConstantInt::get(I32Ty,
Value), Name);
1071 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
1075 if (UsedArray.
empty())
1082 GV->setSection(
"llvm.metadata");
1088 auto *Int8Ty =
Builder.getInt8Ty();
1091 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1099 unsigned Reserve2Flags) {
1101 LocFlags |= OMP_IDENT_FLAG_KMPC;
1108 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1109 ConstantInt::get(Int32, Reserve2Flags),
1110 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1112 size_t SrcLocStrArgIdx = 4;
1113 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1117 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1124 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1125 if (
GV.getInitializer() == Initializer)
1130 M, OpenMPIRBuilder::Ident,
1133 M.getDataLayout().getDefaultGlobalsAddressSpace());
1145 SrcLocStrSize = LocStr.
size();
1154 if (
GV.isConstant() &&
GV.hasInitializer() &&
1155 GV.getInitializer() == Initializer)
1158 SrcLocStr =
Builder.CreateGlobalString(
1159 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1167 unsigned Line,
unsigned Column,
1173 Buffer.
append(FunctionName);
1175 Buffer.
append(std::to_string(Line));
1177 Buffer.
append(std::to_string(Column));
1185 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1196 !DIL->getFilename().empty() ? DIL->getFilename() :
M.getName();
1201 DIL->getColumn(), SrcLocStrSize);
1207 Loc.IP.getBlock()->getParent());
1213 "omp_global_thread_num");
1218 bool ForceSimpleCall,
bool CheckCancelFlag) {
1228 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1231 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1234 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1237 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1240 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1253 bool UseCancelBarrier =
1258 ? OMPRTL___kmpc_cancel_barrier
1259 : OMPRTL___kmpc_barrier),
1262 if (UseCancelBarrier && CheckCancelFlag)
1272 omp::Directive CanceledDirective) {
1277 auto *UI =
Builder.CreateUnreachable();
1285 Builder.SetInsertPoint(ElseTI);
1286 auto ElseIP =
Builder.saveIP();
1294 Builder.SetInsertPoint(ThenTI);
1296 Value *CancelKind =
nullptr;
1297 switch (CanceledDirective) {
1298#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1299 case DirectiveEnum: \
1300 CancelKind = Builder.getInt32(Value); \
1302#include "llvm/Frontend/OpenMP/OMPKinds.def"
1319 Builder.SetInsertPoint(UI->getParent());
1320 UI->eraseFromParent();
1327 omp::Directive CanceledDirective) {
1332 auto *UI =
Builder.CreateUnreachable();
1335 Value *CancelKind =
nullptr;
1336 switch (CanceledDirective) {
1337#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1338 case DirectiveEnum: \
1339 CancelKind = Builder.getInt32(Value); \
1341#include "llvm/Frontend/OpenMP/OMPKinds.def"
1358 Builder.SetInsertPoint(UI->getParent());
1359 UI->eraseFromParent();
1372 auto *KernelArgsPtr =
1373 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1378 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1381 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1385 NumThreads, HostPtr, KernelArgsPtr};
1412 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1416 Value *Return =
nullptr;
1436 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1437 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1444 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1446 auto CurFn =
Builder.GetInsertBlock()->getParent();
1453 emitBlock(OffloadContBlock, CurFn,
true);
1458 Value *CancelFlag, omp::Directive CanceledDirective) {
1460 "Unexpected cancellation!");
1480 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1489 Builder.SetInsertPoint(CancellationBlock);
1490 Builder.CreateBr(*FiniBBOrErr);
1493 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1505 size_t NumArgs = OutlinedFn.
arg_size();
1506 assert((NumArgs == 2 || NumArgs == 3) &&
1507 "expected a 2-3 argument parallel outlined function");
1508 bool UseArgStruct = NumArgs == 3;
1513 {Builder.getInt16Ty(), Builder.getInt32Ty()},
1517 OutlinedFn.
getName() +
".wrapper", OMPIRBuilder->
M);
1519 WrapperFn->addParamAttr(0, Attribute::NoUndef);
1520 WrapperFn->addParamAttr(0, Attribute::ZExt);
1521 WrapperFn->addParamAttr(1, Attribute::NoUndef);
1525 Builder.SetInsertPoint(EntryBB);
1528 Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1530 AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1531 AddrAlloca, Builder.getPtrTy(0),
1532 AddrAlloca->
getName() +
".ascast");
1534 Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1536 ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1537 ZeroAlloca, Builder.getPtrTy(0),
1538 ZeroAlloca->
getName() +
".ascast");
1540 Value *ArgsAlloca =
nullptr;
1542 ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
1543 nullptr,
"global_args");
1544 ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1545 ArgsAlloca, Builder.getPtrTy(0),
1546 ArgsAlloca->
getName() +
".ascast");
1550 Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
1551 Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
1555 llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
1563 Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
1564 StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
1565 {Builder.getInt64(0)});
1566 StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg,
"structArg");
1567 Args.push_back(StructArg);
1571 Builder.CreateCall(&OutlinedFn, Args);
1572 Builder.CreateRetVoid();
1587 "Expected at least tid and bounded tid as arguments");
1588 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1596 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1599 assert(CI &&
"Expected call instruction to outlined function");
1600 CI->
getParent()->setName(
"omp_parallel");
1602 Builder.SetInsertPoint(CI);
1603 Type *PtrTy = OMPIRBuilder->VoidPtr;
1606 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1610 Value *Args = ArgsAlloca;
1614 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1615 Builder.restoreIP(CurrentIP);
1618 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1620 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1622 Builder.CreateStore(V, StoreAddress);
1626 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1627 : Builder.getInt32(1);
1628 Value *NumThreadsArg =
1629 NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
1630 : Builder.getInt32(-1);
1640 Value *Parallel60CallArgs[] = {
1645 Builder.getInt32(-1),
1649 Builder.getInt64(NumCapturedVars),
1650 Builder.getInt32(0)};
1658 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1661 Builder.SetInsertPoint(PrivTID);
1663 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1670 I->eraseFromParent();
1693 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1701 F->addMetadata(LLVMContext::MD_callback,
1710 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1713 "Expected at least tid and bounded tid as arguments");
1714 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1717 CI->
getParent()->setName(
"omp_parallel");
1718 Builder.SetInsertPoint(CI);
1721 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1725 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1727 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1734 auto PtrTy = OMPIRBuilder->VoidPtr;
1735 if (IfCondition && NumCapturedVars == 0) {
1743 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1746 Builder.SetInsertPoint(PrivTID);
1748 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1755 I->eraseFromParent();
1763 Value *NumThreads, omp::ProcBindKind ProcBind,
bool IsCancellable) {
1772 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1773 (ProcBind != OMP_PROC_BIND_default);
1780 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1784 if (NumThreads && !
Config.isTargetDevice()) {
1787 Builder.CreateIntCast(NumThreads, Int32,
false)};
1792 if (ProcBind != OMP_PROC_BIND_default) {
1796 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1818 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1821 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1824 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1828 PointerType ::get(
M.getContext(), 0),
1829 "zero.addr.ascast");
1853 if (IP.getBlock()->end() == IP.getPoint()) {
1859 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1860 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1861 "Unexpected insertion point for finalization call!");
1873 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1879 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1897 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1900 assert(BodyGenCB &&
"Expected body generation callback!");
1902 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, PRegExitBB))
1905 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1909 bool UsesDeviceSharedMemory =
1911 std::unique_ptr<OutlineInfo> OI =
1912 UsesDeviceSharedMemory
1913 ? std::make_unique<DeviceSharedMemOutlineInfo>(*
this)
1914 : std::make_unique<OutlineInfo>();
1916 if (
Config.isTargetDevice()) {
1918 OI->PostOutlineCB = [=, ToBeDeletedVec =
1919 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1921 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1922 ThreadID, ToBeDeletedVec);
1926 OI->PostOutlineCB = [=, ToBeDeletedVec =
1927 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1929 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1933 OI->FixUpNonEntryAllocas =
true;
1934 OI->OuterAllocBB = OuterAllocaBlock;
1935 OI->EntryBB = PRegEntryBB;
1936 OI->ExitBB = PRegExitBB;
1937 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
1938 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
1942 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
1954 ".omp_par", ArgsInZeroAddressSpace);
1959 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1961 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1966 return GV->getValueType() == OpenMPIRBuilder::Ident;
1971 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1977 if (&V == TIDAddr || &V == ZeroAddr) {
1978 OI->ExcludeArgsFromAggregate.push_back(&V);
1983 for (
Use &U : V.uses())
1985 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1995 if (!V.getType()->isPointerTy()) {
1999 Builder.restoreIP(OuterAllocIP);
2001 if (UsesDeviceSharedMemory) {
2004 V.getName() +
".reloaded");
2005 for (
BasicBlock *DeallocBlock : OuterDeallocBlocks)
2007 InsertPointTy(DeallocBlock, DeallocBlock->getFirstInsertionPt()),
2010 Ptr =
Builder.CreateAlloca(V.getType(),
nullptr,
2011 V.getName() +
".reloaded");
2016 Builder.SetInsertPoint(InsertBB,
2021 Builder.restoreIP(InnerAllocaIP);
2022 Inner =
Builder.CreateLoad(V.getType(), Ptr);
2025 Value *ReplacementValue =
nullptr;
2028 ReplacementValue = PrivTID;
2031 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
2039 assert(ReplacementValue &&
2040 "Expected copy/create callback to set replacement value!");
2041 if (ReplacementValue == &V)
2046 UPtr->set(ReplacementValue);
2071 for (
Value *Output : Outputs)
2075 "OpenMP outlining should not produce live-out values!");
2077 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
2079 for (
auto *BB : Blocks)
2080 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
2088 assert(FiniInfo.DK == OMPD_parallel &&
2089 "Unexpected finalization stack state!");
2100 Builder.CreateBr(*FiniBBOrErr);
2104 Term->eraseFromParent();
2110 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
2111 UI->eraseFromParent();
2174 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2176 Builder.CreateStore(DepValPtr, Addr);
2179 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2181 ConstantInt::get(SizeTy,
2186 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Flags));
2188 static_cast<unsigned int>(Dep.
DepKind)),
2201 if (Dependencies.
empty())
2221 Type *DependInfo = OMPBuilder.DependInfo;
2223 Value *DepArray =
nullptr;
2225 Builder.SetInsertPoint(
2229 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2231 Builder.restoreIP(OldIP);
2233 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2235 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2242Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2244 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2259 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2263 "omp_taskloop_dup",
M);
2266 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2267 DestTaskArg->
setName(
"dest_task");
2268 SrcTaskArg->
setName(
"src_task");
2269 LastprivateFlagArg->
setName(
"lastprivate_flag");
2271 IRBuilderBase::InsertPointGuard Guard(
Builder);
2275 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2276 Type *TaskWithPrivatesTy =
2279 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2281 PrivatesTy, TaskPrivates,
2286 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2287 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2289 DestTaskContextPtr->
setName(
"destPtr");
2290 SrcTaskContextPtr->
setName(
"srcPtr");
2295 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2296 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2297 if (!AfterIPOrError)
2299 Builder.restoreIP(*AfterIPOrError);
2309 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2311 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2313 Value *TaskContextStructPtrVal) {
2318 uint32_t SrcLocStrSize;
2334 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP, TaskloopExitBB))
2337 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2342 llvm::CanonicalLoopInfo *CLI = result.
get();
2343 auto OI = std::make_unique<OutlineInfo>();
2344 OI->EntryBB = TaskloopAllocaBB;
2345 OI->OuterAllocBB = AllocaIP.getBlock();
2346 OI->ExitBB = TaskloopExitBB;
2347 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2348 copy(DeallocBlocks, OI->OuterDeallocBBs.end());
2354 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2356 TaskloopAllocaIP,
"lb",
false,
true);
2358 TaskloopAllocaIP,
"ub",
false,
true);
2360 TaskloopAllocaIP,
"step",
false,
true);
2363 OI->Inputs.insert(FakeLB);
2364 OI->Inputs.insert(FakeUB);
2365 OI->Inputs.insert(FakeStep);
2366 if (TaskContextStructPtrVal)
2367 OI->Inputs.insert(TaskContextStructPtrVal);
2368 assert(((TaskContextStructPtrVal && DupCB) ||
2369 (!TaskContextStructPtrVal && !DupCB)) &&
2370 "Task context struct ptr and duplication callback must be both set "
2376 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2380 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2381 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2384 if (!TaskDupFnOrErr) {
2387 Value *TaskDupFn = *TaskDupFnOrErr;
2389 OI->PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2390 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2391 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2392 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2393 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2395 assert(OutlinedFn.hasOneUse() &&
2396 "there must be a single user for the outlined function");
2403 Value *CastedLBVal =
2404 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2405 Value *CastedUBVal =
2406 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2407 Value *CastedStepVal =
2408 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2410 Builder.SetInsertPoint(StaleCI);
2423 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2444 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2446 AllocaInst *ArgStructAlloca =
2448 assert(ArgStructAlloca &&
2449 "Unable to find the alloca instruction corresponding to arguments "
2450 "for extracted function");
2451 std::optional<TypeSize> ArgAllocSize =
2454 "Unable to determine size of arguments for extracted function");
2455 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2460 CallInst *TaskData =
Builder.CreateCall(
2461 TaskAllocFn, {Ident, ThreadID,
Flags,
2462 TaskSize, SharedsSize,
2467 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2468 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2473 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2476 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2479 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2485 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2491 Value *GrainSizeVal =
2492 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2494 Value *TaskDup = TaskDupFn;
2496 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2497 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2502 Builder.CreateCall(TaskloopFn, Args);
2509 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2514 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2516 LoadInst *SharedsOutlined =
2517 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2518 OutlinedFn.getArg(1)->replaceUsesWithIf(
2520 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2523 Type *IVTy =
IV->getType();
2529 Value *TaskLB =
nullptr;
2530 Value *TaskUB =
nullptr;
2531 Value *TaskStep =
nullptr;
2532 Value *LoadTaskLB =
nullptr;
2533 Value *LoadTaskUB =
nullptr;
2534 Value *LoadTaskStep =
nullptr;
2535 for (Instruction &
I : *TaskloopAllocaBB) {
2536 if (
I.getOpcode() == Instruction::GetElementPtr) {
2539 switch (CI->getZExtValue()) {
2551 }
else if (
I.getOpcode() == Instruction::Load) {
2553 if (
Load.getPointerOperand() == TaskLB) {
2554 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2556 }
else if (
Load.getPointerOperand() == TaskUB) {
2557 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2559 }
else if (
Load.getPointerOperand() == TaskStep) {
2560 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2566 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2568 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2569 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2570 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2572 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2573 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2574 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2575 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2577 CLI->setTripCount(CastedTripCount);
2579 Builder.SetInsertPoint(CLI->getBody(),
2580 CLI->getBody()->getFirstInsertionPt());
2582 if (NumOfCollapseLoops > 1) {
2588 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2591 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2592 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2593 User *IVUser = IVUse->getUser();
2595 if (
Op->getOpcode() == Instruction::URem ||
2596 Op->getOpcode() == Instruction::UDiv) {
2601 for (User *User : UsersToReplace) {
2602 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2619 assert(CLI->getIndVar()->getNumUses() == 3 &&
2620 "Canonical loop should have exactly three uses of the ind var");
2621 for (User *IVUser : CLI->getIndVar()->users()) {
2623 if (
Mul->getOpcode() == Instruction::Mul) {
2624 for (User *MulUser :
Mul->users()) {
2626 if (
Add->getOpcode() == Instruction::Add) {
2627 Add->setOperand(1, CastedTaskLB);
2636 FakeLB->replaceAllUsesWith(CastedLBVal);
2637 FakeUB->replaceAllUsesWith(CastedUBVal);
2638 FakeStep->replaceAllUsesWith(CastedStepVal);
2640 I->eraseFromParent();
2645 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2651 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2661 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2693 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskExitBB))
2696 auto OI = std::make_unique<OutlineInfo>();
2697 OI->EntryBB = TaskAllocaBB;
2698 OI->OuterAllocBB = AllocaIP.
getBlock();
2699 OI->ExitBB = TaskExitBB;
2700 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2701 copy(DeallocBlocks, OI->OuterDeallocBBs.
end());
2706 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2708 OI->PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2709 Affinities, Mergeable, Priority, EventHandle,
2711 ToBeDeleted](
Function &OutlinedFn)
mutable {
2713 assert(OutlinedFn.hasOneUse() &&
2714 "there must be a single user for the outlined function");
2719 bool HasShareds = StaleCI->
arg_size() > 1;
2720 Builder.SetInsertPoint(StaleCI);
2745 bool UseMergedIf0Path = ConstIfCondition && ConstIfCondition->isZero();
2749 Flags =
Builder.CreateOr(FinalFlag, Flags);
2752 if (Mergeable || UseMergedIf0Path)
2764 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2773 assert(ArgStructAlloca &&
2774 "Unable to find the alloca instruction corresponding to arguments "
2775 "for extracted function");
2776 std::optional<TypeSize> ArgAllocSize =
2779 "Unable to determine size of arguments for extracted function");
2780 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2786 TaskAllocFn, {Ident, ThreadID, Flags,
2787 TaskSize, SharedsSize,
2790 if (Affinities.
Count && Affinities.
Info) {
2792 OMPRTL___kmpc_omp_reg_task_with_affinity);
2803 OMPRTL___kmpc_task_allow_completion_event);
2807 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2809 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2810 Builder.CreateStore(EventVal, EventHandleAddr);
2816 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2817 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2835 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2838 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2840 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2843 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2844 PriorityData, {Zero, Zero});
2845 Builder.CreateStore(Priority, CmplrData);
2848 Value *DepArray =
nullptr;
2849 Value *NumDeps =
nullptr;
2852 NumDeps = Dependencies.
NumDeps;
2853 }
else if (!Dependencies.
Deps.empty()) {
2855 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
2875 if (IfCondition && !UseMergedIf0Path) {
2880 Builder.GetInsertPoint()->getParent()->getTerminator();
2881 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2882 Builder.SetInsertPoint(IfTerminator);
2885 Builder.SetInsertPoint(ElseTI);
2892 {Ident, ThreadID, NumDeps, DepArray,
2893 ConstantInt::get(
Builder.getInt32Ty(), 0),
2908 Builder.SetInsertPoint(ThenTI);
2916 {Ident, ThreadID, TaskData, NumDeps, DepArray,
2917 ConstantInt::get(
Builder.getInt32Ty(), 0),
2928 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2930 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2931 OutlinedFn.getArg(1)->replaceUsesWithIf(
2932 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2936 I->eraseFromParent();
2940 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2962 if (
Error Err = BodyGenCB(AllocaIP,
Builder.saveIP(), DeallocBlocks))
2965 Builder.SetInsertPoint(TaskgroupExitBB);
3008 unsigned CaseNumber = 0;
3009 for (
auto SectionCB : SectionCBs) {
3011 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
3013 Builder.SetInsertPoint(CaseBB);
3017 {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {}))
3028 Value *LB = ConstantInt::get(I32Ty, 0);
3029 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
3030 Value *ST = ConstantInt::get(I32Ty, 1);
3032 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
3037 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
3038 WorksharingLoopType::ForStaticLoop, !IsNowait);
3044 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
3048 assert(FiniInfo.DK == OMPD_sections &&
3049 "Unexpected finalization stack state!");
3050 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
3064 if (IP.getBlock()->end() != IP.getPoint())
3075 auto *CaseBB =
Loc.IP.getBlock();
3076 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
3077 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
3083 Directive OMPD = Directive::OMPD_sections;
3086 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
3097Value *OpenMPIRBuilder::getGPUThreadID() {
3100 OMPRTL___kmpc_get_hardware_thread_id_in_block),
3104Value *OpenMPIRBuilder::getGPUWarpSize() {
3109Value *OpenMPIRBuilder::getNVPTXWarpID() {
3110 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3111 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
3114Value *OpenMPIRBuilder::getNVPTXLaneID() {
3115 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3116 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
3117 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
3118 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
3125 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
3126 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
3127 assert(FromSize > 0 &&
"From size must be greater than zero");
3128 assert(ToSize > 0 &&
"To size must be greater than zero");
3129 if (FromType == ToType)
3131 if (FromSize == ToSize)
3132 return Builder.CreateBitCast(From, ToType);
3134 return Builder.CreateIntCast(From, ToType,
true);
3140 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3141 CastItem,
Builder.getPtrTy(0));
3142 Builder.CreateStore(From, ValCastItem);
3143 return Builder.CreateLoad(ToType, CastItem);
3150 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
3151 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
3155 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
3157 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
3159 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
3160 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
3161 Value *WarpSizeCast =
3163 Value *ShuffleCall =
3165 return castValueToType(AllocaIP, ShuffleCall, CastTy);
3172 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
3184 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3185 Value *ElemPtr = DstAddr;
3186 Value *Ptr = SrcAddr;
3187 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
3191 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3194 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
3195 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3199 if ((
Size / IntSize) > 1) {
3200 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3201 SrcAddrGEP,
Builder.getPtrTy());
3218 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
3220 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
3223 Value *Res = createRuntimeShuffleFunction(
3226 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3228 Builder.CreateAlignedStore(Res, ElemPtr,
3229 M.getDataLayout().getPrefTypeAlign(ElemType));
3231 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3232 Value *LocalElemPtr =
3233 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3239 Value *Res = createRuntimeShuffleFunction(
3240 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3243 Res =
Builder.CreateTrunc(Res, ElemType);
3244 Builder.CreateStore(Res, ElemPtr);
3245 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3247 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3253Error OpenMPIRBuilder::emitReductionListCopy(
3258 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3259 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3263 for (
auto En :
enumerate(ReductionInfos)) {
3265 Value *SrcElementAddr =
nullptr;
3266 AllocaInst *DestAlloca =
nullptr;
3267 Value *DestElementAddr =
nullptr;
3268 Value *DestElementPtrAddr =
nullptr;
3270 bool ShuffleInElement =
false;
3273 bool UpdateDestListPtr =
false;
3277 ReductionArrayTy, SrcBase,
3278 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3279 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3283 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3284 ReductionArrayTy, DestBase,
3285 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3286 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3292 Type *DestAllocaType =
3293 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3294 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3295 ".omp.reduction.element");
3297 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3298 DestElementAddr = DestAlloca;
3301 DestElementAddr->
getName() +
".ascast");
3303 ShuffleInElement =
true;
3304 UpdateDestListPtr =
true;
3316 if (ShuffleInElement) {
3317 Type *ShuffleType = RI.ElementType;
3318 Value *ShuffleSrcAddr = SrcElementAddr;
3319 Value *ShuffleDestAddr = DestElementAddr;
3320 AllocaInst *LocalStorage =
nullptr;
3323 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3324 assert(RI.ByRefAllocatedType &&
3325 "Expected by-ref allocated type to be set");
3330 ShuffleType = RI.ByRefElementType;
3332 if (RI.DataPtrPtrGen) {
3335 Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3338 return GenResult.takeError();
3347 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3349 ShuffleDestAddr = LocalStorage;
3354 ShuffleDestAddr = DestElementAddr;
3358 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3359 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3361 if (IsByRefElem && RI.DataPtrPtrGen) {
3363 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3364 DestAlloca,
Builder.getPtrTy(),
".ascast");
3367 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3368 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3371 return GenResult.takeError();
3374 switch (RI.EvaluationKind) {
3376 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3378 Builder.CreateStore(Elem, DestElementAddr);
3382 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3383 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3385 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3387 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3389 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3391 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3392 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3393 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3394 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3395 Builder.CreateStore(SrcReal, DestRealPtr);
3396 Builder.CreateStore(SrcImg, DestImgPtr);
3401 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3403 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3404 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3416 if (UpdateDestListPtr) {
3417 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3418 DestElementAddr,
Builder.getPtrTy(),
3419 DestElementAddr->
getName() +
".ascast");
3420 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3427Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3431 LLVMContext &Ctx =
M.getContext();
3433 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3437 "_omp_reduction_inter_warp_copy_func", &
M);
3443 Builder.SetInsertPoint(EntryBB);
3460 StringRef TransferMediumName =
3461 "__openmp_nvptx_data_transfer_temporary_storage";
3462 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3463 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3465 if (!TransferMedium) {
3466 TransferMedium =
new GlobalVariable(
3474 Value *GPUThreadID = getGPUThreadID();
3476 Value *LaneID = getNVPTXLaneID();
3478 Value *WarpID = getNVPTXWarpID();
3482 Builder.GetInsertBlock()->getFirstInsertionPt());
3486 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3487 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3488 AllocaInst *NumWarpsAlloca =
3489 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3490 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3491 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3492 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3493 NumWarpsAlloca,
Builder.getPtrTy(0),
3494 NumWarpsAlloca->
getName() +
".ascast");
3495 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3496 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3505 for (
auto En :
enumerate(ReductionInfos)) {
3511 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3512 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3513 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3514 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3517 unsigned NumIters = RealTySize / TySize;
3520 Value *Cnt =
nullptr;
3521 Value *CntAddr =
nullptr;
3528 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3530 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3531 CntAddr->
getName() +
".ascast");
3543 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3544 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3551 omp::Directive::OMPD_unknown,
3555 return BarrierIP1.takeError();
3561 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3562 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3566 auto *RedListArrayTy =
3569 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3571 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3572 {ConstantInt::get(IndexTy, 0),
3573 ConstantInt::get(IndexTy, En.index())});
3577 if (IsByRefElem && RI.DataPtrPtrGen) {
3579 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3582 return GenRes.takeError();
3593 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3598 Builder.CreateStore(Elem, MediumPtr,
3610 omp::Directive::OMPD_unknown,
3614 return BarrierIP2.takeError();
3621 Value *NumWarpsVal =
3624 Value *IsActiveThread =
3625 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3626 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3633 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3635 Value *TargetElemPtrPtr =
3636 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3637 {ConstantInt::get(IndexTy, 0),
3638 ConstantInt::get(IndexTy, En.index())});
3639 Value *TargetElemPtrVal =
3641 Value *TargetElemPtr = TargetElemPtrVal;
3643 if (IsByRefElem && RI.DataPtrPtrGen) {
3645 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3648 return GenRes.takeError();
3650 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3658 Value *SrcMediumValue =
3659 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3660 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3670 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3671 Builder.CreateStore(Cnt, CntAddr,
false);
3673 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3677 RealTySize %= TySize;
3687Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3690 LLVMContext &Ctx =
M.getContext();
3691 FunctionType *FuncTy =
3693 {Builder.getPtrTy(), Builder.getInt16Ty(),
3694 Builder.getInt16Ty(), Builder.getInt16Ty()},
3698 "_omp_reduction_shuffle_and_reduce_func", &
M);
3709 Builder.SetInsertPoint(EntryBB);
3720 Type *ReduceListArgType = ReduceListArg->
getType();
3724 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3725 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3726 LaneIDArg->
getName() +
".addr");
3728 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3729 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3730 AlgoVerArg->
getName() +
".addr");
3737 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3739 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3740 ReduceListAlloca, ReduceListArgType,
3741 ReduceListAlloca->
getName() +
".ascast");
3742 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3743 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3744 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3745 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3746 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3747 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3748 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3749 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3750 RemoteReductionListAlloca,
Builder.getPtrTy(),
3751 RemoteReductionListAlloca->
getName() +
".ascast");
3753 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3754 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3755 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3756 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3758 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3759 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3760 Value *RemoteLaneOffset =
3761 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3762 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3769 Error EmitRedLsCpRes = emitReductionListCopy(
3771 ReduceList, RemoteListAddrCast, IsByRef,
3772 {RemoteLaneOffset,
nullptr,
nullptr});
3775 return EmitRedLsCpRes;
3800 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3805 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3806 Value *RemoteOffsetComp =
3808 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3809 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3810 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3816 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3818 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3819 ReduceList,
Builder.getPtrTy());
3820 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3821 RemoteListAddrCast,
Builder.getPtrTy());
3823 ->addFnAttr(Attribute::NoUnwind);
3834 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3835 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3840 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3844 EmitRedLsCpRes = emitReductionListCopy(
3846 RemoteListAddrCast, ReduceList, IsByRef);
3849 return EmitRedLsCpRes;
3864OpenMPIRBuilder::generateReductionDescriptor(
3866 Type *DescriptorType,
3872 Value *DescriptorSize =
3873 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3875 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3876 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3880 Value *DataPtrField;
3882 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3885 return GenResult.takeError();
3888 DataPtr,
Builder.getPtrTy(),
".ascast"),
3894Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3898 LLVMContext &Ctx =
M.getContext();
3901 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3905 "_omp_reduction_list_to_global_copy_func", &
M);
3912 Builder.SetInsertPoint(EntryBlock);
3922 BufferArg->
getName() +
".addr");
3926 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3927 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3928 BufferArgAlloca,
Builder.getPtrTy(),
3929 BufferArgAlloca->
getName() +
".ascast");
3930 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3931 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3932 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3933 ReduceListArgAlloca,
Builder.getPtrTy(),
3934 ReduceListArgAlloca->
getName() +
".ascast");
3936 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3937 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3938 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3940 Value *LocalReduceList =
3942 Value *BufferArgVal =
3946 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3947 for (
auto En :
enumerate(ReductionInfos)) {
3949 auto *RedListArrayTy =
3953 RedListArrayTy, LocalReduceList,
3954 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3960 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3962 ReductionsBufferTy, BufferVD, 0, En.index());
3964 switch (RI.EvaluationKind) {
3966 Value *TargetElement;
3968 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3969 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3971 if (RI.DataPtrPtrGen) {
3973 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3976 return GenResult.takeError();
3980 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3983 Builder.CreateStore(TargetElement, GlobVal);
3987 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3988 RI.ElementType, ElemPtr, 0, 0,
".realp");
3990 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3992 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3994 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3996 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3997 RI.ElementType, GlobVal, 0, 0,
".realp");
3998 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3999 RI.ElementType, GlobVal, 0, 1,
".imagp");
4000 Builder.CreateStore(SrcReal, DestRealPtr);
4001 Builder.CreateStore(SrcImg, DestImgPtr);
4006 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
4008 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
4009 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
4020Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
4024 LLVMContext &Ctx =
M.getContext();
4027 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4031 "_omp_reduction_list_to_global_reduce_func", &
M);
4038 Builder.SetInsertPoint(EntryBlock);
4048 BufferArg->
getName() +
".addr");
4052 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4053 auto *RedListArrayTy =
4058 Value *LocalReduceList =
4059 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4063 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4064 BufferArgAlloca,
Builder.getPtrTy(),
4065 BufferArgAlloca->
getName() +
".ascast");
4066 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4067 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4068 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4069 ReduceListArgAlloca,
Builder.getPtrTy(),
4070 ReduceListArgAlloca->
getName() +
".ascast");
4071 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4072 LocalReduceList,
Builder.getPtrTy(),
4073 LocalReduceList->
getName() +
".ascast");
4075 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4076 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4077 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4082 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4083 for (
auto En :
enumerate(ReductionInfos)) {
4086 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4087 RedListArrayTy, LocalReduceListAddrCast,
4088 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4090 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4092 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4093 ReductionsBufferTy, BufferVD, 0, En.index());
4095 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4099 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4100 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4101 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4108 Value *SrcElementPtrPtr =
4109 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
4110 {ConstantInt::get(IndexTy, 0),
4111 ConstantInt::get(IndexTy, En.index())});
4112 Value *SrcDescriptorAddr =
4117 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4118 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4121 return GenResult.takeError();
4123 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4125 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4133 ->addFnAttr(Attribute::NoUnwind);
4139Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
4143 LLVMContext &Ctx =
M.getContext();
4146 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4150 "_omp_reduction_global_to_list_copy_func", &
M);
4157 Builder.SetInsertPoint(EntryBlock);
4167 BufferArg->
getName() +
".addr");
4171 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4172 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4173 BufferArgAlloca,
Builder.getPtrTy(),
4174 BufferArgAlloca->
getName() +
".ascast");
4175 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4176 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4177 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4178 ReduceListArgAlloca,
Builder.getPtrTy(),
4179 ReduceListArgAlloca->
getName() +
".ascast");
4180 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4181 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4182 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4184 Value *LocalReduceList =
4189 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4190 for (
auto En :
enumerate(ReductionInfos)) {
4191 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4192 auto *RedListArrayTy =
4196 RedListArrayTy, LocalReduceList,
4197 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4202 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4203 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4204 ReductionsBufferTy, BufferVD, 0, En.index());
4210 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4217 return GenResult.takeError();
4223 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
4224 Builder.CreateStore(TargetElement, ElemPtr);
4228 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4237 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4239 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4241 Builder.CreateStore(SrcReal, DestRealPtr);
4242 Builder.CreateStore(SrcImg, DestImgPtr);
4249 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4250 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4262Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4266 LLVMContext &Ctx =
M.getContext();
4269 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4273 "_omp_reduction_global_to_list_reduce_func", &
M);
4280 Builder.SetInsertPoint(EntryBlock);
4290 BufferArg->
getName() +
".addr");
4294 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4300 Value *LocalReduceList =
4301 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4305 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4306 BufferArgAlloca,
Builder.getPtrTy(),
4307 BufferArgAlloca->
getName() +
".ascast");
4308 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4309 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4310 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4311 ReduceListArgAlloca,
Builder.getPtrTy(),
4312 ReduceListArgAlloca->
getName() +
".ascast");
4313 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4314 LocalReduceList,
Builder.getPtrTy(),
4315 LocalReduceList->
getName() +
".ascast");
4317 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4318 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4319 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4324 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4325 for (
auto En :
enumerate(ReductionInfos)) {
4328 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4329 RedListArrayTy, ReductionList,
4330 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4333 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4334 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4335 ReductionsBufferTy, BufferVD, 0, En.index());
4337 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4341 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4342 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4343 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4348 Value *ReduceListVal =
4350 Value *SrcElementPtrPtr =
4351 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4352 {ConstantInt::get(IndexTy, 0),
4353 ConstantInt::get(IndexTy, En.index())});
4354 Value *SrcDescriptorAddr =
4359 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4360 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4362 return GenResult.takeError();
4364 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4366 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4374 ->addFnAttr(Attribute::NoUnwind);
4380std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4381 std::string Suffix =
4383 return (Name + Suffix).str();
4386Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4389 AttributeList FuncAttrs) {
4391 {Builder.getPtrTy(), Builder.getPtrTy()},
4393 std::string
Name = getReductionFuncName(ReducerName);
4402 Builder.SetInsertPoint(EntryBB);
4406 Value *LHSArrayPtr =
nullptr;
4407 Value *RHSArrayPtr =
nullptr;
4414 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4416 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4417 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4418 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4419 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4420 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4421 Builder.CreateStore(Arg0, LHSAddrCast);
4422 Builder.CreateStore(Arg1, RHSAddrCast);
4423 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4424 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4428 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4430 for (
auto En :
enumerate(ReductionInfos)) {
4433 RedArrayTy, RHSArrayPtr,
4434 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4436 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4437 RHSI8Ptr, RI.PrivateVariable->getType(),
4438 RHSI8Ptr->
getName() +
".ascast");
4441 RedArrayTy, LHSArrayPtr,
4442 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4444 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4445 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4454 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4455 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4456 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4463 return AfterIP.takeError();
4464 if (!
Builder.GetInsertBlock())
4465 return ReductionFunc;
4469 if (!IsByRef.
empty() && !IsByRef[En.index()])
4470 Builder.CreateStore(Reduced, LHSPtr);
4475 for (
auto En :
enumerate(ReductionInfos)) {
4476 unsigned Index = En.index();
4478 Value *LHSFixupPtr, *RHSFixupPtr;
4479 Builder.restoreIP(RI.ReductionGenClang(
4480 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4485 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4490 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4504 return ReductionFunc;
4512 assert(RI.Variable &&
"expected non-null variable");
4513 assert(RI.PrivateVariable &&
"expected non-null private variable");
4514 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4515 "expected non-null reduction generator callback");
4518 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4519 "expected variables and their private equivalents to have the same "
4522 assert(RI.Variable->getType()->isPointerTy() &&
4523 "expected variables to be pointers");
4532 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4546 if (ReductionInfos.
size() == 0)
4556 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4560 AttributeList FuncAttrs;
4561 AttrBuilder AttrBldr(Ctx);
4563 AttrBldr.addAttribute(Attr);
4564 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4565 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4569 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4571 if (!ReductionResult)
4573 Function *ReductionFunc = *ReductionResult;
4577 if (GridValue.has_value())
4578 Config.setGridValue(GridValue.value());
4593 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4597 Value *ReductionListAlloca =
4598 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4599 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4600 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4603 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4604 for (
auto En :
enumerate(ReductionInfos)) {
4607 RedArrayTy, ReductionList,
4608 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4611 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4616 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4617 Builder.CreateStore(CastElem, ElemPtr);
4621 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4627 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4633 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4645 unsigned MaxDataSize = 0;
4647 for (
auto En :
enumerate(ReductionInfos)) {
4651 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4652 ? En.value().ByRefElementType
4653 : En.value().ElementType;
4654 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4655 if (
Size > MaxDataSize)
4659 Value *ReductionDataSize =
4660 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4661 if (!IsTeamsReduction) {
4662 Value *SarFuncCast =
4663 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4665 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4666 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4669 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4674 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4676 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4679 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4684 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4689 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4694 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4701 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4703 Value *Args3[] = {SrcLocInfo,
4704 KernelTeamsReductionPtr,
4705 Builder.getInt32(ReductionBufNum),
4716 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4733 for (
auto En :
enumerate(ReductionInfos)) {
4741 Value *LHSPtr, *RHSPtr;
4743 &LHSPtr, &RHSPtr, CurFunc));
4749 RedValue =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4751 if (RHSPtr->
getType() != RHS->getType())
4753 Builder.CreatePointerBitCastOrAddrSpaceCast(RHS, RHSPtr->
getType());
4764 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4766 "red.value." +
Twine(En.index()));
4777 if (!IsByRef.
empty() && !IsByRef[En.index()])
4782 if (ContinuationBlock) {
4783 Builder.CreateBr(ContinuationBlock);
4784 Builder.SetInsertPoint(ContinuationBlock);
4786 Config.setEmitLLVMUsed();
4797 ".omp.reduction.func", &M);
4807 Builder.SetInsertPoint(ReductionFuncBlock);
4808 Value *LHSArrayPtr =
nullptr;
4809 Value *RHSArrayPtr =
nullptr;
4820 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4822 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4823 Value *LHSAddrCast =
4824 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4825 Value *RHSAddrCast =
4826 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4827 Builder.CreateStore(Arg0, LHSAddrCast);
4828 Builder.CreateStore(Arg1, RHSAddrCast);
4829 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4830 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4832 LHSArrayPtr = ReductionFunc->
getArg(0);
4833 RHSArrayPtr = ReductionFunc->
getArg(1);
4836 unsigned NumReductions = ReductionInfos.
size();
4839 for (
auto En :
enumerate(ReductionInfos)) {
4841 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4842 RedArrayTy, LHSArrayPtr, 0, En.index());
4843 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4844 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4847 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4848 RedArrayTy, RHSArrayPtr, 0, En.index());
4849 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4850 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4859 Builder.restoreIP(*AfterIP);
4861 if (!Builder.GetInsertBlock())
4865 if (!IsByRef[En.index()])
4866 Builder.CreateStore(Reduced, LHSPtr);
4868 Builder.CreateRetVoid();
4875 bool IsNoWait,
bool IsTeamsReduction) {
4879 IsByRef, IsNoWait, IsTeamsReduction);
4886 if (ReductionInfos.
size() == 0)
4896 unsigned NumReductions = ReductionInfos.
size();
4899 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4901 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4903 for (
auto En :
enumerate(ReductionInfos)) {
4904 unsigned Index = En.index();
4906 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4907 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4914 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4924 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4929 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4930 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4932 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4934 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4935 : RuntimeFunction::OMPRTL___kmpc_reduce);
4938 {Ident, ThreadId, NumVariables, RedArraySize,
4939 RedArray, ReductionFunc, Lock},
4950 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4951 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4952 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4957 Builder.SetInsertPoint(NonAtomicRedBlock);
4958 for (
auto En :
enumerate(ReductionInfos)) {
4964 if (!IsByRef[En.index()]) {
4966 "red.value." +
Twine(En.index()));
4968 Value *PrivateRedValue =
4970 "red.private.value." +
Twine(En.index()));
4978 if (!
Builder.GetInsertBlock())
4981 if (!IsByRef[En.index()])
4985 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4986 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4988 Builder.CreateBr(ContinuationBlock);
4993 Builder.SetInsertPoint(AtomicRedBlock);
4994 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
5001 if (!
Builder.GetInsertBlock())
5004 Builder.CreateBr(ContinuationBlock);
5017 if (!
Builder.GetInsertBlock())
5020 Builder.SetInsertPoint(ContinuationBlock);
5031 Directive OMPD = Directive::OMPD_master;
5036 Value *Args[] = {Ident, ThreadId};
5044 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5055 Directive OMPD = Directive::OMPD_masked;
5061 Value *ArgsEnd[] = {Ident, ThreadId};
5069 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5079 Call->setDoesNotThrow();
5094 bool IsInclusive,
ScanInfo *ScanRedInfo) {
5096 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
5097 ScanVarsType, ScanRedInfo);
5108 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5111 Type *DestTy = ScanVarsType[i];
5112 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5115 Builder.CreateStore(Src, Val);
5120 Builder.GetInsertBlock()->getParent());
5123 IV = ScanRedInfo->
IV;
5126 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5129 Type *DestTy = ScanVarsType[i];
5131 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5133 Builder.CreateStore(Src, ScanVars[i]);
5147 Builder.GetInsertBlock()->getParent());
5152Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
5156 Builder.restoreIP(AllocaIP);
5158 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5160 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
5167 Builder.restoreIP(CodeGenIP);
5169 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
5170 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5174 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
5175 AllocSpan,
nullptr,
"arr");
5176 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
5194 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5203Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
5209 Value *PrivateVar = RedInfo.PrivateVariable;
5210 Value *OrigVar = RedInfo.Variable;
5214 Type *SrcTy = RedInfo.ElementType;
5219 Builder.CreateStore(Src, OrigVar);
5242 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5267 Builder.GetInsertBlock()->getModule(),
5274 Builder.GetInsertBlock()->getModule(),
5280 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5281 Builder.SetInsertPoint(InputBB);
5284 Builder.SetInsertPoint(LoopBB);
5300 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5302 Builder.SetInsertPoint(InnerLoopBB);
5306 Value *ReductionVal = RedInfo.PrivateVariable;
5309 Type *DestTy = RedInfo.ElementType;
5312 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5315 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5320 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5323 Builder.CreateStore(Result, LHSPtr);
5326 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5328 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5329 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5332 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5338 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5359 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5366Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5378 Error Err = InputLoopGen();
5389 Error Err = ScanLoopGen(Builder.saveIP());
5396void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5433 Builder.SetInsertPoint(Preheader);
5436 Builder.SetInsertPoint(Header);
5437 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5438 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5443 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5444 Builder.CreateCondBr(Cmp, Body, Exit);
5449 Builder.SetInsertPoint(Latch);
5451 "omp_" + Name +
".next",
true);
5462 CL->Header = Header;
5481 NextBB, NextBB, Name);
5513 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5522 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5523 ScanRedInfo->
Span = TripCount;
5529 ScanRedInfo->
IV =
IV;
5530 createScanBBs(ScanRedInfo);
5533 assert(Terminator->getNumSuccessors() == 1);
5534 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5537 Builder.GetInsertBlock()->getParent());
5540 Builder.GetInsertBlock()->getParent());
5541 Builder.CreateBr(ContinueBlock);
5547 const auto &&InputLoopGen = [&]() ->
Error {
5549 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5550 ComputeIP, Name,
true, ScanRedInfo);
5554 Builder.restoreIP((*LoopInfo)->getAfterIP());
5560 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5564 Builder.restoreIP((*LoopInfo)->getAfterIP());
5568 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5576 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5586 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5587 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5591 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5607 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5610 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5614 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5619 Value *CountIfLooping;
5620 if (InclusiveStop) {
5621 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5627 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5630 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5631 "omp_" + Name +
".tripcount");
5636 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5643 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5650 ScanRedInfo->
IV = IndVar;
5651 return BodyGenCB(
Builder.saveIP(), IndVar);
5657 Builder.getCurrentDebugLocation());
5668 unsigned Bitwidth = Ty->getIntegerBitWidth();
5671 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5674 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5684 unsigned Bitwidth = Ty->getIntegerBitWidth();
5687 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5690 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5698 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5700 "Require dedicated allocate IP");
5706 uint32_t SrcLocStrSize;
5710 case WorksharingLoopType::ForStaticLoop:
5711 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5713 case WorksharingLoopType::DistributeStaticLoop:
5714 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5716 case WorksharingLoopType::DistributeForStaticLoop:
5717 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
5724 Type *IVTy =
IV->getType();
5725 FunctionCallee StaticInit =
5726 LoopType == WorksharingLoopType::DistributeForStaticLoop
5729 FunctionCallee StaticFini =
5733 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5736 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5737 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5738 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5739 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5748 Constant *One = ConstantInt::get(IVTy, 1);
5749 Builder.CreateStore(Zero, PLowerBound);
5751 Builder.CreateStore(UpperBound, PUpperBound);
5752 Builder.CreateStore(One, PStride);
5758 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5759 ? OMPScheduleType::OrderedDistribute
5762 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5766 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5767 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5770 PLowerBound, PUpperBound});
5771 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5772 Value *PDistUpperBound =
5773 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5774 Args.push_back(PDistUpperBound);
5779 BuildInitCall(SchedulingType,
Builder);
5780 if (HasDistSchedule &&
5781 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5782 Constant *DistScheduleSchedType = ConstantInt::get(
5787 BuildInitCall(DistScheduleSchedType,
Builder);
5789 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5790 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5791 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5792 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5793 CLI->setTripCount(TripCount);
5799 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5803 return Builder.CreateAdd(OldIV, LowerBound);
5815 omp::Directive::OMPD_for,
false,
5818 return BarrierIP.takeError();
5845 Reachable.insert(
Block);
5855 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5859OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5863 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5864 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5869 Type *IVTy =
IV->getType();
5871 "Max supported tripcount bitwidth is 64 bits");
5873 :
Type::getInt64Ty(Ctx);
5876 Constant *One = ConstantInt::get(InternalIVTy, 1);
5882 for (BasicBlock &BB : *
F)
5883 if (!BB.hasTerminator())
5884 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5889 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5890 for (Instruction *
I : UIs)
5891 I->eraseFromParent();
5894 if (ChunkSize || DistScheduleChunkSize)
5899 FunctionCallee StaticInit =
5901 FunctionCallee StaticFini =
5907 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5908 Value *PLowerBound =
5909 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5910 Value *PUpperBound =
5911 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5912 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5921 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5922 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5923 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5924 "distschedulechunksize");
5925 Value *CastedTripCount =
5926 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5929 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5931 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5932 Builder.CreateStore(Zero, PLowerBound);
5933 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5934 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5936 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5937 Builder.CreateStore(UpperBound, PUpperBound);
5938 Builder.CreateStore(One, PStride);
5942 uint32_t SrcLocStrSize;
5945 if (DistScheduleSchedType != OMPScheduleType::None) {
5946 Flag |= OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5951 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5952 PUpperBound, PStride, One,
5953 this](
Value *SchedulingType,
Value *ChunkSize,
5956 StaticInit, {SrcLoc, ThreadNum,
5957 SchedulingType, PLastIter,
5958 PLowerBound, PUpperBound,
5962 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5963 if (DistScheduleSchedType != OMPScheduleType::None &&
5964 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5965 SchedType != OMPScheduleType::OrderedDistribute) {
5969 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5973 Value *FirstChunkStart =
5974 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5975 Value *FirstChunkStop =
5976 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5977 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5979 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5980 Value *NextChunkStride =
5981 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5985 Value *DispatchCounter;
5993 DispatchCounter = Counter;
5996 FirstChunkStart, CastedTripCount, NextChunkStride,
6019 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
6020 Value *IsLastChunk =
6021 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
6022 Value *CountUntilOrigTripCount =
6023 Builder.CreateSub(CastedTripCount, DispatchCounter);
6025 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
6026 Value *BackcastedChunkTC =
6027 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
6028 CLI->setTripCount(BackcastedChunkTC);
6033 Value *BackcastedDispatchCounter =
6034 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
6035 CLI->mapIndVar([&](Instruction *) ->
Value * {
6037 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
6050 return AfterIP.takeError();
6065static FunctionCallee
6068 unsigned Bitwidth = Ty->getIntegerBitWidth();
6071 case WorksharingLoopType::ForStaticLoop:
6074 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
6077 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
6079 case WorksharingLoopType::DistributeStaticLoop:
6082 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
6085 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
6087 case WorksharingLoopType::DistributeForStaticLoop:
6090 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
6093 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
6096 if (Bitwidth != 32 && Bitwidth != 64) {
6108 Function &LoopBodyFn,
bool NoLoop) {
6119 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
6120 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6121 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6122 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6127 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
6128 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6132 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
6133 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6134 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
6135 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6136 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
6138 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6162 Builder.restoreIP({Preheader, Preheader->
end()});
6165 Builder.CreateBr(CLI->
getExit());
6173 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
6181 "Expected unique undroppable user of outlined function");
6183 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
6185 "Expected outlined function call to be located in loop preheader");
6187 if (OutlinedFnCallInstruction->
arg_size() > 1)
6194 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
6196 for (
auto &ToBeDeletedItem : ToBeDeleted)
6197 ToBeDeletedItem->eraseFromParent();
6204 uint32_t SrcLocStrSize;
6208 case WorksharingLoopType::ForStaticLoop:
6209 Flag = OMP_IDENT_FLAG_WORK_LOOP;
6211 case WorksharingLoopType::DistributeStaticLoop:
6212 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
6214 case WorksharingLoopType::DistributeForStaticLoop:
6215 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
6220 auto OI = std::make_unique<OutlineInfo>();
6225 SmallVector<Instruction *, 4> ToBeDeleted;
6227 OI->OuterAllocBB = AllocaIP.getBlock();
6250 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
6252 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
6254 CodeExtractorAnalysisCache CEAC(*OuterFn);
6255 CodeExtractor Extractor(Blocks,
6269 SetVector<Value *> SinkingCands, HoistingCands;
6273 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6280 for (
auto Use :
Users) {
6282 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6283 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6289 OI->ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6296 OI->PostOutlineCB = [=, ToBeDeletedVec =
6297 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6307 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6308 bool HasSimdModifier,
bool HasMonotonicModifier,
6309 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6311 Value *DistScheduleChunkSize) {
6312 if (
Config.isTargetDevice())
6313 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6315 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6316 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6318 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6319 OMPScheduleType::ModifierOrdered;
6321 if (HasDistSchedule) {
6322 DistScheduleSchedType = DistScheduleChunkSize
6323 ? OMPScheduleType::OrderedDistributeChunked
6324 : OMPScheduleType::OrderedDistribute;
6326 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6327 case OMPScheduleType::BaseStatic:
6328 case OMPScheduleType::BaseDistribute:
6329 assert((!ChunkSize || !DistScheduleChunkSize) &&
6330 "No chunk size with static-chunked schedule");
6331 if (IsOrdered && !HasDistSchedule)
6332 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6333 NeedsBarrier, ChunkSize);
6335 if (DistScheduleChunkSize)
6336 return applyStaticChunkedWorkshareLoop(
6337 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6338 DistScheduleChunkSize, DistScheduleSchedType);
6339 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6342 case OMPScheduleType::BaseStaticChunked:
6343 case OMPScheduleType::BaseDistributeChunked:
6344 if (IsOrdered && !HasDistSchedule)
6345 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6346 NeedsBarrier, ChunkSize);
6348 return applyStaticChunkedWorkshareLoop(
6349 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6350 DistScheduleChunkSize, DistScheduleSchedType);
6352 case OMPScheduleType::BaseRuntime:
6353 case OMPScheduleType::BaseAuto:
6354 case OMPScheduleType::BaseGreedy:
6355 case OMPScheduleType::BaseBalanced:
6356 case OMPScheduleType::BaseSteal:
6357 case OMPScheduleType::BaseRuntimeSimd:
6359 "schedule type does not support user-defined chunk sizes");
6361 case OMPScheduleType::BaseGuidedSimd:
6362 case OMPScheduleType::BaseDynamicChunked:
6363 case OMPScheduleType::BaseGuidedChunked:
6364 case OMPScheduleType::BaseGuidedIterativeChunked:
6365 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6366 case OMPScheduleType::BaseStaticBalancedChunked:
6367 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6368 NeedsBarrier, ChunkSize);
6381 unsigned Bitwidth = Ty->getIntegerBitWidth();
6384 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6387 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6395static FunctionCallee
6397 unsigned Bitwidth = Ty->getIntegerBitWidth();
6400 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6403 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6410static FunctionCallee
6412 unsigned Bitwidth = Ty->getIntegerBitWidth();
6415 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6418 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6423OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6426 bool NeedsBarrier,
Value *Chunk) {
6427 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6429 "Require dedicated allocate IP");
6431 "Require valid schedule type");
6433 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6434 OMPScheduleType::ModifierOrdered;
6439 uint32_t SrcLocStrSize;
6446 Type *IVTy =
IV->getType();
6451 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6453 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6454 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6455 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6456 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6465 Constant *One = ConstantInt::get(IVTy, 1);
6466 Builder.CreateStore(One, PLowerBound);
6468 Builder.CreateStore(UpperBound, PUpperBound);
6469 Builder.CreateStore(One, PStride);
6487 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6499 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6502 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6503 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6506 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6507 Builder.CreateCondBr(MoreWork, Header, Exit);
6513 PI->setIncomingBlock(0, OuterCond);
6514 PI->setIncomingValue(0, LowerBound);
6519 Br->setSuccessor(OuterCond);
6525 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6528 CI->setOperand(1, UpperBound);
6532 assert(BI->getSuccessor(1) == Exit);
6533 BI->setSuccessor(1, OuterCond);
6547 omp::Directive::OMPD_for,
false,
6550 return BarrierIP.takeError();
6602 assert(
Loops.size() >= 1 &&
"At least one loop required");
6603 size_t NumLoops =
Loops.size();
6607 return Loops.front();
6619 Loop->collectControlBlocks(OldControlBBs);
6623 if (ComputeIP.
isSet())
6630 Value *CollapsedTripCount =
nullptr;
6633 "All loops to collapse must be valid canonical loops");
6634 Value *OrigTripCount = L->getTripCount();
6635 if (!CollapsedTripCount) {
6636 CollapsedTripCount = OrigTripCount;
6641 CollapsedTripCount =
6642 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6648 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6654 Builder.restoreIP(Result->getBodyIP());
6656 Value *Leftover = Result->getIndVar();
6658 NewIndVars.
resize(NumLoops);
6659 for (
int i = NumLoops - 1; i >= 1; --i) {
6660 Value *OrigTripCount =
Loops[i]->getTripCount();
6662 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6663 NewIndVars[i] = NewIndVar;
6665 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6668 NewIndVars[0] = Leftover;
6677 BasicBlock *ContinueBlock = Result->getBody();
6679 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6686 ContinueBlock =
nullptr;
6687 ContinuePred = NextSrc;
6694 for (
size_t i = 0; i < NumLoops - 1; ++i)
6695 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6701 for (
size_t i = NumLoops - 1; i > 0; --i)
6702 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6705 ContinueWith(Result->getLatch(),
nullptr);
6712 for (
size_t i = 0; i < NumLoops; ++i)
6713 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6727std::vector<CanonicalLoopInfo *>
6731 "Must pass as many tile sizes as there are loops");
6732 int NumLoops =
Loops.size();
6733 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6745 Loop->collectControlBlocks(OldControlBBs);
6753 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6754 OrigTripCounts.
push_back(L->getTripCount());
6765 for (
int i = 0; i < NumLoops - 1; ++i) {
6778 for (
int i = 0; i < NumLoops; ++i) {
6780 Value *OrigTripCount = OrigTripCounts[i];
6793 Value *FloorTripOverflow =
6794 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6796 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6797 Value *FloorTripCount =
6798 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6799 "omp_floor" +
Twine(i) +
".tripcount",
true);
6802 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6808 std::vector<CanonicalLoopInfo *> Result;
6809 Result.reserve(NumLoops * 2);
6822 auto EmbeddNewLoop =
6823 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6826 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6831 Enter = EmbeddedLoop->
getBody();
6833 OutroInsertBefore = EmbeddedLoop->
getLatch();
6834 return EmbeddedLoop;
6838 const Twine &NameBase) {
6841 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6842 Result.push_back(EmbeddedLoop);
6846 EmbeddNewLoops(FloorCount,
"floor");
6852 for (
int i = 0; i < NumLoops; ++i) {
6856 Value *FloorIsEpilogue =
6858 Value *TileTripCount =
6865 EmbeddNewLoops(TileCounts,
"tile");
6870 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6879 BodyEnter =
nullptr;
6880 BodyEntered = ExitBB;
6892 Builder.restoreIP(Result.back()->getBodyIP());
6893 for (
int i = 0; i < NumLoops; ++i) {
6896 Value *OrigIndVar = OrigIndVars[i];
6924 if (Properties.
empty())
6947 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6951 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6959 if (
I.mayReadOrWriteMemory()) {
6963 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6977 Loop->collectControlBlocks(oldControlBBs);
6982 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6983 origTripCounts.
push_back(L->getTripCount());
6992 Builder.SetInsertPoint(TCBlock);
6993 Value *fusedTripCount =
nullptr;
6995 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6996 Value *origTripCount = L->getTripCount();
6997 if (!fusedTripCount) {
6998 fusedTripCount = origTripCount;
7001 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
7002 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
7016 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7017 Loops[i]->getPreheader()->moveBefore(TCBlock);
7018 Loops[i]->getAfter()->moveBefore(TCBlock);
7022 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7034 for (
size_t i = 0; i <
Loops.size(); ++i) {
7036 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
7037 Builder.SetInsertPoint(condBlock);
7045 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7046 Builder.SetInsertPoint(condBBs[i]);
7047 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
7063 "omp.fused.pre_latch");
7096 const Twine &NamePrefix) {
7125 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
7127 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
7130 Builder.SetInsertPoint(SplitBeforeIt);
7132 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
7135 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
7138 Builder.SetInsertPoint(ElseBlock);
7144 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
7146 ExistingBlocks.
append(L->block_begin(), L->block_end());
7152 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
7154 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
7161 if (
Block == ThenBlock)
7162 NewBB->
setName(NamePrefix +
".if.else");
7165 VMap[
Block] = NewBB;
7173 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
7174 NamePrefix +
".pre_latch");
7178 L->addBasicBlockToLoop(ThenBlock, LI);
7184 if (TargetTriple.
isX86()) {
7185 if (Features.
lookup(
"avx512f"))
7187 else if (Features.
lookup(
"avx"))
7191 if (TargetTriple.
isPPC())
7193 if (TargetTriple.
isWasm())
7200 Value *IfCond, OrderKind Order,
7210 if (!BB.hasTerminator())
7226 I->eraseFromParent();
7229 if (AlignedVars.
size()) {
7231 for (
auto &AlignedItem : AlignedVars) {
7232 Value *AlignedPtr = AlignedItem.first;
7233 Value *Alignment = AlignedItem.second;
7236 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
7244 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
7257 Reachable.insert(
Block);
7267 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
7283 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7285 if (Simdlen || Safelen) {
7289 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7315static std::unique_ptr<TargetMachine>
7319 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7320 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7331 std::nullopt, OptLevel));
7349 if (!BB.hasTerminator())
7362 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7363 FAM.registerPass([&]() {
return TIRA; });
7377 I->eraseFromParent();
7380 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7385 nullptr, ORE,
static_cast<int>(OptLevel),
7406 <<
" Threshold=" << UP.
Threshold <<
"\n"
7409 <<
" PartialOptSizeThreshold="
7429 Ptr = Load->getPointerOperand();
7431 Ptr = Store->getPointerOperand();
7438 if (Alloca->getParent() == &
F->getEntryBlock())
7458 int MaxTripCount = 0;
7459 bool MaxOrZero =
false;
7460 unsigned TripMultiple = 0;
7463 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7464 unsigned Factor = UP.
Count;
7465 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7476 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7492 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7505 *UnrolledCLI =
Loop;
7510 "unrolling only makes sense with a factor of 2 or larger");
7512 Type *IndVarTy =
Loop->getIndVarType();
7519 std::vector<CanonicalLoopInfo *>
LoopNest =
7534 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7537 (*UnrolledCLI)->assertOK();
7555 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7574 if (!CPVars.
empty()) {
7579 Directive OMPD = Directive::OMPD_single;
7584 Value *Args[] = {Ident, ThreadId};
7593 if (
Error Err = FiniCB(IP))
7614 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7621 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7624 ConstantInt::get(Int64, 0), CPVars[
I],
7627 }
else if (!IsNowait) {
7630 omp::Directive::OMPD_unknown,
false,
7648 Directive::OMPD_scope,
nullptr,
nullptr,
7649 BodyGenCB, FiniCB,
false,
true,
7657 omp::Directive::OMPD_unknown,
7673 Directive OMPD = Directive::OMPD_critical;
7678 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7679 Value *Args[] = {Ident, ThreadId, LockVar};
7696 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7704 const Twine &Name,
bool IsDependSource) {
7708 "OpenMP runtime requires depend vec with i64 type");
7721 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7735 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7753 Directive OMPD = Directive::OMPD_ordered;
7762 Value *Args[] = {Ident, ThreadId};
7772 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7779 bool HasFinalize,
bool IsCancellable) {
7786 BasicBlock *EntryBB = Builder.GetInsertBlock();
7795 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7807 "Unexpected control flow graph state!!");
7809 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7811 return AfterIP.takeError();
7816 "Unexpected Insertion point location!");
7819 auto InsertBB = merged ? ExitPredBB : ExitBB;
7822 Builder.SetInsertPoint(InsertBB);
7824 return Builder.saveIP();
7828 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7830 if (!Conditional || !EntryCall)
7836 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7846 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7850 UI->eraseFromParent();
7858 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7866 "Unexpected finalization stack state!");
7869 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7871 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7872 return std::move(Err);
7876 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7886 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7920 "copyin.not.master.end");
7927 Builder.SetInsertPoint(OMP_Entry);
7928 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7929 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7930 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7931 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7933 Builder.SetInsertPoint(CopyBegin);
7951 Value *Args[] = {ThreadId,
Size, Allocator};
7974 return Builder.CreateCall(Fn, Args, Name);
7988 Value *Args[] = {ThreadId, Addr, Allocator};
7995 const Twine &Name) {
8003 M.getContext(),
M.getDataLayout().getPrefTypeAlign(Int64)));
8009 const Twine &Name) {
8011 Loc,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)), Name);
8016 const Twine &Name) {
8022 return Builder.CreateCall(Fn, Args, Name);
8027 const Twine &Name) {
8029 Loc, Addr,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)),
8036 Value *DependenceAddress,
bool HaveNowaitClause) {
8044 if (Device ==
nullptr)
8046 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
8047 if (NumDependences ==
nullptr) {
8048 NumDependences = ConstantInt::get(Int32, 0);
8052 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8054 Ident, ThreadId, InteropVar, InteropTypeVal,
8055 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
8064 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
8072 if (Device ==
nullptr)
8074 if (NumDependences ==
nullptr) {
8075 NumDependences = ConstantInt::get(Int32, 0);
8079 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8081 Ident, ThreadId, InteropVar, Device,
8082 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8091 Value *NumDependences,
8092 Value *DependenceAddress,
8093 bool HaveNowaitClause) {
8100 if (Device ==
nullptr)
8102 if (NumDependences ==
nullptr) {
8103 NumDependences = ConstantInt::get(Int32, 0);
8107 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8109 Ident, ThreadId, InteropVar, Device,
8110 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8140 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
8141 "expected num_threads and num_teams to be specified");
8161 const std::string DebugPrefix =
"_debug__";
8162 if (KernelName.
ends_with(DebugPrefix)) {
8163 KernelName = KernelName.
drop_back(DebugPrefix.length());
8164 Kernel =
M.getFunction(KernelName);
8170 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
8175 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
8182 MaxThreadsVal = Attrs.MinThreads;
8186 if (MaxThreadsVal > 0)
8199 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
8202 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
8203 Constant *DynamicEnvironmentInitializer =
8207 DynamicEnvironmentInitializer, DynamicEnvironmentName,
8209 DL.getDefaultGlobalsAddressSpace());
8213 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
8214 ? DynamicEnvironmentGV
8216 DynamicEnvironmentPtr);
8219 ConfigurationEnvironment, {
8220 UseGenericStateMachineVal,
8221 MayUseNestedParallelismVal,
8228 ReductionBufferLength,
8231 KernelEnvironment, {
8232 ConfigurationEnvironmentInitializer,
8236 std::string KernelEnvironmentName =
8237 (KernelName +
"_kernel_environment").str();
8240 KernelEnvironmentInitializer, KernelEnvironmentName,
8242 DL.getDefaultGlobalsAddressSpace());
8246 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
8247 ? KernelEnvironmentGV
8249 KernelEnvironmentPtr);
8250 Value *KernelLaunchEnvironment =
8253 KernelLaunchEnvironment =
8254 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
8255 ? KernelLaunchEnvironment
8256 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
8257 KernelLaunchEnvParamTy);
8259 Fn, {KernelEnvironment, KernelLaunchEnvironment});
8271 auto *UI =
Builder.CreateUnreachable();
8277 Builder.SetInsertPoint(WorkerExitBB);
8281 Builder.SetInsertPoint(CheckBBTI);
8282 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
8284 CheckBBTI->eraseFromParent();
8285 UI->eraseFromParent();
8293 int32_t TeamsReductionDataSize,
8294 int32_t TeamsReductionBufferLength) {
8299 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
8303 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
8309 const std::string DebugPrefix =
"_debug__";
8311 KernelName = KernelName.
drop_back(DebugPrefix.length());
8312 auto *KernelEnvironmentGV =
8313 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
8314 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
8315 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
8317 KernelEnvironmentInitializer,
8318 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
8320 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
8322 KernelEnvironmentGV->setInitializer(NewInitializer);
8327 if (
Kernel.hasFnAttribute(Name)) {
8328 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
8334std::pair<int32_t, int32_t>
8336 int32_t ThreadLimit =
8337 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
8340 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
8341 if (!Attr.isValid() || !Attr.isStringAttribute())
8342 return {0, ThreadLimit};
8343 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
8346 return {0, ThreadLimit};
8347 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
8355 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
8357 return {0, ThreadLimit};
8363 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
8366 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
8374std::pair<int32_t, int32_t>
8377 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8381 int32_t LB, int32_t UB) {
8389 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8392void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8401 else if (
T.isNVPTX())
8403 else if (
T.isSPIRV())
8408Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8409 StringRef EntryFnIDName) {
8410 if (
Config.isTargetDevice()) {
8411 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8415 return new GlobalVariable(
8420Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8421 StringRef EntryFnName) {
8425 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8426 "Named kernel already exists?");
8427 return new GlobalVariable(
8440 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8444 OutlinedFn = *CBResult;
8446 OutlinedFn =
nullptr;
8452 if (!IsOffloadEntry)
8455 std::string EntryFnIDName =
8457 ? std::string(EntryFnName)
8461 EntryFnName, EntryFnIDName);
8469 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8470 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8471 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8473 EntryInfo, EntryAddr, OutlinedFnID,
8475 return OutlinedFnID;
8493 bool IsStandAlone = !BodyGenCB;
8500 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8502 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8503 true, DeviceAddrCB))
8510 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8520 SrcLocInfo, DeviceID,
8527 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8531 if (Info.HasNoWait) {
8541 if (Info.HasNoWait) {
8545 emitBlock(OffloadContBlock, CurFn,
true);
8551 bool RequiresOuterTargetTask = Info.HasNoWait;
8552 if (!RequiresOuterTargetTask)
8553 cantFail(TaskBodyCB(
nullptr,
nullptr,
8557 {}, RTArgs, Info.HasNoWait));
8560 omp::OMPRTL___tgt_target_data_begin_mapper);
8564 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8568 Builder.CreateStore(LI, DeviceMap.second.second);
8605 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8614 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8637 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8638 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8653 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8654 return EndThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8657 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8658 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8669 bool IsGPUDistribute) {
8670 assert((IVSize == 32 || IVSize == 64) &&
8671 "IV size is not compatible with the omp runtime");
8673 if (IsGPUDistribute)
8675 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8676 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8677 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8678 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8680 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8681 : omp::OMPRTL___kmpc_for_static_init_4u)
8682 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8683 : omp::OMPRTL___kmpc_for_static_init_8u);
8690 assert((IVSize == 32 || IVSize == 64) &&
8691 "IV size is not compatible with the omp runtime");
8693 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8694 : omp::OMPRTL___kmpc_dispatch_init_4u)
8695 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8696 : omp::OMPRTL___kmpc_dispatch_init_8u);
8703 assert((IVSize == 32 || IVSize == 64) &&
8704 "IV size is not compatible with the omp runtime");
8706 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8707 : omp::OMPRTL___kmpc_dispatch_next_4u)
8708 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8709 : omp::OMPRTL___kmpc_dispatch_next_8u);
8716 assert((IVSize == 32 || IVSize == 64) &&
8717 "IV size is not compatible with the omp runtime");
8719 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8720 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8721 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8722 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8733 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8741 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8745 if (NewVar && (arg == NewVar->
getArg()))
8755 auto UpdateDebugRecord = [&](
auto *DR) {
8758 for (
auto Loc : DR->location_ops()) {
8759 auto Iter = ValueReplacementMap.find(
Loc);
8760 if (Iter != ValueReplacementMap.end()) {
8761 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8762 ArgNo = std::get<1>(Iter->second) + 1;
8766 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8771 if (DVR->getNumVariableLocationOps() != 1u) {
8772 DVR->setKillLocation();
8775 Value *
Loc = DVR->getVariableLocationOp(0u);
8782 RequiredBB = &DVR->getFunction()->getEntryBlock();
8784 if (RequiredBB && RequiredBB != CurBB) {
8796 "Unexpected debug intrinsic");
8798 UpdateDebugRecord(&DVR);
8799 MoveDebugRecordToCorrectBlock(&DVR);
8802 for (
auto *DVR : DVRsToDelete)
8803 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8807 Module *M = Func->getParent();
8810 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8811 unsigned ArgNo = Func->arg_size();
8813 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8814 false, DINode::DIFlags::FlagArtificial);
8816 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8817 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8838 for (
auto &Arg : Inputs)
8839 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8843 for (
auto &Arg : Inputs)
8844 ParameterTypes.
push_back(Arg->getType());
8852 auto BB = Builder.GetInsertBlock();
8853 auto M = BB->getModule();
8864 if (TargetCpuAttr.isStringAttribute())
8865 Func->addFnAttr(TargetCpuAttr);
8867 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8868 if (TargetFeaturesAttr.isStringAttribute())
8869 Func->addFnAttr(TargetFeaturesAttr);
8874 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8885 Builder.SetInsertPoint(EntryBB);
8891 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8901 splitBB(Builder,
true,
"outlined.body");
8908 Builder.SetInsertPoint(ExitBB);
8915 Builder.CreateRetVoid();
8919 auto AllocaIP = Builder.saveIP();
8924 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8956 if (Instr->getFunction() == Func)
8957 Instr->replaceUsesOfWith(
Input, InputCopy);
8963 for (
auto InArg :
zip(Inputs, ArgRange)) {
8965 Argument &Arg = std::get<1>(InArg);
8966 Value *InputCopy =
nullptr;
8969 Arg,
Input, InputCopy, AllocaIP, Builder.saveIP(),
8973 Builder.restoreIP(*AfterIP);
8974 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8994 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
9001 ReplaceValue(
Input, InputCopy, Func);
9005 for (
auto Deferred : DeferredReplacement)
9006 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
9009 ValueReplacementMap);
9017 Value *TaskWithPrivates,
9018 Type *TaskWithPrivatesTy) {
9020 Type *TaskTy = OMPIRBuilder.Task;
9023 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
9024 Value *Shareds = TaskT;
9034 if (TaskWithPrivatesTy != TaskTy)
9035 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
9052 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
9057 assert((!NumOffloadingArrays || PrivatesTy) &&
9058 "PrivatesTy cannot be nullptr when there are offloadingArrays"
9091 Type *TaskPtrTy = OMPBuilder.TaskPtr;
9092 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
9098 ".omp_target_task_proxy_func",
9099 Builder.GetInsertBlock()->getModule());
9100 Value *ThreadId = ProxyFn->getArg(0);
9101 Value *TaskWithPrivates = ProxyFn->getArg(1);
9102 ThreadId->
setName(
"thread.id");
9103 TaskWithPrivates->
setName(
"task");
9105 bool HasShareds = SharedArgsOperandNo > 0;
9106 bool HasOffloadingArrays = NumOffloadingArrays > 0;
9109 Builder.SetInsertPoint(EntryBB);
9115 if (HasOffloadingArrays) {
9116 assert(TaskTy != TaskWithPrivatesTy &&
9117 "If there are offloading arrays to pass to the target"
9118 "TaskTy cannot be the same as TaskWithPrivatesTy");
9121 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
9122 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
9124 Builder.CreateStructGEP(PrivatesTy, Privates, i));
9128 auto *ArgStructAlloca =
9130 assert(ArgStructAlloca &&
9131 "Unable to find the alloca instruction corresponding to arguments "
9132 "for extracted function");
9134 std::optional<TypeSize> ArgAllocSize =
9136 assert(ArgStructType && ArgAllocSize &&
9137 "Unable to determine size of arguments for extracted function");
9138 uint64_t StructSize = ArgAllocSize->getFixedValue();
9141 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
9143 Value *SharedsSize = Builder.getInt64(StructSize);
9146 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
9148 Builder.CreateMemCpy(
9149 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
9151 KernelLaunchArgs.
push_back(NewArgStructAlloca);
9154 Builder.CreateRetVoid();
9160 return GEP->getSourceElementType();
9162 return Alloca->getAllocatedType();
9185 if (OffloadingArraysToPrivatize.
empty())
9186 return OMPIRBuilder.Task;
9189 for (
Value *V : OffloadingArraysToPrivatize) {
9190 assert(V->getType()->isPointerTy() &&
9191 "Expected pointer to array to privatize. Got a non-pointer value "
9194 assert(ArrayTy &&
"ArrayType cannot be nullptr");
9200 "struct.task_with_privates");
9214 EntryFnName, Inputs, CBFunc,
9219 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
9356 TargetTaskAllocaBB->
begin());
9359 auto OI = std::make_unique<OutlineInfo>();
9360 OI->EntryBB = TargetTaskAllocaBB;
9361 OI->OuterAllocBB = AllocaIP.
getBlock();
9366 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
9369 Builder.restoreIP(TargetTaskBodyIP);
9370 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9388 bool NeedsTargetTask = HasNoWait && DeviceID;
9389 if (NeedsTargetTask) {
9395 OffloadingArraysToPrivatize.
push_back(V);
9396 OI->ExcludeArgsFromAggregate.push_back(V);
9400 OI->PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9401 DeviceID, OffloadingArraysToPrivatize](
9404 "there must be a single user for the outlined function");
9418 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9419 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9421 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9422 "Wrong number of arguments for StaleCI when shareds are present");
9423 int SharedArgOperandNo =
9424 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9430 if (!OffloadingArraysToPrivatize.
empty())
9435 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9436 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9438 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9441 Builder.SetInsertPoint(StaleCI);
9458 OMPRTL___kmpc_omp_target_task_alloc);
9470 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9477 auto *ArgStructAlloca =
9479 assert(ArgStructAlloca &&
9480 "Unable to find the alloca instruction corresponding to arguments "
9481 "for extracted function");
9482 std::optional<TypeSize> ArgAllocSize =
9485 "Unable to determine size of arguments for extracted function");
9486 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9505 TaskSize, SharedsSize,
9508 if (NeedsTargetTask) {
9509 assert(DeviceID &&
"Expected non-empty device ID.");
9519 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9520 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9523 if (!OffloadingArraysToPrivatize.
empty()) {
9525 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9526 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9527 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9534 "ElementType should match ArrayType");
9537 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9539 Dst, Alignment, PtrToPrivatize, Alignment,
9540 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9544 Value *DepArray =
nullptr;
9545 Value *NumDeps =
nullptr;
9548 NumDeps = Dependencies.
NumDeps;
9549 }
else if (!Dependencies.
Deps.empty()) {
9551 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
9562 if (!NeedsTargetTask) {
9571 ConstantInt::get(
Builder.getInt32Ty(), 0),
9584 }
else if (DepArray) {
9592 {Ident, ThreadID, TaskData, NumDeps, DepArray,
9593 ConstantInt::get(
Builder.getInt32Ty(), 0),
9603 I->eraseFromParent();
9608 << *(
Builder.GetInsertBlock()) <<
"\n");
9610 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9622 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9645 Builder.restoreIP(IP);
9651 return Builder.saveIP();
9654 bool HasDependencies = !Dependencies.
empty();
9655 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9672 if (OutlinedFnID && DeviceID)
9674 EmitTargetCallFallbackCB, KArgs,
9675 DeviceID, RTLoc, TargetTaskAllocaIP);
9683 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9690 auto &&EmitTargetCallElse =
9697 if (RequiresOuterTargetTask) {
9704 Dependencies, EmptyRTArgs, HasNoWait);
9706 return EmitTargetCallFallbackCB(Builder.saveIP());
9709 Builder.restoreIP(AfterIP);
9713 auto &&EmitTargetCallThen =
9717 Info.HasNoWait = HasNoWait;
9722 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9728 for (
auto [DefaultVal, RuntimeVal] :
9730 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9731 : Builder.getInt32(DefaultVal));
9735 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9737 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9741 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9744 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9752 Value *MaxThreadsClause =
9754 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9757 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9759 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9760 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9762 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9763 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9765 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9768 unsigned NumTargetItems = Info.NumberOfPtrs;
9776 Builder.getInt64Ty(),
9778 : Builder.getInt64(0);
9782 DynCGroupMem = Builder.getInt32(0);
9785 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9786 HasNoWait, DynCGroupMemFallback);
9793 if (RequiresOuterTargetTask)
9795 RTLoc, AllocaIP, Dependencies,
9796 KArgs.
RTArgs, Info.HasNoWait);
9799 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9800 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9803 Builder.restoreIP(AfterIP);
9810 if (!OutlinedFnID) {
9811 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP(), DeallocBlocks));
9817 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP(), DeallocBlocks));
9822 EmitTargetCallElse, AllocaIP));
9835 bool HasNowait,
Value *DynCGroupMem,
9849 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9850 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9856 if (!
Config.isTargetDevice())
9858 RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs,
9859 GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait,
9860 DynCGroupMem, DynCGroupMemFallback);
9874 return OS.
str().str();
9879 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9885 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9887 assert(Elem.second->getValueType() == Ty &&
9888 "OMP internal variable has different type than requested");
9901 :
M.getTargetTriple().isAMDGPU()
9903 :
DL.getDefaultGlobalsAddressSpace();
9912 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9913 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9920Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9921 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9922 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9933 return SizePtrToInt;
9938 std::string VarName) {
9946 return MaptypesArrayGlobal;
9951 unsigned NumOperands,
9960 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9964 ArrI64Ty,
nullptr,
".offload_sizes");
9975 int64_t DeviceID,
unsigned NumOperands) {
9981 Value *ArgsBaseGEP =
9983 {Builder.getInt32(0), Builder.getInt32(0)});
9986 {Builder.getInt32(0), Builder.getInt32(0)});
9987 Value *ArgSizesGEP =
9989 {Builder.getInt32(0), Builder.getInt32(0)});
9993 Builder.getInt32(NumOperands),
9994 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9995 MaptypesArg, MapnamesArg, NullPtr});
10002 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
10003 "expected region end call to runtime only when end call is separate");
10005 auto VoidPtrTy = UnqualPtrTy;
10006 auto VoidPtrPtrTy = UnqualPtrTy;
10008 auto Int64PtrTy = UnqualPtrTy;
10010 if (!Info.NumberOfPtrs) {
10022 Info.RTArgs.BasePointersArray,
10025 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
10029 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10033 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
10034 : Info.RTArgs.MapTypesArray,
10040 if (!Info.EmitDebug)
10044 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
10049 if (!Info.HasMapper)
10053 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
10074 "struct.descriptor_dim");
10076 enum { OffsetFD = 0, CountFD, StrideFD };
10080 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
10083 if (NonContigInfo.
Dims[
I] == 1)
10088 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
10089 Builder.restoreIP(CodeGenIP);
10090 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
10091 unsigned RevIdx = EE -
II - 1;
10095 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
10097 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
10098 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
10100 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
10102 NonContigInfo.
Counts[L][RevIdx], CountLVal,
10103 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10105 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
10107 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
10108 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10111 Builder.restoreIP(CodeGenIP);
10112 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
10113 DimsAddr,
Builder.getPtrTy());
10116 Info.RTArgs.PointersArray, 0,
I);
10118 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
10123void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
10127 StringRef Prefix = IsInit ?
".init" :
".del";
10133 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
10134 Value *DeleteBit = Builder.CreateAnd(
10137 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10138 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
10143 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
10144 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
10145 DeleteCond = Builder.CreateIsNull(
10150 DeleteCond =
Builder.CreateIsNotNull(
10166 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10167 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10168 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10169 MapTypeArg =
Builder.CreateOr(
10172 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10173 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
10177 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
10178 ArraySize, MapTypeArg, MapName};
10189 bool PreserveMemberOfFlags) {
10205 MapperFn->
addFnAttr(Attribute::NoInline);
10206 MapperFn->
addFnAttr(Attribute::NoUnwind);
10216 auto SavedIP =
Builder.saveIP();
10217 Builder.SetInsertPoint(EntryBB);
10229 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
10231 Value *PtrBegin = BeginIn;
10237 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10238 MapType, MapName, ElementSize, HeadBB,
10249 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
10250 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10256 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
10257 PtrPHI->addIncoming(PtrBegin, HeadBB);
10262 return Info.takeError();
10266 Value *OffloadingArgs[] = {MapperHandle};
10270 Value *ShiftedPreviousSize =
10274 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
10275 Value *CurBaseArg = Info->BasePointers[
I];
10276 Value *CurBeginArg = Info->Pointers[
I];
10277 Value *CurSizeArg = Info->Sizes[
I];
10278 Value *CurNameArg = Info->Names.size()
10284 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10286 Value *MemberMapType;
10287 if (PreserveMemberOfFlags) {
10289 static_cast<uint64_t>(OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
10291 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10293 bool HasMemberOf = (OrigFlags & MemberOfMask) != 0;
10295 MemberMapType =
Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10297 MemberMapType = OriMapType;
10299 MemberMapType =
Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10317 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10318 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10319 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10329 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10335 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10336 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10337 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10343 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10344 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10345 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10351 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10352 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10358 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10359 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10360 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10366 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10367 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10376 CurMapType->
addIncoming(MemberMapType, ToElseBB);
10378 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
10379 CurSizeArg, CurMapType, CurNameArg};
10381 auto ChildMapperFn = CustomMapperCB(
I);
10382 if (!ChildMapperFn)
10383 return ChildMapperFn.takeError();
10384 if (*ChildMapperFn) {
10399 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
10400 "omp.arraymap.next");
10401 PtrPHI->addIncoming(PtrNext, LastBB);
10402 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
10404 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10409 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10410 MapType, MapName, ElementSize, DoneBB,
10424 bool IsNonContiguous,
10428 Info.clearArrayInfo();
10431 if (Info.NumberOfPtrs == 0)
10440 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10441 PointerArrayType,
nullptr,
".offload_baseptrs");
10443 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10444 PointerArrayType,
nullptr,
".offload_ptrs");
10446 PointerArrayType,
nullptr,
".offload_mappers");
10447 Info.RTArgs.MappersArray = MappersArray;
10454 ConstantInt::get(Int64Ty, 0));
10456 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10457 bool IsNonContigEntry =
10459 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10461 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10464 if (IsNonContigEntry) {
10466 "Index must be in-bounds for NON_CONTIG Dims array");
10468 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10469 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10474 ConstSizes[
I] = CI;
10478 RuntimeSizes.
set(
I);
10481 if (RuntimeSizes.
all()) {
10483 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10484 SizeArrayType,
nullptr,
".offload_sizes");
10490 auto *SizesArrayGbl =
10495 if (!RuntimeSizes.
any()) {
10496 Info.RTArgs.SizesArray = SizesArrayGbl;
10498 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10499 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10502 SizeArrayType,
nullptr,
".offload_sizes");
10506 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10507 SizesArrayGbl, OffloadSizeAlign,
10512 Info.RTArgs.SizesArray = Buffer;
10520 for (
auto mapFlag : CombinedInfo.
Types)
10522 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10526 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10532 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10533 Info.EmitDebug =
true;
10535 Info.RTArgs.MapNamesArray =
10537 Info.EmitDebug =
false;
10542 if (Info.separateBeginEndCalls()) {
10543 bool EndMapTypesDiffer =
false;
10545 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10546 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10547 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10548 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10549 EndMapTypesDiffer =
true;
10552 if (EndMapTypesDiffer) {
10554 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10559 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10562 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10564 Builder.CreateAlignedStore(BPVal, BP,
10565 M.getDataLayout().getPrefTypeAlign(PtrTy));
10567 if (Info.requiresDevicePointerInfo()) {
10569 CodeGenIP =
Builder.saveIP();
10571 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10572 Builder.restoreIP(CodeGenIP);
10574 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10576 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10578 DeviceAddrCB(
I, BP);
10584 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10587 Builder.CreateAlignedStore(PVal,
P,
10588 M.getDataLayout().getPrefTypeAlign(PtrTy));
10590 if (RuntimeSizes.
test(
I)) {
10592 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10598 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10601 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10604 auto CustomMFunc = CustomMapperCB(
I);
10606 return CustomMFunc.takeError();
10608 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10611 PointerArrayType, MappersArray,
10614 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10618 Info.NumberOfPtrs == 0)
10635 Builder.ClearInsertionPoint();
10666 auto CondConstant = CI->getSExtValue();
10668 return ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10670 return ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10680 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10683 if (
Error Err = ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10689 if (
Error Err = ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10698bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10702 "Unexpected Atomic Ordering.");
10704 bool Flush =
false;
10766 assert(
X.Var->getType()->isPointerTy() &&
10767 "OMP Atomic expects a pointer to target memory");
10768 Type *XElemTy =
X.ElemTy;
10771 "OMP atomic read expected a scalar type");
10773 Value *XRead =
nullptr;
10777 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10786 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10789 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10791 XRead = AtomicLoadRes.first;
10798 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10801 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10803 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10806 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10807 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10818 assert(
X.Var->getType()->isPointerTy() &&
10819 "OMP Atomic expects a pointer to target memory");
10820 Type *XElemTy =
X.ElemTy;
10823 "OMP atomic write expected a scalar type");
10831 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10834 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10842 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10847 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10854 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10855 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10861 Type *XTy =
X.Var->getType();
10863 "OMP Atomic expects a pointer to target memory");
10864 Type *XElemTy =
X.ElemTy;
10867 "OMP atomic update expected a scalar or struct type");
10870 "OpenMP atomic does not support LT or GT operations");
10874 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10875 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10877 return AtomicResult.takeError();
10878 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10883Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10887 return Builder.CreateAdd(Src1, Src2);
10889 return Builder.CreateSub(Src1, Src2);
10891 return Builder.CreateAnd(Src1, Src2);
10893 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10895 return Builder.CreateOr(Src1, Src2);
10897 return Builder.CreateXor(Src1, Src2);
10921Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10924 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10925 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10927 bool emitRMWOp =
false;
10935 emitRMWOp = XElemTy;
10938 emitRMWOp = (IsXBinopExpr && XElemTy);
10945 std::pair<Value *, Value *> Res;
10947 AtomicRMWInst *RMWInst =
10948 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10949 if (
T.isAMDGPU()) {
10950 if (IsIgnoreDenormalMode)
10951 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10953 if (!IsFineGrainedMemory)
10954 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10956 if (!IsRemoteMemory)
10960 Res.first = RMWInst;
10965 Res.second = Res.first;
10967 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10970 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10975 OpenMPIRBuilder::AtomicInfo atomicInfo(
10977 OldVal->
getAlign(),
true , AllocaIP,
X);
10978 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10981 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10988 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10989 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10990 Builder.SetInsertPoint(ContBB);
10992 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10994 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10997 Value *Upd = *CBResult;
10998 Builder.CreateStore(Upd, NewAtomicAddr);
11001 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
11002 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
11003 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
11004 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
11007 Res.first = OldExprVal;
11010 if (UnreachableInst *ExitTI =
11013 Builder.SetInsertPoint(ExitBB);
11015 Builder.SetInsertPoint(ExitTI);
11018 IntegerType *IntCastTy =
11021 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
11030 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11037 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
11038 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
11039 Builder.SetInsertPoint(ContBB);
11041 PHI->addIncoming(OldVal, CurBB);
11046 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
11047 X->getName() +
".atomic.fltCast");
11049 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
11050 X->getName() +
".atomic.ptrCast");
11054 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11057 Value *Upd = *CBResult;
11058 Builder.CreateStore(Upd, NewAtomicAddr);
11059 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
11063 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
11064 Result->setVolatile(VolatileX);
11065 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
11066 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11067 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
11068 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
11070 Res.first = OldExprVal;
11074 if (UnreachableInst *ExitTI =
11077 Builder.SetInsertPoint(ExitBB);
11079 Builder.SetInsertPoint(ExitTI);
11090 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
11091 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
11096 Type *XTy =
X.Var->getType();
11098 "OMP Atomic expects a pointer to target memory");
11099 Type *XElemTy =
X.ElemTy;
11102 "OMP atomic capture expected a scalar or struct type");
11104 "OpenMP atomic does not support LT or GT operations");
11111 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
11112 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
11115 Value *CapturedVal =
11116 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
11117 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
11119 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
11131 IsPostfixUpdate, IsFailOnly, Failure);
11143 assert(
X.Var->getType()->isPointerTy() &&
11144 "OMP atomic expects a pointer to target memory");
11147 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
11148 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
11151 bool IsInteger = E->getType()->isIntegerTy();
11153 if (
Op == OMPAtomicCompareOp::EQ) {
11156 Value *OldValue =
nullptr;
11157 Value *SuccessOrFail =
nullptr;
11195 X.Var->getName() +
".atomic.load");
11201 Value *EIsNaN =
Builder.CreateFCmpUNO(E, E,
"atomic.e.isnan");
11202 Value *XIsNaN =
Builder.CreateFCmpUNO(XFP, XFP,
"atomic.x.isnan");
11203 Value *EitherNaN =
Builder.CreateOr(EIsNaN, XIsNaN,
"atomic.either.nan");
11208 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11212 M.getContext(),
X.Var->getName() +
".atomic.nan",
F, ExitBB);
11214 M.getContext(),
X.Var->getName() +
".atomic.notnan",
F, ExitBB);
11216 M.getContext(),
X.Var->getName() +
".atomic.zero",
F, ExitBB);
11218 M.getContext(),
X.Var->getName() +
".atomic.normal",
F, ExitBB);
11222 Builder.SetInsertPoint(CurBB);
11223 Builder.CreateCondBr(EitherNaN, NaNBB, NotNaNBB);
11226 Builder.SetInsertPoint(NaNBB);
11230 Builder.SetInsertPoint(NotNaNBB);
11233 X.Var->getName() +
".atomic.xiszero");
11235 "atomic.e.iszero");
11236 Value *BothZero =
Builder.CreateAnd(XIsZero, EIsZero,
"atomic.both.zero");
11237 Builder.CreateCondBr(BothZero, ZeroBB, NormalBB);
11240 Builder.SetInsertPoint(ZeroBB);
11242 X.Var, XCurr, DBCast,
MaybeAlign(), AO, Failure);
11243 Value *OldZero =
Builder.CreateExtractValue(ResZero, 0);
11244 Value *OkZero =
Builder.CreateExtractValue(ResZero, 1);
11248 Builder.SetInsertPoint(NormalBB);
11250 X.Var, EBCast, DBCast,
MaybeAlign(), AO, Failure);
11251 Value *OldNormal =
Builder.CreateExtractValue(ResNormal, 0);
11252 Value *OkNormal =
Builder.CreateExtractValue(ResNormal, 1);
11258 Builder.CreatePHI(IntCastTy, 3,
X.Var->getName() +
".atomic.old");
11263 X.Var->getName() +
".atomic.ok");
11270 Builder.SetInsertPoint(ExitBB);
11275 OldValue =
Builder.CreateBitCast(OldIntPHI,
X.ElemTy,
11276 X.Var->getName() +
".atomic.old.fp");
11277 SuccessOrFail = SuccessPHI;
11285 Result =
Builder.CreateAtomicCmpXchg(
X.Var, EBCast, DBCast,
11293 OldValue =
Builder.CreateExtractValue(Result, 0);
11295 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
11297 "OldValue and V must be of same type");
11298 if (IsPostfixUpdate) {
11299 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11301 SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
11305 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11307 CurBBTI,
X.Var->getName() +
".atomic.exit");
11313 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11315 Builder.SetInsertPoint(ContBB);
11316 Builder.CreateStore(OldValue, V.Var);
11322 Builder.SetInsertPoint(ExitBB);
11324 Builder.SetInsertPoint(ExitTI);
11327 Value *CapturedValue =
11328 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11329 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11335 assert(R.Var->getType()->isPointerTy() &&
11336 "r.var must be of pointer type");
11337 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11339 Value *SuccessFailureVal =
11340 Builder.CreateExtractValue(Result, 1);
11341 Value *ResultCast =
11342 R.IsSigned ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
11343 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
11344 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11353 "OldValue and V must be of same type");
11354 if (IsPostfixUpdate) {
11355 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11360 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11362 CurBBTI,
X.Var->getName() +
".atomic.exit");
11368 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11370 Builder.SetInsertPoint(ContBB);
11371 Builder.CreateStore(OldValue, V.Var);
11377 Builder.SetInsertPoint(ExitBB);
11379 Builder.SetInsertPoint(ExitTI);
11382 Value *CapturedValue =
11383 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11384 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11390 assert(R.Var->getType()->isPointerTy() &&
11391 "r.var must be of pointer type");
11392 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11394 Value *ResultCast = R.IsSigned
11395 ?
Builder.CreateSExt(SuccessOrFail, R.ElemTy)
11396 :
Builder.CreateZExt(SuccessOrFail, R.ElemTy);
11397 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11401 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
11402 "Op should be either max or min at this point");
11403 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
11414 if (IsXBinopExpr) {
11443 Value *CapturedValue =
nullptr;
11444 if (IsPostfixUpdate) {
11445 CapturedValue = OldValue;
11470 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
11471 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
11473 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11477 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
11497 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
11524 bool SubClausesPresent =
11525 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
11527 if (!
Config.isTargetDevice() && SubClausesPresent) {
11528 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
11529 "if lowerbound is non-null, then upperbound must also be non-null "
11530 "for bounds on num_teams");
11532 if (NumTeamsUpper ==
nullptr)
11533 NumTeamsUpper =
Builder.getInt32(0);
11535 if (NumTeamsLower ==
nullptr)
11536 NumTeamsLower = NumTeamsUpper;
11540 "argument to if clause must be an integer value");
11544 IfExpr =
Builder.CreateICmpNE(IfExpr,
11545 ConstantInt::get(IfExpr->
getType(), 0));
11546 NumTeamsUpper =
Builder.CreateSelect(
11547 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
11550 NumTeamsLower =
Builder.CreateSelect(
11551 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
11554 if (ThreadLimit ==
nullptr)
11555 ThreadLimit =
Builder.getInt32(0);
11559 Value *NumTeamsLowerInt32 =
11561 Value *NumTeamsUpperInt32 =
11563 Value *ThreadLimitInt32 =
11570 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
11571 ThreadLimitInt32});
11576 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11579 auto OI = std::make_unique<OutlineInfo>();
11580 OI->EntryBB = AllocaBB;
11581 OI->ExitBB = ExitBB;
11582 OI->OuterAllocBB = &OuterAllocaBB;
11588 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11590 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11592 auto HostPostOutlineCB = [
this, Ident,
11593 ToBeDeleted](
Function &OutlinedFn)
mutable {
11598 "there must be a single user for the outlined function");
11603 "Outlined function must have two or three arguments only");
11605 bool HasShared = OutlinedFn.
arg_size() == 3;
11613 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11614 "outlined function.");
11615 Builder.SetInsertPoint(StaleCI);
11622 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11626 I->eraseFromParent();
11629 if (!
Config.isTargetDevice())
11630 OI->PostOutlineCB = HostPostOutlineCB;
11634 Builder.SetInsertPoint(ExitBB);
11647 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11662 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11667 if (
Config.isTargetDevice()) {
11668 auto OI = std::make_unique<OutlineInfo>();
11669 OI->OuterAllocBB = OuterAllocIP.
getBlock();
11670 OI->EntryBB = AllocaBB;
11671 OI->ExitBB = ExitBB;
11672 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
11673 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
11677 Builder.SetInsertPoint(ExitBB);
11684 std::string VarName) {
11693 return MapNamesArrayGlobal;
11698void OpenMPIRBuilder::initializeTypes(
Module &M) {
11702 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11703#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11704#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11705 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11706 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11707#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11708 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11709 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11710#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11711 T = StructType::getTypeByName(Ctx, StructName); \
11713 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11715 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11716#include "llvm/Frontend/OpenMP/OMPKinds.def"
11727 while (!Worklist.
empty()) {
11731 if (
BlockSet.insert(SuccBB).second)
11736std::unique_ptr<CodeExtractor>
11738 bool ArgsInZeroAddressSpace,
11740 return std::make_unique<CodeExtractor>(
11750 Suffix.
str(), ArgsInZeroAddressSpace);
11753std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor(
11755 return std::make_unique<DeviceSharedMemCodeExtractor>(
11756 OMPBuilder, Blocks,
nullptr,
11764 OuterDeallocBBs.empty()
11767 Suffix.
str(), ArgsInZeroAddressSpace);
11777 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11789 Fn->
addFnAttr(
"uniform-work-group-size");
11790 Fn->
addFnAttr(Attribute::MustProgress);
11808 auto &&GetMDInt = [
this](
unsigned V) {
11815 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11816 auto &&TargetRegionMetadataEmitter =
11817 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11832 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11833 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11834 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11835 GetMDInt(E.getOrder())};
11838 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11847 auto &&DeviceGlobalVarMetadataEmitter =
11848 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11858 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11859 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11863 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11870 DeviceGlobalVarMetadataEmitter);
11872 for (
const auto &E : OrderedEntries) {
11873 assert(E.first &&
"All ordered entries must exist!");
11874 if (
const auto *CE =
11877 if (!CE->getID() || !CE->getAddress()) {
11881 if (!
M.getNamedValue(FnName))
11889 }
else if (
const auto *CE =
dyn_cast<
11898 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11900 if (!CE->getAddress()) {
11905 if (CE->getVarSize() == 0)
11909 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11910 (!
Config.isTargetDevice() && CE->getAddress())) &&
11911 "Declaret target link address is set.");
11912 if (
Config.isTargetDevice())
11914 if (!CE->getAddress()) {
11921 if (!CE->getAddress()) {
11934 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11938 OMPTargetGlobalVarEntryIndirectVTable))
11947 Flags, CE->getLinkage(), CE->getVarName());
11950 Flags, CE->getLinkage());
11961 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11967 Config.getRequiresFlags());
11977 OS <<
"_" <<
Count;
11982 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11985 EntryInfo.
Line, NewCount);
11993 auto FileIDInfo = CallBack();
11997 FileID =
Status->getUniqueID().getFile();
12001 FileID =
hash_value(std::get<0>(FileIDInfo));
12005 std::get<1>(FileIDInfo));
12011 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12013 !(Remain & 1); Remain = Remain >> 1)
12031 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12033 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12040 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12046 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
12047 Flags |= MemberOfFlag;
12053 bool IsDeclaration,
bool IsExternallyVisible,
12055 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
12056 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
12057 std::function<
Constant *()> GlobalInitializer,
12068 Config.hasRequiresUnifiedSharedMemory())) {
12073 if (!IsExternallyVisible)
12075 OS <<
"_decl_tgt_ref_ptr";
12078 Value *Ptr =
M.getNamedValue(PtrName);
12087 if (!
Config.isTargetDevice()) {
12088 if (GlobalInitializer)
12089 GV->setInitializer(GlobalInitializer());
12095 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
12096 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
12097 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
12109 bool IsDeclaration,
bool IsExternallyVisible,
12111 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
12112 std::vector<Triple> TargetTriple,
12113 std::function<
Constant *()> GlobalInitializer,
12117 (TargetTriple.empty() && !
Config.isTargetDevice()))
12128 !
Config.hasRequiresUnifiedSharedMemory()) {
12130 VarName = MangledName;
12133 if (!IsDeclaration)
12135 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
12138 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
12142 if (
Config.isTargetDevice() &&
12151 if (!
M.getNamedValue(RefName)) {
12155 GvAddrRef->setConstant(
true);
12157 GvAddrRef->setInitializer(Addr);
12158 GeneratedRefs.push_back(GvAddrRef);
12167 if (
Config.isTargetDevice()) {
12168 VarName = (Addr) ? Addr->
getName() :
"";
12172 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
12173 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
12174 LlvmPtrTy, GlobalInitializer, VariableLinkage);
12175 VarName = (Addr) ? Addr->
getName() :
"";
12177 VarSize =
M.getDataLayout().getPointerSize();
12196 auto &&GetMDInt = [MN](
unsigned Idx) {
12201 auto &&GetMDString = [MN](
unsigned Idx) {
12203 return V->getString();
12206 switch (GetMDInt(0)) {
12210 case OffloadEntriesInfoManager::OffloadEntryInfo::
12211 OffloadingEntryInfoTargetRegion: {
12221 case OffloadEntriesInfoManager::OffloadEntryInfo::
12222 OffloadingEntryInfoDeviceGlobalVar:
12235 if (HostFilePath.
empty())
12239 if (std::error_code Err = Buf.getError()) {
12241 "OpenMPIRBuilder: " +
12249 if (std::error_code Err =
M.getError()) {
12251 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
12265 "expected a valid insertion block for creating an iterator loop");
12275 Builder.getCurrentDebugLocation(),
"omp.it.cont");
12287 T->eraseFromParent();
12296 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
12298 "iterator bodygen must terminate the canonical body with an "
12299 "unconditional branch to the loop latch",
12323 for (
const auto &
ParamAttr : ParamAttrs) {
12366 return std::string(Out.
str());
12374 unsigned VecRegSize;
12376 ISADataTy ISAData[] = {
12395 for (
char Mask :
Masked) {
12396 for (
const ISADataTy &
Data : ISAData) {
12399 Out <<
"_ZGV" <<
Data.ISA << Mask;
12401 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
12415template <
typename T>
12418 StringRef MangledName,
bool OutputBecomesInput,
12422 Out << Prefix << ISA << LMask << VLEN;
12423 if (OutputBecomesInput)
12425 Out << ParSeq <<
'_' << MangledName;
12434 bool OutputBecomesInput,
12439 OutputBecomesInput, Fn);
12441 OutputBecomesInput, Fn);
12445 OutputBecomesInput, Fn);
12447 OutputBecomesInput, Fn);
12451 OutputBecomesInput, Fn);
12453 OutputBecomesInput, Fn);
12458 OutputBecomesInput, Fn);
12469 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
12470 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
12482 OutputBecomesInput, Fn);
12489 OutputBecomesInput, Fn);
12491 OutputBecomesInput, Fn);
12495 OutputBecomesInput, Fn);
12499 OutputBecomesInput, Fn);
12508 OutputBecomesInput, Fn);
12515 MangledName, OutputBecomesInput, Fn);
12517 MangledName, OutputBecomesInput, Fn);
12521 MangledName, OutputBecomesInput, Fn);
12525 MangledName, OutputBecomesInput, Fn);
12535 return OffloadEntriesTargetRegion.empty() &&
12536 OffloadEntriesDeviceGlobalVar.empty();
12539unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
12541 auto It = OffloadEntriesTargetRegionCount.find(
12542 getTargetRegionEntryCountKey(EntryInfo));
12543 if (It == OffloadEntriesTargetRegionCount.end())
12548void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
12550 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
12551 EntryInfo.
Count + 1;
12557 OffloadEntriesTargetRegion[EntryInfo] =
12560 ++OffloadingEntriesNum;
12566 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
12569 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12573 if (OMPBuilder->Config.isTargetDevice()) {
12578 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
12579 Entry.setAddress(Addr);
12581 Entry.setFlags(Flags);
12587 "Target region entry already registered!");
12589 OffloadEntriesTargetRegion[EntryInfo] = Entry;
12590 ++OffloadingEntriesNum;
12592 incrementTargetRegionEntryInfoCount(EntryInfo);
12599 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12601 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
12602 if (It == OffloadEntriesTargetRegion.end()) {
12606 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12614 for (
const auto &It : OffloadEntriesTargetRegion) {
12615 Action(It.first, It.second);
12621 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12622 ++OffloadingEntriesNum;
12628 if (OMPBuilder->Config.isTargetDevice()) {
12632 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12634 if (Entry.getVarSize() == 0) {
12635 Entry.setVarSize(VarSize);
12636 Entry.setLinkage(Linkage);
12640 Entry.setVarSize(VarSize);
12641 Entry.setLinkage(Linkage);
12642 Entry.setAddress(Addr);
12645 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12646 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12647 "Entry not initialized!");
12648 if (Entry.getVarSize() == 0) {
12649 Entry.setVarSize(VarSize);
12650 Entry.setLinkage(Linkage);
12657 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12658 Addr, VarSize, Flags, Linkage,
12661 OffloadEntriesDeviceGlobalVar.try_emplace(
12662 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12663 ++OffloadingEntriesNum;
12670 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12671 Action(E.getKey(), E.getValue());
12678void CanonicalLoopInfo::collectControlBlocks(
12685 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12697void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12709void CanonicalLoopInfo::mapIndVar(
12719 for (
Use &U : OldIV->
uses()) {
12723 if (
User->getParent() == getCond())
12725 if (
User->getParent() == getLatch())
12731 Value *NewIV = Updater(OldIV);
12734 for (Use *U : ReplacableUses)
12755 "Preheader must terminate with unconditional branch");
12757 "Preheader must jump to header");
12761 "Header must terminate with unconditional branch");
12762 assert(Header->getSingleSuccessor() == Cond &&
12763 "Header must jump to exiting block");
12766 assert(Cond->getSinglePredecessor() == Header &&
12767 "Exiting block only reachable from header");
12770 "Exiting block must terminate with conditional branch");
12772 "Exiting block's first successor jump to the body");
12774 "Exiting block's second successor must exit the loop");
12778 "Body only reachable from exiting block");
12783 "Latch must terminate with unconditional branch");
12784 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12787 assert(Latch->getSinglePredecessor() !=
nullptr);
12792 "Exit block must terminate with unconditional branch");
12793 assert(Exit->getSingleSuccessor() == After &&
12794 "Exit block must jump to after block");
12798 "After block only reachable from exit block");
12802 assert(IndVar &&
"Canonical induction variable not found?");
12804 "Induction variable must be an integer");
12806 "Induction variable must be a PHI in the loop header");
12812 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12820 assert(TripCount &&
"Loop trip count not found?");
12822 "Trip count and induction variable must have the same type");
12826 "Exit condition must be a signed less-than comparison");
12828 "Exit condition must compare the induction variable");
12830 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static Function * createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn)
Create wrapper function used to gather the outlined function's argument structure from a shared buffe...
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static bool isGenericKernel(Function &Fn)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static std::optional< omp::OMPTgtExecModeFlags > getTargetKernelExecMode(Function &Kernel)
Given a function, if it represents the entry point of a target kernel, this returns the execution mod...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static cl::opt< bool > UseDefaultMaxThreads("openmp-ir-builder-use-default-max-threads", cl::Hidden, cl::desc("Use a default max threads if none is provided."), cl::init(true))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const OpenMPIRBuilder::DependenciesInfo &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
LLVM_ABI CallInst * createOMPAllocShared(const LocationDescription &Loc, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_alloc_shared.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait)
Generator for 'omp scope'.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> TargetBodyGenCallbackTy
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
bool HandleFPNegZero
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB, bool PreserveMemberOfFlags=false)
Emit the user-defined mapper function.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={}, ArrayRef< BasicBlock * > DeallocBlocks={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
void addOutlineInfo(std::unique_ptr< OutlineInfo > &&OI)
Add a new region that will be outlined later.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPFreeShared(const LocationDescription &Loc, Value *Addr, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_free_shared.
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
SmallVector< std::unique_ptr< OutlineInfo >, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< InsertPointTy > DeallocIPs)> TargetGenArgAccessorsCallbackTy
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Represent a constant reference to a string, i.e.
std::string str() const
Get the contents as an std::string.
constexpr bool empty() const
Check if the string is empty.
constexpr size_t size() const
Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr)
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_SPMD_NO_LOOP
@ OMP_TGT_EXEC_MODE_GENERIC
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
auto filter_to_vector(ContainerTy &&C, PredicateFn &&Pred)
Filter a range to a SmallVector with the element types deduced.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
SmallVector< DependData > Deps
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
BasicBlock * OuterAllocBB
virtual LLVM_ABI std::unique_ptr< CodeExtractor > createCodeExtractor(ArrayRef< BasicBlock * > Blocks, bool ArgsInZeroAddressSpace, Twine Suffix=Twine(""))
Create a CodeExtractor instance based on the information stored in this structure,...
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static LLVM_ABI const Target * lookupTarget(const Triple &TheTriple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...