69#define DEBUG_TYPE "openmp-ir-builder"
76 cl::desc(
"Use optimistic attributes describing "
77 "'as-if' properties of runtime calls."),
81 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
82 cl::desc(
"Factor for the unroll threshold to account for code "
83 "simplifications still taking place"),
94 if (!IP1.isSet() || !IP2.isSet())
96 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
101 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
102 case OMPScheduleType::UnorderedStaticChunked:
103 case OMPScheduleType::UnorderedStatic:
104 case OMPScheduleType::UnorderedDynamicChunked:
105 case OMPScheduleType::UnorderedGuidedChunked:
106 case OMPScheduleType::UnorderedRuntime:
107 case OMPScheduleType::UnorderedAuto:
108 case OMPScheduleType::UnorderedTrapezoidal:
109 case OMPScheduleType::UnorderedGreedy:
110 case OMPScheduleType::UnorderedBalanced:
111 case OMPScheduleType::UnorderedGuidedIterativeChunked:
112 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
113 case OMPScheduleType::UnorderedSteal:
114 case OMPScheduleType::UnorderedStaticBalancedChunked:
115 case OMPScheduleType::UnorderedGuidedSimd:
116 case OMPScheduleType::UnorderedRuntimeSimd:
117 case OMPScheduleType::OrderedStaticChunked:
118 case OMPScheduleType::OrderedStatic:
119 case OMPScheduleType::OrderedDynamicChunked:
120 case OMPScheduleType::OrderedGuidedChunked:
121 case OMPScheduleType::OrderedRuntime:
122 case OMPScheduleType::OrderedAuto:
123 case OMPScheduleType::OrderdTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedStaticChunked:
125 case OMPScheduleType::NomergeUnorderedStatic:
126 case OMPScheduleType::NomergeUnorderedDynamicChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedChunked:
128 case OMPScheduleType::NomergeUnorderedRuntime:
129 case OMPScheduleType::NomergeUnorderedAuto:
130 case OMPScheduleType::NomergeUnorderedTrapezoidal:
131 case OMPScheduleType::NomergeUnorderedGreedy:
132 case OMPScheduleType::NomergeUnorderedBalanced:
133 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
134 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
135 case OMPScheduleType::NomergeUnorderedSteal:
136 case OMPScheduleType::NomergeOrderedStaticChunked:
137 case OMPScheduleType::NomergeOrderedStatic:
138 case OMPScheduleType::NomergeOrderedDynamicChunked:
139 case OMPScheduleType::NomergeOrderedGuidedChunked:
140 case OMPScheduleType::NomergeOrderedRuntime:
141 case OMPScheduleType::NomergeOrderedAuto:
142 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 case OMPScheduleType::OrderedDistributeChunked:
144 case OMPScheduleType::OrderedDistribute:
152 SchedType & OMPScheduleType::MonotonicityMask;
153 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
165 Builder.restoreIP(IP);
173 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
179 Kernel->getFnAttribute(
"target-features").getValueAsString();
180 if (Features.
count(
"+wavefrontsize64"))
195 bool HasSimdModifier,
bool HasDistScheduleChunks) {
197 switch (ClauseKind) {
198 case OMP_SCHEDULE_Default:
199 case OMP_SCHEDULE_Static:
200 return HasChunks ? OMPScheduleType::BaseStaticChunked
201 : OMPScheduleType::BaseStatic;
202 case OMP_SCHEDULE_Dynamic:
203 return OMPScheduleType::BaseDynamicChunked;
204 case OMP_SCHEDULE_Guided:
205 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
206 : OMPScheduleType::BaseGuidedChunked;
207 case OMP_SCHEDULE_Auto:
209 case OMP_SCHEDULE_Runtime:
210 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
211 : OMPScheduleType::BaseRuntime;
212 case OMP_SCHEDULE_Distribute:
213 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
214 : OMPScheduleType::BaseDistribute;
222 bool HasOrderedClause) {
223 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
224 OMPScheduleType::None &&
225 "Must not have ordering nor monotonicity flags already set");
228 ? OMPScheduleType::ModifierOrdered
229 : OMPScheduleType::ModifierUnordered;
230 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
233 if (OrderingScheduleType ==
234 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
235 return OMPScheduleType::OrderedGuidedChunked;
236 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
237 OMPScheduleType::ModifierOrdered))
238 return OMPScheduleType::OrderedRuntime;
240 return OrderingScheduleType;
246 bool HasSimdModifier,
bool HasMonotonic,
247 bool HasNonmonotonic,
bool HasOrderedClause) {
248 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
249 OMPScheduleType::None &&
250 "Must not have monotonicity flags already set");
251 assert((!HasMonotonic || !HasNonmonotonic) &&
252 "Monotonic and Nonmonotonic are contradicting each other");
255 return ScheduleType | OMPScheduleType::ModifierMonotonic;
256 }
else if (HasNonmonotonic) {
257 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
267 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
268 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
274 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
282 bool HasSimdModifier,
bool HasMonotonicModifier,
283 bool HasNonmonotonicModifier,
bool HasOrderedClause,
284 bool HasDistScheduleChunks) {
286 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
290 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
291 HasNonmonotonicModifier, HasOrderedClause);
299static std::optional<omp::OMPTgtExecModeFlags>
304 if (
Call->getCalledFunction()->getName() ==
"__kmpc_target_init") {
305 TargetInitCall =
Call;
330 std::optional<omp::OMPTgtExecModeFlags> ExecMode =
342 if (
Instruction *Term = Source->getTerminatorOrNull()) {
351 NewBr->setDebugLoc(
DL);
356 assert(New->getFirstInsertionPt() == New->begin() &&
357 "Target BB must not have PHI nodes");
373 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
377 NewBr->setDebugLoc(
DL);
389 Builder.SetInsertPoint(Old);
393 Builder.SetCurrentDebugLocation(
DebugLoc);
403 New->replaceSuccessorsPhiUsesWith(Old, New);
412 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
414 Builder.SetInsertPoint(Builder.GetInsertBlock());
417 Builder.SetCurrentDebugLocation(
DebugLoc);
426 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
428 Builder.SetInsertPoint(Builder.GetInsertBlock());
431 Builder.SetCurrentDebugLocation(
DebugLoc);
448 const Twine &Name =
"",
bool AsPtr =
true,
449 bool Is64Bit =
false) {
450 Builder.restoreIP(OuterAllocaIP);
454 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
458 FakeVal = FakeValAddr;
460 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
465 Builder.restoreIP(InnerAllocaIP);
468 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
471 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
484enum OpenMPOffloadingRequiresDirFlags {
486 OMP_REQ_UNDEFINED = 0x000,
488 OMP_REQ_NONE = 0x001,
490 OMP_REQ_REVERSE_OFFLOAD = 0x002,
492 OMP_REQ_UNIFIED_ADDRESS = 0x004,
494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
503 DominatorTree *DT =
nullptr,
bool AggregateArgs =
false,
504 BlockFrequencyInfo *BFI =
nullptr,
505 BranchProbabilityInfo *BPI =
nullptr,
506 AssumptionCache *AC =
nullptr,
bool AllowVarArgs =
false,
507 bool AllowAlloca =
false,
508 BasicBlock *AllocationBlock =
nullptr,
510 std::string Suffix =
"",
bool ArgsInZeroAddressSpace =
false)
511 : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs,
512 AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix,
513 ArgsInZeroAddressSpace),
514 OMPBuilder(OMPBuilder) {}
516 virtual ~OMPCodeExtractor() =
default;
519 OpenMPIRBuilder &OMPBuilder;
522class DeviceSharedMemCodeExtractor :
public OMPCodeExtractor {
524 using OMPCodeExtractor::OMPCodeExtractor;
525 virtual ~DeviceSharedMemCodeExtractor() =
default;
529 allocateVar(IRBuilder<>::InsertPoint AllocaIP,
Type *VarType,
530 const Twine &Name = Twine(
""),
531 AddrSpaceCastInst **CastedAlloc =
nullptr)
override {
532 return OMPBuilder.createOMPAllocShared(AllocaIP, VarType, Name);
535 virtual Instruction *deallocateVar(IRBuilder<>::InsertPoint DeallocIP,
537 return OMPBuilder.createOMPFreeShared(DeallocIP, Var, VarType);
544 OpenMPIRBuilder &OMPBuilder;
546 DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder)
547 : OMPBuilder(OMPBuilder) {}
548 virtual ~DeviceSharedMemOutlineInfo() =
default;
550 virtual std::unique_ptr<CodeExtractor>
552 bool ArgsInZeroAddressSpace,
553 Twine Suffix = Twine(
""))
override;
559 : RequiresFlags(OMP_REQ_UNDEFINED) {}
563 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
564 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
567 RequiresFlags(OMP_REQ_UNDEFINED) {
568 if (HasRequiresReverseOffload)
569 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
570 if (HasRequiresUnifiedAddress)
571 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
572 if (HasRequiresUnifiedSharedMemory)
573 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
574 if (HasRequiresDynamicAllocators)
575 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
579 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
583 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
587 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
591 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
596 :
static_cast<int64_t
>(OMP_REQ_NONE);
601 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
603 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
608 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
610 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
615 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
617 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
622 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
624 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
637 constexpr size_t MaxDim = 3;
642 Value *DynCGroupMemFallbackFlag =
644 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
645 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
651 Value *NumThreads3D =
682 auto FnAttrs = Attrs.getFnAttrs();
683 auto RetAttrs = Attrs.getRetAttrs();
685 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
690 bool Param =
true) ->
void {
691 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
692 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
693 if (HasSignExt || HasZeroExt) {
694 assert(AS.getNumAttributes() == 1 &&
695 "Currently not handling extension attr combined with others.");
697 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
700 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
707#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
708#include "llvm/Frontend/OpenMP/OMPKinds.def"
712#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
714 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
715 addAttrSet(RetAttrs, RetAttrSet, false); \
716 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
717 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
718 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
720#include "llvm/Frontend/OpenMP/OMPKinds.def"
734#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
736 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
738 Fn = M.getFunction(Str); \
740#include "llvm/Frontend/OpenMP/OMPKinds.def"
746#define OMP_RTL(Enum, Str, ...) \
748 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
750#include "llvm/Frontend/OpenMP/OMPKinds.def"
754 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
764 LLVMContext::MD_callback,
766 2, {-1, -1},
true)}));
779 assert(Fn &&
"Failed to create OpenMP runtime function");
790 Builder.SetInsertPoint(FiniBB);
802 FiniBB = OtherFiniBB;
804 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
812 auto EndIt = FiniBB->end();
813 if (FiniBB->size() >= 1)
814 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
819 FiniBB->replaceAllUsesWith(OtherFiniBB);
820 FiniBB->eraseFromParent();
821 FiniBB = OtherFiniBB;
828 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
851 for (
auto Inst =
Block->getReverseIterator()->begin();
852 Inst !=
Block->getReverseIterator()->end();) {
881 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
902 DeferredOutlines.
push_back(std::move(OI));
906 ParallelRegionBlockSet.
clear();
908 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
918 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
919 std::unique_ptr<CodeExtractor> Extractor =
920 OI->createCodeExtractor(Blocks, ArgsInZeroAddressSpace,
".omp_par");
924 <<
" Exit: " << OI->ExitBB->getName() <<
"\n");
925 assert(Extractor->isEligible() &&
926 "Expected OpenMP outlining to be possible!");
928 for (
auto *V : OI->ExcludeArgsFromAggregate)
929 Extractor->excludeArgFromAggregate(V);
932 Extractor->extractCodeRegion(CEAC, OI->Inputs, OI->Outputs);
936 if (TargetCpuAttr.isStringAttribute())
939 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
940 if (TargetFeaturesAttr.isStringAttribute())
941 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
944 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
946 "OpenMP outlined functions should not return a value!");
951 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
958 assert(OI->EntryBB->getUniquePredecessor() == &ArtificialEntry);
965 "Expected instructions to add in the outlined region entry");
967 End = ArtificialEntry.
rend();
972 if (
I.isTerminator()) {
974 if (
Instruction *TI = OI->EntryBB->getTerminatorOrNull())
975 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
979 I.moveBeforePreserving(*OI->EntryBB,
980 OI->EntryBB->getFirstInsertionPt());
983 OI->EntryBB->moveBefore(&ArtificialEntry);
990 if (OI->PostOutlineCB)
991 OI->PostOutlineCB(*OutlinedFn);
993 if (OI->FixUpNonEntryAllocas)
1025 errs() <<
"Error of kind: " << Kind
1026 <<
" when emitting offload entries and metadata during "
1027 "OMPIRBuilder finalization \n";
1033 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
1034 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
1035 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
1036 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
1053 ConstantInt::get(I32Ty,
Value), Name);
1066 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
1070 if (UsedArray.
empty())
1077 GV->setSection(
"llvm.metadata");
1083 auto *Int8Ty =
Builder.getInt8Ty();
1086 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1094 unsigned Reserve2Flags) {
1096 LocFlags |= OMP_IDENT_FLAG_KMPC;
1103 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1104 ConstantInt::get(Int32, Reserve2Flags),
1105 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1107 size_t SrcLocStrArgIdx = 4;
1108 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1112 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1119 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1120 if (
GV.getInitializer() == Initializer)
1125 M, OpenMPIRBuilder::Ident,
1128 M.getDataLayout().getDefaultGlobalsAddressSpace());
1140 SrcLocStrSize = LocStr.
size();
1149 if (
GV.isConstant() &&
GV.hasInitializer() &&
1150 GV.getInitializer() == Initializer)
1153 SrcLocStr =
Builder.CreateGlobalString(
1154 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1162 unsigned Line,
unsigned Column,
1168 Buffer.
append(FunctionName);
1170 Buffer.
append(std::to_string(Line));
1172 Buffer.
append(std::to_string(Column));
1180 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1191 if (
DIFile *DIF = DIL->getFile())
1192 if (std::optional<StringRef> Source = DIF->getSource())
1198 DIL->getColumn(), SrcLocStrSize);
1204 Loc.IP.getBlock()->getParent());
1210 "omp_global_thread_num");
1215 bool ForceSimpleCall,
bool CheckCancelFlag) {
1225 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1228 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1231 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1234 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1237 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1250 bool UseCancelBarrier =
1255 ? OMPRTL___kmpc_cancel_barrier
1256 : OMPRTL___kmpc_barrier),
1259 if (UseCancelBarrier && CheckCancelFlag)
1269 omp::Directive CanceledDirective) {
1274 auto *UI =
Builder.CreateUnreachable();
1282 Builder.SetInsertPoint(ElseTI);
1283 auto ElseIP =
Builder.saveIP();
1291 Builder.SetInsertPoint(ThenTI);
1293 Value *CancelKind =
nullptr;
1294 switch (CanceledDirective) {
1295#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1296 case DirectiveEnum: \
1297 CancelKind = Builder.getInt32(Value); \
1299#include "llvm/Frontend/OpenMP/OMPKinds.def"
1316 Builder.SetInsertPoint(UI->getParent());
1317 UI->eraseFromParent();
1324 omp::Directive CanceledDirective) {
1329 auto *UI =
Builder.CreateUnreachable();
1332 Value *CancelKind =
nullptr;
1333 switch (CanceledDirective) {
1334#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1335 case DirectiveEnum: \
1336 CancelKind = Builder.getInt32(Value); \
1338#include "llvm/Frontend/OpenMP/OMPKinds.def"
1355 Builder.SetInsertPoint(UI->getParent());
1356 UI->eraseFromParent();
1369 auto *KernelArgsPtr =
1370 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1375 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1378 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1382 NumThreads, HostPtr, KernelArgsPtr};
1409 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1413 Value *Return =
nullptr;
1433 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1434 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1441 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1443 auto CurFn =
Builder.GetInsertBlock()->getParent();
1450 emitBlock(OffloadContBlock, CurFn,
true);
1455 Value *CancelFlag, omp::Directive CanceledDirective) {
1457 "Unexpected cancellation!");
1477 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1486 Builder.SetInsertPoint(CancellationBlock);
1487 Builder.CreateBr(*FiniBBOrErr);
1490 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1502 size_t NumArgs = OutlinedFn.
arg_size();
1503 assert((NumArgs == 2 || NumArgs == 3) &&
1504 "expected a 2-3 argument parallel outlined function");
1505 bool UseArgStruct = NumArgs == 3;
1510 {Builder.getInt16Ty(), Builder.getInt32Ty()},
1514 OutlinedFn.
getName() +
".wrapper", OMPIRBuilder->
M);
1516 WrapperFn->addParamAttr(0, Attribute::NoUndef);
1517 WrapperFn->addParamAttr(0, Attribute::ZExt);
1518 WrapperFn->addParamAttr(1, Attribute::NoUndef);
1522 Builder.SetInsertPoint(EntryBB);
1525 Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1527 AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1528 AddrAlloca, Builder.getPtrTy(0),
1529 AddrAlloca->
getName() +
".ascast");
1531 Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1533 ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1534 ZeroAlloca, Builder.getPtrTy(0),
1535 ZeroAlloca->
getName() +
".ascast");
1537 Value *ArgsAlloca =
nullptr;
1539 ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
1540 nullptr,
"global_args");
1541 ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1542 ArgsAlloca, Builder.getPtrTy(0),
1543 ArgsAlloca->
getName() +
".ascast");
1547 Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
1548 Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
1552 llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
1560 Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
1561 StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
1562 {Builder.getInt64(0)});
1563 StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg,
"structArg");
1564 Args.push_back(StructArg);
1568 Builder.CreateCall(&OutlinedFn, Args);
1569 Builder.CreateRetVoid();
1584 "Expected at least tid and bounded tid as arguments");
1585 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1593 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1596 assert(CI &&
"Expected call instruction to outlined function");
1597 CI->
getParent()->setName(
"omp_parallel");
1599 Builder.SetInsertPoint(CI);
1600 Type *PtrTy = OMPIRBuilder->VoidPtr;
1603 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1607 Value *Args = ArgsAlloca;
1611 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1612 Builder.restoreIP(CurrentIP);
1615 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1617 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1619 Builder.CreateStore(V, StoreAddress);
1623 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1624 : Builder.getInt32(1);
1625 Value *NumThreadsArg =
1626 NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
1627 : Builder.getInt32(-1);
1637 Value *Parallel60CallArgs[] = {
1642 Builder.getInt32(-1),
1646 Builder.getInt64(NumCapturedVars),
1647 Builder.getInt32(0)};
1655 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1658 Builder.SetInsertPoint(PrivTID);
1660 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1667 I->eraseFromParent();
1690 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1698 F->addMetadata(LLVMContext::MD_callback,
1707 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1710 "Expected at least tid and bounded tid as arguments");
1711 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1714 CI->
getParent()->setName(
"omp_parallel");
1715 Builder.SetInsertPoint(CI);
1718 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1722 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1724 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1731 auto PtrTy = OMPIRBuilder->VoidPtr;
1732 if (IfCondition && NumCapturedVars == 0) {
1740 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1743 Builder.SetInsertPoint(PrivTID);
1745 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1752 I->eraseFromParent();
1760 Value *NumThreads, omp::ProcBindKind ProcBind,
bool IsCancellable) {
1769 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1770 (ProcBind != OMP_PROC_BIND_default);
1777 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1781 if (NumThreads && !
Config.isTargetDevice()) {
1784 Builder.CreateIntCast(NumThreads, Int32,
false)};
1789 if (ProcBind != OMP_PROC_BIND_default) {
1793 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1815 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1818 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1821 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1825 PointerType ::get(
M.getContext(), 0),
1826 "zero.addr.ascast");
1850 if (IP.getBlock()->end() == IP.getPoint()) {
1856 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1857 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1858 "Unexpected insertion point for finalization call!");
1870 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1876 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1894 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1897 assert(BodyGenCB &&
"Expected body generation callback!");
1899 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, PRegExitBB))
1902 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1906 bool UsesDeviceSharedMemory =
1908 std::unique_ptr<OutlineInfo> OI =
1909 UsesDeviceSharedMemory
1910 ? std::make_unique<DeviceSharedMemOutlineInfo>(*
this)
1911 : std::make_unique<OutlineInfo>();
1913 if (
Config.isTargetDevice()) {
1915 OI->PostOutlineCB = [=, ToBeDeletedVec =
1916 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1918 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1919 ThreadID, ToBeDeletedVec);
1923 OI->PostOutlineCB = [=, ToBeDeletedVec =
1924 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1926 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1930 OI->FixUpNonEntryAllocas =
true;
1931 OI->OuterAllocBB = OuterAllocaBlock;
1932 OI->EntryBB = PRegEntryBB;
1933 OI->ExitBB = PRegExitBB;
1934 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
1935 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
1939 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
1951 ".omp_par", ArgsInZeroAddressSpace);
1956 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1958 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1963 return GV->getValueType() == OpenMPIRBuilder::Ident;
1968 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1974 if (&V == TIDAddr || &V == ZeroAddr) {
1975 OI->ExcludeArgsFromAggregate.push_back(&V);
1980 for (
Use &U : V.uses())
1982 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1992 if (!V.getType()->isPointerTy()) {
1996 Builder.restoreIP(OuterAllocIP);
1998 if (UsesDeviceSharedMemory) {
2001 V.getName() +
".reloaded");
2002 for (
BasicBlock *DeallocBlock : OuterDeallocBlocks)
2004 InsertPointTy(DeallocBlock, DeallocBlock->getFirstInsertionPt()),
2007 Ptr =
Builder.CreateAlloca(V.getType(),
nullptr,
2008 V.getName() +
".reloaded");
2013 Builder.SetInsertPoint(InsertBB,
2018 Builder.restoreIP(InnerAllocaIP);
2019 Inner =
Builder.CreateLoad(V.getType(), Ptr);
2022 Value *ReplacementValue =
nullptr;
2025 ReplacementValue = PrivTID;
2028 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
2036 assert(ReplacementValue &&
2037 "Expected copy/create callback to set replacement value!");
2038 if (ReplacementValue == &V)
2043 UPtr->set(ReplacementValue);
2068 for (
Value *Output : Outputs)
2072 "OpenMP outlining should not produce live-out values!");
2074 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
2076 for (
auto *BB : Blocks)
2077 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
2085 assert(FiniInfo.DK == OMPD_parallel &&
2086 "Unexpected finalization stack state!");
2097 Builder.CreateBr(*FiniBBOrErr);
2101 Term->eraseFromParent();
2107 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
2108 UI->eraseFromParent();
2171 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2173 Builder.CreateStore(DepValPtr, Addr);
2176 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2178 ConstantInt::get(SizeTy,
2183 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Flags));
2185 static_cast<unsigned int>(Dep.
DepKind)),
2198 if (Dependencies.
empty())
2218 Type *DependInfo = OMPBuilder.DependInfo;
2220 Value *DepArray =
nullptr;
2222 Builder.SetInsertPoint(
2226 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2228 Builder.restoreIP(OldIP);
2230 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2232 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2239Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2241 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2256 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2260 "omp_taskloop_dup",
M);
2263 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2264 DestTaskArg->
setName(
"dest_task");
2265 SrcTaskArg->
setName(
"src_task");
2266 LastprivateFlagArg->
setName(
"lastprivate_flag");
2268 IRBuilderBase::InsertPointGuard Guard(
Builder);
2272 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2273 Type *TaskWithPrivatesTy =
2276 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2278 PrivatesTy, TaskPrivates,
2283 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2284 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2286 DestTaskContextPtr->
setName(
"destPtr");
2287 SrcTaskContextPtr->
setName(
"srcPtr");
2292 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2293 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2294 if (!AfterIPOrError)
2296 Builder.restoreIP(*AfterIPOrError);
2306 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2308 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2310 Value *TaskContextStructPtrVal) {
2315 uint32_t SrcLocStrSize;
2331 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP, TaskloopExitBB))
2334 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2339 llvm::CanonicalLoopInfo *CLI = result.
get();
2340 auto OI = std::make_unique<OutlineInfo>();
2341 OI->EntryBB = TaskloopAllocaBB;
2342 OI->OuterAllocBB = AllocaIP.getBlock();
2343 OI->ExitBB = TaskloopExitBB;
2344 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2345 copy(DeallocBlocks, OI->OuterDeallocBBs.end());
2351 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2353 TaskloopAllocaIP,
"lb",
false,
true);
2355 TaskloopAllocaIP,
"ub",
false,
true);
2357 TaskloopAllocaIP,
"step",
false,
true);
2360 OI->Inputs.insert(FakeLB);
2361 OI->Inputs.insert(FakeUB);
2362 OI->Inputs.insert(FakeStep);
2363 if (TaskContextStructPtrVal)
2364 OI->Inputs.insert(TaskContextStructPtrVal);
2365 assert(((TaskContextStructPtrVal && DupCB) ||
2366 (!TaskContextStructPtrVal && !DupCB)) &&
2367 "Task context struct ptr and duplication callback must be both set "
2373 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2377 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2378 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2381 if (!TaskDupFnOrErr) {
2384 Value *TaskDupFn = *TaskDupFnOrErr;
2386 OI->PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2387 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2388 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2389 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2390 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2392 assert(OutlinedFn.hasOneUse() &&
2393 "there must be a single user for the outlined function");
2400 Value *CastedLBVal =
2401 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2402 Value *CastedUBVal =
2403 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2404 Value *CastedStepVal =
2405 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2407 Builder.SetInsertPoint(StaleCI);
2420 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2441 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2443 AllocaInst *ArgStructAlloca =
2445 assert(ArgStructAlloca &&
2446 "Unable to find the alloca instruction corresponding to arguments "
2447 "for extracted function");
2448 std::optional<TypeSize> ArgAllocSize =
2451 "Unable to determine size of arguments for extracted function");
2452 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2457 CallInst *TaskData =
Builder.CreateCall(
2458 TaskAllocFn, {Ident, ThreadID,
Flags,
2459 TaskSize, SharedsSize,
2464 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2465 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2470 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2473 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2476 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2482 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2488 Value *GrainSizeVal =
2489 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2491 Value *TaskDup = TaskDupFn;
2493 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2494 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2499 Builder.CreateCall(TaskloopFn, Args);
2506 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2511 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2513 LoadInst *SharedsOutlined =
2514 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2515 OutlinedFn.getArg(1)->replaceUsesWithIf(
2517 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2520 Type *IVTy =
IV->getType();
2526 Value *TaskLB =
nullptr;
2527 Value *TaskUB =
nullptr;
2528 Value *TaskStep =
nullptr;
2529 Value *LoadTaskLB =
nullptr;
2530 Value *LoadTaskUB =
nullptr;
2531 Value *LoadTaskStep =
nullptr;
2532 for (Instruction &
I : *TaskloopAllocaBB) {
2533 if (
I.getOpcode() == Instruction::GetElementPtr) {
2536 switch (CI->getZExtValue()) {
2548 }
else if (
I.getOpcode() == Instruction::Load) {
2550 if (
Load.getPointerOperand() == TaskLB) {
2551 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2553 }
else if (
Load.getPointerOperand() == TaskUB) {
2554 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2556 }
else if (
Load.getPointerOperand() == TaskStep) {
2557 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2563 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2565 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2566 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2567 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2569 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2570 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2571 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2572 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2574 CLI->setTripCount(CastedTripCount);
2576 Builder.SetInsertPoint(CLI->getBody(),
2577 CLI->getBody()->getFirstInsertionPt());
2579 if (NumOfCollapseLoops > 1) {
2585 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2588 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2589 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2590 User *IVUser = IVUse->getUser();
2592 if (
Op->getOpcode() == Instruction::URem ||
2593 Op->getOpcode() == Instruction::UDiv) {
2598 for (User *User : UsersToReplace) {
2599 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2616 assert(CLI->getIndVar()->getNumUses() == 3 &&
2617 "Canonical loop should have exactly three uses of the ind var");
2618 for (User *IVUser : CLI->getIndVar()->users()) {
2620 if (
Mul->getOpcode() == Instruction::Mul) {
2621 for (User *MulUser :
Mul->users()) {
2623 if (
Add->getOpcode() == Instruction::Add) {
2624 Add->setOperand(1, CastedTaskLB);
2633 FakeLB->replaceAllUsesWith(CastedLBVal);
2634 FakeUB->replaceAllUsesWith(CastedUBVal);
2635 FakeStep->replaceAllUsesWith(CastedStepVal);
2637 I->eraseFromParent();
2642 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2648 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2658 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2690 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskExitBB))
2693 auto OI = std::make_unique<OutlineInfo>();
2694 OI->EntryBB = TaskAllocaBB;
2695 OI->OuterAllocBB = AllocaIP.
getBlock();
2696 OI->ExitBB = TaskExitBB;
2697 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2698 copy(DeallocBlocks, OI->OuterDeallocBBs.
end());
2703 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2705 OI->PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2706 Affinities, Mergeable, Priority, EventHandle,
2708 ToBeDeleted](
Function &OutlinedFn)
mutable {
2710 assert(OutlinedFn.hasOneUse() &&
2711 "there must be a single user for the outlined function");
2716 bool HasShareds = StaleCI->
arg_size() > 1;
2717 Builder.SetInsertPoint(StaleCI);
2742 Flags =
Builder.CreateOr(FinalFlag, Flags);
2755 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2764 assert(ArgStructAlloca &&
2765 "Unable to find the alloca instruction corresponding to arguments "
2766 "for extracted function");
2767 std::optional<TypeSize> ArgAllocSize =
2770 "Unable to determine size of arguments for extracted function");
2771 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2777 TaskAllocFn, {Ident, ThreadID, Flags,
2778 TaskSize, SharedsSize,
2781 if (Affinities.
Count && Affinities.
Info) {
2783 OMPRTL___kmpc_omp_reg_task_with_affinity);
2794 OMPRTL___kmpc_task_allow_completion_event);
2798 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2800 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2801 Builder.CreateStore(EventVal, EventHandleAddr);
2807 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2808 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2826 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2829 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2831 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2834 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2835 PriorityData, {Zero, Zero});
2836 Builder.CreateStore(Priority, CmplrData);
2839 Value *DepArray =
nullptr;
2840 Value *NumDeps =
nullptr;
2843 NumDeps = Dependencies.
NumDeps;
2844 }
else if (!Dependencies.
Deps.empty()) {
2846 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
2871 Builder.GetInsertPoint()->getParent()->getTerminator();
2872 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2873 Builder.SetInsertPoint(IfTerminator);
2876 Builder.SetInsertPoint(ElseTI);
2883 {Ident, ThreadID, NumDeps, DepArray,
2884 ConstantInt::get(
Builder.getInt32Ty(), 0),
2899 Builder.SetInsertPoint(ThenTI);
2907 {Ident, ThreadID, TaskData, NumDeps, DepArray,
2908 ConstantInt::get(
Builder.getInt32Ty(), 0),
2919 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2921 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2922 OutlinedFn.getArg(1)->replaceUsesWithIf(
2923 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2927 I->eraseFromParent();
2931 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2953 if (
Error Err = BodyGenCB(AllocaIP,
Builder.saveIP(), DeallocBlocks))
2956 Builder.SetInsertPoint(TaskgroupExitBB);
2999 unsigned CaseNumber = 0;
3000 for (
auto SectionCB : SectionCBs) {
3002 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
3004 Builder.SetInsertPoint(CaseBB);
3008 {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {}))
3019 Value *LB = ConstantInt::get(I32Ty, 0);
3020 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
3021 Value *ST = ConstantInt::get(I32Ty, 1);
3023 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
3028 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
3029 WorksharingLoopType::ForStaticLoop, !IsNowait);
3035 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
3039 assert(FiniInfo.DK == OMPD_sections &&
3040 "Unexpected finalization stack state!");
3041 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
3055 if (IP.getBlock()->end() != IP.getPoint())
3066 auto *CaseBB =
Loc.IP.getBlock();
3067 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
3068 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
3074 Directive OMPD = Directive::OMPD_sections;
3077 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
3088Value *OpenMPIRBuilder::getGPUThreadID() {
3091 OMPRTL___kmpc_get_hardware_thread_id_in_block),
3095Value *OpenMPIRBuilder::getGPUWarpSize() {
3100Value *OpenMPIRBuilder::getNVPTXWarpID() {
3101 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3102 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
3105Value *OpenMPIRBuilder::getNVPTXLaneID() {
3106 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3107 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
3108 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
3109 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
3116 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
3117 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
3118 assert(FromSize > 0 &&
"From size must be greater than zero");
3119 assert(ToSize > 0 &&
"To size must be greater than zero");
3120 if (FromType == ToType)
3122 if (FromSize == ToSize)
3123 return Builder.CreateBitCast(From, ToType);
3125 return Builder.CreateIntCast(From, ToType,
true);
3131 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3132 CastItem,
Builder.getPtrTy(0));
3133 Builder.CreateStore(From, ValCastItem);
3134 return Builder.CreateLoad(ToType, CastItem);
3141 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
3142 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
3146 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
3148 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
3150 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
3151 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
3152 Value *WarpSizeCast =
3154 Value *ShuffleCall =
3156 return castValueToType(AllocaIP, ShuffleCall, CastTy);
3163 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
3175 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3176 Value *ElemPtr = DstAddr;
3177 Value *Ptr = SrcAddr;
3178 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
3182 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3185 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
3186 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3190 if ((
Size / IntSize) > 1) {
3191 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3192 SrcAddrGEP,
Builder.getPtrTy());
3209 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
3211 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
3214 Value *Res = createRuntimeShuffleFunction(
3217 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3219 Builder.CreateAlignedStore(Res, ElemPtr,
3220 M.getDataLayout().getPrefTypeAlign(ElemType));
3222 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3223 Value *LocalElemPtr =
3224 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3230 Value *Res = createRuntimeShuffleFunction(
3231 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3234 Res =
Builder.CreateTrunc(Res, ElemType);
3235 Builder.CreateStore(Res, ElemPtr);
3236 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3238 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3244Error OpenMPIRBuilder::emitReductionListCopy(
3249 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3250 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3254 for (
auto En :
enumerate(ReductionInfos)) {
3256 Value *SrcElementAddr =
nullptr;
3257 AllocaInst *DestAlloca =
nullptr;
3258 Value *DestElementAddr =
nullptr;
3259 Value *DestElementPtrAddr =
nullptr;
3261 bool ShuffleInElement =
false;
3264 bool UpdateDestListPtr =
false;
3268 ReductionArrayTy, SrcBase,
3269 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3270 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3274 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3275 ReductionArrayTy, DestBase,
3276 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3277 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3283 Type *DestAllocaType =
3284 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3285 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3286 ".omp.reduction.element");
3288 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3289 DestElementAddr = DestAlloca;
3292 DestElementAddr->
getName() +
".ascast");
3294 ShuffleInElement =
true;
3295 UpdateDestListPtr =
true;
3307 if (ShuffleInElement) {
3308 Type *ShuffleType = RI.ElementType;
3309 Value *ShuffleSrcAddr = SrcElementAddr;
3310 Value *ShuffleDestAddr = DestElementAddr;
3311 AllocaInst *LocalStorage =
nullptr;
3314 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3315 assert(RI.ByRefAllocatedType &&
3316 "Expected by-ref allocated type to be set");
3321 ShuffleType = RI.ByRefElementType;
3323 if (RI.DataPtrPtrGen) {
3326 Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3329 return GenResult.takeError();
3338 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3340 ShuffleDestAddr = LocalStorage;
3345 ShuffleDestAddr = DestElementAddr;
3349 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3350 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3352 if (IsByRefElem && RI.DataPtrPtrGen) {
3354 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3355 DestAlloca,
Builder.getPtrTy(),
".ascast");
3358 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3359 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3362 return GenResult.takeError();
3365 switch (RI.EvaluationKind) {
3367 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3369 Builder.CreateStore(Elem, DestElementAddr);
3373 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3374 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3376 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3378 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3380 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3382 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3383 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3384 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3385 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3386 Builder.CreateStore(SrcReal, DestRealPtr);
3387 Builder.CreateStore(SrcImg, DestImgPtr);
3392 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3394 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3395 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3407 if (UpdateDestListPtr) {
3408 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3409 DestElementAddr,
Builder.getPtrTy(),
3410 DestElementAddr->
getName() +
".ascast");
3411 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3418Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3422 LLVMContext &Ctx =
M.getContext();
3424 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3428 "_omp_reduction_inter_warp_copy_func", &
M);
3433 Builder.SetInsertPoint(EntryBB);
3450 StringRef TransferMediumName =
3451 "__openmp_nvptx_data_transfer_temporary_storage";
3452 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3453 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3455 if (!TransferMedium) {
3456 TransferMedium =
new GlobalVariable(
3464 Value *GPUThreadID = getGPUThreadID();
3466 Value *LaneID = getNVPTXLaneID();
3468 Value *WarpID = getNVPTXWarpID();
3472 Builder.GetInsertBlock()->getFirstInsertionPt());
3476 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3477 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3478 AllocaInst *NumWarpsAlloca =
3479 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3480 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3481 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3482 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3483 NumWarpsAlloca,
Builder.getPtrTy(0),
3484 NumWarpsAlloca->
getName() +
".ascast");
3485 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3486 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3495 for (
auto En :
enumerate(ReductionInfos)) {
3501 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3502 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3503 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3504 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3507 unsigned NumIters = RealTySize / TySize;
3510 Value *Cnt =
nullptr;
3511 Value *CntAddr =
nullptr;
3518 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3520 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3521 CntAddr->
getName() +
".ascast");
3533 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3534 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3541 omp::Directive::OMPD_unknown,
3545 return BarrierIP1.takeError();
3551 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3552 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3556 auto *RedListArrayTy =
3559 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3561 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3562 {ConstantInt::get(IndexTy, 0),
3563 ConstantInt::get(IndexTy, En.index())});
3567 if (IsByRefElem && RI.DataPtrPtrGen) {
3569 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3572 return GenRes.takeError();
3583 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3588 Builder.CreateStore(Elem, MediumPtr,
3600 omp::Directive::OMPD_unknown,
3604 return BarrierIP2.takeError();
3611 Value *NumWarpsVal =
3614 Value *IsActiveThread =
3615 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3616 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3623 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3625 Value *TargetElemPtrPtr =
3626 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3627 {ConstantInt::get(IndexTy, 0),
3628 ConstantInt::get(IndexTy, En.index())});
3629 Value *TargetElemPtrVal =
3631 Value *TargetElemPtr = TargetElemPtrVal;
3633 if (IsByRefElem && RI.DataPtrPtrGen) {
3635 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3638 return GenRes.takeError();
3640 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3648 Value *SrcMediumValue =
3649 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3650 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3660 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3661 Builder.CreateStore(Cnt, CntAddr,
false);
3663 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3667 RealTySize %= TySize;
3677Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3680 LLVMContext &Ctx =
M.getContext();
3681 FunctionType *FuncTy =
3683 {Builder.getPtrTy(), Builder.getInt16Ty(),
3684 Builder.getInt16Ty(), Builder.getInt16Ty()},
3688 "_omp_reduction_shuffle_and_reduce_func", &
M);
3698 Builder.SetInsertPoint(EntryBB);
3709 Type *ReduceListArgType = ReduceListArg->
getType();
3713 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3714 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3715 LaneIDArg->
getName() +
".addr");
3717 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3718 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3719 AlgoVerArg->
getName() +
".addr");
3726 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3728 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3729 ReduceListAlloca, ReduceListArgType,
3730 ReduceListAlloca->
getName() +
".ascast");
3731 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3732 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3733 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3734 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3735 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3736 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3737 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3738 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3739 RemoteReductionListAlloca,
Builder.getPtrTy(),
3740 RemoteReductionListAlloca->
getName() +
".ascast");
3742 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3743 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3744 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3745 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3747 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3748 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3749 Value *RemoteLaneOffset =
3750 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3751 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3758 Error EmitRedLsCpRes = emitReductionListCopy(
3760 ReduceList, RemoteListAddrCast, IsByRef,
3761 {RemoteLaneOffset,
nullptr,
nullptr});
3764 return EmitRedLsCpRes;
3789 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3794 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3795 Value *RemoteOffsetComp =
3797 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3798 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3799 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3805 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3807 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3808 ReduceList,
Builder.getPtrTy());
3809 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3810 RemoteListAddrCast,
Builder.getPtrTy());
3812 ->addFnAttr(Attribute::NoUnwind);
3823 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3824 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3829 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3833 EmitRedLsCpRes = emitReductionListCopy(
3835 RemoteListAddrCast, ReduceList, IsByRef);
3838 return EmitRedLsCpRes;
3853OpenMPIRBuilder::generateReductionDescriptor(
3855 Type *DescriptorType,
3861 Value *DescriptorSize =
3862 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3864 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3865 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3869 Value *DataPtrField;
3871 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3874 return GenResult.takeError();
3877 DataPtr,
Builder.getPtrTy(),
".ascast"),
3883Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3887 LLVMContext &Ctx =
M.getContext();
3890 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3894 "_omp_reduction_list_to_global_copy_func", &
M);
3901 Builder.SetInsertPoint(EntryBlock);
3911 BufferArg->
getName() +
".addr");
3915 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3916 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3917 BufferArgAlloca,
Builder.getPtrTy(),
3918 BufferArgAlloca->
getName() +
".ascast");
3919 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3920 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3921 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3922 ReduceListArgAlloca,
Builder.getPtrTy(),
3923 ReduceListArgAlloca->
getName() +
".ascast");
3925 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3926 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3927 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3929 Value *LocalReduceList =
3931 Value *BufferArgVal =
3935 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3936 for (
auto En :
enumerate(ReductionInfos)) {
3938 auto *RedListArrayTy =
3942 RedListArrayTy, LocalReduceList,
3943 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3949 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3951 ReductionsBufferTy, BufferVD, 0, En.index());
3953 switch (RI.EvaluationKind) {
3955 Value *TargetElement;
3957 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3958 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3960 if (RI.DataPtrPtrGen) {
3962 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3965 return GenResult.takeError();
3969 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3972 Builder.CreateStore(TargetElement, GlobVal);
3976 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3977 RI.ElementType, ElemPtr, 0, 0,
".realp");
3979 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3981 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3983 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3985 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3986 RI.ElementType, GlobVal, 0, 0,
".realp");
3987 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3988 RI.ElementType, GlobVal, 0, 1,
".imagp");
3989 Builder.CreateStore(SrcReal, DestRealPtr);
3990 Builder.CreateStore(SrcImg, DestImgPtr);
3995 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3997 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3998 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
4009Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
4013 LLVMContext &Ctx =
M.getContext();
4016 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4020 "_omp_reduction_list_to_global_reduce_func", &
M);
4027 Builder.SetInsertPoint(EntryBlock);
4037 BufferArg->
getName() +
".addr");
4041 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4042 auto *RedListArrayTy =
4047 Value *LocalReduceList =
4048 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4052 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4053 BufferArgAlloca,
Builder.getPtrTy(),
4054 BufferArgAlloca->
getName() +
".ascast");
4055 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4056 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4057 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4058 ReduceListArgAlloca,
Builder.getPtrTy(),
4059 ReduceListArgAlloca->
getName() +
".ascast");
4060 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4061 LocalReduceList,
Builder.getPtrTy(),
4062 LocalReduceList->
getName() +
".ascast");
4064 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4065 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4066 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4071 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4072 for (
auto En :
enumerate(ReductionInfos)) {
4075 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4076 RedListArrayTy, LocalReduceListAddrCast,
4077 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4079 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4081 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4082 ReductionsBufferTy, BufferVD, 0, En.index());
4084 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4088 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4089 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4090 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4097 Value *SrcElementPtrPtr =
4098 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
4099 {ConstantInt::get(IndexTy, 0),
4100 ConstantInt::get(IndexTy, En.index())});
4101 Value *SrcDescriptorAddr =
4106 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4107 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4110 return GenResult.takeError();
4112 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4114 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4122 ->addFnAttr(Attribute::NoUnwind);
4128Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
4132 LLVMContext &Ctx =
M.getContext();
4135 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4139 "_omp_reduction_global_to_list_copy_func", &
M);
4146 Builder.SetInsertPoint(EntryBlock);
4156 BufferArg->
getName() +
".addr");
4160 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4161 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4162 BufferArgAlloca,
Builder.getPtrTy(),
4163 BufferArgAlloca->
getName() +
".ascast");
4164 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4165 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4166 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4167 ReduceListArgAlloca,
Builder.getPtrTy(),
4168 ReduceListArgAlloca->
getName() +
".ascast");
4169 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4170 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4171 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4173 Value *LocalReduceList =
4178 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4179 for (
auto En :
enumerate(ReductionInfos)) {
4180 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4181 auto *RedListArrayTy =
4185 RedListArrayTy, LocalReduceList,
4186 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4191 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4192 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4193 ReductionsBufferTy, BufferVD, 0, En.index());
4199 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4206 return GenResult.takeError();
4212 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
4213 Builder.CreateStore(TargetElement, ElemPtr);
4217 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4226 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4228 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4230 Builder.CreateStore(SrcReal, DestRealPtr);
4231 Builder.CreateStore(SrcImg, DestImgPtr);
4238 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4239 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4251Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4255 LLVMContext &Ctx =
M.getContext();
4258 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4262 "_omp_reduction_global_to_list_reduce_func", &
M);
4269 Builder.SetInsertPoint(EntryBlock);
4279 BufferArg->
getName() +
".addr");
4283 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4289 Value *LocalReduceList =
4290 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4294 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4295 BufferArgAlloca,
Builder.getPtrTy(),
4296 BufferArgAlloca->
getName() +
".ascast");
4297 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4298 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4299 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4300 ReduceListArgAlloca,
Builder.getPtrTy(),
4301 ReduceListArgAlloca->
getName() +
".ascast");
4302 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4303 LocalReduceList,
Builder.getPtrTy(),
4304 LocalReduceList->
getName() +
".ascast");
4306 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4307 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4308 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4313 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4314 for (
auto En :
enumerate(ReductionInfos)) {
4317 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4318 RedListArrayTy, ReductionList,
4319 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4322 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4323 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4324 ReductionsBufferTy, BufferVD, 0, En.index());
4326 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4330 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4331 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4332 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4337 Value *ReduceListVal =
4339 Value *SrcElementPtrPtr =
4340 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4341 {ConstantInt::get(IndexTy, 0),
4342 ConstantInt::get(IndexTy, En.index())});
4343 Value *SrcDescriptorAddr =
4348 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4349 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4351 return GenResult.takeError();
4353 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4355 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4363 ->addFnAttr(Attribute::NoUnwind);
4369std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4370 std::string Suffix =
4372 return (Name + Suffix).str();
4375Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4378 AttributeList FuncAttrs) {
4380 {Builder.getPtrTy(), Builder.getPtrTy()},
4382 std::string
Name = getReductionFuncName(ReducerName);
4390 Builder.SetInsertPoint(EntryBB);
4394 Value *LHSArrayPtr =
nullptr;
4395 Value *RHSArrayPtr =
nullptr;
4402 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4404 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4405 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4406 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4407 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4408 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4409 Builder.CreateStore(Arg0, LHSAddrCast);
4410 Builder.CreateStore(Arg1, RHSAddrCast);
4411 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4412 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4416 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4418 for (
auto En :
enumerate(ReductionInfos)) {
4421 RedArrayTy, RHSArrayPtr,
4422 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4424 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4425 RHSI8Ptr, RI.PrivateVariable->getType(),
4426 RHSI8Ptr->
getName() +
".ascast");
4429 RedArrayTy, LHSArrayPtr,
4430 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4432 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4433 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4442 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4443 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4444 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4451 return AfterIP.takeError();
4452 if (!
Builder.GetInsertBlock())
4453 return ReductionFunc;
4457 if (!IsByRef.
empty() && !IsByRef[En.index()])
4458 Builder.CreateStore(Reduced, LHSPtr);
4463 for (
auto En :
enumerate(ReductionInfos)) {
4464 unsigned Index = En.index();
4466 Value *LHSFixupPtr, *RHSFixupPtr;
4467 Builder.restoreIP(RI.ReductionGenClang(
4468 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4473 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4478 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4492 return ReductionFunc;
4500 assert(RI.Variable &&
"expected non-null variable");
4501 assert(RI.PrivateVariable &&
"expected non-null private variable");
4502 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4503 "expected non-null reduction generator callback");
4506 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4507 "expected variables and their private equivalents to have the same "
4510 assert(RI.Variable->getType()->isPointerTy() &&
4511 "expected variables to be pointers");
4520 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4534 if (ReductionInfos.
size() == 0)
4544 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4548 AttributeList FuncAttrs;
4549 AttrBuilder AttrBldr(Ctx);
4551 AttrBldr.addAttribute(Attr);
4552 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4553 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4557 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4559 if (!ReductionResult)
4561 Function *ReductionFunc = *ReductionResult;
4565 if (GridValue.has_value())
4566 Config.setGridValue(GridValue.value());
4581 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4585 Value *ReductionListAlloca =
4586 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4587 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4588 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4591 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4592 for (
auto En :
enumerate(ReductionInfos)) {
4595 RedArrayTy, ReductionList,
4596 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4599 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4604 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4605 Builder.CreateStore(CastElem, ElemPtr);
4609 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4615 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4621 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4633 unsigned MaxDataSize = 0;
4635 for (
auto En :
enumerate(ReductionInfos)) {
4639 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4640 ? En.value().ByRefElementType
4641 : En.value().ElementType;
4642 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4643 if (
Size > MaxDataSize)
4647 Value *ReductionDataSize =
4648 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4649 if (!IsTeamsReduction) {
4650 Value *SarFuncCast =
4651 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4653 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4654 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4657 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4662 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4664 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4667 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4672 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4677 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4682 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4689 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4691 Value *Args3[] = {SrcLocInfo,
4692 KernelTeamsReductionPtr,
4693 Builder.getInt32(ReductionBufNum),
4704 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4721 for (
auto En :
enumerate(ReductionInfos)) {
4729 Value *LHSPtr, *RHSPtr;
4731 &LHSPtr, &RHSPtr, CurFunc));
4744 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4746 "red.value." +
Twine(En.index()));
4757 if (!IsByRef.
empty() && !IsByRef[En.index()])
4762 if (ContinuationBlock) {
4763 Builder.CreateBr(ContinuationBlock);
4764 Builder.SetInsertPoint(ContinuationBlock);
4766 Config.setEmitLLVMUsed();
4777 ".omp.reduction.func", &M);
4787 Builder.SetInsertPoint(ReductionFuncBlock);
4788 Value *LHSArrayPtr =
nullptr;
4789 Value *RHSArrayPtr =
nullptr;
4800 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4802 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4803 Value *LHSAddrCast =
4804 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4805 Value *RHSAddrCast =
4806 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4807 Builder.CreateStore(Arg0, LHSAddrCast);
4808 Builder.CreateStore(Arg1, RHSAddrCast);
4809 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4810 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4812 LHSArrayPtr = ReductionFunc->
getArg(0);
4813 RHSArrayPtr = ReductionFunc->
getArg(1);
4816 unsigned NumReductions = ReductionInfos.
size();
4819 for (
auto En :
enumerate(ReductionInfos)) {
4821 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4822 RedArrayTy, LHSArrayPtr, 0, En.index());
4823 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4824 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4827 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4828 RedArrayTy, RHSArrayPtr, 0, En.index());
4829 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4830 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4839 Builder.restoreIP(*AfterIP);
4841 if (!Builder.GetInsertBlock())
4845 if (!IsByRef[En.index()])
4846 Builder.CreateStore(Reduced, LHSPtr);
4848 Builder.CreateRetVoid();
4855 bool IsNoWait,
bool IsTeamsReduction) {
4859 IsByRef, IsNoWait, IsTeamsReduction);
4866 if (ReductionInfos.
size() == 0)
4876 unsigned NumReductions = ReductionInfos.
size();
4879 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4881 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4883 for (
auto En :
enumerate(ReductionInfos)) {
4884 unsigned Index = En.index();
4886 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4887 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4894 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4904 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4909 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4910 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4912 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4914 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4915 : RuntimeFunction::OMPRTL___kmpc_reduce);
4918 {Ident, ThreadId, NumVariables, RedArraySize,
4919 RedArray, ReductionFunc, Lock},
4930 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4931 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4932 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4937 Builder.SetInsertPoint(NonAtomicRedBlock);
4938 for (
auto En :
enumerate(ReductionInfos)) {
4944 if (!IsByRef[En.index()]) {
4946 "red.value." +
Twine(En.index()));
4948 Value *PrivateRedValue =
4950 "red.private.value." +
Twine(En.index()));
4958 if (!
Builder.GetInsertBlock())
4961 if (!IsByRef[En.index()])
4965 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4966 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4968 Builder.CreateBr(ContinuationBlock);
4973 Builder.SetInsertPoint(AtomicRedBlock);
4974 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4981 if (!
Builder.GetInsertBlock())
4984 Builder.CreateBr(ContinuationBlock);
4997 if (!
Builder.GetInsertBlock())
5000 Builder.SetInsertPoint(ContinuationBlock);
5011 Directive OMPD = Directive::OMPD_master;
5016 Value *Args[] = {Ident, ThreadId};
5024 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5035 Directive OMPD = Directive::OMPD_masked;
5041 Value *ArgsEnd[] = {Ident, ThreadId};
5049 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5059 Call->setDoesNotThrow();
5074 bool IsInclusive,
ScanInfo *ScanRedInfo) {
5076 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
5077 ScanVarsType, ScanRedInfo);
5088 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5091 Type *DestTy = ScanVarsType[i];
5092 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5095 Builder.CreateStore(Src, Val);
5100 Builder.GetInsertBlock()->getParent());
5103 IV = ScanRedInfo->
IV;
5106 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5109 Type *DestTy = ScanVarsType[i];
5111 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5113 Builder.CreateStore(Src, ScanVars[i]);
5127 Builder.GetInsertBlock()->getParent());
5132Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
5136 Builder.restoreIP(AllocaIP);
5138 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5140 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
5147 Builder.restoreIP(CodeGenIP);
5149 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
5150 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5154 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
5155 AllocSpan,
nullptr,
"arr");
5156 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
5174 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5183Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
5189 Value *PrivateVar = RedInfo.PrivateVariable;
5190 Value *OrigVar = RedInfo.Variable;
5194 Type *SrcTy = RedInfo.ElementType;
5199 Builder.CreateStore(Src, OrigVar);
5222 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5247 Builder.GetInsertBlock()->getModule(),
5254 Builder.GetInsertBlock()->getModule(),
5260 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5261 Builder.SetInsertPoint(InputBB);
5264 Builder.SetInsertPoint(LoopBB);
5280 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5282 Builder.SetInsertPoint(InnerLoopBB);
5286 Value *ReductionVal = RedInfo.PrivateVariable;
5289 Type *DestTy = RedInfo.ElementType;
5292 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5295 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5300 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5303 Builder.CreateStore(Result, LHSPtr);
5306 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5308 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5309 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5312 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5318 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5339 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5346Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5358 Error Err = InputLoopGen();
5369 Error Err = ScanLoopGen(Builder.saveIP());
5376void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5413 Builder.SetInsertPoint(Preheader);
5416 Builder.SetInsertPoint(Header);
5417 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5418 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5423 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5424 Builder.CreateCondBr(Cmp, Body, Exit);
5429 Builder.SetInsertPoint(Latch);
5431 "omp_" + Name +
".next",
true);
5442 CL->Header = Header;
5461 NextBB, NextBB, Name);
5493 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5502 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5503 ScanRedInfo->
Span = TripCount;
5509 ScanRedInfo->
IV =
IV;
5510 createScanBBs(ScanRedInfo);
5513 assert(Terminator->getNumSuccessors() == 1);
5514 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5517 Builder.GetInsertBlock()->getParent());
5520 Builder.GetInsertBlock()->getParent());
5521 Builder.CreateBr(ContinueBlock);
5527 const auto &&InputLoopGen = [&]() ->
Error {
5529 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5530 ComputeIP, Name,
true, ScanRedInfo);
5534 Builder.restoreIP((*LoopInfo)->getAfterIP());
5540 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5544 Builder.restoreIP((*LoopInfo)->getAfterIP());
5548 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5556 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5566 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5567 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5571 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5587 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5590 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5594 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5599 Value *CountIfLooping;
5600 if (InclusiveStop) {
5601 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5607 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5610 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5611 "omp_" + Name +
".tripcount");
5616 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5623 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5630 ScanRedInfo->
IV = IndVar;
5631 return BodyGenCB(
Builder.saveIP(), IndVar);
5637 Builder.getCurrentDebugLocation());
5648 unsigned Bitwidth = Ty->getIntegerBitWidth();
5651 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5654 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5664 unsigned Bitwidth = Ty->getIntegerBitWidth();
5667 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5670 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5678 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5680 "Require dedicated allocate IP");
5686 uint32_t SrcLocStrSize;
5690 case WorksharingLoopType::ForStaticLoop:
5691 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5693 case WorksharingLoopType::DistributeStaticLoop:
5694 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5696 case WorksharingLoopType::DistributeForStaticLoop:
5697 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
5704 Type *IVTy =
IV->getType();
5705 FunctionCallee StaticInit =
5706 LoopType == WorksharingLoopType::DistributeForStaticLoop
5709 FunctionCallee StaticFini =
5713 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5716 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5717 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5718 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5719 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5728 Constant *One = ConstantInt::get(IVTy, 1);
5729 Builder.CreateStore(Zero, PLowerBound);
5731 Builder.CreateStore(UpperBound, PUpperBound);
5732 Builder.CreateStore(One, PStride);
5738 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5739 ? OMPScheduleType::OrderedDistribute
5742 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5746 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5747 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5750 PLowerBound, PUpperBound});
5751 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5752 Value *PDistUpperBound =
5753 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5754 Args.push_back(PDistUpperBound);
5759 BuildInitCall(SchedulingType,
Builder);
5760 if (HasDistSchedule &&
5761 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5762 Constant *DistScheduleSchedType = ConstantInt::get(
5767 BuildInitCall(DistScheduleSchedType,
Builder);
5769 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5770 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5771 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5772 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5773 CLI->setTripCount(TripCount);
5779 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5783 return Builder.CreateAdd(OldIV, LowerBound);
5795 omp::Directive::OMPD_for,
false,
5798 return BarrierIP.takeError();
5825 Reachable.insert(
Block);
5835 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5839OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5843 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5844 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5849 Type *IVTy =
IV->getType();
5851 "Max supported tripcount bitwidth is 64 bits");
5853 :
Type::getInt64Ty(Ctx);
5856 Constant *One = ConstantInt::get(InternalIVTy, 1);
5862 for (BasicBlock &BB : *
F)
5863 if (!BB.hasTerminator())
5864 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5869 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5870 for (Instruction *
I : UIs)
5871 I->eraseFromParent();
5874 if (ChunkSize || DistScheduleChunkSize)
5879 FunctionCallee StaticInit =
5881 FunctionCallee StaticFini =
5887 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5888 Value *PLowerBound =
5889 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5890 Value *PUpperBound =
5891 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5892 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5901 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5902 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5903 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5904 "distschedulechunksize");
5905 Value *CastedTripCount =
5906 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5909 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5911 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5912 Builder.CreateStore(Zero, PLowerBound);
5913 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5914 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5916 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5917 Builder.CreateStore(UpperBound, PUpperBound);
5918 Builder.CreateStore(One, PStride);
5922 uint32_t SrcLocStrSize;
5925 if (DistScheduleSchedType != OMPScheduleType::None) {
5926 Flag |= OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5931 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5932 PUpperBound, PStride, One,
5933 this](
Value *SchedulingType,
Value *ChunkSize,
5936 StaticInit, {SrcLoc, ThreadNum,
5937 SchedulingType, PLastIter,
5938 PLowerBound, PUpperBound,
5942 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5943 if (DistScheduleSchedType != OMPScheduleType::None &&
5944 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5945 SchedType != OMPScheduleType::OrderedDistribute) {
5949 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5953 Value *FirstChunkStart =
5954 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5955 Value *FirstChunkStop =
5956 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5957 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5959 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5960 Value *NextChunkStride =
5961 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5965 Value *DispatchCounter;
5973 DispatchCounter = Counter;
5976 FirstChunkStart, CastedTripCount, NextChunkStride,
5999 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
6000 Value *IsLastChunk =
6001 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
6002 Value *CountUntilOrigTripCount =
6003 Builder.CreateSub(CastedTripCount, DispatchCounter);
6005 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
6006 Value *BackcastedChunkTC =
6007 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
6008 CLI->setTripCount(BackcastedChunkTC);
6013 Value *BackcastedDispatchCounter =
6014 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
6015 CLI->mapIndVar([&](Instruction *) ->
Value * {
6017 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
6030 return AfterIP.takeError();
6045static FunctionCallee
6048 unsigned Bitwidth = Ty->getIntegerBitWidth();
6051 case WorksharingLoopType::ForStaticLoop:
6054 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
6057 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
6059 case WorksharingLoopType::DistributeStaticLoop:
6062 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
6065 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
6067 case WorksharingLoopType::DistributeForStaticLoop:
6070 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
6073 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
6076 if (Bitwidth != 32 && Bitwidth != 64) {
6088 Function &LoopBodyFn,
bool NoLoop) {
6099 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
6100 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6101 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6102 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6107 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
6108 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6112 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
6113 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6114 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
6115 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6116 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
6118 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6142 Builder.restoreIP({Preheader, Preheader->
end()});
6145 Builder.CreateBr(CLI->
getExit());
6153 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
6161 "Expected unique undroppable user of outlined function");
6163 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
6165 "Expected outlined function call to be located in loop preheader");
6167 if (OutlinedFnCallInstruction->
arg_size() > 1)
6174 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
6176 for (
auto &ToBeDeletedItem : ToBeDeleted)
6177 ToBeDeletedItem->eraseFromParent();
6184 uint32_t SrcLocStrSize;
6188 case WorksharingLoopType::ForStaticLoop:
6189 Flag = OMP_IDENT_FLAG_WORK_LOOP;
6191 case WorksharingLoopType::DistributeStaticLoop:
6192 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
6194 case WorksharingLoopType::DistributeForStaticLoop:
6195 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
6200 auto OI = std::make_unique<OutlineInfo>();
6205 SmallVector<Instruction *, 4> ToBeDeleted;
6207 OI->OuterAllocBB = AllocaIP.getBlock();
6230 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
6232 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
6234 CodeExtractorAnalysisCache CEAC(*OuterFn);
6235 CodeExtractor Extractor(Blocks,
6249 SetVector<Value *> SinkingCands, HoistingCands;
6253 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6260 for (
auto Use :
Users) {
6262 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6263 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6269 OI->ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6276 OI->PostOutlineCB = [=, ToBeDeletedVec =
6277 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6287 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6288 bool HasSimdModifier,
bool HasMonotonicModifier,
6289 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6291 Value *DistScheduleChunkSize) {
6292 if (
Config.isTargetDevice())
6293 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6295 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6296 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6298 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6299 OMPScheduleType::ModifierOrdered;
6301 if (HasDistSchedule) {
6302 DistScheduleSchedType = DistScheduleChunkSize
6303 ? OMPScheduleType::OrderedDistributeChunked
6304 : OMPScheduleType::OrderedDistribute;
6306 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6307 case OMPScheduleType::BaseStatic:
6308 case OMPScheduleType::BaseDistribute:
6309 assert((!ChunkSize || !DistScheduleChunkSize) &&
6310 "No chunk size with static-chunked schedule");
6311 if (IsOrdered && !HasDistSchedule)
6312 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6313 NeedsBarrier, ChunkSize);
6315 if (DistScheduleChunkSize)
6316 return applyStaticChunkedWorkshareLoop(
6317 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6318 DistScheduleChunkSize, DistScheduleSchedType);
6319 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6322 case OMPScheduleType::BaseStaticChunked:
6323 case OMPScheduleType::BaseDistributeChunked:
6324 if (IsOrdered && !HasDistSchedule)
6325 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6326 NeedsBarrier, ChunkSize);
6328 return applyStaticChunkedWorkshareLoop(
6329 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6330 DistScheduleChunkSize, DistScheduleSchedType);
6332 case OMPScheduleType::BaseRuntime:
6333 case OMPScheduleType::BaseAuto:
6334 case OMPScheduleType::BaseGreedy:
6335 case OMPScheduleType::BaseBalanced:
6336 case OMPScheduleType::BaseSteal:
6337 case OMPScheduleType::BaseRuntimeSimd:
6339 "schedule type does not support user-defined chunk sizes");
6341 case OMPScheduleType::BaseGuidedSimd:
6342 case OMPScheduleType::BaseDynamicChunked:
6343 case OMPScheduleType::BaseGuidedChunked:
6344 case OMPScheduleType::BaseGuidedIterativeChunked:
6345 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6346 case OMPScheduleType::BaseStaticBalancedChunked:
6347 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6348 NeedsBarrier, ChunkSize);
6361 unsigned Bitwidth = Ty->getIntegerBitWidth();
6364 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6367 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6375static FunctionCallee
6377 unsigned Bitwidth = Ty->getIntegerBitWidth();
6380 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6383 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6390static FunctionCallee
6392 unsigned Bitwidth = Ty->getIntegerBitWidth();
6395 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6398 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6403OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6406 bool NeedsBarrier,
Value *Chunk) {
6407 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6409 "Require dedicated allocate IP");
6411 "Require valid schedule type");
6413 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6414 OMPScheduleType::ModifierOrdered;
6419 uint32_t SrcLocStrSize;
6426 Type *IVTy =
IV->getType();
6431 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6433 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6434 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6435 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6436 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6445 Constant *One = ConstantInt::get(IVTy, 1);
6446 Builder.CreateStore(One, PLowerBound);
6448 Builder.CreateStore(UpperBound, PUpperBound);
6449 Builder.CreateStore(One, PStride);
6467 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6479 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6482 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6483 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6486 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6487 Builder.CreateCondBr(MoreWork, Header, Exit);
6493 PI->setIncomingBlock(0, OuterCond);
6494 PI->setIncomingValue(0, LowerBound);
6499 Br->setSuccessor(OuterCond);
6505 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6508 CI->setOperand(1, UpperBound);
6512 assert(BI->getSuccessor(1) == Exit);
6513 BI->setSuccessor(1, OuterCond);
6527 omp::Directive::OMPD_for,
false,
6530 return BarrierIP.takeError();
6549 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6554 if (BBsToErase.
count(UseInst->getParent()))
6561 while (BBsToErase.
remove_if(HasRemainingUses)) {
6572 assert(
Loops.size() >= 1 &&
"At least one loop required");
6573 size_t NumLoops =
Loops.size();
6577 return Loops.front();
6589 Loop->collectControlBlocks(OldControlBBs);
6593 if (ComputeIP.
isSet())
6600 Value *CollapsedTripCount =
nullptr;
6603 "All loops to collapse must be valid canonical loops");
6604 Value *OrigTripCount = L->getTripCount();
6605 if (!CollapsedTripCount) {
6606 CollapsedTripCount = OrigTripCount;
6611 CollapsedTripCount =
6612 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6618 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6624 Builder.restoreIP(Result->getBodyIP());
6626 Value *Leftover = Result->getIndVar();
6628 NewIndVars.
resize(NumLoops);
6629 for (
int i = NumLoops - 1; i >= 1; --i) {
6630 Value *OrigTripCount =
Loops[i]->getTripCount();
6632 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6633 NewIndVars[i] = NewIndVar;
6635 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6638 NewIndVars[0] = Leftover;
6647 BasicBlock *ContinueBlock = Result->getBody();
6649 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6656 ContinueBlock =
nullptr;
6657 ContinuePred = NextSrc;
6664 for (
size_t i = 0; i < NumLoops - 1; ++i)
6665 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6671 for (
size_t i = NumLoops - 1; i > 0; --i)
6672 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6675 ContinueWith(Result->getLatch(),
nullptr);
6682 for (
size_t i = 0; i < NumLoops; ++i)
6683 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6697std::vector<CanonicalLoopInfo *>
6701 "Must pass as many tile sizes as there are loops");
6702 int NumLoops =
Loops.size();
6703 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6715 Loop->collectControlBlocks(OldControlBBs);
6723 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6724 OrigTripCounts.
push_back(L->getTripCount());
6735 for (
int i = 0; i < NumLoops - 1; ++i) {
6748 for (
int i = 0; i < NumLoops; ++i) {
6750 Value *OrigTripCount = OrigTripCounts[i];
6763 Value *FloorTripOverflow =
6764 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6766 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6767 Value *FloorTripCount =
6768 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6769 "omp_floor" +
Twine(i) +
".tripcount",
true);
6772 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6778 std::vector<CanonicalLoopInfo *> Result;
6779 Result.reserve(NumLoops * 2);
6792 auto EmbeddNewLoop =
6793 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6796 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6801 Enter = EmbeddedLoop->
getBody();
6803 OutroInsertBefore = EmbeddedLoop->
getLatch();
6804 return EmbeddedLoop;
6808 const Twine &NameBase) {
6811 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6812 Result.push_back(EmbeddedLoop);
6816 EmbeddNewLoops(FloorCount,
"floor");
6822 for (
int i = 0; i < NumLoops; ++i) {
6826 Value *FloorIsEpilogue =
6828 Value *TileTripCount =
6835 EmbeddNewLoops(TileCounts,
"tile");
6840 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6849 BodyEnter =
nullptr;
6850 BodyEntered = ExitBB;
6862 Builder.restoreIP(Result.back()->getBodyIP());
6863 for (
int i = 0; i < NumLoops; ++i) {
6866 Value *OrigIndVar = OrigIndVars[i];
6894 if (Properties.
empty())
6917 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6921 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6929 if (
I.mayReadOrWriteMemory()) {
6933 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6947 Loop->collectControlBlocks(oldControlBBs);
6952 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6953 origTripCounts.
push_back(L->getTripCount());
6962 Builder.SetInsertPoint(TCBlock);
6963 Value *fusedTripCount =
nullptr;
6965 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6966 Value *origTripCount = L->getTripCount();
6967 if (!fusedTripCount) {
6968 fusedTripCount = origTripCount;
6971 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6972 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
6986 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6987 Loops[i]->getPreheader()->moveBefore(TCBlock);
6988 Loops[i]->getAfter()->moveBefore(TCBlock);
6992 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7004 for (
size_t i = 0; i <
Loops.size(); ++i) {
7006 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
7007 Builder.SetInsertPoint(condBlock);
7015 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7016 Builder.SetInsertPoint(condBBs[i]);
7017 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
7033 "omp.fused.pre_latch");
7066 const Twine &NamePrefix) {
7095 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
7097 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
7100 Builder.SetInsertPoint(SplitBeforeIt);
7102 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
7105 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
7108 Builder.SetInsertPoint(ElseBlock);
7114 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
7116 ExistingBlocks.
append(L->block_begin(), L->block_end());
7122 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
7124 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
7131 if (
Block == ThenBlock)
7132 NewBB->
setName(NamePrefix +
".if.else");
7135 VMap[
Block] = NewBB;
7143 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
7144 NamePrefix +
".pre_latch");
7148 L->addBasicBlockToLoop(ThenBlock, LI);
7154 if (TargetTriple.
isX86()) {
7155 if (Features.
lookup(
"avx512f"))
7157 else if (Features.
lookup(
"avx"))
7161 if (TargetTriple.
isPPC())
7163 if (TargetTriple.
isWasm())
7170 Value *IfCond, OrderKind Order,
7180 if (!BB.hasTerminator())
7196 I->eraseFromParent();
7199 if (AlignedVars.
size()) {
7201 for (
auto &AlignedItem : AlignedVars) {
7202 Value *AlignedPtr = AlignedItem.first;
7203 Value *Alignment = AlignedItem.second;
7206 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
7214 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
7227 Reachable.insert(
Block);
7237 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
7253 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7255 if (Simdlen || Safelen) {
7259 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7285static std::unique_ptr<TargetMachine>
7289 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7290 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7301 std::nullopt, OptLevel));
7319 if (!BB.hasTerminator())
7332 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7333 FAM.registerPass([&]() {
return TIRA; });
7347 I->eraseFromParent();
7350 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7355 nullptr, ORE,
static_cast<int>(OptLevel),
7376 <<
" Threshold=" << UP.
Threshold <<
"\n"
7379 <<
" PartialOptSizeThreshold="
7399 Ptr = Load->getPointerOperand();
7401 Ptr = Store->getPointerOperand();
7408 if (Alloca->getParent() == &
F->getEntryBlock())
7428 int MaxTripCount = 0;
7429 bool MaxOrZero =
false;
7430 unsigned TripMultiple = 0;
7433 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7434 unsigned Factor = UP.
Count;
7435 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7446 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7462 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7475 *UnrolledCLI =
Loop;
7480 "unrolling only makes sense with a factor of 2 or larger");
7482 Type *IndVarTy =
Loop->getIndVarType();
7489 std::vector<CanonicalLoopInfo *>
LoopNest =
7504 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7507 (*UnrolledCLI)->assertOK();
7525 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7544 if (!CPVars.
empty()) {
7549 Directive OMPD = Directive::OMPD_single;
7554 Value *Args[] = {Ident, ThreadId};
7563 if (
Error Err = FiniCB(IP))
7584 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7591 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7594 ConstantInt::get(Int64, 0), CPVars[
I],
7597 }
else if (!IsNowait) {
7600 omp::Directive::OMPD_unknown,
false,
7618 Directive::OMPD_scope,
nullptr,
nullptr,
7619 BodyGenCB, FiniCB,
false,
true,
7627 omp::Directive::OMPD_unknown,
7643 Directive OMPD = Directive::OMPD_critical;
7648 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7649 Value *Args[] = {Ident, ThreadId, LockVar};
7666 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7674 const Twine &Name,
bool IsDependSource) {
7678 "OpenMP runtime requires depend vec with i64 type");
7691 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7705 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7723 Directive OMPD = Directive::OMPD_ordered;
7732 Value *Args[] = {Ident, ThreadId};
7742 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7749 bool HasFinalize,
bool IsCancellable) {
7756 BasicBlock *EntryBB = Builder.GetInsertBlock();
7765 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7777 "Unexpected control flow graph state!!");
7779 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7781 return AfterIP.takeError();
7786 "Unexpected Insertion point location!");
7789 auto InsertBB = merged ? ExitPredBB : ExitBB;
7792 Builder.SetInsertPoint(InsertBB);
7794 return Builder.saveIP();
7798 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7800 if (!Conditional || !EntryCall)
7806 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7816 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7820 UI->eraseFromParent();
7828 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7836 "Unexpected finalization stack state!");
7839 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7841 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7842 return std::move(Err);
7846 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7856 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7890 "copyin.not.master.end");
7897 Builder.SetInsertPoint(OMP_Entry);
7898 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7899 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7900 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7901 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7903 Builder.SetInsertPoint(CopyBegin);
7921 Value *Args[] = {ThreadId,
Size, Allocator};
7944 return Builder.CreateCall(Fn, Args, Name);
7958 Value *Args[] = {ThreadId, Addr, Allocator};
7965 const Twine &Name) {
7973 M.getContext(),
M.getDataLayout().getPrefTypeAlign(Int64)));
7979 const Twine &Name) {
7981 Loc,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)), Name);
7986 const Twine &Name) {
7992 return Builder.CreateCall(Fn, Args, Name);
7997 const Twine &Name) {
7999 Loc, Addr,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)),
8006 Value *DependenceAddress,
bool HaveNowaitClause) {
8014 if (Device ==
nullptr)
8016 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
8017 if (NumDependences ==
nullptr) {
8018 NumDependences = ConstantInt::get(Int32, 0);
8022 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8024 Ident, ThreadId, InteropVar, InteropTypeVal,
8025 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
8034 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
8042 if (Device ==
nullptr)
8044 if (NumDependences ==
nullptr) {
8045 NumDependences = ConstantInt::get(Int32, 0);
8049 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8051 Ident, ThreadId, InteropVar, Device,
8052 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8061 Value *NumDependences,
8062 Value *DependenceAddress,
8063 bool HaveNowaitClause) {
8070 if (Device ==
nullptr)
8072 if (NumDependences ==
nullptr) {
8073 NumDependences = ConstantInt::get(Int32, 0);
8077 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8079 Ident, ThreadId, InteropVar, Device,
8080 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8110 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
8111 "expected num_threads and num_teams to be specified");
8131 const std::string DebugPrefix =
"_debug__";
8132 if (KernelName.
ends_with(DebugPrefix)) {
8133 KernelName = KernelName.
drop_back(DebugPrefix.length());
8134 Kernel =
M.getFunction(KernelName);
8140 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
8145 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
8146 if (MaxThreadsVal < 0) {
8152 MaxThreadsVal = Attrs.MinThreads;
8156 if (MaxThreadsVal > 0)
8169 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
8172 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
8173 Constant *DynamicEnvironmentInitializer =
8177 DynamicEnvironmentInitializer, DynamicEnvironmentName,
8179 DL.getDefaultGlobalsAddressSpace());
8183 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
8184 ? DynamicEnvironmentGV
8186 DynamicEnvironmentPtr);
8189 ConfigurationEnvironment, {
8190 UseGenericStateMachineVal,
8191 MayUseNestedParallelismVal,
8198 ReductionBufferLength,
8201 KernelEnvironment, {
8202 ConfigurationEnvironmentInitializer,
8206 std::string KernelEnvironmentName =
8207 (KernelName +
"_kernel_environment").str();
8210 KernelEnvironmentInitializer, KernelEnvironmentName,
8212 DL.getDefaultGlobalsAddressSpace());
8216 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
8217 ? KernelEnvironmentGV
8219 KernelEnvironmentPtr);
8220 Value *KernelLaunchEnvironment =
8223 KernelLaunchEnvironment =
8224 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
8225 ? KernelLaunchEnvironment
8226 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
8227 KernelLaunchEnvParamTy);
8229 Fn, {KernelEnvironment, KernelLaunchEnvironment});
8241 auto *UI =
Builder.CreateUnreachable();
8247 Builder.SetInsertPoint(WorkerExitBB);
8251 Builder.SetInsertPoint(CheckBBTI);
8252 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
8254 CheckBBTI->eraseFromParent();
8255 UI->eraseFromParent();
8263 int32_t TeamsReductionDataSize,
8264 int32_t TeamsReductionBufferLength) {
8269 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
8273 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
8279 const std::string DebugPrefix =
"_debug__";
8281 KernelName = KernelName.
drop_back(DebugPrefix.length());
8282 auto *KernelEnvironmentGV =
8283 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
8284 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
8285 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
8287 KernelEnvironmentInitializer,
8288 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
8290 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
8292 KernelEnvironmentGV->setInitializer(NewInitializer);
8297 if (
Kernel.hasFnAttribute(Name)) {
8298 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
8304std::pair<int32_t, int32_t>
8306 int32_t ThreadLimit =
8307 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
8310 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
8311 if (!Attr.isValid() || !Attr.isStringAttribute())
8312 return {0, ThreadLimit};
8313 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
8316 return {0, ThreadLimit};
8317 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
8325 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
8327 return {0, ThreadLimit};
8333 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
8336 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
8344std::pair<int32_t, int32_t>
8347 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8351 int32_t LB, int32_t UB) {
8358 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8361void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8370 else if (
T.isNVPTX())
8372 else if (
T.isSPIRV())
8377Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8378 StringRef EntryFnIDName) {
8379 if (
Config.isTargetDevice()) {
8380 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8384 return new GlobalVariable(
8389Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8390 StringRef EntryFnName) {
8394 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8395 "Named kernel already exists?");
8396 return new GlobalVariable(
8409 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8413 OutlinedFn = *CBResult;
8415 OutlinedFn =
nullptr;
8421 if (!IsOffloadEntry)
8424 std::string EntryFnIDName =
8426 ? std::string(EntryFnName)
8430 EntryFnName, EntryFnIDName);
8438 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8439 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8440 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8442 EntryInfo, EntryAddr, OutlinedFnID,
8444 return OutlinedFnID;
8462 bool IsStandAlone = !BodyGenCB;
8469 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8471 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8472 true, DeviceAddrCB))
8479 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8489 SrcLocInfo, DeviceID,
8496 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8500 if (Info.HasNoWait) {
8510 if (Info.HasNoWait) {
8514 emitBlock(OffloadContBlock, CurFn,
true);
8520 bool RequiresOuterTargetTask = Info.HasNoWait;
8521 if (!RequiresOuterTargetTask)
8522 cantFail(TaskBodyCB(
nullptr,
nullptr,
8526 {}, RTArgs, Info.HasNoWait));
8529 omp::OMPRTL___tgt_target_data_begin_mapper);
8533 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8537 Builder.CreateStore(LI, DeviceMap.second.second);
8574 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8583 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8606 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8607 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8622 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8623 return EndThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8626 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8627 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8638 bool IsGPUDistribute) {
8639 assert((IVSize == 32 || IVSize == 64) &&
8640 "IV size is not compatible with the omp runtime");
8642 if (IsGPUDistribute)
8644 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8645 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8646 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8647 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8649 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8650 : omp::OMPRTL___kmpc_for_static_init_4u)
8651 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8652 : omp::OMPRTL___kmpc_for_static_init_8u);
8659 assert((IVSize == 32 || IVSize == 64) &&
8660 "IV size is not compatible with the omp runtime");
8662 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8663 : omp::OMPRTL___kmpc_dispatch_init_4u)
8664 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8665 : omp::OMPRTL___kmpc_dispatch_init_8u);
8672 assert((IVSize == 32 || IVSize == 64) &&
8673 "IV size is not compatible with the omp runtime");
8675 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8676 : omp::OMPRTL___kmpc_dispatch_next_4u)
8677 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8678 : omp::OMPRTL___kmpc_dispatch_next_8u);
8685 assert((IVSize == 32 || IVSize == 64) &&
8686 "IV size is not compatible with the omp runtime");
8688 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8689 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8690 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8691 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8702 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8710 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8714 if (NewVar && (arg == NewVar->
getArg()))
8724 auto UpdateDebugRecord = [&](
auto *DR) {
8727 for (
auto Loc : DR->location_ops()) {
8728 auto Iter = ValueReplacementMap.find(
Loc);
8729 if (Iter != ValueReplacementMap.end()) {
8730 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8731 ArgNo = std::get<1>(Iter->second) + 1;
8735 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8740 if (DVR->getNumVariableLocationOps() != 1u) {
8741 DVR->setKillLocation();
8744 Value *
Loc = DVR->getVariableLocationOp(0u);
8751 RequiredBB = &DVR->getFunction()->getEntryBlock();
8753 if (RequiredBB && RequiredBB != CurBB) {
8765 "Unexpected debug intrinsic");
8767 UpdateDebugRecord(&DVR);
8768 MoveDebugRecordToCorrectBlock(&DVR);
8771 for (
auto *DVR : DVRsToDelete)
8772 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8776 Module *M = Func->getParent();
8779 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8780 unsigned ArgNo = Func->arg_size();
8782 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8783 false, DINode::DIFlags::FlagArtificial);
8785 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8786 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8807 for (
auto &Arg : Inputs)
8808 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8812 for (
auto &Arg : Inputs)
8813 ParameterTypes.
push_back(Arg->getType());
8821 auto BB = Builder.GetInsertBlock();
8822 auto M = BB->getModule();
8833 if (TargetCpuAttr.isStringAttribute())
8834 Func->addFnAttr(TargetCpuAttr);
8836 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8837 if (TargetFeaturesAttr.isStringAttribute())
8838 Func->addFnAttr(TargetFeaturesAttr);
8843 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8854 Builder.SetInsertPoint(EntryBB);
8860 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8870 splitBB(Builder,
true,
"outlined.body");
8877 Builder.SetInsertPoint(ExitBB);
8884 Builder.CreateRetVoid();
8888 auto AllocaIP = Builder.saveIP();
8893 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8925 if (Instr->getFunction() == Func)
8926 Instr->replaceUsesOfWith(
Input, InputCopy);
8932 for (
auto InArg :
zip(Inputs, ArgRange)) {
8934 Argument &Arg = std::get<1>(InArg);
8935 Value *InputCopy =
nullptr;
8938 Arg,
Input, InputCopy, AllocaIP, Builder.saveIP(),
8942 Builder.restoreIP(*AfterIP);
8943 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8963 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8970 ReplaceValue(
Input, InputCopy, Func);
8974 for (
auto Deferred : DeferredReplacement)
8975 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8978 ValueReplacementMap);
8986 Value *TaskWithPrivates,
8987 Type *TaskWithPrivatesTy) {
8989 Type *TaskTy = OMPIRBuilder.Task;
8992 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8993 Value *Shareds = TaskT;
9003 if (TaskWithPrivatesTy != TaskTy)
9004 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
9021 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
9026 assert((!NumOffloadingArrays || PrivatesTy) &&
9027 "PrivatesTy cannot be nullptr when there are offloadingArrays"
9060 Type *TaskPtrTy = OMPBuilder.TaskPtr;
9061 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
9067 ".omp_target_task_proxy_func",
9068 Builder.GetInsertBlock()->getModule());
9069 Value *ThreadId = ProxyFn->getArg(0);
9070 Value *TaskWithPrivates = ProxyFn->getArg(1);
9071 ThreadId->
setName(
"thread.id");
9072 TaskWithPrivates->
setName(
"task");
9074 bool HasShareds = SharedArgsOperandNo > 0;
9075 bool HasOffloadingArrays = NumOffloadingArrays > 0;
9078 Builder.SetInsertPoint(EntryBB);
9084 if (HasOffloadingArrays) {
9085 assert(TaskTy != TaskWithPrivatesTy &&
9086 "If there are offloading arrays to pass to the target"
9087 "TaskTy cannot be the same as TaskWithPrivatesTy");
9090 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
9091 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
9093 Builder.CreateStructGEP(PrivatesTy, Privates, i));
9097 auto *ArgStructAlloca =
9099 assert(ArgStructAlloca &&
9100 "Unable to find the alloca instruction corresponding to arguments "
9101 "for extracted function");
9103 std::optional<TypeSize> ArgAllocSize =
9105 assert(ArgStructType && ArgAllocSize &&
9106 "Unable to determine size of arguments for extracted function");
9107 uint64_t StructSize = ArgAllocSize->getFixedValue();
9110 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
9112 Value *SharedsSize = Builder.getInt64(StructSize);
9115 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
9117 Builder.CreateMemCpy(
9118 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
9120 KernelLaunchArgs.
push_back(NewArgStructAlloca);
9123 Builder.CreateRetVoid();
9129 return GEP->getSourceElementType();
9131 return Alloca->getAllocatedType();
9154 if (OffloadingArraysToPrivatize.
empty())
9155 return OMPIRBuilder.Task;
9158 for (
Value *V : OffloadingArraysToPrivatize) {
9159 assert(V->getType()->isPointerTy() &&
9160 "Expected pointer to array to privatize. Got a non-pointer value "
9163 assert(ArrayTy &&
"ArrayType cannot be nullptr");
9169 "struct.task_with_privates");
9183 EntryFnName, Inputs, CBFunc,
9188 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
9325 TargetTaskAllocaBB->
begin());
9328 auto OI = std::make_unique<OutlineInfo>();
9329 OI->EntryBB = TargetTaskAllocaBB;
9330 OI->OuterAllocBB = AllocaIP.
getBlock();
9335 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
9338 Builder.restoreIP(TargetTaskBodyIP);
9339 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9357 bool NeedsTargetTask = HasNoWait && DeviceID;
9358 if (NeedsTargetTask) {
9364 OffloadingArraysToPrivatize.
push_back(V);
9365 OI->ExcludeArgsFromAggregate.push_back(V);
9369 OI->PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9370 DeviceID, OffloadingArraysToPrivatize](
9373 "there must be a single user for the outlined function");
9387 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9388 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9390 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9391 "Wrong number of arguments for StaleCI when shareds are present");
9392 int SharedArgOperandNo =
9393 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9399 if (!OffloadingArraysToPrivatize.
empty())
9404 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9405 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9407 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9410 Builder.SetInsertPoint(StaleCI);
9427 OMPRTL___kmpc_omp_target_task_alloc);
9439 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9446 auto *ArgStructAlloca =
9448 assert(ArgStructAlloca &&
9449 "Unable to find the alloca instruction corresponding to arguments "
9450 "for extracted function");
9451 std::optional<TypeSize> ArgAllocSize =
9454 "Unable to determine size of arguments for extracted function");
9455 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9474 TaskSize, SharedsSize,
9477 if (NeedsTargetTask) {
9478 assert(DeviceID &&
"Expected non-empty device ID.");
9488 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9489 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9492 if (!OffloadingArraysToPrivatize.
empty()) {
9494 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9495 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9496 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9503 "ElementType should match ArrayType");
9506 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9508 Dst, Alignment, PtrToPrivatize, Alignment,
9509 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9513 Value *DepArray =
nullptr;
9514 Value *NumDeps =
nullptr;
9517 NumDeps = Dependencies.
NumDeps;
9518 }
else if (!Dependencies.
Deps.empty()) {
9520 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
9531 if (!NeedsTargetTask) {
9540 ConstantInt::get(
Builder.getInt32Ty(), 0),
9553 }
else if (DepArray) {
9561 {Ident, ThreadID, TaskData, NumDeps, DepArray,
9562 ConstantInt::get(
Builder.getInt32Ty(), 0),
9572 I->eraseFromParent();
9577 << *(
Builder.GetInsertBlock()) <<
"\n");
9579 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9591 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9614 Builder.restoreIP(IP);
9620 return Builder.saveIP();
9623 bool HasDependencies = !Dependencies.
empty();
9624 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9641 if (OutlinedFnID && DeviceID)
9643 EmitTargetCallFallbackCB, KArgs,
9644 DeviceID, RTLoc, TargetTaskAllocaIP);
9652 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9659 auto &&EmitTargetCallElse =
9666 if (RequiresOuterTargetTask) {
9673 Dependencies, EmptyRTArgs, HasNoWait);
9675 return EmitTargetCallFallbackCB(Builder.saveIP());
9678 Builder.restoreIP(AfterIP);
9682 auto &&EmitTargetCallThen =
9686 Info.HasNoWait = HasNoWait;
9691 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9697 for (
auto [DefaultVal, RuntimeVal] :
9699 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9700 : Builder.getInt32(DefaultVal));
9704 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9706 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9710 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9713 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9721 Value *MaxThreadsClause =
9723 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9726 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9728 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9729 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9731 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9732 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9734 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9737 unsigned NumTargetItems = Info.NumberOfPtrs;
9745 Builder.getInt64Ty(),
9747 : Builder.getInt64(0);
9751 DynCGroupMem = Builder.getInt32(0);
9754 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9755 HasNoWait, DynCGroupMemFallback);
9762 if (RequiresOuterTargetTask)
9764 RTLoc, AllocaIP, Dependencies,
9765 KArgs.
RTArgs, Info.HasNoWait);
9768 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9769 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9772 Builder.restoreIP(AfterIP);
9779 if (!OutlinedFnID) {
9780 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP(), DeallocBlocks));
9786 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP(), DeallocBlocks));
9791 EmitTargetCallElse, AllocaIP));
9804 bool HasNowait,
Value *DynCGroupMem,
9818 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9819 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9825 if (!
Config.isTargetDevice())
9827 RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs,
9828 GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait,
9829 DynCGroupMem, DynCGroupMemFallback);
9843 return OS.
str().str();
9848 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9854 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9856 assert(Elem.second->getValueType() == Ty &&
9857 "OMP internal variable has different type than requested");
9870 :
M.getTargetTriple().isAMDGPU()
9872 :
DL.getDefaultGlobalsAddressSpace();
9881 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9882 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9889Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9890 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9891 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9902 return SizePtrToInt;
9907 std::string VarName) {
9915 return MaptypesArrayGlobal;
9920 unsigned NumOperands,
9929 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9933 ArrI64Ty,
nullptr,
".offload_sizes");
9944 int64_t DeviceID,
unsigned NumOperands) {
9950 Value *ArgsBaseGEP =
9952 {Builder.getInt32(0), Builder.getInt32(0)});
9955 {Builder.getInt32(0), Builder.getInt32(0)});
9956 Value *ArgSizesGEP =
9958 {Builder.getInt32(0), Builder.getInt32(0)});
9962 Builder.getInt32(NumOperands),
9963 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9964 MaptypesArg, MapnamesArg, NullPtr});
9971 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9972 "expected region end call to runtime only when end call is separate");
9974 auto VoidPtrTy = UnqualPtrTy;
9975 auto VoidPtrPtrTy = UnqualPtrTy;
9977 auto Int64PtrTy = UnqualPtrTy;
9979 if (!Info.NumberOfPtrs) {
9991 Info.RTArgs.BasePointersArray,
9994 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9998 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10002 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
10003 : Info.RTArgs.MapTypesArray,
10009 if (!Info.EmitDebug)
10013 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
10018 if (!Info.HasMapper)
10022 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
10043 "struct.descriptor_dim");
10045 enum { OffsetFD = 0, CountFD, StrideFD };
10049 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
10052 if (NonContigInfo.
Dims[
I] == 1)
10057 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
10058 Builder.restoreIP(CodeGenIP);
10059 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
10060 unsigned RevIdx = EE -
II - 1;
10064 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
10066 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
10067 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
10069 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
10071 NonContigInfo.
Counts[L][RevIdx], CountLVal,
10072 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10074 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
10076 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
10077 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10080 Builder.restoreIP(CodeGenIP);
10081 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
10082 DimsAddr,
Builder.getPtrTy());
10085 Info.RTArgs.PointersArray, 0,
I);
10087 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
10092void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
10096 StringRef Prefix = IsInit ?
".init" :
".del";
10102 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
10103 Value *DeleteBit = Builder.CreateAnd(
10106 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10107 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
10112 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
10113 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
10114 DeleteCond = Builder.CreateIsNull(
10119 DeleteCond =
Builder.CreateIsNotNull(
10135 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10136 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10137 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10138 MapTypeArg =
Builder.CreateOr(
10141 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10142 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
10146 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
10147 ArraySize, MapTypeArg, MapName};
10173 MapperFn->
addFnAttr(Attribute::NoInline);
10174 MapperFn->
addFnAttr(Attribute::NoUnwind);
10184 auto SavedIP =
Builder.saveIP();
10185 Builder.SetInsertPoint(EntryBB);
10197 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
10199 Value *PtrBegin = BeginIn;
10205 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10206 MapType, MapName, ElementSize, HeadBB,
10217 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
10218 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10224 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
10225 PtrPHI->addIncoming(PtrBegin, HeadBB);
10230 return Info.takeError();
10234 Value *OffloadingArgs[] = {MapperHandle};
10238 Value *ShiftedPreviousSize =
10242 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
10243 Value *CurBaseArg = Info->BasePointers[
I];
10244 Value *CurBeginArg = Info->Pointers[
I];
10245 Value *CurSizeArg = Info->Sizes[
I];
10246 Value *CurNameArg = Info->Names.size()
10252 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10254 Value *MemberMapType =
10255 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10272 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10273 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10274 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10284 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10290 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10291 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10292 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10298 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10299 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10300 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10306 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10307 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10313 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10314 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10315 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10321 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10322 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10331 CurMapType->
addIncoming(MemberMapType, ToElseBB);
10333 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
10334 CurSizeArg, CurMapType, CurNameArg};
10336 auto ChildMapperFn = CustomMapperCB(
I);
10337 if (!ChildMapperFn)
10338 return ChildMapperFn.takeError();
10339 if (*ChildMapperFn) {
10354 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
10355 "omp.arraymap.next");
10356 PtrPHI->addIncoming(PtrNext, LastBB);
10357 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
10359 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10364 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10365 MapType, MapName, ElementSize, DoneBB,
10379 bool IsNonContiguous,
10383 Info.clearArrayInfo();
10386 if (Info.NumberOfPtrs == 0)
10395 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10396 PointerArrayType,
nullptr,
".offload_baseptrs");
10398 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10399 PointerArrayType,
nullptr,
".offload_ptrs");
10401 PointerArrayType,
nullptr,
".offload_mappers");
10402 Info.RTArgs.MappersArray = MappersArray;
10409 ConstantInt::get(Int64Ty, 0));
10411 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10412 bool IsNonContigEntry =
10414 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10416 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10419 if (IsNonContigEntry) {
10421 "Index must be in-bounds for NON_CONTIG Dims array");
10423 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10424 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10429 ConstSizes[
I] = CI;
10433 RuntimeSizes.
set(
I);
10436 if (RuntimeSizes.
all()) {
10438 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10439 SizeArrayType,
nullptr,
".offload_sizes");
10445 auto *SizesArrayGbl =
10450 if (!RuntimeSizes.
any()) {
10451 Info.RTArgs.SizesArray = SizesArrayGbl;
10453 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10454 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10457 SizeArrayType,
nullptr,
".offload_sizes");
10461 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10462 SizesArrayGbl, OffloadSizeAlign,
10467 Info.RTArgs.SizesArray = Buffer;
10475 for (
auto mapFlag : CombinedInfo.
Types)
10477 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10481 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10487 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10488 Info.EmitDebug =
true;
10490 Info.RTArgs.MapNamesArray =
10492 Info.EmitDebug =
false;
10497 if (Info.separateBeginEndCalls()) {
10498 bool EndMapTypesDiffer =
false;
10500 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10501 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10502 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10503 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10504 EndMapTypesDiffer =
true;
10507 if (EndMapTypesDiffer) {
10509 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10514 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10517 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10519 Builder.CreateAlignedStore(BPVal, BP,
10520 M.getDataLayout().getPrefTypeAlign(PtrTy));
10522 if (Info.requiresDevicePointerInfo()) {
10524 CodeGenIP =
Builder.saveIP();
10526 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10527 Builder.restoreIP(CodeGenIP);
10529 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10531 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10533 DeviceAddrCB(
I, BP);
10539 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10542 Builder.CreateAlignedStore(PVal,
P,
10543 M.getDataLayout().getPrefTypeAlign(PtrTy));
10545 if (RuntimeSizes.
test(
I)) {
10547 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10553 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10556 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10559 auto CustomMFunc = CustomMapperCB(
I);
10561 return CustomMFunc.takeError();
10563 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10566 PointerArrayType, MappersArray,
10569 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10573 Info.NumberOfPtrs == 0)
10590 Builder.ClearInsertionPoint();
10621 auto CondConstant = CI->getSExtValue();
10623 return ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10625 return ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10635 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10638 if (
Error Err = ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10644 if (
Error Err = ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10653bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10657 "Unexpected Atomic Ordering.");
10659 bool Flush =
false;
10721 assert(
X.Var->getType()->isPointerTy() &&
10722 "OMP Atomic expects a pointer to target memory");
10723 Type *XElemTy =
X.ElemTy;
10726 "OMP atomic read expected a scalar type");
10728 Value *XRead =
nullptr;
10732 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10741 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10744 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10746 XRead = AtomicLoadRes.first;
10753 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10756 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10758 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10761 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10762 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10773 assert(
X.Var->getType()->isPointerTy() &&
10774 "OMP Atomic expects a pointer to target memory");
10775 Type *XElemTy =
X.ElemTy;
10778 "OMP atomic write expected a scalar type");
10786 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10789 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10797 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10802 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10809 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10810 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10816 Type *XTy =
X.Var->getType();
10818 "OMP Atomic expects a pointer to target memory");
10819 Type *XElemTy =
X.ElemTy;
10822 "OMP atomic update expected a scalar or struct type");
10825 "OpenMP atomic does not support LT or GT operations");
10829 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10830 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10832 return AtomicResult.takeError();
10833 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10838Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10842 return Builder.CreateAdd(Src1, Src2);
10844 return Builder.CreateSub(Src1, Src2);
10846 return Builder.CreateAnd(Src1, Src2);
10848 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10850 return Builder.CreateOr(Src1, Src2);
10852 return Builder.CreateXor(Src1, Src2);
10876Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10879 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10880 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10882 bool emitRMWOp =
false;
10890 emitRMWOp = XElemTy;
10893 emitRMWOp = (IsXBinopExpr && XElemTy);
10900 std::pair<Value *, Value *> Res;
10902 AtomicRMWInst *RMWInst =
10903 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10904 if (
T.isAMDGPU()) {
10905 if (IsIgnoreDenormalMode)
10906 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10908 if (!IsFineGrainedMemory)
10909 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10911 if (!IsRemoteMemory)
10915 Res.first = RMWInst;
10920 Res.second = Res.first;
10922 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10925 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10930 OpenMPIRBuilder::AtomicInfo atomicInfo(
10932 OldVal->
getAlign(),
true , AllocaIP,
X);
10933 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10936 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10943 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10944 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10945 Builder.SetInsertPoint(ContBB);
10947 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10949 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10952 Value *Upd = *CBResult;
10953 Builder.CreateStore(Upd, NewAtomicAddr);
10956 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10957 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10958 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10959 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10962 Res.first = OldExprVal;
10965 if (UnreachableInst *ExitTI =
10968 Builder.SetInsertPoint(ExitBB);
10970 Builder.SetInsertPoint(ExitTI);
10973 IntegerType *IntCastTy =
10976 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10985 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10992 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10993 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10994 Builder.SetInsertPoint(ContBB);
10996 PHI->addIncoming(OldVal, CurBB);
11001 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
11002 X->getName() +
".atomic.fltCast");
11004 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
11005 X->getName() +
".atomic.ptrCast");
11009 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11012 Value *Upd = *CBResult;
11013 Builder.CreateStore(Upd, NewAtomicAddr);
11014 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
11018 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
11019 Result->setVolatile(VolatileX);
11020 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
11021 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11022 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
11023 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
11025 Res.first = OldExprVal;
11029 if (UnreachableInst *ExitTI =
11032 Builder.SetInsertPoint(ExitBB);
11034 Builder.SetInsertPoint(ExitTI);
11045 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
11046 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
11051 Type *XTy =
X.Var->getType();
11053 "OMP Atomic expects a pointer to target memory");
11054 Type *XElemTy =
X.ElemTy;
11057 "OMP atomic capture expected a scalar or struct type");
11059 "OpenMP atomic does not support LT or GT operations");
11066 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
11067 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
11070 Value *CapturedVal =
11071 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
11072 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
11074 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
11086 IsPostfixUpdate, IsFailOnly, Failure);
11098 assert(
X.Var->getType()->isPointerTy() &&
11099 "OMP atomic expects a pointer to target memory");
11102 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
11103 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
11106 bool IsInteger = E->getType()->isIntegerTy();
11108 if (
Op == OMPAtomicCompareOp::EQ) {
11123 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
11125 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
11127 "OldValue and V must be of same type");
11128 if (IsPostfixUpdate) {
11129 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11131 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
11144 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11146 CurBBTI,
X.Var->getName() +
".atomic.exit");
11152 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11154 Builder.SetInsertPoint(ContBB);
11155 Builder.CreateStore(OldValue, V.Var);
11161 Builder.SetInsertPoint(ExitBB);
11163 Builder.SetInsertPoint(ExitTI);
11166 Value *CapturedValue =
11167 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11168 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11174 assert(R.Var->getType()->isPointerTy() &&
11175 "r.var must be of pointer type");
11176 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11178 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11179 Value *ResultCast = R.IsSigned
11180 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
11181 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
11182 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11185 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
11186 "Op should be either max or min at this point");
11187 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
11198 if (IsXBinopExpr) {
11227 Value *CapturedValue =
nullptr;
11228 if (IsPostfixUpdate) {
11229 CapturedValue = OldValue;
11254 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
11255 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
11257 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11261 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
11281 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
11308 bool SubClausesPresent =
11309 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
11311 if (!
Config.isTargetDevice() && SubClausesPresent) {
11312 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
11313 "if lowerbound is non-null, then upperbound must also be non-null "
11314 "for bounds on num_teams");
11316 if (NumTeamsUpper ==
nullptr)
11317 NumTeamsUpper =
Builder.getInt32(0);
11319 if (NumTeamsLower ==
nullptr)
11320 NumTeamsLower = NumTeamsUpper;
11324 "argument to if clause must be an integer value");
11328 IfExpr =
Builder.CreateICmpNE(IfExpr,
11329 ConstantInt::get(IfExpr->
getType(), 0));
11330 NumTeamsUpper =
Builder.CreateSelect(
11331 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
11334 NumTeamsLower =
Builder.CreateSelect(
11335 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
11338 if (ThreadLimit ==
nullptr)
11339 ThreadLimit =
Builder.getInt32(0);
11343 Value *NumTeamsLowerInt32 =
11345 Value *NumTeamsUpperInt32 =
11347 Value *ThreadLimitInt32 =
11354 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
11355 ThreadLimitInt32});
11360 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11363 auto OI = std::make_unique<OutlineInfo>();
11364 OI->EntryBB = AllocaBB;
11365 OI->ExitBB = ExitBB;
11366 OI->OuterAllocBB = &OuterAllocaBB;
11372 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11374 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11376 auto HostPostOutlineCB = [
this, Ident,
11377 ToBeDeleted](
Function &OutlinedFn)
mutable {
11382 "there must be a single user for the outlined function");
11387 "Outlined function must have two or three arguments only");
11389 bool HasShared = OutlinedFn.
arg_size() == 3;
11397 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11398 "outlined function.");
11399 Builder.SetInsertPoint(StaleCI);
11406 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11410 I->eraseFromParent();
11413 if (!
Config.isTargetDevice())
11414 OI->PostOutlineCB = HostPostOutlineCB;
11418 Builder.SetInsertPoint(ExitBB);
11431 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11446 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11451 if (
Config.isTargetDevice()) {
11452 auto OI = std::make_unique<OutlineInfo>();
11453 OI->OuterAllocBB = OuterAllocIP.
getBlock();
11454 OI->EntryBB = AllocaBB;
11455 OI->ExitBB = ExitBB;
11456 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
11457 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
11461 Builder.SetInsertPoint(ExitBB);
11468 std::string VarName) {
11477 return MapNamesArrayGlobal;
11482void OpenMPIRBuilder::initializeTypes(
Module &M) {
11486 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11487#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11488#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11489 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11490 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11491#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11492 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11493 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11494#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11495 T = StructType::getTypeByName(Ctx, StructName); \
11497 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11499 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11500#include "llvm/Frontend/OpenMP/OMPKinds.def"
11511 while (!Worklist.
empty()) {
11515 if (
BlockSet.insert(SuccBB).second)
11520std::unique_ptr<CodeExtractor>
11522 bool ArgsInZeroAddressSpace,
11524 return std::make_unique<CodeExtractor>(
11534 Suffix.
str(), ArgsInZeroAddressSpace);
11537std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor(
11539 return std::make_unique<DeviceSharedMemCodeExtractor>(
11540 OMPBuilder, Blocks,
nullptr,
11548 OuterDeallocBBs.empty()
11551 Suffix.
str(), ArgsInZeroAddressSpace);
11561 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11573 Fn->
addFnAttr(
"uniform-work-group-size");
11574 Fn->
addFnAttr(Attribute::MustProgress);
11592 auto &&GetMDInt = [
this](
unsigned V) {
11599 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11600 auto &&TargetRegionMetadataEmitter =
11601 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11616 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11617 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11618 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11619 GetMDInt(E.getOrder())};
11622 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11631 auto &&DeviceGlobalVarMetadataEmitter =
11632 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11642 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11643 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11647 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11654 DeviceGlobalVarMetadataEmitter);
11656 for (
const auto &E : OrderedEntries) {
11657 assert(E.first &&
"All ordered entries must exist!");
11658 if (
const auto *CE =
11661 if (!CE->getID() || !CE->getAddress()) {
11665 if (!
M.getNamedValue(FnName))
11673 }
else if (
const auto *CE =
dyn_cast<
11682 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11684 if (!CE->getAddress()) {
11689 if (CE->getVarSize() == 0)
11693 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11694 (!
Config.isTargetDevice() && CE->getAddress())) &&
11695 "Declaret target link address is set.");
11696 if (
Config.isTargetDevice())
11698 if (!CE->getAddress()) {
11705 if (!CE->getAddress()) {
11718 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11722 OMPTargetGlobalVarEntryIndirectVTable))
11731 Flags, CE->getLinkage(), CE->getVarName());
11734 Flags, CE->getLinkage());
11745 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11751 Config.getRequiresFlags());
11761 OS <<
"_" <<
Count;
11766 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11769 EntryInfo.
Line, NewCount);
11777 auto FileIDInfo = CallBack();
11781 FileID =
Status->getUniqueID().getFile();
11785 FileID =
hash_value(std::get<0>(FileIDInfo));
11789 std::get<1>(FileIDInfo));
11795 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11797 !(Remain & 1); Remain = Remain >> 1)
11815 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11817 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11824 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11830 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11831 Flags |= MemberOfFlag;
11837 bool IsDeclaration,
bool IsExternallyVisible,
11839 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11840 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11841 std::function<
Constant *()> GlobalInitializer,
11852 Config.hasRequiresUnifiedSharedMemory())) {
11857 if (!IsExternallyVisible)
11859 OS <<
"_decl_tgt_ref_ptr";
11862 Value *Ptr =
M.getNamedValue(PtrName);
11871 if (!
Config.isTargetDevice()) {
11872 if (GlobalInitializer)
11873 GV->setInitializer(GlobalInitializer());
11879 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11880 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11881 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11893 bool IsDeclaration,
bool IsExternallyVisible,
11895 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11896 std::vector<Triple> TargetTriple,
11897 std::function<
Constant *()> GlobalInitializer,
11901 (TargetTriple.empty() && !
Config.isTargetDevice()))
11912 !
Config.hasRequiresUnifiedSharedMemory()) {
11914 VarName = MangledName;
11917 if (!IsDeclaration)
11919 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11922 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11926 if (
Config.isTargetDevice() &&
11935 if (!
M.getNamedValue(RefName)) {
11939 GvAddrRef->setConstant(
true);
11941 GvAddrRef->setInitializer(Addr);
11942 GeneratedRefs.push_back(GvAddrRef);
11951 if (
Config.isTargetDevice()) {
11952 VarName = (Addr) ? Addr->
getName() :
"";
11956 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11957 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11958 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11959 VarName = (Addr) ? Addr->
getName() :
"";
11961 VarSize =
M.getDataLayout().getPointerSize();
11980 auto &&GetMDInt = [MN](
unsigned Idx) {
11985 auto &&GetMDString = [MN](
unsigned Idx) {
11987 return V->getString();
11990 switch (GetMDInt(0)) {
11994 case OffloadEntriesInfoManager::OffloadEntryInfo::
11995 OffloadingEntryInfoTargetRegion: {
12005 case OffloadEntriesInfoManager::OffloadEntryInfo::
12006 OffloadingEntryInfoDeviceGlobalVar:
12019 if (HostFilePath.
empty())
12023 if (std::error_code Err = Buf.getError()) {
12025 "OpenMPIRBuilder: " +
12033 if (std::error_code Err =
M.getError()) {
12035 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
12049 "expected a valid insertion block for creating an iterator loop");
12059 Builder.getCurrentDebugLocation(),
"omp.it.cont");
12071 T->eraseFromParent();
12080 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
12082 "iterator bodygen must terminate the canonical body with an "
12083 "unconditional branch to the loop latch",
12107 for (
const auto &
ParamAttr : ParamAttrs) {
12150 return std::string(Out.
str());
12158 unsigned VecRegSize;
12160 ISADataTy ISAData[] = {
12179 for (
char Mask :
Masked) {
12180 for (
const ISADataTy &
Data : ISAData) {
12183 Out <<
"_ZGV" <<
Data.ISA << Mask;
12185 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
12199template <
typename T>
12202 StringRef MangledName,
bool OutputBecomesInput,
12206 Out << Prefix << ISA << LMask << VLEN;
12207 if (OutputBecomesInput)
12209 Out << ParSeq <<
'_' << MangledName;
12218 bool OutputBecomesInput,
12223 OutputBecomesInput, Fn);
12225 OutputBecomesInput, Fn);
12229 OutputBecomesInput, Fn);
12231 OutputBecomesInput, Fn);
12235 OutputBecomesInput, Fn);
12237 OutputBecomesInput, Fn);
12242 OutputBecomesInput, Fn);
12253 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
12254 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
12266 OutputBecomesInput, Fn);
12273 OutputBecomesInput, Fn);
12275 OutputBecomesInput, Fn);
12279 OutputBecomesInput, Fn);
12283 OutputBecomesInput, Fn);
12292 OutputBecomesInput, Fn);
12299 MangledName, OutputBecomesInput, Fn);
12301 MangledName, OutputBecomesInput, Fn);
12305 MangledName, OutputBecomesInput, Fn);
12309 MangledName, OutputBecomesInput, Fn);
12319 return OffloadEntriesTargetRegion.empty() &&
12320 OffloadEntriesDeviceGlobalVar.empty();
12323unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
12325 auto It = OffloadEntriesTargetRegionCount.find(
12326 getTargetRegionEntryCountKey(EntryInfo));
12327 if (It == OffloadEntriesTargetRegionCount.end())
12332void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
12334 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
12335 EntryInfo.
Count + 1;
12341 OffloadEntriesTargetRegion[EntryInfo] =
12344 ++OffloadingEntriesNum;
12350 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
12353 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12357 if (OMPBuilder->Config.isTargetDevice()) {
12362 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
12363 Entry.setAddress(Addr);
12365 Entry.setFlags(Flags);
12371 "Target region entry already registered!");
12373 OffloadEntriesTargetRegion[EntryInfo] = Entry;
12374 ++OffloadingEntriesNum;
12376 incrementTargetRegionEntryInfoCount(EntryInfo);
12383 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12385 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
12386 if (It == OffloadEntriesTargetRegion.end()) {
12390 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12398 for (
const auto &It : OffloadEntriesTargetRegion) {
12399 Action(It.first, It.second);
12405 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12406 ++OffloadingEntriesNum;
12412 if (OMPBuilder->Config.isTargetDevice()) {
12416 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12418 if (Entry.getVarSize() == 0) {
12419 Entry.setVarSize(VarSize);
12420 Entry.setLinkage(Linkage);
12424 Entry.setVarSize(VarSize);
12425 Entry.setLinkage(Linkage);
12426 Entry.setAddress(Addr);
12429 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12430 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12431 "Entry not initialized!");
12432 if (Entry.getVarSize() == 0) {
12433 Entry.setVarSize(VarSize);
12434 Entry.setLinkage(Linkage);
12441 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12442 Addr, VarSize, Flags, Linkage,
12445 OffloadEntriesDeviceGlobalVar.try_emplace(
12446 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12447 ++OffloadingEntriesNum;
12454 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12455 Action(E.getKey(), E.getValue());
12462void CanonicalLoopInfo::collectControlBlocks(
12469 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12481void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12493void CanonicalLoopInfo::mapIndVar(
12503 for (
Use &U : OldIV->
uses()) {
12507 if (
User->getParent() == getCond())
12509 if (
User->getParent() == getLatch())
12515 Value *NewIV = Updater(OldIV);
12518 for (Use *U : ReplacableUses)
12539 "Preheader must terminate with unconditional branch");
12541 "Preheader must jump to header");
12545 "Header must terminate with unconditional branch");
12546 assert(Header->getSingleSuccessor() == Cond &&
12547 "Header must jump to exiting block");
12550 assert(Cond->getSinglePredecessor() == Header &&
12551 "Exiting block only reachable from header");
12554 "Exiting block must terminate with conditional branch");
12556 "Exiting block's first successor jump to the body");
12558 "Exiting block's second successor must exit the loop");
12562 "Body only reachable from exiting block");
12567 "Latch must terminate with unconditional branch");
12568 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12571 assert(Latch->getSinglePredecessor() !=
nullptr);
12576 "Exit block must terminate with unconditional branch");
12577 assert(Exit->getSingleSuccessor() == After &&
12578 "Exit block must jump to after block");
12582 "After block only reachable from exit block");
12586 assert(IndVar &&
"Canonical induction variable not found?");
12588 "Induction variable must be an integer");
12590 "Induction variable must be a PHI in the loop header");
12596 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12604 assert(TripCount &&
"Loop trip count not found?");
12606 "Trip count and induction variable must have the same type");
12610 "Exit condition must be a signed less-than comparison");
12612 "Exit condition must compare the induction variable");
12614 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static Function * createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn)
Create wrapper function used to gather the outlined function's argument structure from a shared buffe...
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static bool isGenericKernel(Function &Fn)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static std::optional< omp::OMPTgtExecModeFlags > getTargetKernelExecMode(Function &Kernel)
Given a function, if it represents the entry point of a target kernel, this returns the execution mod...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const OpenMPIRBuilder::DependenciesInfo &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
LLVM_ABI CallInst * createOMPAllocShared(const LocationDescription &Loc, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_alloc_shared.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait)
Generator for 'omp scope'.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> TargetBodyGenCallbackTy
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={}, ArrayRef< BasicBlock * > DeallocBlocks={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
void addOutlineInfo(std::unique_ptr< OutlineInfo > &&OI)
Add a new region that will be outlined later.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPFreeShared(const LocationDescription &Loc, Value *Addr, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_free_shared.
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
SmallVector< std::unique_ptr< OutlineInfo >, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< InsertPointTy > DeallocIPs)> TargetGenArgAccessorsCallbackTy
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr)
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_SPMD_NO_LOOP
@ OMP_TGT_EXEC_MODE_GENERIC
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
SmallVector< DependData > Deps
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
BasicBlock * OuterAllocBB
virtual LLVM_ABI std::unique_ptr< CodeExtractor > createCodeExtractor(ArrayRef< BasicBlock * > Blocks, bool ArgsInZeroAddressSpace, Twine Suffix=Twine(""))
Create a CodeExtractor instance based on the information stored in this structure,...
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...