66#define DEBUG_TYPE "openmp-ir-builder"
73 cl::desc(
"Use optimistic attributes describing "
74 "'as-if' properties of runtime calls."),
78 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
79 cl::desc(
"Factor for the unroll threshold to account for code "
80 "simplifications still taking place"),
91 if (!IP1.isSet() || !IP2.isSet())
93 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
98 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
99 case OMPScheduleType::UnorderedStaticChunked:
100 case OMPScheduleType::UnorderedStatic:
101 case OMPScheduleType::UnorderedDynamicChunked:
102 case OMPScheduleType::UnorderedGuidedChunked:
103 case OMPScheduleType::UnorderedRuntime:
104 case OMPScheduleType::UnorderedAuto:
105 case OMPScheduleType::UnorderedTrapezoidal:
106 case OMPScheduleType::UnorderedGreedy:
107 case OMPScheduleType::UnorderedBalanced:
108 case OMPScheduleType::UnorderedGuidedIterativeChunked:
109 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
110 case OMPScheduleType::UnorderedSteal:
111 case OMPScheduleType::UnorderedStaticBalancedChunked:
112 case OMPScheduleType::UnorderedGuidedSimd:
113 case OMPScheduleType::UnorderedRuntimeSimd:
114 case OMPScheduleType::OrderedStaticChunked:
115 case OMPScheduleType::OrderedStatic:
116 case OMPScheduleType::OrderedDynamicChunked:
117 case OMPScheduleType::OrderedGuidedChunked:
118 case OMPScheduleType::OrderedRuntime:
119 case OMPScheduleType::OrderedAuto:
120 case OMPScheduleType::OrderdTrapezoidal:
121 case OMPScheduleType::NomergeUnorderedStaticChunked:
122 case OMPScheduleType::NomergeUnorderedStatic:
123 case OMPScheduleType::NomergeUnorderedDynamicChunked:
124 case OMPScheduleType::NomergeUnorderedGuidedChunked:
125 case OMPScheduleType::NomergeUnorderedRuntime:
126 case OMPScheduleType::NomergeUnorderedAuto:
127 case OMPScheduleType::NomergeUnorderedTrapezoidal:
128 case OMPScheduleType::NomergeUnorderedGreedy:
129 case OMPScheduleType::NomergeUnorderedBalanced:
130 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
131 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
132 case OMPScheduleType::NomergeUnorderedSteal:
133 case OMPScheduleType::NomergeOrderedStaticChunked:
134 case OMPScheduleType::NomergeOrderedStatic:
135 case OMPScheduleType::NomergeOrderedDynamicChunked:
136 case OMPScheduleType::NomergeOrderedGuidedChunked:
137 case OMPScheduleType::NomergeOrderedRuntime:
138 case OMPScheduleType::NomergeOrderedAuto:
139 case OMPScheduleType::NomergeOrderedTrapezoidal:
140 case OMPScheduleType::OrderedDistributeChunked:
141 case OMPScheduleType::OrderedDistribute:
149 SchedType & OMPScheduleType::MonotonicityMask;
150 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
162 Builder.restoreIP(IP);
172 Kernel->getFnAttribute(
"target-features").getValueAsString();
173 if (Features.
count(
"+wavefrontsize64"))
188 bool HasSimdModifier,
bool HasDistScheduleChunks) {
190 switch (ClauseKind) {
191 case OMP_SCHEDULE_Default:
192 case OMP_SCHEDULE_Static:
193 return HasChunks ? OMPScheduleType::BaseStaticChunked
194 : OMPScheduleType::BaseStatic;
195 case OMP_SCHEDULE_Dynamic:
196 return OMPScheduleType::BaseDynamicChunked;
197 case OMP_SCHEDULE_Guided:
198 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
199 : OMPScheduleType::BaseGuidedChunked;
200 case OMP_SCHEDULE_Auto:
202 case OMP_SCHEDULE_Runtime:
203 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
204 : OMPScheduleType::BaseRuntime;
205 case OMP_SCHEDULE_Distribute:
206 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
207 : OMPScheduleType::BaseDistribute;
215 bool HasOrderedClause) {
216 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
217 OMPScheduleType::None &&
218 "Must not have ordering nor monotonicity flags already set");
221 ? OMPScheduleType::ModifierOrdered
222 : OMPScheduleType::ModifierUnordered;
223 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
226 if (OrderingScheduleType ==
227 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
228 return OMPScheduleType::OrderedGuidedChunked;
229 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
230 OMPScheduleType::ModifierOrdered))
231 return OMPScheduleType::OrderedRuntime;
233 return OrderingScheduleType;
239 bool HasSimdModifier,
bool HasMonotonic,
240 bool HasNonmonotonic,
bool HasOrderedClause) {
241 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
242 OMPScheduleType::None &&
243 "Must not have monotonicity flags already set");
244 assert((!HasMonotonic || !HasNonmonotonic) &&
245 "Monotonic and Nonmonotonic are contradicting each other");
248 return ScheduleType | OMPScheduleType::ModifierMonotonic;
249 }
else if (HasNonmonotonic) {
250 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
260 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
261 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
267 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
275 bool HasSimdModifier,
bool HasMonotonicModifier,
276 bool HasNonmonotonicModifier,
bool HasOrderedClause,
277 bool HasDistScheduleChunks) {
279 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
283 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
284 HasNonmonotonicModifier, HasOrderedClause);
299 assert(!Br->isConditional() &&
300 "BB's terminator must be an unconditional branch (or degenerate)");
303 Br->setSuccessor(0,
Target);
308 NewBr->setDebugLoc(
DL);
314 "Target BB must not have PHI nodes");
334 NewBr->setDebugLoc(
DL);
342 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
346 Builder.SetInsertPoint(Old);
350 Builder.SetCurrentDebugLocation(
DebugLoc);
359 spliceBB(IP, New, CreateBranch,
DL);
360 New->replaceSuccessorsPhiUsesWith(Old, New);
369 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
371 Builder.SetInsertPoint(Builder.GetInsertBlock());
374 Builder.SetCurrentDebugLocation(
DebugLoc);
383 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
385 Builder.SetInsertPoint(Builder.GetInsertBlock());
388 Builder.SetCurrentDebugLocation(
DebugLoc);
395 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
402 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
404 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
405 const Twine &Name =
"",
bool AsPtr =
true) {
406 Builder.restoreIP(OuterAllocaIP);
409 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
413 FakeVal = FakeValAddr;
416 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
421 Builder.restoreIP(InnerAllocaIP);
425 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
441enum OpenMPOffloadingRequiresDirFlags {
443 OMP_REQ_UNDEFINED = 0x000,
445 OMP_REQ_NONE = 0x001,
447 OMP_REQ_REVERSE_OFFLOAD = 0x002,
449 OMP_REQ_UNIFIED_ADDRESS = 0x004,
451 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
453 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
459OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
460 : RequiresFlags(OMP_REQ_UNDEFINED) {}
462OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
463 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
464 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
465 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
466 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
467 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
468 RequiresFlags(OMP_REQ_UNDEFINED) {
469 if (HasRequiresReverseOffload)
470 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
471 if (HasRequiresUnifiedAddress)
472 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
473 if (HasRequiresUnifiedSharedMemory)
474 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
475 if (HasRequiresDynamicAllocators)
476 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
479bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
480 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
483bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
484 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
487bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
488 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
491bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
492 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
495int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
496 return hasRequiresFlags() ? RequiresFlags
497 :
static_cast<int64_t
>(OMP_REQ_NONE);
500void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
502 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
504 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
514void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
516 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
518 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
521void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
523 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
525 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
532void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
536 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
538 constexpr size_t MaxDim = 3;
541 Value *HasNoWaitFlag = Builder.getInt64(KernelArgs.HasNoWait);
543 Value *DynCGroupMemFallbackFlag =
544 Builder.getInt64(
static_cast<uint64_t>(KernelArgs.DynCGroupMemFallback));
545 DynCGroupMemFallbackFlag = Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
546 Value *Flags = Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
548 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
551 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
552 Value *NumThreads3D =
553 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
555 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
557 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
559 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
561 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
563 ArgsVector = {Version,
565 KernelArgs.RTArgs.BasePointersArray,
566 KernelArgs.RTArgs.PointersArray,
567 KernelArgs.RTArgs.SizesArray,
568 KernelArgs.RTArgs.MapTypesArray,
569 KernelArgs.RTArgs.MapNamesArray,
570 KernelArgs.RTArgs.MappersArray,
571 KernelArgs.NumIterations,
575 KernelArgs.DynCGroupMem};
583 auto FnAttrs =
Attrs.getFnAttrs();
584 auto RetAttrs =
Attrs.getRetAttrs();
586 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
591 bool Param =
true) ->
void {
592 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
593 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
594 if (HasSignExt || HasZeroExt) {
595 assert(AS.getNumAttributes() == 1 &&
596 "Currently not handling extension attr combined with others.");
598 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
601 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
608#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
609#include "llvm/Frontend/OpenMP/OMPKinds.def"
613#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
615 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
616 addAttrSet(RetAttrs, RetAttrSet, false); \
617 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
618 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
619 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
621#include "llvm/Frontend/OpenMP/OMPKinds.def"
635#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
637 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
639 Fn = M.getFunction(Str); \
641#include "llvm/Frontend/OpenMP/OMPKinds.def"
647#define OMP_RTL(Enum, Str, ...) \
649 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
651#include "llvm/Frontend/OpenMP/OMPKinds.def"
655 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
665 LLVMContext::MD_callback,
667 2, {-1, -1},
true)}));
673 addAttributes(FnID, *Fn);
680 assert(Fn &&
"Failed to create OpenMP runtime function");
686OpenMPIRBuilder::FinalizationInfo::getFiniBB(
IRBuilderBase &Builder) {
691 Builder.SetInsertPoint(FiniBB);
693 if (
Error Err = FiniCB(Builder.saveIP()))
703 FiniBB = OtherFiniBB;
705 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
706 if (
Error Err = FiniCB(Builder.saveIP()))
713 auto EndIt = FiniBB->end();
714 if (FiniBB->size() >= 1)
715 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
720 FiniBB->replaceAllUsesWith(OtherFiniBB);
721 FiniBB->eraseFromParent();
722 FiniBB = OtherFiniBB;
729 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
736 CallInst *
Call = Builder.CreateCall(Callee, Args, Name);
737 Call->setCallingConv(Config.getRuntimeCC());
741void OpenMPIRBuilder::initialize() { initializeTypes(M); }
752 for (
auto Inst =
Block->getReverseIterator()->begin();
753 Inst !=
Block->getReverseIterator()->end();) {
766void OpenMPIRBuilder::finalize(
Function *Fn) {
770 for (OutlineInfo &OI : OutlineInfos) {
773 if (Fn && OI.getFunction() != Fn) {
778 ParallelRegionBlockSet.
clear();
780 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
790 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
799 ".omp_par", ArgsInZeroAddressSpace);
803 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
804 assert(Extractor.isEligible() &&
805 "Expected OpenMP outlining to be possible!");
807 for (
auto *V : OI.ExcludeArgsFromAggregate)
808 Extractor.excludeArgFromAggregate(V);
810 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
814 if (TargetCpuAttr.isStringAttribute())
817 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
818 if (TargetFeaturesAttr.isStringAttribute())
819 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
822 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
824 "OpenMP outlined functions should not return a value!");
829 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
836 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
843 "Expected instructions to add in the outlined region entry");
845 End = ArtificialEntry.
rend();
850 if (
I.isTerminator()) {
852 if (OI.EntryBB->getTerminator())
853 OI.EntryBB->getTerminator()->adoptDbgRecords(
854 &ArtificialEntry,
I.getIterator(),
false);
858 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
861 OI.EntryBB->moveBefore(&ArtificialEntry);
868 if (OI.PostOutlineCB)
869 OI.PostOutlineCB(*OutlinedFn);
873 OutlineInfos = std::move(DeferredOutlines);
894 for (
Function *
F : ConstantAllocaRaiseCandidates)
897 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
898 [](EmitMetadataErrorKind Kind,
899 const TargetRegionEntryInfo &EntryInfo) ->
void {
900 errs() <<
"Error of kind: " << Kind
901 <<
" when emitting offload entries and metadata during "
902 "OMPIRBuilder finalization \n";
905 if (!OffloadInfoManager.empty())
906 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
908 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
909 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
910 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
911 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
917bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
919OpenMPIRBuilder::~OpenMPIRBuilder() {
920 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
928 ConstantInt::get(I32Ty,
Value), Name);
940 UsedArray.
resize(List.size());
941 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
945 if (UsedArray.
empty())
952 GV->setSection(
"llvm.metadata");
956OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
958 auto *Int8Ty = Builder.getInt8Ty();
961 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
969 unsigned Reserve2Flags) {
971 LocFlags |= OMP_IDENT_FLAG_KMPC;
974 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
978 ConstantInt::get(Int32,
uint32_t(LocFlags)),
979 ConstantInt::get(Int32, Reserve2Flags),
980 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
982 size_t SrcLocStrArgIdx = 4;
983 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
987 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
994 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
995 if (
GV.getInitializer() == Initializer)
1000 M, OpenMPIRBuilder::Ident,
1003 M.getDataLayout().getDefaultGlobalsAddressSpace());
1015 SrcLocStrSize = LocStr.
size();
1016 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
1024 if (
GV.isConstant() &&
GV.hasInitializer() &&
1025 GV.getInitializer() == Initializer)
1028 SrcLocStr = Builder.CreateGlobalString(
1029 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
1037 unsigned Line,
unsigned Column,
1043 Buffer.
append(FunctionName);
1045 Buffer.
append(std::to_string(Line));
1047 Buffer.
append(std::to_string(Column));
1050 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
1054OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
1055 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1056 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1064 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1066 if (
DIFile *DIF = DIL->getFile())
1067 if (std::optional<StringRef> Source = DIF->getSource())
1072 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1073 DIL->getColumn(), SrcLocStrSize);
1076Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1078 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1079 Loc.IP.getBlock()->getParent());
1082Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1083 return createRuntimeFunctionCall(
1084 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1085 "omp_global_thread_num");
1088OpenMPIRBuilder::InsertPointOrErrorTy
1089OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1090 bool ForceSimpleCall,
bool CheckCancelFlag) {
1091 if (!updateToLocation(
Loc))
1100 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1103 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1106 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1109 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1112 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1117 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1119 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1120 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1125 bool UseCancelBarrier =
1126 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1129 getOrCreateRuntimeFunctionPtr(UseCancelBarrier
1130 ? OMPRTL___kmpc_cancel_barrier
1131 : OMPRTL___kmpc_barrier),
1134 if (UseCancelBarrier && CheckCancelFlag)
1135 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1138 return Builder.saveIP();
1141OpenMPIRBuilder::InsertPointOrErrorTy
1142OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1144 omp::Directive CanceledDirective) {
1145 if (!updateToLocation(
Loc))
1149 auto *UI = Builder.CreateUnreachable();
1154 Builder.SetInsertPoint(ThenTI);
1156 Value *CancelKind =
nullptr;
1157 switch (CanceledDirective) {
1158#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1159 case DirectiveEnum: \
1160 CancelKind = Builder.getInt32(Value); \
1162#include "llvm/Frontend/OpenMP/OMPKinds.def"
1168 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1169 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1170 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1172 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1175 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
1179 Builder.SetInsertPoint(UI->getParent());
1180 UI->eraseFromParent();
1182 return Builder.saveIP();
1185OpenMPIRBuilder::InsertPointOrErrorTy
1186OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1187 omp::Directive CanceledDirective) {
1188 if (!updateToLocation(
Loc))
1192 auto *UI = Builder.CreateUnreachable();
1193 Builder.SetInsertPoint(UI);
1195 Value *CancelKind =
nullptr;
1196 switch (CanceledDirective) {
1197#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1198 case DirectiveEnum: \
1199 CancelKind = Builder.getInt32(Value); \
1201#include "llvm/Frontend/OpenMP/OMPKinds.def"
1207 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1208 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1209 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1211 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1214 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
1218 Builder.SetInsertPoint(UI->getParent());
1219 UI->eraseFromParent();
1221 return Builder.saveIP();
1224OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1225 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1228 if (!updateToLocation(
Loc))
1231 Builder.restoreIP(AllocaIP);
1232 auto *KernelArgsPtr =
1233 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1234 updateToLocation(
Loc);
1238 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1239 Builder.CreateAlignedStore(
1241 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1245 NumThreads, HostPtr, KernelArgsPtr};
1247 Return = createRuntimeFunctionCall(
1248 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1251 return Builder.saveIP();
1254OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1255 const LocationDescription &
Loc,
Value *OutlinedFnID,
1256 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1257 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1259 if (!updateToLocation(
Loc))
1272 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1276 Value *Return =
nullptr;
1280 getKernelArgsVector(Args, Builder, ArgsVector);
1295 Builder.restoreIP(emitTargetKernel(
1296 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1297 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1304 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1306 auto CurFn = Builder.GetInsertBlock()->getParent();
1307 emitBlock(OffloadFailedBlock, CurFn);
1308 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1310 return AfterIP.takeError();
1311 Builder.restoreIP(*AfterIP);
1312 emitBranch(OffloadContBlock);
1313 emitBlock(OffloadContBlock, CurFn,
true);
1314 return Builder.saveIP();
1317Error OpenMPIRBuilder::emitCancelationCheckImpl(
1318 Value *CancelFlag, omp::Directive CanceledDirective) {
1319 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1320 "Unexpected cancellation!");
1325 if (Builder.GetInsertPoint() == BB->
end()) {
1331 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1333 Builder.SetInsertPoint(BB);
1339 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1340 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1345 auto &FI = FinalizationStack.back();
1349 Builder.SetInsertPoint(CancellationBlock);
1350 Builder.CreateBr(*FiniBBOrErr);
1353 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1372 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1375 "Expected at least tid and bounded tid as arguments");
1376 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1379 assert(CI &&
"Expected call instruction to outlined function");
1380 CI->
getParent()->setName(
"omp_parallel");
1382 Builder.SetInsertPoint(CI);
1383 Type *PtrTy = OMPIRBuilder->VoidPtr;
1387 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1391 Value *Args = ArgsAlloca;
1395 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1396 Builder.restoreIP(CurrentIP);
1399 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1401 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1403 Builder.CreateStore(V, StoreAddress);
1407 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1408 : Builder.getInt32(1);
1411 Value *Parallel51CallArgs[] = {
1415 NumThreads ? NumThreads : Builder.getInt32(-1),
1416 Builder.getInt32(-1),
1420 Builder.getInt64(NumCapturedVars)};
1423 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1425 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, Parallel51CallArgs);
1428 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1431 Builder.SetInsertPoint(PrivTID);
1433 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1440 I->eraseFromParent();
1457 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1460 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1463 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1471 F->addMetadata(LLVMContext::MD_callback,
1480 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1483 "Expected at least tid and bounded tid as arguments");
1484 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1487 CI->
getParent()->setName(
"omp_parallel");
1488 Builder.SetInsertPoint(CI);
1491 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1495 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1497 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1504 auto PtrTy = OMPIRBuilder->VoidPtr;
1505 if (IfCondition && NumCapturedVars == 0) {
1510 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
1513 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1516 Builder.SetInsertPoint(PrivTID);
1518 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1525 I->eraseFromParent();
1529OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1530 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1531 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1532 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1533 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1536 if (!updateToLocation(
Loc))
1540 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1541 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1542 Value *ThreadID = getOrCreateThreadID(Ident);
1548 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1552 if (NumThreads && !Config.isTargetDevice()) {
1555 Builder.CreateIntCast(NumThreads, Int32,
false)};
1556 createRuntimeFunctionCall(
1557 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1560 if (ProcBind != OMP_PROC_BIND_default) {
1564 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1565 createRuntimeFunctionCall(
1566 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1569 BasicBlock *InsertBB = Builder.GetInsertBlock();
1574 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1582 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1583 Builder.restoreIP(NewOuter);
1584 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32,
nullptr,
"tid.addr");
1586 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1589 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1592 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1596 PointerType ::get(M.getContext(), 0),
1597 "zero.addr.ascast");
1618 auto FiniCBWrapper = [&](InsertPointTy IP) {
1623 Builder.restoreIP(IP);
1625 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1629 "Unexpected insertion point for finalization call!");
1633 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1638 InsertPointTy InnerAllocaIP = Builder.saveIP();
1641 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1642 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr,
"tid");
1645 ToBeDeleted.
push_back(Builder.CreateLoad(Int32, TIDAddr,
"tid.addr.use"));
1647 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1665 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1668 assert(BodyGenCB &&
"Expected body generation callback!");
1669 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1670 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1673 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1676 if (Config.isTargetDevice()) {
1678 OI.PostOutlineCB = [=, ToBeDeletedVec =
1679 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1681 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1682 ThreadID, ToBeDeletedVec);
1686 OI.PostOutlineCB = [=, ToBeDeletedVec =
1687 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1689 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1693 OI.OuterAllocaBB = OuterAllocaBlock;
1694 OI.EntryBB = PRegEntryBB;
1695 OI.ExitBB = PRegExitBB;
1699 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1710 ".omp_par", ArgsInZeroAddressSpace);
1715 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1717 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1722 return GV->getValueType() == OpenMPIRBuilder::Ident;
1727 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1730 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1733 if (&V == TIDAddr || &V == ZeroAddr) {
1734 OI.ExcludeArgsFromAggregate.push_back(&V);
1739 for (
Use &U : V.uses())
1741 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1751 if (!V.getType()->isPointerTy()) {
1755 Builder.restoreIP(OuterAllocaIP);
1757 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1761 Builder.SetInsertPoint(InsertBB,
1763 Builder.CreateStore(&V, Ptr);
1766 Builder.restoreIP(InnerAllocaIP);
1767 Inner = Builder.CreateLoad(V.getType(), Ptr);
1770 Value *ReplacementValue =
nullptr;
1773 ReplacementValue = PrivTID;
1775 InsertPointOrErrorTy AfterIP =
1776 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1778 return AfterIP.takeError();
1779 Builder.restoreIP(*AfterIP);
1781 InnerAllocaIP.getBlock(),
1782 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1784 assert(ReplacementValue &&
1785 "Expected copy/create callback to set replacement value!");
1786 if (ReplacementValue == &V)
1791 UPtr->set(ReplacementValue);
1816 for (
Value *Output : Outputs)
1819 assert(Outputs.empty() &&
1820 "OpenMP outlining should not produce live-out values!");
1822 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1824 for (
auto *BB : Blocks)
1831 auto FiniInfo = FinalizationStack.pop_back_val();
1833 assert(FiniInfo.DK == OMPD_parallel &&
1834 "Unexpected finalization stack state!");
1838 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1844 Builder.restoreIP(PreFiniIP);
1845 Builder.CreateBr(*FiniBBOrErr);
1848 if (
Instruction *Term = Builder.GetInsertBlock()->getTerminator())
1849 Term->eraseFromParent();
1853 addOutlineInfo(std::move(OI));
1855 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1856 UI->eraseFromParent();
1861void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1864 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1865 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1867 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush),
1871void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1872 if (!updateToLocation(
Loc))
1877void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1881 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1882 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1883 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1886 createRuntimeFunctionCall(
1887 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), Args);
1890void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1891 if (!updateToLocation(
Loc))
1893 emitTaskwaitImpl(
Loc);
1896void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1899 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1900 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1902 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1904 createRuntimeFunctionCall(
1905 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), Args);
1908void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1909 if (!updateToLocation(
Loc))
1911 emitTaskyieldImpl(
Loc);
1920 OpenMPIRBuilder &OMPBuilder,
1923 if (Dependencies.
empty())
1943 Type *DependInfo = OMPBuilder.DependInfo;
1944 Module &M = OMPBuilder.M;
1946 Value *DepArray =
nullptr;
1947 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1948 Builder.SetInsertPoint(
1949 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1952 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1954 Builder.restoreIP(OldIP);
1956 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1958 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1960 Value *Addr = Builder.CreateStructGEP(
1962 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1963 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1964 Builder.CreateStore(DepValPtr, Addr);
1967 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1968 Builder.CreateStore(
1969 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1972 Value *Flags = Builder.CreateStructGEP(
1974 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1975 Builder.CreateStore(
1976 ConstantInt::get(Builder.getInt8Ty(),
1977 static_cast<unsigned int>(Dep.DepKind)),
1983OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1984 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1985 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1989 if (!updateToLocation(
Loc))
1990 return InsertPointTy();
1993 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1994 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2011 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
2012 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
2014 splitBB(Builder,
true,
"task.alloca");
2016 InsertPointTy TaskAllocaIP =
2017 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
2018 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
2019 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2023 OI.EntryBB = TaskAllocaBB;
2024 OI.OuterAllocaBB = AllocaIP.getBlock();
2025 OI.ExitBB = TaskExitBB;
2030 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2032 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2033 Mergeable, Priority, EventHandle, TaskAllocaBB,
2034 ToBeDeleted](
Function &OutlinedFn)
mutable {
2037 "there must be a single user for the outlined function");
2042 bool HasShareds = StaleCI->
arg_size() > 1;
2043 Builder.SetInsertPoint(StaleCI);
2048 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2052 Value *ThreadID = getOrCreateThreadID(Ident);
2064 Value *Flags = Builder.getInt32(Tied);
2067 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2068 Flags = Builder.CreateOr(FinalFlag, Flags);
2072 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2074 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2080 Value *TaskSize = Builder.getInt64(
2081 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2086 Value *SharedsSize = Builder.getInt64(0);
2090 assert(ArgStructAlloca &&
2091 "Unable to find the alloca instruction corresponding to arguments "
2092 "for extracted function");
2095 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2096 "arguments for extracted function");
2098 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2103 CallInst *TaskData = createRuntimeFunctionCall(
2104 TaskAllocFn, {Ident, ThreadID, Flags,
2105 TaskSize, SharedsSize,
2112 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2113 OMPRTL___kmpc_task_allow_completion_event);
2115 createRuntimeFunctionCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2117 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2118 Builder.getPtrTy(0));
2119 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2120 Builder.CreateStore(EventVal, EventHandleAddr);
2126 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2127 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2145 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2148 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2149 Value *PriorityData = Builder.CreateInBoundsGEP(
2150 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2153 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2155 Builder.CreateStore(Priority, CmplrData);
2180 splitBB(Builder,
true,
"if.end");
2182 Builder.GetInsertPoint()->
getParent()->getTerminator();
2183 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2184 Builder.SetInsertPoint(IfTerminator);
2187 Builder.SetInsertPoint(ElseTI);
2189 if (Dependencies.size()) {
2191 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2192 createRuntimeFunctionCall(
2194 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2195 ConstantInt::get(Builder.getInt32Ty(), 0),
2199 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2201 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2202 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2205 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID, TaskData});
2207 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID});
2209 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2210 Builder.SetInsertPoint(ThenTI);
2213 if (Dependencies.size()) {
2215 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2216 createRuntimeFunctionCall(
2218 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2219 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2224 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2225 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
2230 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2232 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2234 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2238 I->eraseFromParent();
2241 addOutlineInfo(std::move(OI));
2242 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2244 return Builder.saveIP();
2247OpenMPIRBuilder::InsertPointOrErrorTy
2248OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2249 InsertPointTy AllocaIP,
2250 BodyGenCallbackTy BodyGenCB) {
2251 if (!updateToLocation(
Loc))
2252 return InsertPointTy();
2255 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2256 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2257 Value *ThreadID = getOrCreateThreadID(Ident);
2261 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2262 createRuntimeFunctionCall(TaskgroupFn, {Ident, ThreadID});
2264 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2265 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2268 Builder.SetInsertPoint(TaskgroupExitBB);
2271 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2272 createRuntimeFunctionCall(EndTaskgroupFn, {Ident, ThreadID});
2274 return Builder.saveIP();
2277OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2278 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2280 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2283 if (!updateToLocation(
Loc))
2286 FinalizationStack.push_back({FiniCB, OMPD_sections, IsCancellable});
2304 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2305 Builder.restoreIP(CodeGenIP);
2307 splitBBWithSuffix(Builder,
false,
".sections.after");
2311 unsigned CaseNumber = 0;
2312 for (
auto SectionCB : SectionCBs) {
2314 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2315 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2316 Builder.SetInsertPoint(CaseBB);
2318 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2330 Value *LB = ConstantInt::get(I32Ty, 0);
2331 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2332 Value *
ST = ConstantInt::get(I32Ty, 1);
2334 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2338 InsertPointOrErrorTy WsloopIP =
2339 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2340 WorksharingLoopType::ForStaticLoop, !IsNowait);
2342 return WsloopIP.takeError();
2343 InsertPointTy AfterIP = *WsloopIP;
2346 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2349 auto FiniInfo = FinalizationStack.pop_back_val();
2350 assert(FiniInfo.DK == OMPD_sections &&
2351 "Unexpected finalization stack state!");
2352 if (
Error Err = FiniInfo.mergeFiniBB(Builder, LoopFini))
2358OpenMPIRBuilder::InsertPointOrErrorTy
2359OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2360 BodyGenCallbackTy BodyGenCB,
2361 FinalizeCallbackTy FiniCB) {
2362 if (!updateToLocation(
Loc))
2365 auto FiniCBWrapper = [&](InsertPointTy IP) {
2376 Builder.restoreIP(IP);
2377 auto *CaseBB =
Loc.IP.getBlock();
2381 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2385 Directive OMPD = Directive::OMPD_sections;
2388 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2396 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2399Value *OpenMPIRBuilder::getGPUThreadID() {
2400 return createRuntimeFunctionCall(
2401 getOrCreateRuntimeFunction(M,
2402 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2406Value *OpenMPIRBuilder::getGPUWarpSize() {
2407 return createRuntimeFunctionCall(
2408 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2411Value *OpenMPIRBuilder::getNVPTXWarpID() {
2412 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2413 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2416Value *OpenMPIRBuilder::getNVPTXLaneID() {
2417 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2418 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2419 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2420 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2424Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2427 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2428 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2429 assert(FromSize > 0 &&
"From size must be greater than zero");
2430 assert(ToSize > 0 &&
"To size must be greater than zero");
2431 if (FromType == ToType)
2433 if (FromSize == ToSize)
2434 return Builder.CreateBitCast(From, ToType);
2436 return Builder.CreateIntCast(From, ToType,
true);
2437 InsertPointTy SaveIP = Builder.saveIP();
2438 Builder.restoreIP(AllocaIP);
2439 Value *CastItem = Builder.CreateAlloca(ToType);
2440 Builder.restoreIP(SaveIP);
2442 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2443 CastItem, Builder.getPtrTy(0));
2444 Builder.CreateStore(From, ValCastItem);
2445 return Builder.CreateLoad(ToType, CastItem);
2448Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2452 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2453 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2457 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2459 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2460 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2461 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2462 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2463 Value *WarpSizeCast =
2464 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2465 Value *ShuffleCall =
2466 createRuntimeFunctionCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2467 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2470void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2474 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2485 Type *IndexTy = Builder.getIndexTy(
2486 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2487 Value *ElemPtr = DstAddr;
2488 Value *Ptr = SrcAddr;
2489 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2493 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2494 Ptr, Builder.getPtrTy(0), Ptr->
getName() +
".ascast");
2496 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2497 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2498 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2501 if ((
Size / IntSize) > 1) {
2502 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2503 SrcAddrGEP, Builder.getPtrTy());
2508 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2509 emitBlock(PreCondBB, CurFunc);
2511 Builder.CreatePHI(Ptr->
getType(), 2);
2514 Builder.CreatePHI(ElemPtr->
getType(), 2);
2518 Value *PtrDiff = Builder.CreatePtrDiff(
2519 Builder.getInt8Ty(), PtrEnd,
2520 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr, Builder.getPtrTy()));
2521 Builder.CreateCondBr(
2522 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2524 emitBlock(ThenBB, CurFunc);
2525 Value *Res = createRuntimeShuffleFunction(
2527 Builder.CreateAlignedLoad(
2528 IntType, Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2530 Builder.CreateAlignedStore(Res, ElemPtr,
2531 M.getDataLayout().getPrefTypeAlign(ElemType));
2533 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2534 Value *LocalElemPtr =
2535 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2538 emitBranch(PreCondBB);
2539 emitBlock(ExitBB, CurFunc);
2541 Value *Res = createRuntimeShuffleFunction(
2542 AllocaIP, Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2545 Res = Builder.CreateTrunc(Res, ElemType);
2546 Builder.CreateStore(Res, ElemPtr);
2547 Ptr = Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2549 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2555Error OpenMPIRBuilder::emitReductionListCopy(
2556 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2559 Type *IndexTy = Builder.getIndexTy(
2560 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2561 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2565 for (
auto En :
enumerate(ReductionInfos)) {
2566 const ReductionInfo &RI = En.value();
2567 Value *SrcElementAddr =
nullptr;
2569 Value *DestElementAddr =
nullptr;
2570 Value *DestElementPtrAddr =
nullptr;
2572 bool ShuffleInElement =
false;
2575 bool UpdateDestListPtr =
false;
2578 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2579 ReductionArrayTy, SrcBase,
2580 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2581 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2585 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2586 ReductionArrayTy, DestBase,
2587 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2588 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
2590 case CopyAction::RemoteLaneToThread: {
2591 InsertPointTy CurIP = Builder.saveIP();
2592 Builder.restoreIP(AllocaIP);
2594 Type *DestAllocaType =
2595 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
2596 DestAlloca = Builder.CreateAlloca(DestAllocaType,
nullptr,
2597 ".omp.reduction.element");
2599 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
2600 DestElementAddr = DestAlloca;
2602 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2603 DestElementAddr->
getName() +
".ascast");
2604 Builder.restoreIP(CurIP);
2605 ShuffleInElement =
true;
2606 UpdateDestListPtr =
true;
2609 case CopyAction::ThreadCopy: {
2611 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2618 if (ShuffleInElement) {
2619 Type *ShuffleType = RI.ElementType;
2620 Value *ShuffleSrcAddr = SrcElementAddr;
2621 Value *ShuffleDestAddr = DestElementAddr;
2625 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
2626 assert(RI.ByRefAllocatedType &&
2627 "Expected by-ref allocated type to be set");
2632 ShuffleType = RI.ByRefElementType;
2634 InsertPointOrErrorTy GenResult =
2635 RI.DataPtrPtrGen(Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
2638 return GenResult.takeError();
2640 ShuffleSrcAddr = Builder.CreateLoad(Builder.getPtrTy(), ShuffleSrcAddr);
2643 InsertPointTy OldIP = Builder.saveIP();
2644 Builder.restoreIP(AllocaIP);
2646 LocalStorage = Builder.CreateAlloca(ShuffleType);
2647 Builder.restoreIP(OldIP);
2648 ShuffleDestAddr = LocalStorage;
2652 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
2653 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
2657 InsertPointOrErrorTy GenResult =
2658 RI.DataPtrPtrGen(Builder.saveIP(),
2659 Builder.CreatePointerBitCastOrAddrSpaceCast(
2660 DestAlloca, Builder.getPtrTy(),
".ascast"),
2664 return GenResult.takeError();
2666 Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast(
2667 LocalStorage, Builder.getPtrTy(),
".ascast"),
2671 switch (RI.EvaluationKind) {
2672 case EvalKind::Scalar: {
2673 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2675 Builder.CreateStore(Elem, DestElementAddr);
2678 case EvalKind::Complex: {
2679 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2680 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2681 Value *SrcReal = Builder.CreateLoad(
2682 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2683 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2684 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2685 Value *SrcImg = Builder.CreateLoad(
2686 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2688 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2689 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2690 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2691 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2692 Builder.CreateStore(SrcReal, DestRealPtr);
2693 Builder.CreateStore(SrcImg, DestImgPtr);
2696 case EvalKind::Aggregate: {
2697 Value *SizeVal = Builder.getInt64(
2698 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2699 Builder.CreateMemCpy(
2700 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2701 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2713 if (UpdateDestListPtr) {
2714 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2715 DestElementAddr, Builder.getPtrTy(),
2716 DestElementAddr->
getName() +
".ascast");
2717 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2727 InsertPointTy SavedIP = Builder.saveIP();
2730 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2734 "_omp_reduction_inter_warp_copy_func", &M);
2739 Builder.SetInsertPoint(EntryBB);
2757 "__openmp_nvptx_data_transfer_temporary_storage";
2758 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2759 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2761 if (!TransferMedium) {
2770 Value *GPUThreadID = getGPUThreadID();
2772 Value *LaneID = getNVPTXLaneID();
2774 Value *WarpID = getNVPTXWarpID();
2776 InsertPointTy AllocaIP =
2777 InsertPointTy(Builder.GetInsertBlock(),
2778 Builder.GetInsertBlock()->getFirstInsertionPt());
2781 Builder.restoreIP(AllocaIP);
2782 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2783 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2785 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2786 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2787 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2788 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2789 NumWarpsAlloca, Builder.getPtrTy(0),
2790 NumWarpsAlloca->
getName() +
".ascast");
2791 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2792 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2794 InsertPointTy CodeGenIP =
2796 Builder.restoreIP(CodeGenIP);
2799 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2801 for (
auto En :
enumerate(ReductionInfos)) {
2806 const ReductionInfo &RI = En.value();
2807 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
2808 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(
2809 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
2810 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2813 unsigned NumIters = RealTySize / TySize;
2816 Value *Cnt =
nullptr;
2817 Value *CntAddr =
nullptr;
2821 CodeGenIP = Builder.saveIP();
2822 Builder.restoreIP(AllocaIP);
2824 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2826 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2827 CntAddr->
getName() +
".ascast");
2828 Builder.restoreIP(CodeGenIP);
2835 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2836 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2838 Value *
Cmp = Builder.CreateICmpULT(
2839 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2840 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2841 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2845 InsertPointOrErrorTy BarrierIP1 =
2846 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2847 omp::Directive::OMPD_unknown,
2851 return BarrierIP1.takeError();
2857 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2858 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2859 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2862 auto *RedListArrayTy =
2864 Type *IndexTy = Builder.getIndexTy(
2865 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2867 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2868 {ConstantInt::get(IndexTy, 0),
2869 ConstantInt::get(IndexTy, En.index())});
2871 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2874 InsertPointOrErrorTy GenRes =
2875 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
2878 return GenRes.takeError();
2880 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
2884 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2888 Value *MediumPtr = Builder.CreateInBoundsGEP(
2889 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2892 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2894 Builder.CreateStore(Elem, MediumPtr,
2896 Builder.CreateBr(MergeBB);
2899 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2900 Builder.CreateBr(MergeBB);
2903 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2904 InsertPointOrErrorTy BarrierIP2 =
2905 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2906 omp::Directive::OMPD_unknown,
2910 return BarrierIP2.takeError();
2917 Value *NumWarpsVal =
2918 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2920 Value *IsActiveThread =
2921 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2922 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2924 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2928 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2929 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2931 Value *TargetElemPtrPtr =
2932 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2933 {ConstantInt::get(IndexTy, 0),
2934 ConstantInt::get(IndexTy, En.index())});
2935 Value *TargetElemPtrVal =
2936 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2937 Value *TargetElemPtr = TargetElemPtrVal;
2940 InsertPointOrErrorTy GenRes =
2941 RI.DataPtrPtrGen(Builder.saveIP(), TargetElemPtr, TargetElemPtr);
2944 return GenRes.takeError();
2946 TargetElemPtr = Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtr);
2951 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2954 Value *SrcMediumValue =
2955 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2956 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2957 Builder.CreateBr(W0MergeBB);
2959 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2960 Builder.CreateBr(W0MergeBB);
2962 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2965 Cnt = Builder.CreateNSWAdd(
2966 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2967 Builder.CreateStore(Cnt, CntAddr,
false);
2969 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2970 emitBranch(PrecondBB);
2971 emitBlock(ExitBB, CurFn);
2973 RealTySize %= TySize;
2977 Builder.CreateRetVoid();
2978 Builder.restoreIP(SavedIP);
2989 {Builder.getPtrTy(), Builder.getInt16Ty(),
2990 Builder.getInt16Ty(), Builder.getInt16Ty()},
2994 "_omp_reduction_shuffle_and_reduce_func", &M);
3004 Builder.SetInsertPoint(EntryBB);
3015 Type *ReduceListArgType = ReduceListArg->
getType();
3017 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
3018 Value *ReduceListAlloca = Builder.CreateAlloca(
3019 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3020 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3021 LaneIDArg->
getName() +
".addr");
3022 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
3023 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3024 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3025 AlgoVerArg->
getName() +
".addr");
3031 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
3032 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3034 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3035 ReduceListAlloca, ReduceListArgType,
3036 ReduceListAlloca->
getName() +
".ascast");
3037 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3038 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3039 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3040 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3041 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3042 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3043 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3044 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3045 RemoteReductionListAlloca, Builder.getPtrTy(),
3046 RemoteReductionListAlloca->
getName() +
".ascast");
3048 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3049 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3050 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3051 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3053 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3054 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3055 Value *RemoteLaneOffset =
3056 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3057 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3064 Error EmitRedLsCpRes = emitReductionListCopy(
3065 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
3066 ReduceList, RemoteListAddrCast, IsByRef,
3067 {RemoteLaneOffset,
nullptr,
nullptr});
3070 return EmitRedLsCpRes;
3093 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
3094 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3095 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3096 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
3097 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
3098 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
3099 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
3100 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
3101 Value *RemoteOffsetComp =
3102 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
3103 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3104 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3105 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3111 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3112 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3113 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3114 ReduceList, Builder.getPtrTy());
3115 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3116 RemoteListAddrCast, Builder.getPtrTy());
3117 createRuntimeFunctionCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3118 ->addFnAttr(Attribute::NoUnwind);
3119 Builder.CreateBr(MergeBB);
3121 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3122 Builder.CreateBr(MergeBB);
3124 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3128 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3129 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3130 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3135 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3137 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3139 EmitRedLsCpRes = emitReductionListCopy(
3140 AllocaIP, CopyAction::ThreadCopy, RedListArrayTy, ReductionInfos,
3141 RemoteListAddrCast, ReduceList, IsByRef);
3144 return EmitRedLsCpRes;
3146 Builder.CreateBr(CpyMergeBB);
3148 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3149 Builder.CreateBr(CpyMergeBB);
3151 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3153 Builder.CreateRetVoid();
3158Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3160 AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_copy_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3192 BufferArgAlloca, Builder.getPtrTy(),
3193 BufferArgAlloca->
getName() +
".ascast");
3194 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3195 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3196 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3197 ReduceListArgAlloca, Builder.getPtrTy(),
3198 ReduceListArgAlloca->
getName() +
".ascast");
3200 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3201 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3202 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3204 Value *LocalReduceList =
3205 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3206 Value *BufferArgVal =
3207 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3208 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3209 Type *IndexTy = Builder.getIndexTy(
3210 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3211 for (
auto En :
enumerate(ReductionInfos)) {
3212 const ReductionInfo &RI = En.value();
3213 auto *RedListArrayTy =
3216 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3217 RedListArrayTy, LocalReduceList,
3218 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3220 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3225 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3226 ReductionsBufferTy, BufferVD, 0, En.index());
3228 switch (RI.EvaluationKind) {
3229 case EvalKind::Scalar: {
3230 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3231 Builder.CreateStore(TargetElement, GlobVal);
3234 case EvalKind::Complex: {
3235 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3236 RI.ElementType, ElemPtr, 0, 0,
".realp");
3237 Value *SrcReal = Builder.CreateLoad(
3238 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3239 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3240 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3241 Value *SrcImg = Builder.CreateLoad(
3242 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3244 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3245 RI.ElementType, GlobVal, 0, 0,
".realp");
3246 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3247 RI.ElementType, GlobVal, 0, 1,
".imagp");
3248 Builder.CreateStore(SrcReal, DestRealPtr);
3249 Builder.CreateStore(SrcImg, DestImgPtr);
3252 case EvalKind::Aggregate: {
3254 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3255 Builder.CreateMemCpy(
3256 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3257 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3263 Builder.CreateRetVoid();
3264 Builder.restoreIP(OldIP);
3268Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3270 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3271 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3274 Builder.getVoidTy(),
3275 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3279 "_omp_reduction_list_to_global_reduce_func", &M);
3286 Builder.SetInsertPoint(EntryBlock);
3295 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3296 BufferArg->
getName() +
".addr");
3297 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3299 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3300 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3301 auto *RedListArrayTy =
3306 Value *LocalReduceList =
3307 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3309 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3310 BufferArgAlloca, Builder.getPtrTy(),
3311 BufferArgAlloca->
getName() +
".ascast");
3312 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3313 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3314 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3315 ReduceListArgAlloca, Builder.getPtrTy(),
3316 ReduceListArgAlloca->
getName() +
".ascast");
3317 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3318 LocalReduceList, Builder.getPtrTy(),
3319 LocalReduceList->
getName() +
".ascast");
3321 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3322 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3323 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3325 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3326 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3327 Type *IndexTy = Builder.getIndexTy(
3328 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3329 for (
auto En :
enumerate(ReductionInfos)) {
3330 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3331 RedListArrayTy, LocalReduceListAddrCast,
3332 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3334 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3336 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3337 ReductionsBufferTy, BufferVD, 0, En.index());
3338 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3343 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3344 createRuntimeFunctionCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3345 ->addFnAttr(Attribute::NoUnwind);
3346 Builder.CreateRetVoid();
3347 Builder.restoreIP(OldIP);
3351Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3353 AttributeList FuncAttrs) {
3354 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3357 Builder.getVoidTy(),
3358 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3362 "_omp_reduction_global_to_list_copy_func", &M);
3369 Builder.SetInsertPoint(EntryBlock);
3378 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3379 BufferArg->
getName() +
".addr");
3380 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3382 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3383 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3384 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3385 BufferArgAlloca, Builder.getPtrTy(),
3386 BufferArgAlloca->
getName() +
".ascast");
3387 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3388 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3389 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3390 ReduceListArgAlloca, Builder.getPtrTy(),
3391 ReduceListArgAlloca->
getName() +
".ascast");
3392 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3393 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3394 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3396 Value *LocalReduceList =
3397 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3398 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3399 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3400 Type *IndexTy = Builder.getIndexTy(
3401 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3402 for (
auto En :
enumerate(ReductionInfos)) {
3403 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3404 auto *RedListArrayTy =
3407 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3408 RedListArrayTy, LocalReduceList,
3409 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3411 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3414 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3415 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3416 ReductionsBufferTy, BufferVD, 0, En.index());
3418 switch (RI.EvaluationKind) {
3419 case EvalKind::Scalar: {
3420 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3421 Builder.CreateStore(TargetElement, ElemPtr);
3424 case EvalKind::Complex: {
3425 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3426 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3427 Value *SrcReal = Builder.CreateLoad(
3428 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3429 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3430 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3431 Value *SrcImg = Builder.CreateLoad(
3432 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3434 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3435 RI.ElementType, ElemPtr, 0, 0,
".realp");
3436 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3437 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3438 Builder.CreateStore(SrcReal, DestRealPtr);
3439 Builder.CreateStore(SrcImg, DestImgPtr);
3442 case EvalKind::Aggregate: {
3444 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3445 Builder.CreateMemCpy(
3446 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3447 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3454 Builder.CreateRetVoid();
3455 Builder.restoreIP(OldIP);
3459Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3461 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3462 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3465 Builder.getVoidTy(),
3466 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3470 "_omp_reduction_global_to_list_reduce_func", &M);
3477 Builder.SetInsertPoint(EntryBlock);
3486 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3487 BufferArg->
getName() +
".addr");
3488 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3490 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3491 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3497 Value *LocalReduceList =
3498 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3500 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3501 BufferArgAlloca, Builder.getPtrTy(),
3502 BufferArgAlloca->
getName() +
".ascast");
3503 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3504 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3505 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3506 ReduceListArgAlloca, Builder.getPtrTy(),
3507 ReduceListArgAlloca->
getName() +
".ascast");
3508 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3509 LocalReduceList, Builder.getPtrTy(),
3510 LocalReduceList->
getName() +
".ascast");
3512 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3513 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3514 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3516 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3517 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3518 Type *IndexTy = Builder.getIndexTy(
3519 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3520 for (
auto En :
enumerate(ReductionInfos)) {
3521 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3522 RedListArrayTy, ReductionList,
3523 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3526 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3527 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3528 ReductionsBufferTy, BufferVD, 0, En.index());
3529 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3534 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3535 createRuntimeFunctionCall(ReduceFn, {ReduceList, ReductionList})
3536 ->addFnAttr(Attribute::NoUnwind);
3537 Builder.CreateRetVoid();
3538 Builder.restoreIP(OldIP);
3542std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3543 std::string Suffix =
3544 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3545 return (Name + Suffix).
str();
3551 AttributeList FuncAttrs) {
3553 {Builder.getPtrTy(), Builder.getPtrTy()},
3555 std::string
Name = getReductionFuncName(ReducerName);
3563 Builder.SetInsertPoint(EntryBB);
3567 Value *LHSArrayPtr =
nullptr;
3568 Value *RHSArrayPtr =
nullptr;
3575 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3577 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3578 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3579 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3580 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3581 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3582 Builder.CreateStore(Arg0, LHSAddrCast);
3583 Builder.CreateStore(Arg1, RHSAddrCast);
3584 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3585 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3588 Type *IndexTy = Builder.getIndexTy(
3589 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3591 for (
auto En :
enumerate(ReductionInfos)) {
3592 const ReductionInfo &RI = En.value();
3593 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3594 RedArrayTy, RHSArrayPtr,
3595 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3596 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3597 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3598 RHSI8Ptr, RI.PrivateVariable->getType(),
3599 RHSI8Ptr->
getName() +
".ascast");
3601 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3602 RedArrayTy, LHSArrayPtr,
3603 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3604 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3605 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3606 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3608 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3615 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
3616 LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3617 RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3621 InsertPointOrErrorTy AfterIP =
3622 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3624 return AfterIP.takeError();
3625 if (!Builder.GetInsertBlock())
3626 return ReductionFunc;
3628 Builder.restoreIP(*AfterIP);
3630 if (!IsByRef.
empty() && !IsByRef[En.index()])
3631 Builder.CreateStore(Reduced, LHSPtr);
3635 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3636 for (
auto En :
enumerate(ReductionInfos)) {
3637 unsigned Index = En.index();
3638 const ReductionInfo &RI = En.value();
3639 Value *LHSFixupPtr, *RHSFixupPtr;
3640 Builder.restoreIP(RI.ReductionGenClang(
3641 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3646 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3651 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3657 Builder.CreateRetVoid();
3658 return ReductionFunc;
3664 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3666 assert(RI.Variable &&
"expected non-null variable");
3667 assert(RI.PrivateVariable &&
"expected non-null private variable");
3668 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3669 "expected non-null reduction generator callback");
3672 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3673 "expected variables and their private equivalents to have the same "
3676 assert(RI.Variable->getType()->isPointerTy() &&
3677 "expected variables to be pointers");
3681OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3682 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3685 ReductionGenCBKind ReductionGenCBKind, std::optional<omp::GV> GridValue,
3686 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3687 if (!updateToLocation(
Loc))
3688 return InsertPointTy();
3689 Builder.restoreIP(CodeGenIP);
3696 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3697 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3700 if (ReductionInfos.
size() == 0)
3701 return Builder.saveIP();
3704 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3710 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3714 AttributeList FuncAttrs;
3715 AttrBuilder AttrBldr(Ctx);
3717 AttrBldr.addAttribute(Attr);
3718 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3719 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3721 CodeGenIP = Builder.saveIP();
3723 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
3724 ReductionGenCBKind, FuncAttrs);
3725 if (!ReductionResult)
3727 Function *ReductionFunc = *ReductionResult;
3728 Builder.restoreIP(CodeGenIP);
3731 if (GridValue.has_value())
3732 Config.setGridValue(GridValue.value());
3747 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
3749 CodeGenIP = Builder.saveIP();
3750 Builder.restoreIP(AllocaIP);
3751 Value *ReductionListAlloca =
3752 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3753 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3754 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3755 Builder.restoreIP(CodeGenIP);
3756 Type *IndexTy = Builder.getIndexTy(
3757 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3758 for (
auto En :
enumerate(ReductionInfos)) {
3759 const ReductionInfo &RI = En.value();
3760 Value *ElemPtr = Builder.CreateInBoundsGEP(
3761 RedArrayTy, ReductionList,
3762 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3764 Value *PrivateVar = RI.PrivateVariable;
3765 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3767 PrivateVar = Builder.CreateLoad(RI.ElementType, PrivateVar);
3770 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
3771 Builder.CreateStore(CastElem, ElemPtr);
3773 CodeGenIP = Builder.saveIP();
3775 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
3781 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
3785 Builder.restoreIP(CodeGenIP);
3787 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3789 unsigned MaxDataSize = 0;
3791 for (
auto En :
enumerate(ReductionInfos)) {
3792 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3793 if (
Size > MaxDataSize)
3795 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3797 Value *ReductionDataSize =
3798 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3799 if (!IsTeamsReduction) {
3800 Value *SarFuncCast =
3801 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
3803 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
3804 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3806 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3807 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3808 Res = createRuntimeFunctionCall(Pv2Ptr, Args);
3810 CodeGenIP = Builder.saveIP();
3812 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3813 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3814 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3815 Function *LtGCFunc = emitListToGlobalCopyFunction(
3816 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3817 Function *LtGRFunc = emitListToGlobalReduceFunction(
3818 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3819 Function *GtLCFunc = emitGlobalToListCopyFunction(
3820 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3821 Function *GtLRFunc = emitGlobalToListReduceFunction(
3822 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3823 Builder.restoreIP(CodeGenIP);
3825 Value *KernelTeamsReductionPtr = createRuntimeFunctionCall(
3826 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3828 Value *Args3[] = {SrcLocInfo,
3829 KernelTeamsReductionPtr,
3830 Builder.getInt32(ReductionBufNum),
3840 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3841 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3842 Res = createRuntimeFunctionCall(TeamsReduceFn, Args3);
3848 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3849 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3855 emitBlock(ThenBB, CurFunc);
3858 for (
auto En :
enumerate(ReductionInfos)) {
3859 const ReductionInfo &RI = En.value();
3861 Value *RedValue = RI.Variable;
3863 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3865 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3866 Value *LHSPtr, *RHSPtr;
3867 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3868 &LHSPtr, &RHSPtr, CurFunc));
3881 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3882 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3883 "red.value." +
Twine(En.index()));
3885 Value *PrivateRedValue = Builder.CreateLoad(
3888 InsertPointOrErrorTy AfterIP =
3889 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3891 return AfterIP.takeError();
3892 Builder.restoreIP(*AfterIP);
3894 if (!IsByRef.
empty() && !IsByRef[En.index()])
3895 Builder.CreateStore(Reduced, RI.Variable);
3898 emitBlock(ExitBB, CurFunc);
3899 if (ContinuationBlock) {
3900 Builder.CreateBr(ContinuationBlock);
3901 Builder.SetInsertPoint(ContinuationBlock);
3903 Config.setEmitLLVMUsed();
3905 return Builder.saveIP();
3914 ".omp.reduction.func", &M);
3924 Builder.SetInsertPoint(ReductionFuncBlock);
3925 Value *LHSArrayPtr =
nullptr;
3926 Value *RHSArrayPtr =
nullptr;
3937 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3939 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3940 Value *LHSAddrCast =
3941 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3942 Value *RHSAddrCast =
3943 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3944 Builder.CreateStore(Arg0, LHSAddrCast);
3945 Builder.CreateStore(Arg1, RHSAddrCast);
3946 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3947 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3949 LHSArrayPtr = ReductionFunc->
getArg(0);
3950 RHSArrayPtr = ReductionFunc->
getArg(1);
3953 unsigned NumReductions = ReductionInfos.
size();
3956 for (
auto En :
enumerate(ReductionInfos)) {
3957 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3958 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3959 RedArrayTy, LHSArrayPtr, 0, En.index());
3960 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3961 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3962 LHSI8Ptr, RI.Variable->
getType());
3963 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3964 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3965 RedArrayTy, RHSArrayPtr, 0, En.index());
3966 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3967 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3968 RHSI8Ptr, RI.PrivateVariable->
getType());
3969 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3971 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3972 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3974 return AfterIP.takeError();
3976 Builder.restoreIP(*AfterIP);
3978 if (!Builder.GetInsertBlock())
3982 if (!IsByRef[En.index()])
3983 Builder.CreateStore(Reduced, LHSPtr);
3985 Builder.CreateRetVoid();
3989OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3990 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3992 bool IsNoWait,
bool IsTeamsReduction) {
3995 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3996 IsByRef, IsNoWait, IsTeamsReduction);
4000 if (!updateToLocation(
Loc))
4001 return InsertPointTy();
4003 if (ReductionInfos.
size() == 0)
4004 return Builder.saveIP();
4013 unsigned NumReductions = ReductionInfos.
size();
4015 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
4016 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4018 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4020 for (
auto En :
enumerate(ReductionInfos)) {
4021 unsigned Index = En.index();
4022 const ReductionInfo &RI = En.value();
4023 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
4024 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4025 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
4030 Type *IndexTy = Builder.getIndexTy(
4031 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
4032 Function *
Func = Builder.GetInsertBlock()->getParent();
4035 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4036 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
4037 return RI.AtomicReductionGen;
4039 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
4041 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4043 Value *ThreadId = getOrCreateThreadID(Ident);
4044 Constant *NumVariables = Builder.getInt32(NumReductions);
4046 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4047 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4049 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4050 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
4051 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4052 : RuntimeFunction::OMPRTL___kmpc_reduce);
4054 createRuntimeFunctionCall(ReduceFunc,
4055 {Ident, ThreadId, NumVariables, RedArraySize,
4056 RedArray, ReductionFunc, Lock},
4067 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4068 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
4069 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
4074 Builder.SetInsertPoint(NonAtomicRedBlock);
4075 for (
auto En :
enumerate(ReductionInfos)) {
4076 const ReductionInfo &RI = En.value();
4080 Value *RedValue = RI.Variable;
4081 if (!IsByRef[En.index()]) {
4082 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
4083 "red.value." +
Twine(En.index()));
4085 Value *PrivateRedValue =
4086 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
4087 "red.private.value." +
Twine(En.index()));
4089 InsertPointOrErrorTy AfterIP =
4090 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
4092 return AfterIP.takeError();
4093 Builder.restoreIP(*AfterIP);
4095 if (!Builder.GetInsertBlock())
4096 return InsertPointTy();
4098 if (!IsByRef[En.index()])
4099 Builder.CreateStore(Reduced, RI.Variable);
4101 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
4102 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4103 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4104 createRuntimeFunctionCall(EndReduceFunc, {Ident, ThreadId, Lock});
4105 Builder.CreateBr(ContinuationBlock);
4110 Builder.SetInsertPoint(AtomicRedBlock);
4111 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4112 for (
const ReductionInfo &RI : ReductionInfos) {
4113 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
4114 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
4116 return AfterIP.takeError();
4117 Builder.restoreIP(*AfterIP);
4118 if (!Builder.GetInsertBlock())
4119 return InsertPointTy();
4121 Builder.CreateBr(ContinuationBlock);
4123 Builder.CreateUnreachable();
4134 if (!Builder.GetInsertBlock())
4135 return InsertPointTy();
4137 Builder.SetInsertPoint(ContinuationBlock);
4138 return Builder.saveIP();
4141OpenMPIRBuilder::InsertPointOrErrorTy
4142OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4143 BodyGenCallbackTy BodyGenCB,
4144 FinalizeCallbackTy FiniCB) {
4145 if (!updateToLocation(
Loc))
4148 Directive OMPD = Directive::OMPD_master;
4150 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4151 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4152 Value *ThreadId = getOrCreateThreadID(Ident);
4155 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4156 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4158 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4159 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
4161 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4165OpenMPIRBuilder::InsertPointOrErrorTy
4166OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4167 BodyGenCallbackTy BodyGenCB,
4169 if (!updateToLocation(
Loc))
4172 Directive OMPD = Directive::OMPD_masked;
4174 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4175 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4176 Value *ThreadId = getOrCreateThreadID(Ident);
4178 Value *ArgsEnd[] = {Ident, ThreadId};
4180 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4181 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4183 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4184 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, ArgsEnd);
4186 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4196 Call->setDoesNotThrow();
4208OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4209 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4211 bool IsInclusive, ScanInfo *ScanRedInfo) {
4212 if (ScanRedInfo->OMPFirstScanLoop) {
4213 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4214 ScanVarsType, ScanRedInfo);
4218 if (!updateToLocation(
Loc))
4223 if (ScanRedInfo->OMPFirstScanLoop) {
4225 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4226 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4227 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4228 Type *DestTy = ScanVarsType[i];
4229 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4230 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4232 Builder.CreateStore(Src, Val);
4235 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4236 emitBlock(ScanRedInfo->OMPScanDispatch,
4237 Builder.GetInsertBlock()->getParent());
4239 if (!ScanRedInfo->OMPFirstScanLoop) {
4240 IV = ScanRedInfo->IV;
4243 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4244 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4245 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4246 Type *DestTy = ScanVarsType[i];
4248 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4249 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4250 Builder.CreateStore(Src, ScanVars[i]);
4256 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4257 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4258 ScanRedInfo->OMPAfterScanBlock);
4260 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4261 ScanRedInfo->OMPBeforeScanBlock);
4263 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4264 Builder.GetInsertBlock()->getParent());
4265 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4266 return Builder.saveIP();
4269Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4273 Builder.restoreIP(AllocaIP);
4275 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4277 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4278 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4282 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4283 InsertPointTy CodeGenIP) ->
Error {
4284 Builder.restoreIP(CodeGenIP);
4286 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4287 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4291 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4292 AllocSpan,
nullptr,
"arr");
4293 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4301 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4303 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4304 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4307 return AfterIP.takeError();
4308 Builder.restoreIP(*AfterIP);
4309 BasicBlock *InputBB = Builder.GetInsertBlock();
4311 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4312 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4314 return AfterIP.takeError();
4315 Builder.restoreIP(*AfterIP);
4320Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4322 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4323 InsertPointTy CodeGenIP) ->
Error {
4324 Builder.restoreIP(CodeGenIP);
4325 for (ReductionInfo RedInfo : ReductionInfos) {
4326 Value *PrivateVar = RedInfo.PrivateVariable;
4327 Value *OrigVar = RedInfo.Variable;
4328 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4329 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4331 Type *SrcTy = RedInfo.ElementType;
4332 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4334 Value *Src = Builder.CreateLoad(SrcTy, Val);
4336 Builder.CreateStore(Src, OrigVar);
4337 Builder.CreateFree(Buff);
4345 if (ScanRedInfo->OMPScanFinish->getTerminator())
4346 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4348 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4351 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4352 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4355 return AfterIP.takeError();
4356 Builder.restoreIP(*AfterIP);
4357 BasicBlock *InputBB = Builder.GetInsertBlock();
4359 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4360 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4362 return AfterIP.takeError();
4363 Builder.restoreIP(*AfterIP);
4367OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4368 const LocationDescription &
Loc,
4370 ScanInfo *ScanRedInfo) {
4372 if (!updateToLocation(
Loc))
4374 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4375 InsertPointTy CodeGenIP) ->
Error {
4376 Builder.restoreIP(CodeGenIP);
4382 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4384 Builder.GetInsertBlock()->getModule(),
4388 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4391 Builder.GetInsertBlock()->getModule(),
4394 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4397 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4398 Builder.SetInsertPoint(InputBB);
4399 Builder.CreateBr(LoopBB);
4400 emitBlock(LoopBB, CurFn);
4401 Builder.SetInsertPoint(LoopBB);
4403 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4405 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4406 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4408 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4416 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4417 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4418 emitBlock(InnerLoopBB, CurFn);
4419 Builder.SetInsertPoint(InnerLoopBB);
4420 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4422 for (ReductionInfo RedInfo : ReductionInfos) {
4423 Value *ReductionVal = RedInfo.PrivateVariable;
4424 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4425 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4426 Type *DestTy = RedInfo.ElementType;
4427 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4429 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4430 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4432 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4433 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4434 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4436 InsertPointOrErrorTy AfterIP =
4437 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4439 return AfterIP.takeError();
4440 Builder.CreateStore(Result, LHSPtr);
4443 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4444 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4445 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4446 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4447 emitBlock(InnerExitBB, CurFn);
4449 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4452 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4453 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4455 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4465 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4466 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4469 return AfterIP.takeError();
4470 Builder.restoreIP(*AfterIP);
4471 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4474 return AfterIP.takeError();
4475 Builder.restoreIP(*AfterIP);
4476 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4483Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4486 ScanInfo *ScanRedInfo) {
4494 ScanRedInfo->OMPFirstScanLoop =
true;
4495 Error Err = InputLoopGen();
4505 ScanRedInfo->OMPFirstScanLoop =
false;
4506 Error Err = ScanLoopGen(Builder.saveIP());
4513void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4514 Function *
Fun = Builder.GetInsertBlock()->getParent();
4515 ScanRedInfo->OMPScanDispatch =
4517 ScanRedInfo->OMPAfterScanBlock =
4519 ScanRedInfo->OMPBeforeScanBlock =
4521 ScanRedInfo->OMPScanLoopExit =
4524CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4548 Builder.SetCurrentDebugLocation(
DL);
4550 Builder.SetInsertPoint(Preheader);
4551 Builder.CreateBr(Header);
4553 Builder.SetInsertPoint(Header);
4554 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4555 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4556 Builder.CreateBr(
Cond);
4558 Builder.SetInsertPoint(
Cond);
4560 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4561 Builder.CreateCondBr(Cmp, Body, Exit);
4563 Builder.SetInsertPoint(Body);
4564 Builder.CreateBr(Latch);
4566 Builder.SetInsertPoint(Latch);
4567 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4568 "omp_" + Name +
".next",
true);
4569 Builder.CreateBr(Header);
4572 Builder.SetInsertPoint(Exit);
4573 Builder.CreateBr(After);
4576 LoopInfos.emplace_front();
4577 CanonicalLoopInfo *CL = &LoopInfos.front();
4579 CL->Header = Header;
4591OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4592 LoopBodyGenCallbackTy BodyGenCB,
4597 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4598 NextBB, NextBB, Name);
4602 if (updateToLocation(
Loc)) {
4606 spliceBB(Builder, After,
false);
4607 Builder.CreateBr(CL->getPreheader());
4612 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4622 ScanInfos.emplace_front();
4623 ScanInfo *
Result = &ScanInfos.front();
4628OpenMPIRBuilder::createCanonicalScanLoops(
4629 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4630 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4631 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4632 LocationDescription ComputeLoc =
4633 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4634 updateToLocation(ComputeLoc);
4638 Value *TripCount = calculateCanonicalLoopTripCount(
4639 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4640 ScanRedInfo->Span = TripCount;
4641 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4642 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4644 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4645 Builder.restoreIP(CodeGenIP);
4646 ScanRedInfo->IV =
IV;
4647 createScanBBs(ScanRedInfo);
4648 BasicBlock *InputBlock = Builder.GetInsertBlock();
4652 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4653 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4654 Builder.GetInsertBlock()->getParent());
4655 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4656 emitBlock(ScanRedInfo->OMPScanLoopExit,
4657 Builder.GetInsertBlock()->getParent());
4658 Builder.CreateBr(ContinueBlock);
4659 Builder.SetInsertPoint(
4660 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4661 return BodyGenCB(Builder.saveIP(),
IV);
4664 const auto &&InputLoopGen = [&]() ->
Error {
4666 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4667 ComputeIP, Name,
true, ScanRedInfo);
4671 Builder.restoreIP((*LoopInfo)->getAfterIP());
4674 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4676 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4677 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4681 Builder.restoreIP((*LoopInfo)->getAfterIP());
4682 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4685 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4691Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4693 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4703 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4704 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4706 updateToLocation(
Loc);
4723 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4724 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4725 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4726 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4727 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4728 ZeroCmp = Builder.CreateICmp(
4731 Span = Builder.CreateSub(Stop, Start,
"",
true);
4732 ZeroCmp = Builder.CreateICmp(
4736 Value *CountIfLooping;
4737 if (InclusiveStop) {
4738 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4741 Value *CountIfTwo = Builder.CreateAdd(
4742 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4744 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4747 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4748 "omp_" + Name +
".tripcount");
4752 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4753 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4754 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4755 ScanInfo *ScanRedInfo) {
4756 LocationDescription ComputeLoc =
4757 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4759 Value *TripCount = calculateCanonicalLoopTripCount(
4760 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4762 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4763 Builder.restoreIP(CodeGenIP);
4764 Value *Span = Builder.CreateMul(
IV, Step);
4765 Value *IndVar = Builder.CreateAdd(Span, Start);
4767 ScanRedInfo->IV = IndVar;
4768 return BodyGenCB(Builder.saveIP(), IndVar);
4770 LocationDescription LoopLoc =
4773 : LocationDescription(Builder.saveIP(),
4774 Builder.getCurrentDebugLocation());
4775 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4784 OpenMPIRBuilder &OMPBuilder) {
4785 unsigned Bitwidth = Ty->getIntegerBitWidth();
4787 return OMPBuilder.getOrCreateRuntimeFunction(
4788 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4790 return OMPBuilder.getOrCreateRuntimeFunction(
4791 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4800 OpenMPIRBuilder &OMPBuilder) {
4801 unsigned Bitwidth = Ty->getIntegerBitWidth();
4803 return OMPBuilder.getOrCreateRuntimeFunction(
4804 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4806 return OMPBuilder.getOrCreateRuntimeFunction(
4807 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4811OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4812 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4815 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4817 "Require dedicated allocate IP");
4820 Builder.restoreIP(CLI->getPreheaderIP());
4821 Builder.SetCurrentDebugLocation(
DL);
4824 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4825 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4829 Type *IVTy =
IV->getType();
4831 LoopType == WorksharingLoopType::DistributeForStaticLoop
4835 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4838 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4841 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4842 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4843 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4844 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4845 CLI->setLastIter(PLastIter);
4851 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4853 Constant *One = ConstantInt::get(IVTy, 1);
4854 Builder.CreateStore(Zero, PLowerBound);
4855 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4856 Builder.CreateStore(UpperBound, PUpperBound);
4857 Builder.CreateStore(One, PStride);
4859 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4862 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4863 ? OMPScheduleType::OrderedDistribute
4866 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4870 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
4871 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
4872 this](
Value *SchedulingType,
auto &Builder) {
4874 PLowerBound, PUpperBound});
4875 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4876 Value *PDistUpperBound =
4877 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4878 Args.push_back(PDistUpperBound);
4881 createRuntimeFunctionCall(StaticInit, Args);
4883 BuildInitCall(SchedulingType, Builder);
4884 if (HasDistSchedule &&
4885 LoopType != WorksharingLoopType::DistributeStaticLoop) {
4886 Constant *DistScheduleSchedType = ConstantInt::get(
4891 BuildInitCall(DistScheduleSchedType, Builder);
4893 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4894 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4895 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4896 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4897 CLI->setTripCount(TripCount);
4904 Builder.SetInsertPoint(CLI->getBody(),
4905 CLI->getBody()->getFirstInsertionPt());
4906 Builder.SetCurrentDebugLocation(
DL);
4907 return Builder.CreateAdd(OldIV, LowerBound);
4911 Builder.SetInsertPoint(CLI->getExit(),
4912 CLI->getExit()->getTerminator()->getIterator());
4913 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
4917 InsertPointOrErrorTy BarrierIP =
4918 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4919 omp::Directive::OMPD_for,
false,
4922 return BarrierIP.takeError();
4925 InsertPointTy AfterIP = CLI->getAfterIP();
4947 if (
Block == CLI->getCond() ||
Block == CLI->getHeader())
4949 Reachable.insert(
Block);
4959 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
4962OpenMPIRBuilder::InsertPointOrErrorTy
4963OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
4964 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4967 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4968 assert(ChunkSize || DistScheduleChunkSize &&
"Chunk size is required");
4970 LLVMContext &Ctx = CLI->getFunction()->getContext();
4972 Value *OrigTripCount = CLI->getTripCount();
4973 Type *IVTy =
IV->getType();
4975 "Max supported tripcount bitwidth is 64 bits");
4977 :
Type::getInt64Ty(Ctx);
4980 Constant *One = ConstantInt::get(InternalIVTy, 1);
4990 if (ChunkSize || DistScheduleChunkSize)
4998 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
5001 Builder.restoreIP(AllocaIP);
5002 Builder.SetCurrentDebugLocation(
DL);
5003 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5004 Value *PLowerBound =
5005 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5006 Value *PUpperBound =
5007 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5008 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5009 CLI->setLastIter(PLastIter);
5012 Builder.restoreIP(CLI->getPreheaderIP());
5013 Builder.SetCurrentDebugLocation(
DL);
5016 Value *CastedChunkSize = Builder.CreateZExtOrTrunc(
5017 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5018 Value *CastedDistScheduleChunkSize = Builder.CreateZExtOrTrunc(
5019 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5020 "distschedulechunksize");
5021 Value *CastedTripCount =
5022 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5025 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5027 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5028 Builder.CreateStore(Zero, PLowerBound);
5029 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
5030 Builder.CreateStore(OrigUpperBound, PUpperBound);
5031 Builder.CreateStore(One, PStride);
5036 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5037 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5038 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5039 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5040 PUpperBound, PStride, One,
5041 this](
Value *SchedulingType,
Value *ChunkSize,
5043 createRuntimeFunctionCall(
5044 StaticInit, {SrcLoc, ThreadNum,
5045 SchedulingType, PLastIter,
5046 PLowerBound, PUpperBound,
5050 BuildInitCall(SchedulingType, CastedChunkSize, Builder);
5051 if (DistScheduleSchedType != OMPScheduleType::None &&
5052 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5053 SchedType != OMPScheduleType::OrderedDistribute) {
5057 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize, Builder);
5061 Value *FirstChunkStart =
5062 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5063 Value *FirstChunkStop =
5064 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5065 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
5067 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5068 Value *NextChunkStride =
5069 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5072 BasicBlock *DispatchEnter = splitBB(Builder,
true);
5073 Value *DispatchCounter;
5078 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
5079 {Builder.saveIP(),
DL},
5080 [&](InsertPointTy BodyIP,
Value *Counter) {
5081 DispatchCounter = Counter;
5084 FirstChunkStart, CastedTripCount, NextChunkStride,
5090 BasicBlock *DispatchBody = DispatchCLI->getBody();
5091 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
5092 BasicBlock *DispatchExit = DispatchCLI->getExit();
5093 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
5094 DispatchCLI->invalidate();
5102 Builder.restoreIP(CLI->getPreheaderIP());
5103 Builder.SetCurrentDebugLocation(
DL);
5106 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
5107 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
5108 Value *IsLastChunk =
5109 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5110 Value *CountUntilOrigTripCount =
5111 Builder.CreateSub(CastedTripCount, DispatchCounter);
5112 Value *ChunkTripCount = Builder.CreateSelect(
5113 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5114 Value *BackcastedChunkTC =
5115 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5116 CLI->setTripCount(BackcastedChunkTC);
5121 Value *BackcastedDispatchCounter =
5122 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5124 Builder.restoreIP(CLI->getBodyIP());
5125 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5130 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
5134 InsertPointOrErrorTy AfterIP =
5135 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
5138 return AfterIP.takeError();
5156 unsigned Bitwidth = Ty->getIntegerBitWidth();
5157 Module &M = OMPBuilder->M;
5159 case WorksharingLoopType::ForStaticLoop:
5161 return OMPBuilder->getOrCreateRuntimeFunction(
5162 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5164 return OMPBuilder->getOrCreateRuntimeFunction(
5165 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5167 case WorksharingLoopType::DistributeStaticLoop:
5169 return OMPBuilder->getOrCreateRuntimeFunction(
5170 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5172 return OMPBuilder->getOrCreateRuntimeFunction(
5173 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5175 case WorksharingLoopType::DistributeForStaticLoop:
5177 return OMPBuilder->getOrCreateRuntimeFunction(
5178 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5180 return OMPBuilder->getOrCreateRuntimeFunction(
5181 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5184 if (Bitwidth != 32 && Bitwidth != 64) {
5196 Function &LoopBodyFn,
bool NoLoop) {
5198 Module &M = OMPBuilder->M;
5207 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5208 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5209 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5210 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5211 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5214 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5215 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5216 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5217 Value *NumThreads = OMPBuilder->createRuntimeFunctionCall(RTLNumThreads, {});
5220 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5221 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5222 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5223 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5224 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5226 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5229 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5233 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5238 Value *TripCount = CLI->getTripCount();
5244 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5245 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5250 Builder.restoreIP({Preheader, Preheader->
end()});
5253 Builder.CreateBr(CLI->getExit());
5256 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5259 CleanUpInfo.EntryBB = CLI->getHeader();
5260 CleanUpInfo.ExitBB = CLI->getExit();
5261 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5269 "Expected unique undroppable user of outlined function");
5271 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5273 "Expected outlined function call to be located in loop preheader");
5275 if (OutlinedFnCallInstruction->
arg_size() > 1)
5282 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5284 for (
auto &ToBeDeletedItem : ToBeDeleted)
5285 ToBeDeletedItem->eraseFromParent();
5289OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5290 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5293 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5294 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5297 OI.OuterAllocaBB = CLI->getPreheader();
5303 OI.OuterAllocaBB = AllocaIP.getBlock();
5306 OI.EntryBB = CLI->getBody();
5307 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5308 "omp.prelatch",
true);
5311 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5315 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5317 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5328 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5339 CLI->getPreheader(),
5348 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5354 CLI->getIndVar()->user_end());
5357 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5358 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5364 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5371 OI.PostOutlineCB = [=, ToBeDeletedVec =
5372 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5376 addOutlineInfo(std::move(OI));
5377 return CLI->getAfterIP();
5380OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5381 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5382 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5383 bool HasSimdModifier,
bool HasMonotonicModifier,
5384 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5386 Value *DistScheduleChunkSize) {
5387 if (Config.isTargetDevice())
5388 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5390 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5391 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5393 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5394 OMPScheduleType::ModifierOrdered;
5396 if (HasDistSchedule) {
5397 DistScheduleSchedType = DistScheduleChunkSize
5398 ? OMPScheduleType::OrderedDistributeChunked
5399 : OMPScheduleType::OrderedDistribute;
5401 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5402 case OMPScheduleType::BaseStatic:
5403 case OMPScheduleType::BaseDistribute:
5404 assert(!ChunkSize || !DistScheduleChunkSize &&
5405 "No chunk size with static-chunked schedule");
5406 if (IsOrdered && !HasDistSchedule)
5407 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5408 NeedsBarrier, ChunkSize);
5410 if (DistScheduleChunkSize)
5411 return applyStaticChunkedWorkshareLoop(
5412 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5413 DistScheduleChunkSize, DistScheduleSchedType);
5414 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
5417 case OMPScheduleType::BaseStaticChunked:
5418 case OMPScheduleType::BaseDistributeChunked:
5419 if (IsOrdered && !HasDistSchedule)
5420 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5421 NeedsBarrier, ChunkSize);
5423 return applyStaticChunkedWorkshareLoop(
5424 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5425 DistScheduleChunkSize, DistScheduleSchedType);
5427 case OMPScheduleType::BaseRuntime:
5428 case OMPScheduleType::BaseAuto:
5429 case OMPScheduleType::BaseGreedy:
5430 case OMPScheduleType::BaseBalanced:
5431 case OMPScheduleType::BaseSteal:
5432 case OMPScheduleType::BaseGuidedSimd:
5433 case OMPScheduleType::BaseRuntimeSimd:
5435 "schedule type does not support user-defined chunk sizes");
5437 case OMPScheduleType::BaseDynamicChunked:
5438 case OMPScheduleType::BaseGuidedChunked:
5439 case OMPScheduleType::BaseGuidedIterativeChunked:
5440 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5441 case OMPScheduleType::BaseStaticBalancedChunked:
5442 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5443 NeedsBarrier, ChunkSize);
5456 unsigned Bitwidth = Ty->getIntegerBitWidth();
5458 return OMPBuilder.getOrCreateRuntimeFunction(
5459 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5461 return OMPBuilder.getOrCreateRuntimeFunction(
5462 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5472 unsigned Bitwidth = Ty->getIntegerBitWidth();
5474 return OMPBuilder.getOrCreateRuntimeFunction(
5475 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5477 return OMPBuilder.getOrCreateRuntimeFunction(
5478 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5487 unsigned Bitwidth = Ty->getIntegerBitWidth();
5489 return OMPBuilder.getOrCreateRuntimeFunction(
5490 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5492 return OMPBuilder.getOrCreateRuntimeFunction(
5493 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5497OpenMPIRBuilder::InsertPointOrErrorTy
5498OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5499 InsertPointTy AllocaIP,
5501 bool NeedsBarrier,
Value *Chunk) {
5502 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5504 "Require dedicated allocate IP");
5506 "Require valid schedule type");
5508 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5509 OMPScheduleType::ModifierOrdered;
5512 Builder.SetCurrentDebugLocation(
DL);
5515 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5516 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5520 Type *IVTy =
IV->getType();
5525 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5527 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5528 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5529 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5530 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5531 CLI->setLastIter(PLastIter);
5539 Constant *One = ConstantInt::get(IVTy, 1);
5540 Builder.CreateStore(One, PLowerBound);
5541 Value *UpperBound = CLI->getTripCount();
5542 Builder.CreateStore(UpperBound, PUpperBound);
5543 Builder.CreateStore(One, PStride);
5549 InsertPointTy AfterIP = CLI->getAfterIP();
5557 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5560 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5563 createRuntimeFunctionCall(DynamicInit, {SrcLoc, ThreadNum, SchedulingType,
5572 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
5573 Value *Res = createRuntimeFunctionCall(
5575 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
5576 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5579 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5580 Builder.CreateCondBr(MoreWork, Header, Exit);
5586 PI->setIncomingBlock(0, OuterCond);
5587 PI->setIncomingValue(0, LowerBound);
5592 Br->setSuccessor(0, OuterCond);
5597 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5598 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5601 CI->setOperand(1, UpperBound);
5605 assert(BI->getSuccessor(1) == Exit);
5606 BI->setSuccessor(1, OuterCond);
5610 Builder.SetInsertPoint(&Latch->
back());
5612 createRuntimeFunctionCall(DynamicFini, {SrcLoc, ThreadNum});
5617 Builder.SetInsertPoint(&
Exit->back());
5618 InsertPointOrErrorTy BarrierIP =
5619 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5620 omp::Directive::OMPD_for,
false,
5623 return BarrierIP.takeError();
5642 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5647 if (BBsToErase.
count(UseInst->getParent()))
5654 while (BBsToErase.
remove_if(HasRemainingUses)) {
5664 InsertPointTy ComputeIP) {
5665 assert(
Loops.size() >= 1 &&
"At least one loop required");
5666 size_t NumLoops =
Loops.size();
5670 return Loops.front();
5672 CanonicalLoopInfo *Outermost =
Loops.front();
5673 CanonicalLoopInfo *Innermost =
Loops.back();
5674 BasicBlock *OrigPreheader = Outermost->getPreheader();
5675 BasicBlock *OrigAfter = Outermost->getAfter();
5682 Loop->collectControlBlocks(OldControlBBs);
5685 Builder.SetCurrentDebugLocation(
DL);
5686 if (ComputeIP.isSet())
5687 Builder.restoreIP(ComputeIP);
5689 Builder.restoreIP(Outermost->getPreheaderIP());
5693 Value *CollapsedTripCount =
nullptr;
5694 for (CanonicalLoopInfo *L :
Loops) {
5696 "All loops to collapse must be valid canonical loops");
5697 Value *OrigTripCount =
L->getTripCount();
5698 if (!CollapsedTripCount) {
5699 CollapsedTripCount = OrigTripCount;
5704 CollapsedTripCount =
5705 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5709 CanonicalLoopInfo *
Result =
5710 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5711 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5717 Builder.restoreIP(
Result->getBodyIP());
5721 NewIndVars.
resize(NumLoops);
5722 for (
int i = NumLoops - 1; i >= 1; --i) {
5723 Value *OrigTripCount =
Loops[i]->getTripCount();
5725 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5726 NewIndVars[i] = NewIndVar;
5728 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5731 NewIndVars[0] = Leftover;
5742 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5749 ContinueBlock =
nullptr;
5750 ContinuePred = NextSrc;
5757 for (
size_t i = 0; i < NumLoops - 1; ++i)
5758 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5761 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5764 for (
size_t i = NumLoops - 1; i > 0; --i)
5765 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5768 ContinueWith(
Result->getLatch(),
nullptr);
5775 for (
size_t i = 0; i < NumLoops; ++i)
5776 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5781 for (CanonicalLoopInfo *L :
Loops)
5790std::vector<CanonicalLoopInfo *>
5794 "Must pass as many tile sizes as there are loops");
5795 int NumLoops =
Loops.size();
5796 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5798 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5799 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5800 Function *
F = OutermostLoop->getBody()->getParent();
5801 BasicBlock *InnerEnter = InnermostLoop->getBody();
5802 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5808 Loop->collectControlBlocks(OldControlBBs);
5815 for (CanonicalLoopInfo *L :
Loops) {
5816 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5828 for (
int i = 0; i < NumLoops - 1; ++i) {
5829 CanonicalLoopInfo *Surrounding =
Loops[i];
5832 BasicBlock *EnterBB = Surrounding->getBody();
5838 Builder.SetCurrentDebugLocation(
DL);
5839 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5841 for (
int i = 0; i < NumLoops; ++i) {
5843 Value *OrigTripCount = OrigTripCounts[i];
5846 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5847 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5856 Value *FloorTripOverflow =
5857 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5859 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5860 Value *FloorTripCount =
5861 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5862 "omp_floor" +
Twine(i) +
".tripcount",
true);
5865 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5871 std::vector<CanonicalLoopInfo *>
Result;
5872 Result.reserve(NumLoops * 2);
5876 BasicBlock *Enter = OutermostLoop->getPreheader();
5883 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5885 auto EmbeddNewLoop =
5886 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5888 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5889 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5894 Enter = EmbeddedLoop->getBody();
5895 Continue = EmbeddedLoop->getLatch();
5896 OutroInsertBefore = EmbeddedLoop->getLatch();
5897 return EmbeddedLoop;
5901 const Twine &NameBase) {
5903 CanonicalLoopInfo *EmbeddedLoop =
5904 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5905 Result.push_back(EmbeddedLoop);
5909 EmbeddNewLoops(FloorCount,
"floor");
5913 Builder.SetInsertPoint(Enter->getTerminator());
5915 for (
int i = 0; i < NumLoops; ++i) {
5916 CanonicalLoopInfo *FloorLoop =
Result[i];
5919 Value *FloorIsEpilogue =
5920 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5921 Value *TileTripCount =
5922 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5928 EmbeddNewLoops(TileCounts,
"tile");
5933 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5942 BodyEnter =
nullptr;
5943 BodyEntered = ExitBB;
5955 Builder.restoreIP(
Result.back()->getBodyIP());
5956 for (
int i = 0; i < NumLoops; ++i) {
5957 CanonicalLoopInfo *FloorLoop =
Result[i];
5958 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5959 Value *OrigIndVar = OrigIndVars[i];
5963 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5965 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5972 for (CanonicalLoopInfo *L :
Loops)
5976 for (CanonicalLoopInfo *GenL : Result)
5987 if (Properties.
empty())
6010 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6014 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6022 if (
I.mayReadOrWriteMemory()) {
6026 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6031void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
6038void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
6046void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
6049 const Twine &NamePrefix) {
6050 Function *
F = CanonicalLoop->getFunction();
6072 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
6078 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6080 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
6083 Builder.SetInsertPoint(SplitBeforeIt);
6085 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6088 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6091 Builder.SetInsertPoint(ElseBlock);
6097 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
6099 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
6105 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6107 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
6114 if (
Block == ThenBlock)
6115 NewBB->
setName(NamePrefix +
".if.else");
6118 VMap[
Block] = NewBB;
6122 Builder.CreateBr(NewBlocks.
front());
6126 L->getLoopLatch()->splitBasicBlock(
6127 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
6131 L->addBasicBlockToLoop(ThenBlock, LI);
6135OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
6137 if (TargetTriple.
isX86()) {
6138 if (Features.
lookup(
"avx512f"))
6140 else if (Features.
lookup(
"avx"))
6144 if (TargetTriple.
isPPC())
6146 if (TargetTriple.
isWasm())
6151void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
6153 Value *IfCond, OrderKind Order,
6157 Function *
F = CanonicalLoop->getFunction();
6172 if (AlignedVars.
size()) {
6173 InsertPointTy IP = Builder.saveIP();
6174 for (
auto &AlignedItem : AlignedVars) {
6175 Value *AlignedPtr = AlignedItem.first;
6176 Value *Alignment = AlignedItem.second;
6179 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6182 Builder.restoreIP(IP);
6187 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6197 if (
Block == CanonicalLoop->getCond() ||
6198 Block == CanonicalLoop->getHeader())
6200 Reachable.insert(
Block);
6210 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6226 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6228 if (Simdlen || Safelen) {
6232 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6258static std::unique_ptr<TargetMachine>
6262 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6263 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6274 std::nullopt, OptLevel));
6298 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6299 FAM.registerPass([&]() {
return TIRA; });
6313 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6318 nullptr, ORE,
static_cast<int>(OptLevel),
6339 <<
" Threshold=" << UP.
Threshold <<
"\n"
6342 <<
" PartialOptSizeThreshold="
6362 Ptr = Load->getPointerOperand();
6364 Ptr = Store->getPointerOperand();
6371 if (Alloca->getParent() == &
F->getEntryBlock())
6391 int MaxTripCount = 0;
6392 bool MaxOrZero =
false;
6393 unsigned TripMultiple = 0;
6395 bool UseUpperBound =
false;
6397 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6399 unsigned Factor = UP.
Count;
6400 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6408void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6410 CanonicalLoopInfo **UnrolledCLI) {
6411 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6427 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6440 *UnrolledCLI =
Loop;
6445 "unrolling only makes sense with a factor of 2 or larger");
6447 Type *IndVarTy =
Loop->getIndVarType();
6454 std::vector<CanonicalLoopInfo *>
LoopNest =
6455 tileLoops(
DL, {
Loop}, {FactorVal});
6458 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6469 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6472 (*UnrolledCLI)->assertOK();
6476OpenMPIRBuilder::InsertPointTy
6477OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6480 if (!updateToLocation(
Loc))
6484 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6485 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6486 Value *ThreadId = getOrCreateThreadID(Ident);
6488 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6490 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6492 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6493 createRuntimeFunctionCall(Fn, Args);
6495 return Builder.saveIP();
6498OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6499 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6503 if (!updateToLocation(
Loc))
6509 if (!CPVars.
empty()) {
6511 Builder.CreateStore(Builder.getInt32(0), DidIt);
6514 Directive OMPD = Directive::OMPD_single;
6516 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6517 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6518 Value *ThreadId = getOrCreateThreadID(Ident);
6521 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6522 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
6524 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6525 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6527 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6528 if (
Error Err = FiniCB(IP))
6535 Builder.CreateStore(Builder.getInt32(1), DidIt);
6548 InsertPointOrErrorTy AfterIP =
6549 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6553 return AfterIP.takeError();
6556 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6558 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6559 ConstantInt::get(Int64, 0), CPVars[
I],
6562 }
else if (!IsNowait) {
6563 InsertPointOrErrorTy AfterIP =
6564 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6565 omp::Directive::OMPD_unknown,
false,
6568 return AfterIP.takeError();
6570 return Builder.saveIP();
6573OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6574 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6575 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6577 if (!updateToLocation(
Loc))
6580 Directive OMPD = Directive::OMPD_critical;
6582 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6583 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6584 Value *ThreadId = getOrCreateThreadID(Ident);
6585 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6586 Value *
Args[] = {Ident, ThreadId, LockVar};
6592 EnterArgs.push_back(HintInst);
6593 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6595 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6597 Instruction *EntryCall = createRuntimeFunctionCall(RTFn, EnterArgs);
6600 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6601 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6603 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6607OpenMPIRBuilder::InsertPointTy
6608OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6609 InsertPointTy AllocaIP,
unsigned NumLoops,
6611 const Twine &Name,
bool IsDependSource) {
6615 "OpenMP runtime requires depend vec with i64 type");
6617 if (!updateToLocation(
Loc))
6622 Builder.restoreIP(AllocaIP);
6623 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6625 updateToLocation(
Loc);
6628 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6629 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6630 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6631 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6635 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6636 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6639 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6640 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6641 Value *ThreadId = getOrCreateThreadID(Ident);
6642 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6646 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6648 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6649 createRuntimeFunctionCall(RTLFn, Args);
6651 return Builder.saveIP();
6654OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6655 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6656 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6657 if (!updateToLocation(
Loc))
6660 Directive OMPD = Directive::OMPD_ordered;
6666 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6667 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6668 Value *ThreadId = getOrCreateThreadID(Ident);
6671 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6672 EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
6675 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6676 ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6679 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6683OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6685 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6686 bool HasFinalize,
bool IsCancellable) {
6689 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6693 BasicBlock *EntryBB = Builder.GetInsertBlock();
6702 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6705 if (
Error Err = BodyGenCB( InsertPointTy(),
6713 "Unexpected control flow graph state!!");
6714 InsertPointOrErrorTy AfterIP =
6715 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6717 return AfterIP.takeError();
6722 "Unexpected Insertion point location!");
6725 auto InsertBB = merged ? ExitPredBB : ExitBB;
6728 Builder.SetInsertPoint(InsertBB);
6730 return Builder.saveIP();
6733OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6736 if (!Conditional || !EntryCall)
6737 return Builder.saveIP();
6739 BasicBlock *EntryBB = Builder.GetInsertBlock();
6740 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6752 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6754 Builder.SetInsertPoint(UI);
6755 Builder.Insert(EntryBBTI);
6756 UI->eraseFromParent();
6763OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6764 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6767 Builder.restoreIP(FinIP);
6771 assert(!FinalizationStack.empty() &&
6772 "Unexpected finalization stack state!");
6774 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6775 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6777 if (
Error Err = Fi.mergeFiniBB(Builder, FinIP.getBlock()))
6778 return std::move(Err);
6782 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
6786 return Builder.saveIP();
6790 Builder.Insert(ExitCall);
6796OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6797 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6826 "copyin.not.master.end");
6833 Builder.SetInsertPoint(OMP_Entry);
6834 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6835 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6836 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6837 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6839 Builder.SetInsertPoint(CopyBegin);
6841 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6843 return Builder.saveIP();
6846CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6850 updateToLocation(
Loc);
6853 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6854 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6855 Value *ThreadId = getOrCreateThreadID(Ident);
6858 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6860 return createRuntimeFunctionCall(Fn, Args, Name);
6863CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6867 updateToLocation(
Loc);
6870 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6871 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6872 Value *ThreadId = getOrCreateThreadID(Ident);
6874 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6875 return createRuntimeFunctionCall(Fn, Args, Name);
6878CallInst *OpenMPIRBuilder::createOMPInteropInit(
6879 const LocationDescription &
Loc,
Value *InteropVar,
6881 Value *DependenceAddress,
bool HaveNowaitClause) {
6883 updateToLocation(
Loc);
6886 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6887 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6888 Value *ThreadId = getOrCreateThreadID(Ident);
6889 if (Device ==
nullptr)
6891 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
6892 if (NumDependences ==
nullptr) {
6893 NumDependences = ConstantInt::get(Int32, 0);
6897 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
6899 Ident, ThreadId, InteropVar, InteropTypeVal,
6900 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6902 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6904 return createRuntimeFunctionCall(Fn, Args);
6907CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6908 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6909 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6911 updateToLocation(
Loc);
6914 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6915 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6916 Value *ThreadId = getOrCreateThreadID(Ident);
6917 if (Device ==
nullptr)
6919 if (NumDependences ==
nullptr) {
6920 NumDependences = ConstantInt::get(Int32, 0);
6924 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
6926 Ident, ThreadId, InteropVar,
Device,
6927 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6929 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6931 return createRuntimeFunctionCall(Fn, Args);
6934CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6936 Value *NumDependences,
6937 Value *DependenceAddress,
6938 bool HaveNowaitClause) {
6940 updateToLocation(
Loc);
6942 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6943 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6944 Value *ThreadId = getOrCreateThreadID(Ident);
6945 if (Device ==
nullptr)
6947 if (NumDependences ==
nullptr) {
6948 NumDependences = ConstantInt::get(Int32, 0);
6952 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
6954 Ident, ThreadId, InteropVar,
Device,
6955 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6957 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6959 return createRuntimeFunctionCall(Fn, Args);
6962CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6966 updateToLocation(
Loc);
6969 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6970 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6971 Value *ThreadId = getOrCreateThreadID(Ident);
6973 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6977 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6979 return createRuntimeFunctionCall(Fn, Args);
6982OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6983 const LocationDescription &
Loc,
6984 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6986 "expected num_threads and num_teams to be specified");
6988 if (!updateToLocation(
Loc))
6992 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6993 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7005 const std::string DebugPrefix =
"_debug__";
7006 if (KernelName.
ends_with(DebugPrefix)) {
7007 KernelName = KernelName.
drop_back(DebugPrefix.length());
7008 Kernel = M.getFunction(KernelName);
7014 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
7019 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
7020 if (MaxThreadsVal < 0)
7021 MaxThreadsVal = std::max(
7024 if (MaxThreadsVal > 0)
7025 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
7036 Function *Fn = getOrCreateRuntimeFunctionPtr(
7037 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7040 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7041 Constant *DynamicEnvironmentInitializer =
7045 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7047 DL.getDefaultGlobalsAddressSpace());
7051 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7052 ? DynamicEnvironmentGV
7054 DynamicEnvironmentPtr);
7057 ConfigurationEnvironment, {
7058 UseGenericStateMachineVal,
7059 MayUseNestedParallelismVal,
7066 ReductionBufferLength,
7069 KernelEnvironment, {
7070 ConfigurationEnvironmentInitializer,
7074 std::string KernelEnvironmentName =
7075 (KernelName +
"_kernel_environment").str();
7078 KernelEnvironmentInitializer, KernelEnvironmentName,
7080 DL.getDefaultGlobalsAddressSpace());
7084 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7085 ? KernelEnvironmentGV
7087 KernelEnvironmentPtr);
7088 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7090 KernelLaunchEnvironment =
7091 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7092 ? KernelLaunchEnvironment
7093 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7094 KernelLaunchEnvParamTy);
7095 CallInst *ThreadKind = createRuntimeFunctionCall(
7096 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7098 Value *ExecUserCode = Builder.CreateICmpEQ(
7108 auto *UI = Builder.CreateUnreachable();
7114 Builder.SetInsertPoint(WorkerExitBB);
7115 Builder.CreateRetVoid();
7118 Builder.SetInsertPoint(CheckBBTI);
7119 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7122 UI->eraseFromParent();
7129void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
7130 int32_t TeamsReductionDataSize,
7131 int32_t TeamsReductionBufferLength) {
7132 if (!updateToLocation(
Loc))
7135 Function *Fn = getOrCreateRuntimeFunctionPtr(
7136 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7138 createRuntimeFunctionCall(Fn, {});
7140 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7146 const std::string DebugPrefix =
"_debug__";
7148 KernelName = KernelName.
drop_back(DebugPrefix.length());
7149 auto *KernelEnvironmentGV =
7150 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7151 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7152 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
7154 KernelEnvironmentInitializer,
7155 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7157 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7164 if (
Kernel.hasFnAttribute(Name)) {
7165 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7171std::pair<int32_t, int32_t>
7173 int32_t ThreadLimit =
7174 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7177 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7178 if (!Attr.isValid() || !Attr.isStringAttribute())
7179 return {0, ThreadLimit};
7180 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7183 return {0, ThreadLimit};
7184 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7190 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7191 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7192 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7194 return {0, ThreadLimit};
7197void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
7200 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7203 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7211std::pair<int32_t, int32_t>
7214 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7218 int32_t LB, int32_t UB) {
7225 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7228void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7230 if (Config.isTargetDevice()) {
7237 else if (
T.isNVPTX())
7239 else if (
T.isSPIRV())
7246 if (Config.isTargetDevice()) {
7247 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7256Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7261 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7262 "Named kernel already exists?");
7268Error OpenMPIRBuilder::emitTargetRegionFunction(
7269 TargetRegionEntryInfo &EntryInfo,
7270 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7274 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7276 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7280 OutlinedFn = *CBResult;
7282 OutlinedFn =
nullptr;
7288 if (!IsOffloadEntry)
7291 std::string EntryFnIDName =
7292 Config.isTargetDevice()
7293 ? std::string(EntryFnName)
7294 : createPlatformSpecificName({EntryFnName,
"region_id"});
7296 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7297 EntryFnName, EntryFnIDName);
7301Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7302 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7305 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7306 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7307 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7308 OffloadInfoManager.registerTargetRegionEntryInfo(
7309 EntryInfo, EntryAddr, OutlinedFnID,
7310 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7311 return OutlinedFnID;
7314OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7315 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7316 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7317 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7319 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7320 BodyGenTy BodyGenType)>
7323 if (!updateToLocation(
Loc))
7324 return InsertPointTy();
7326 Builder.restoreIP(CodeGenIP);
7328 if (Config.IsTargetDevice.value_or(
false)) {
7330 InsertPointOrErrorTy AfterIP =
7331 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7333 return AfterIP.takeError();
7334 Builder.restoreIP(*AfterIP);
7336 return Builder.saveIP();
7339 bool IsStandAlone = !BodyGenCB;
7340 MapInfosTy *MapInfo;
7344 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7345 InsertPointTy CodeGenIP) ->
Error {
7346 MapInfo = &GenMapInfoCB(Builder.saveIP());
7347 if (
Error Err = emitOffloadingArrays(
7348 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7349 true, DeviceAddrCB))
7352 TargetDataRTArgs RTArgs;
7353 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7356 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7361 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7362 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7366 SrcLocInfo, DeviceID,
7367 PointerNum, RTArgs.BasePointersArray,
7368 RTArgs.PointersArray, RTArgs.SizesArray,
7369 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7370 RTArgs.MappersArray};
7373 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7377 if (
Info.HasNoWait) {
7384 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7387 if (
Info.HasNoWait) {
7391 emitBlock(OffloadContBlock, CurFn,
true);
7392 Builder.restoreIP(Builder.saveIP());
7397 bool RequiresOuterTargetTask =
Info.HasNoWait;
7398 if (!RequiresOuterTargetTask)
7399 cantFail(TaskBodyCB(
nullptr,
nullptr,
7402 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7403 {}, RTArgs,
Info.HasNoWait));
7405 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7406 omp::OMPRTL___tgt_target_data_begin_mapper);
7408 createRuntimeFunctionCall(BeginMapperFunc, OffloadingArgs);
7410 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7413 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7414 Builder.CreateStore(LI, DeviceMap.second.second);
7421 InsertPointOrErrorTy AfterIP =
7422 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7424 return AfterIP.takeError();
7425 Builder.restoreIP(*AfterIP);
7433 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7434 InsertPointTy CodeGenIP) ->
Error {
7435 InsertPointOrErrorTy AfterIP =
7436 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7438 return AfterIP.takeError();
7439 Builder.restoreIP(*AfterIP);
7444 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7445 TargetDataRTArgs RTArgs;
7446 Info.EmitDebug = !MapInfo->Names.empty();
7447 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7450 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7455 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7456 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7459 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7460 PointerNum, RTArgs.BasePointersArray,
7461 RTArgs.PointersArray, RTArgs.SizesArray,
7462 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7463 RTArgs.MappersArray};
7465 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7467 createRuntimeFunctionCall(EndMapperFunc, OffloadingArgs);
7473 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7481 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7482 return BeginThenGen(AllocaIP, Builder.saveIP());
7490 InsertPointOrErrorTy AfterIP =
7491 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7493 return AfterIP.takeError();
7497 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7498 return EndThenGen(AllocaIP, Builder.saveIP());
7501 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7502 return BeginThenGen(AllocaIP, Builder.saveIP());
7508 return Builder.saveIP();
7512OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7513 bool IsGPUDistribute) {
7514 assert((IVSize == 32 || IVSize == 64) &&
7515 "IV size is not compatible with the omp runtime");
7517 if (IsGPUDistribute)
7519 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7520 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7521 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7522 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7524 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7525 : omp::OMPRTL___kmpc_for_static_init_4u)
7526 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7527 : omp::OMPRTL___kmpc_for_static_init_8u);
7529 return getOrCreateRuntimeFunction(M, Name);
7532FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7534 assert((IVSize == 32 || IVSize == 64) &&
7535 "IV size is not compatible with the omp runtime");
7537 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7538 : omp::OMPRTL___kmpc_dispatch_init_4u)
7539 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7540 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7542 return getOrCreateRuntimeFunction(M, Name);
7545FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7547 assert((IVSize == 32 || IVSize == 64) &&
7548 "IV size is not compatible with the omp runtime");
7550 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7551 : omp::OMPRTL___kmpc_dispatch_next_4u)
7552 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7553 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7555 return getOrCreateRuntimeFunction(M, Name);
7558FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7560 assert((IVSize == 32 || IVSize == 64) &&
7561 "IV size is not compatible with the omp runtime");
7563 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7564 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7565 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7566 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7568 return getOrCreateRuntimeFunction(M, Name);
7572 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7577 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7585 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7589 if (NewVar && (arg == NewVar->
getArg()))
7599 auto UpdateDebugRecord = [&](
auto *DR) {
7602 for (
auto Loc : DR->location_ops()) {
7603 auto Iter = ValueReplacementMap.find(
Loc);
7604 if (Iter != ValueReplacementMap.end()) {
7605 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7606 ArgNo = std::get<1>(Iter->second) + 1;
7610 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7617 "Unexpected debug intrinsic");
7619 UpdateDebugRecord(&DVR);
7622 if (OMPBuilder.Config.isTargetDevice()) {
7624 Module *M = Func->getParent();
7627 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7629 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7630 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7632 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7645 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7647 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7648 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7650 if (OMPBuilder.Config.isTargetDevice()) {
7658 for (
auto &Arg : Inputs)
7663 for (
auto &Arg : Inputs)
7667 auto BB = Builder.GetInsertBlock();
7679 if (TargetCpuAttr.isStringAttribute())
7680 Func->addFnAttr(TargetCpuAttr);
7682 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7683 if (TargetFeaturesAttr.isStringAttribute())
7684 Func->addFnAttr(TargetFeaturesAttr);
7686 if (OMPBuilder.Config.isTargetDevice()) {
7688 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7689 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7700 Builder.SetInsertPoint(EntryBB);
7703 if (OMPBuilder.Config.isTargetDevice())
7704 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7706 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7711 if (OMPBuilder.Config.isTargetDevice())
7712 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7716 splitBB(Builder,
true,
"outlined.body");
7717 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7719 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7721 return AfterIP.takeError();
7722 Builder.restoreIP(*AfterIP);
7723 if (OMPBuilder.Config.isTargetDevice())
7724 OMPBuilder.createTargetDeinit(Builder);
7727 Builder.CreateRetVoid();
7731 auto AllocaIP = Builder.saveIP();
7736 const auto &ArgRange =
7737 OMPBuilder.Config.isTargetDevice()
7738 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7771 if (Instr->getFunction() == Func)
7772 Instr->replaceUsesOfWith(
Input, InputCopy);
7778 for (
auto InArg :
zip(Inputs, ArgRange)) {
7780 Argument &Arg = std::get<1>(InArg);
7781 Value *InputCopy =
nullptr;
7783 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7784 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7786 return AfterIP.takeError();
7787 Builder.restoreIP(*AfterIP);
7788 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7808 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7815 ReplaceValue(
Input, InputCopy, Func);
7819 for (
auto Deferred : DeferredReplacement)
7820 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7823 ValueReplacementMap);
7831 Value *TaskWithPrivates,
7832 Type *TaskWithPrivatesTy) {
7834 Type *TaskTy = OMPIRBuilder.Task;
7837 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7838 Value *Shareds = TaskT;
7848 if (TaskWithPrivatesTy != TaskTy)
7849 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7866 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7871 assert((!NumOffloadingArrays || PrivatesTy) &&
7872 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7875 Module &M = OMPBuilder.M;
7899 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7905 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7906 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7912 ".omp_target_task_proxy_func",
7913 Builder.GetInsertBlock()->getModule());
7914 Value *ThreadId = ProxyFn->getArg(0);
7915 Value *TaskWithPrivates = ProxyFn->getArg(1);
7916 ThreadId->
setName(
"thread.id");
7917 TaskWithPrivates->
setName(
"task");
7919 bool HasShareds = SharedArgsOperandNo > 0;
7920 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7923 Builder.SetInsertPoint(EntryBB);
7929 if (HasOffloadingArrays) {
7930 assert(TaskTy != TaskWithPrivatesTy &&
7931 "If there are offloading arrays to pass to the target"
7932 "TaskTy cannot be the same as TaskWithPrivatesTy");
7935 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7936 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7938 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7942 auto *ArgStructAlloca =
7944 assert(ArgStructAlloca &&
7945 "Unable to find the alloca instruction corresponding to arguments "
7946 "for extracted function");
7950 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7952 Value *SharedsSize =
7953 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7956 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7958 Builder.CreateMemCpy(
7959 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7961 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7963 OMPBuilder.createRuntimeFunctionCall(KernelLaunchFunction, KernelLaunchArgs);
7964 Builder.CreateRetVoid();
7970 return GEP->getSourceElementType();
7972 return Alloca->getAllocatedType();
7995 if (OffloadingArraysToPrivatize.
empty())
7996 return OMPIRBuilder.Task;
7999 for (
Value *V : OffloadingArraysToPrivatize) {
8000 assert(V->getType()->isPointerTy() &&
8001 "Expected pointer to array to privatize. Got a non-pointer value "
8004 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8010 "struct.task_with_privates");
8013 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
8014 TargetRegionEntryInfo &EntryInfo,
8015 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8018 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
8019 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
8021 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
8024 EntryFnName, Inputs, CBFunc,
8028 return OMPBuilder.emitTargetRegionFunction(
8029 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8033OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
8034 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
8035 OpenMPIRBuilder::InsertPointTy AllocaIP,
8037 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
8161 splitBB(Builder,
true,
"target.task.body");
8163 splitBB(Builder,
true,
"target.task.alloca");
8165 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
8166 TargetTaskAllocaBB->
begin());
8167 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
8170 OI.EntryBB = TargetTaskAllocaBB;
8171 OI.OuterAllocaBB = AllocaIP.getBlock();
8176 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8179 Builder.restoreIP(TargetTaskBodyIP);
8180 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8194 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
8198 bool NeedsTargetTask = HasNoWait && DeviceID;
8199 if (NeedsTargetTask) {
8201 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
8202 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
8203 RTArgs.SizesArray}) {
8205 OffloadingArraysToPrivatize.
push_back(V);
8206 OI.ExcludeArgsFromAggregate.push_back(V);
8210 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8211 DeviceID, OffloadingArraysToPrivatize](
8214 "there must be a single user for the outlined function");
8228 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8229 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8231 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8232 "Wrong number of arguments for StaleCI when shareds are present");
8233 int SharedArgOperandNo =
8234 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8240 if (!OffloadingArraysToPrivatize.
empty())
8245 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8246 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8248 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8251 Builder.SetInsertPoint(StaleCI);
8256 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8257 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8266 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8267 : getOrCreateRuntimeFunctionPtr(
8268 OMPRTL___kmpc_omp_target_task_alloc);
8272 Value *ThreadID = getOrCreateThreadID(Ident);
8279 Value *TaskSize = Builder.getInt64(
8280 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8285 Value *SharedsSize = Builder.getInt64(0);
8287 auto *ArgStructAlloca =
8289 assert(ArgStructAlloca &&
8290 "Unable to find the alloca instruction corresponding to arguments "
8291 "for extracted function");
8292 auto *ArgStructType =
8294 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8295 "arguments for extracted function");
8297 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8306 Value *Flags = Builder.getInt32(0);
8316 TaskSize, SharedsSize,
8319 if (NeedsTargetTask) {
8320 assert(DeviceID &&
"Expected non-empty device ID.");
8324 TaskData = createRuntimeFunctionCall(TaskAllocFn, TaskAllocArgs);
8330 *
this, Builder, TaskData, TaskWithPrivatesTy);
8331 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8334 if (!OffloadingArraysToPrivatize.
empty()) {
8336 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8337 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8338 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8345 "ElementType should match ArrayType");
8348 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8349 Builder.CreateMemCpy(
8350 Dst, Alignment, PtrToPrivatize, Alignment,
8351 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8365 if (!NeedsTargetTask) {
8368 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8369 createRuntimeFunctionCall(
8372 Builder.getInt32(Dependencies.size()),
8374 ConstantInt::get(Builder.getInt32Ty(), 0),
8380 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8382 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8383 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8384 CallInst *CI = createRuntimeFunctionCall(ProxyFn, {ThreadID, TaskData});
8386 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8387 }
else if (DepArray) {
8392 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8393 createRuntimeFunctionCall(
8395 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8396 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8400 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8401 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
8406 I->eraseFromParent();
8408 addOutlineInfo(std::move(OI));
8411 << *(Builder.GetInsertBlock()) <<
"\n");
8413 << *(Builder.GetInsertBlock()->getParent()->getParent())
8415 return Builder.saveIP();
8418Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8419 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8420 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8421 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8424 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8425 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8427 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8433 OpenMPIRBuilder::InsertPointTy AllocaIP,
8434 OpenMPIRBuilder::TargetDataInfo &
Info,
8435 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8436 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8439 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8440 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8442 bool HasNoWait,
Value *DynCGroupMem,
8447 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8448 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8449 Builder.restoreIP(IP);
8450 OMPBuilder.createRuntimeFunctionCall(OutlinedFn, Args);
8451 return Builder.saveIP();
8454 bool HasDependencies = Dependencies.
size() > 0;
8455 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8457 OpenMPIRBuilder::TargetKernelArgs KArgs;
8464 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8472 if (OutlinedFnID && DeviceID)
8473 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8474 EmitTargetCallFallbackCB, KArgs,
8475 DeviceID, RTLoc, TargetTaskAllocaIP);
8483 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8486 OMPBuilder.Builder.restoreIP(AfterIP);
8490 auto &&EmitTargetCallElse =
8491 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8492 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8495 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8496 if (RequiresOuterTargetTask) {
8500 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8501 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8503 Dependencies, EmptyRTArgs, HasNoWait);
8505 return EmitTargetCallFallbackCB(Builder.saveIP());
8508 Builder.restoreIP(AfterIP);
8512 auto &&EmitTargetCallThen =
8513 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8514 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8515 Info.HasNoWait = HasNoWait;
8516 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8517 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8518 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8519 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8526 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8527 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8532 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8534 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8538 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8541 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8549 Value *MaxThreadsClause =
8550 RuntimeAttrs.TeamsThreadLimit.size() == 1
8551 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8554 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8555 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8556 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8557 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8559 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8560 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8562 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8565 unsigned NumTargetItems =
Info.NumberOfPtrs;
8569 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8570 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8573 Value *TripCount = RuntimeAttrs.LoopTripCount
8574 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8575 Builder.getInt64Ty(),
8577 : Builder.getInt64(0);
8581 DynCGroupMem = Builder.getInt32(0);
8583 KArgs = OpenMPIRBuilder::TargetKernelArgs(
8584 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
8585 HasNoWait, DynCGroupMemFallback);
8589 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8592 if (RequiresOuterTargetTask)
8593 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8594 Dependencies, KArgs.RTArgs,
8597 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8598 EmitTargetCallFallbackCB, KArgs,
8599 DeviceID, RTLoc, AllocaIP);
8602 Builder.restoreIP(AfterIP);
8609 if (!OutlinedFnID) {
8610 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8616 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8620 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8621 EmitTargetCallElse, AllocaIP));
8624OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8625 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8626 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8627 TargetRegionEntryInfo &EntryInfo,
8628 const TargetKernelDefaultAttrs &DefaultAttrs,
8629 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8631 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8632 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8633 CustomMapperCallbackTy CustomMapperCB,
8637 if (!updateToLocation(
Loc))
8638 return InsertPointTy();
8640 Builder.restoreIP(CodeGenIP);
8648 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8649 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8655 if (!Config.isTargetDevice())
8657 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8658 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
8659 DynCGroupMemFallback);
8660 return Builder.saveIP();
8673 return OS.
str().str();
8678 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8679 Config.separator());
8684 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8686 assert(Elem.second->getValueType() == Ty &&
8687 "OMP internal variable has different type than requested");
8700 : M.getTargetTriple().isAMDGPU()
8702 :
DL.getDefaultGlobalsAddressSpace();
8711 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
8712 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8719Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8720 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8721 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8722 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8725Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8730 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8732 return SizePtrToInt;
8737 std::string VarName) {
8741 M, MaptypesArrayInit->
getType(),
8745 return MaptypesArrayGlobal;
8748void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8749 InsertPointTy AllocaIP,
8750 unsigned NumOperands,
8751 struct MapperAllocas &MapperAllocas) {
8752 if (!updateToLocation(
Loc))
8757 Builder.restoreIP(AllocaIP);
8759 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8763 ArrI64Ty,
nullptr,
".offload_sizes");
8764 updateToLocation(
Loc);
8765 MapperAllocas.ArgsBase = ArgsBase;
8766 MapperAllocas.Args =
Args;
8767 MapperAllocas.ArgSizes = ArgSizes;
8770void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8773 struct MapperAllocas &MapperAllocas,
8774 int64_t DeviceID,
unsigned NumOperands) {
8775 if (!updateToLocation(
Loc))
8780 Value *ArgsBaseGEP =
8781 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8782 {Builder.getInt32(0), Builder.getInt32(0)});
8784 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8785 {Builder.getInt32(0), Builder.getInt32(0)});
8786 Value *ArgSizesGEP =
8787 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8788 {Builder.getInt32(0), Builder.getInt32(0)});
8791 createRuntimeFunctionCall(MapperFunc, {SrcLocInfo, Builder.getInt64(DeviceID),
8792 Builder.getInt32(NumOperands),
8793 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
8794 MaptypesArg, MapnamesArg, NullPtr});
8797void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8798 TargetDataRTArgs &RTArgs,
8799 TargetDataInfo &
Info,
8801 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8802 "expected region end call to runtime only when end call is separate");
8804 auto VoidPtrTy = UnqualPtrTy;
8805 auto VoidPtrPtrTy = UnqualPtrTy;
8807 auto Int64PtrTy = UnqualPtrTy;
8809 if (!
Info.NumberOfPtrs) {
8819 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8821 Info.RTArgs.BasePointersArray,
8823 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8827 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8830 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8832 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8833 :
Info.RTArgs.MapTypesArray,
8839 if (!
Info.EmitDebug)
8842 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8848 if (!
Info.HasMapper)
8851 RTArgs.MappersArray =
8852 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8855void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8856 InsertPointTy CodeGenIP,
8857 MapInfosTy &CombinedInfo,
8858 TargetDataInfo &
Info) {
8859 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8860 CombinedInfo.NonContigInfo;
8873 "struct.descriptor_dim");
8875 enum { OffsetFD = 0, CountFD, StrideFD };
8879 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8882 if (NonContigInfo.Dims[
I] == 1)
8884 Builder.restoreIP(AllocaIP);
8887 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8888 Builder.restoreIP(CodeGenIP);
8889 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8890 unsigned RevIdx = EE -
II - 1;
8891 Value *DimsLVal = Builder.CreateInBoundsGEP(
8893 {Builder.getInt64(0), Builder.getInt64(II)});
8895 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8896 Builder.CreateAlignedStore(
8897 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8898 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8900 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8901 Builder.CreateAlignedStore(
8902 NonContigInfo.Counts[L][RevIdx], CountLVal,
8903 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8905 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8906 Builder.CreateAlignedStore(
8907 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8908 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8911 Builder.restoreIP(CodeGenIP);
8912 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8913 DimsAddr, Builder.getPtrTy());
8914 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8916 Info.RTArgs.PointersArray, 0,
I);
8917 Builder.CreateAlignedStore(
8918 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8923void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8931 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8933 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8934 Value *DeleteBit = Builder.CreateAnd(
8937 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8938 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8943 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8945 Value *PtrAndObjBit = Builder.CreateAnd(
8948 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8949 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8950 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8951 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8952 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8953 DeleteCond = Builder.CreateIsNull(
8955 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8958 DeleteCond = Builder.CreateIsNotNull(
8960 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8962 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8963 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8965 emitBlock(BodyBB, MapperFn);
8968 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8971 Value *MapTypeArg = Builder.CreateAnd(
8974 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8975 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8976 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8977 MapTypeArg = Builder.CreateOr(
8980 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8981 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8985 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8986 ArraySize, MapTypeArg, MapName};
8987 createRuntimeFunctionCall(
8988 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8996 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
9012 MapperFn->
addFnAttr(Attribute::NoInline);
9013 MapperFn->
addFnAttr(Attribute::NoUnwind);
9023 auto SavedIP = Builder.saveIP();
9024 Builder.SetInsertPoint(EntryBB);
9036 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
9037 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
9038 Value *PtrBegin = BeginIn;
9039 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
9044 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9045 MapType, MapName, ElementSize, HeadBB,
9051 emitBlock(HeadBB, MapperFn);
9056 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9057 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9060 emitBlock(BodyBB, MapperFn);
9063 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9067 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
9069 return Info.takeError();
9073 Value *OffloadingArgs[] = {MapperHandle};
9074 Value *PreviousSize = createRuntimeFunctionCall(
9075 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
9077 Value *ShiftedPreviousSize =
9078 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
9081 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
9090 Value *OriMapType = Builder.getInt64(
9091 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9093 Value *MemberMapType =
9094 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9108 Value *LeftToFrom = Builder.CreateAnd(
9111 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9112 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9113 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9122 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
9123 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9125 emitBlock(AllocBB, MapperFn);
9126 Value *AllocMapType = Builder.CreateAnd(
9129 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9130 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9131 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9132 Builder.CreateBr(EndBB);
9133 emitBlock(AllocElseBB, MapperFn);
9134 Value *IsTo = Builder.CreateICmpEQ(
9137 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9138 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9139 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9141 emitBlock(ToBB, MapperFn);
9142 Value *ToMapType = Builder.CreateAnd(
9145 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9146 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9147 Builder.CreateBr(EndBB);
9148 emitBlock(ToElseBB, MapperFn);
9149 Value *IsFrom = Builder.CreateICmpEQ(
9152 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9153 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9154 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9156 emitBlock(FromBB, MapperFn);
9157 Value *FromMapType = Builder.CreateAnd(
9160 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9161 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9163 emitBlock(EndBB, MapperFn);
9166 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
9172 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9173 CurSizeArg, CurMapType, CurNameArg};
9175 auto ChildMapperFn = CustomMapperCB(
I);
9177 return ChildMapperFn.takeError();
9178 if (*ChildMapperFn) {
9180 createRuntimeFunctionCall(*ChildMapperFn, OffloadingArgs)
9181 ->setDoesNotThrow();
9185 createRuntimeFunctionCall(
9186 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9193 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9194 "omp.arraymap.next");
9196 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9198 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9200 emitBlock(ExitBB, MapperFn);
9203 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9204 MapType, MapName, ElementSize, DoneBB,
9208 emitBlock(DoneBB, MapperFn,
true);
9210 Builder.CreateRetVoid();
9211 Builder.restoreIP(SavedIP);
9215Error OpenMPIRBuilder::emitOffloadingArrays(
9216 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
9217 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
9218 bool IsNonContiguous,
9222 Info.clearArrayInfo();
9223 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9225 if (
Info.NumberOfPtrs == 0)
9228 Builder.restoreIP(AllocaIP);
9234 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9235 PointerArrayType,
nullptr,
".offload_baseptrs");
9237 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9238 PointerArrayType,
nullptr,
".offload_ptrs");
9239 AllocaInst *MappersArray = Builder.CreateAlloca(
9240 PointerArrayType,
nullptr,
".offload_mappers");
9241 Info.RTArgs.MappersArray = MappersArray;
9248 ConstantInt::get(Int64Ty, 0));
9250 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9253 if (IsNonContiguous &&
9254 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9255 CombinedInfo.Types[
I] &
9256 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9258 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9264 RuntimeSizes.set(
I);
9267 if (RuntimeSizes.all()) {
9269 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9270 SizeArrayType,
nullptr,
".offload_sizes");
9275 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9276 auto *SizesArrayGbl =
9281 if (!RuntimeSizes.any()) {
9282 Info.RTArgs.SizesArray = SizesArrayGbl;
9284 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9285 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9288 SizeArrayType,
nullptr,
".offload_sizes");
9291 Builder.CreateMemCpy(
9292 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9293 SizesArrayGbl, OffloadSizeAlign,
9298 Info.RTArgs.SizesArray = Buffer;
9306 for (
auto mapFlag : CombinedInfo.Types)
9308 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9310 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9311 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9312 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9315 if (!CombinedInfo.Names.empty()) {
9316 auto *MapNamesArrayGbl = createOffloadMapnames(
9317 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9318 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9319 Info.EmitDebug =
true;
9321 Info.RTArgs.MapNamesArray =
9323 Info.EmitDebug =
false;
9328 if (
Info.separateBeginEndCalls()) {
9329 bool EndMapTypesDiffer =
false;
9331 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9332 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9333 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9334 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9335 EndMapTypesDiffer =
true;
9338 if (EndMapTypesDiffer) {
9339 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9340 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9345 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9346 Value *BPVal = CombinedInfo.BasePointers[
I];
9347 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9350 Builder.CreateAlignedStore(BPVal, BP,
9351 M.getDataLayout().getPrefTypeAlign(PtrTy));
9353 if (
Info.requiresDevicePointerInfo()) {
9354 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9355 CodeGenIP = Builder.saveIP();
9356 Builder.restoreIP(AllocaIP);
9357 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9358 Builder.restoreIP(CodeGenIP);
9360 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9361 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9362 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9364 DeviceAddrCB(
I, BP);
9368 Value *PVal = CombinedInfo.Pointers[
I];
9369 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9373 Builder.CreateAlignedStore(PVal,
P,
9374 M.getDataLayout().getPrefTypeAlign(PtrTy));
9376 if (RuntimeSizes.test(
I)) {
9377 Value *S = Builder.CreateConstInBoundsGEP2_32(
9381 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9384 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9387 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9390 auto CustomMFunc = CustomMapperCB(
I);
9392 return CustomMFunc.takeError();
9394 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9396 Value *MAddr = Builder.CreateInBoundsGEP(
9398 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9399 Builder.CreateAlignedStore(
9400 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9403 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9404 Info.NumberOfPtrs == 0)
9406 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9411 BasicBlock *CurBB = Builder.GetInsertBlock();
9418 Builder.CreateBr(
Target);
9421 Builder.ClearInsertionPoint();
9426 BasicBlock *CurBB = Builder.GetInsertBlock();
9442 Builder.SetInsertPoint(BB);
9445Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9446 BodyGenCallbackTy ElseGen,
9447 InsertPointTy AllocaIP) {
9451 auto CondConstant = CI->getSExtValue();
9453 return ThenGen(AllocaIP, Builder.saveIP());
9455 return ElseGen(AllocaIP, Builder.saveIP());
9465 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9467 emitBlock(ThenBlock, CurFn);
9468 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9470 emitBranch(ContBlock);
9473 emitBlock(ElseBlock, CurFn);
9474 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9477 emitBranch(ContBlock);
9479 emitBlock(ContBlock, CurFn,
true);
9483bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9487 "Unexpected Atomic Ordering.");
9544OpenMPIRBuilder::InsertPointTy
9545OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9546 AtomicOpValue &
X, AtomicOpValue &V,
9548 if (!updateToLocation(
Loc))
9551 assert(
X.Var->getType()->isPointerTy() &&
9552 "OMP Atomic expects a pointer to target memory");
9553 Type *XElemTy =
X.ElemTy;
9556 "OMP atomic read expected a scalar type");
9558 Value *XRead =
nullptr;
9562 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9568 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9571 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9572 OpenMPIRBuilder::AtomicInfo atomicInfo(
9573 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9574 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9575 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9576 XRead = AtomicLoadRes.first;
9583 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9586 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9588 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9591 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9592 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9593 return Builder.saveIP();
9596OpenMPIRBuilder::InsertPointTy
9597OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9598 AtomicOpValue &
X,
Value *Expr,
9600 if (!updateToLocation(
Loc))
9603 assert(
X.Var->getType()->isPointerTy() &&
9604 "OMP Atomic expects a pointer to target memory");
9605 Type *XElemTy =
X.ElemTy;
9608 "OMP atomic write expected a scalar type");
9611 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9614 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9616 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9617 OpenMPIRBuilder::AtomicInfo atomicInfo(
9618 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9619 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9620 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9627 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9628 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9632 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9633 return Builder.saveIP();
9636OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9637 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9639 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9640 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9642 if (!updateToLocation(
Loc))
9646 Type *XTy =
X.Var->getType();
9648 "OMP Atomic expects a pointer to target memory");
9649 Type *XElemTy =
X.ElemTy;
9652 "OMP atomic update expected a scalar type");
9655 "OpenMP atomic does not support LT or GT operations");
9659 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9660 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9662 return AtomicResult.takeError();
9663 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9664 return Builder.saveIP();
9668Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9672 return Builder.CreateAdd(Src1, Src2);
9674 return Builder.CreateSub(Src1, Src2);
9676 return Builder.CreateAnd(Src1, Src2);
9678 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9680 return Builder.CreateOr(Src1, Src2);
9682 return Builder.CreateXor(Src1, Src2);
9707 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9708 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9711 bool emitRMWOp =
false;
9719 emitRMWOp = XElemTy;
9722 emitRMWOp = (IsXBinopExpr && XElemTy);
9729 std::pair<Value *, Value *> Res;
9734 if (IsIgnoreDenormalMode)
9735 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9737 if (!IsFineGrainedMemory)
9738 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9740 if (!IsRemoteMemory)
9744 Res.first = RMWInst;
9749 Res.second = Res.first;
9751 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9755 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9761 OpenMPIRBuilder::AtomicInfo atomicInfo(
9762 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9763 OldVal->
getAlign(),
true , AllocaIP,
X);
9764 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9765 BasicBlock *CurBB = Builder.GetInsertBlock();
9767 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9771 X->getName() +
".atomic.cont");
9773 Builder.restoreIP(AllocaIP);
9774 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9775 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9776 Builder.SetInsertPoint(ContBB);
9778 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9783 Value *Upd = *CBResult;
9784 Builder.CreateStore(Upd, NewAtomicAddr);
9787 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9788 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9790 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9791 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9793 Res.first = OldExprVal;
9799 Builder.SetInsertPoint(ExitBB);
9801 Builder.SetInsertPoint(ExitTI);
9807 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9814 BasicBlock *CurBB = Builder.GetInsertBlock();
9816 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9820 X->getName() +
".atomic.cont");
9822 Builder.restoreIP(AllocaIP);
9823 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9824 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9825 Builder.SetInsertPoint(ContBB);
9827 PHI->addIncoming(OldVal, CurBB);
9832 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9833 X->getName() +
".atomic.fltCast");
9835 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9836 X->getName() +
".atomic.ptrCast");
9843 Value *Upd = *CBResult;
9844 Builder.CreateStore(Upd, NewAtomicAddr);
9845 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9850 Result->setVolatile(VolatileX);
9851 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9852 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9853 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9854 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9856 Res.first = OldExprVal;
9863 Builder.SetInsertPoint(ExitBB);
9865 Builder.SetInsertPoint(ExitTI);
9872OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9873 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9876 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9877 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9878 if (!updateToLocation(
Loc))
9882 Type *XTy =
X.Var->getType();
9884 "OMP Atomic expects a pointer to target memory");
9885 Type *XElemTy =
X.ElemTy;
9888 "OMP atomic capture expected a scalar type");
9890 "OpenMP atomic does not support LT or GT operations");
9897 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9898 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9901 Value *CapturedVal =
9902 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9903 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9905 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9906 return Builder.saveIP();
9909OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9910 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9916 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9917 IsPostfixUpdate, IsFailOnly, Failure);
9920OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9921 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9926 if (!updateToLocation(
Loc))
9929 assert(
X.Var->getType()->isPointerTy() &&
9930 "OMP atomic expects a pointer to target memory");
9933 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9934 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9937 bool IsInteger =
E->getType()->isIntegerTy();
9939 if (
Op == OMPAtomicCompareOp::EQ) {
9944 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9945 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9950 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9954 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9956 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9958 "OldValue and V must be of same type");
9959 if (IsPostfixUpdate) {
9960 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9962 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9973 BasicBlock *CurBB = Builder.GetInsertBlock();
9975 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9977 CurBBTI,
X.Var->getName() +
".atomic.exit");
9983 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9985 Builder.SetInsertPoint(ContBB);
9986 Builder.CreateStore(OldValue, V.Var);
9987 Builder.CreateBr(ExitBB);
9992 Builder.SetInsertPoint(ExitBB);
9994 Builder.SetInsertPoint(ExitTI);
9997 Value *CapturedValue =
9998 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9999 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10005 assert(
R.Var->getType()->isPointerTy() &&
10006 "r.var must be of pointer type");
10007 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10009 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
10010 Value *ResultCast =
R.IsSigned
10011 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
10012 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
10013 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
10016 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10017 "Op should be either max or min at this point");
10018 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10029 if (IsXBinopExpr) {
10056 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
10058 Value *CapturedValue =
nullptr;
10059 if (IsPostfixUpdate) {
10060 CapturedValue = OldValue;
10085 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
10086 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
10088 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10092 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10094 return Builder.saveIP();
10097OpenMPIRBuilder::InsertPointOrErrorTy
10098OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
10099 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
10102 if (!updateToLocation(
Loc))
10103 return InsertPointTy();
10106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
10107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
10112 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
10113 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
10114 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10134 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
10135 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
10137 splitBB(Builder,
true,
"teams.alloca");
10139 bool SubClausesPresent =
10140 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10142 if (!Config.isTargetDevice() && SubClausesPresent) {
10143 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10144 "if lowerbound is non-null, then upperbound must also be non-null "
10145 "for bounds on num_teams");
10147 if (NumTeamsUpper ==
nullptr)
10148 NumTeamsUpper = Builder.getInt32(0);
10150 if (NumTeamsLower ==
nullptr)
10151 NumTeamsLower = NumTeamsUpper;
10155 "argument to if clause must be an integer value");
10159 IfExpr = Builder.CreateICmpNE(IfExpr,
10160 ConstantInt::get(IfExpr->
getType(), 0));
10161 NumTeamsUpper = Builder.CreateSelect(
10162 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
10165 NumTeamsLower = Builder.CreateSelect(
10166 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
10169 if (ThreadLimit ==
nullptr)
10170 ThreadLimit = Builder.getInt32(0);
10172 Value *ThreadNum = getOrCreateThreadID(Ident);
10173 createRuntimeFunctionCall(
10174 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
10175 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
10178 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10179 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10180 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10184 OI.EntryBB = AllocaBB;
10185 OI.ExitBB = ExitBB;
10186 OI.OuterAllocaBB = &OuterAllocaBB;
10190 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
10192 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10194 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10196 auto HostPostOutlineCB = [
this, Ident,
10197 ToBeDeleted](
Function &OutlinedFn)
mutable {
10202 "there must be a single user for the outlined function");
10207 "Outlined function must have two or three arguments only");
10209 bool HasShared = OutlinedFn.
arg_size() == 3;
10217 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10218 "outlined function.");
10219 Builder.SetInsertPoint(StaleCI);
10221 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
10224 createRuntimeFunctionCall(
10225 getOrCreateRuntimeFunctionPtr(
10226 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10230 I->eraseFromParent();
10233 if (!Config.isTargetDevice())
10234 OI.PostOutlineCB = HostPostOutlineCB;
10236 addOutlineInfo(std::move(OI));
10238 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10240 return Builder.saveIP();
10243OpenMPIRBuilder::InsertPointOrErrorTy
10244OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10245 InsertPointTy OuterAllocaIP,
10246 BodyGenCallbackTy BodyGenCB) {
10247 if (!updateToLocation(
Loc))
10248 return InsertPointTy();
10250 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10252 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10254 splitBB(Builder,
true,
"distribute.entry");
10255 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10258 splitBB(Builder,
true,
"distribute.exit");
10260 splitBB(Builder,
true,
"distribute.body");
10262 splitBB(Builder,
true,
"distribute.alloca");
10265 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10266 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10267 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10272 if (Config.isTargetDevice()) {
10274 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10275 OI.EntryBB = AllocaBB;
10276 OI.ExitBB = ExitBB;
10278 addOutlineInfo(std::move(OI));
10280 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10282 return Builder.saveIP();
10287 std::string VarName) {
10293 M, MapNamesArrayInit->
getType(),
10296 return MapNamesArrayGlobal;
10301void OpenMPIRBuilder::initializeTypes(
Module &M) {
10304 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10305 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10306#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10307#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10308 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10309 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10310#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10311 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10312 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10313#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10314 T = StructType::getTypeByName(Ctx, StructName); \
10316 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10318 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10319#include "llvm/Frontend/OpenMP/OMPKinds.def"
10322void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10330 while (!Worklist.
empty()) {
10334 if (
BlockSet.insert(SuccBB).second)
10343 if (!Config.isGPU()) {
10358 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10359 Fn->
addFnAttr(Attribute::MustProgress);
10363void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10364 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10367 if (OffloadInfoManager.empty())
10371 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10372 TargetRegionEntryInfo>,
10374 OrderedEntries(OffloadInfoManager.size());
10377 auto &&GetMDInt = [
this](
unsigned V) {
10384 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10385 auto &&TargetRegionMetadataEmitter =
10386 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10387 const TargetRegionEntryInfo &EntryInfo,
10388 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10401 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10402 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10403 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10404 GetMDInt(
E.getOrder())};
10407 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10413 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10416 auto &&DeviceGlobalVarMetadataEmitter =
10417 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10419 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10427 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10428 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10431 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10432 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10438 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10439 DeviceGlobalVarMetadataEmitter);
10441 for (
const auto &
E : OrderedEntries) {
10442 assert(
E.first &&
"All ordered entries must exist!");
10443 if (
const auto *CE =
10446 if (!
CE->getID() || !
CE->getAddress()) {
10448 TargetRegionEntryInfo EntryInfo =
E.second;
10449 StringRef FnName = EntryInfo.ParentName;
10450 if (!M.getNamedValue(FnName))
10452 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10455 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10458 }
else if (
const auto *CE =
dyn_cast<
10459 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10461 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10462 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10465 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10466 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10467 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10469 if (!
CE->getAddress()) {
10470 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10474 if (
CE->getVarSize() == 0)
10477 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10478 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10479 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10480 "Declaret target link address is set.");
10481 if (Config.isTargetDevice())
10483 if (!
CE->getAddress()) {
10484 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10496 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10497 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10502 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10503 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10504 Flags,
CE->getLinkage(),
CE->getVarName());
10506 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10507 Flags,
CE->getLinkage());
10518 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10523 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10524 Config.getRequiresFlags());
10527void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10529 unsigned FileID,
unsigned Line,
unsigned Count) {
10531 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10532 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10534 OS <<
"_" <<
Count;
10537void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10539 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10540 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10541 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10542 EntryInfo.Line, NewCount);
10545TargetRegionEntryInfo
10546OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10550 auto FileIDInfo = CallBack();
10554 FileID =
Status->getUniqueID().getFile();
10558 FileID =
hash_value(std::get<0>(FileIDInfo));
10561 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10562 std::get<1>(FileIDInfo));
10565unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10568 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10570 !(Remain & 1); Remain = Remain >> 1)
10576OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10579 << getFlagMemberOffset());
10582void OpenMPIRBuilder::setCorrectMemberOfFlag(
10588 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10590 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10597 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10598 Flags |= MemberOfFlag;
10601Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10602 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10603 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10604 bool IsDeclaration,
bool IsExternallyVisible,
10605 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10606 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10607 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10608 std::function<
Constant *()> GlobalInitializer,
10615 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10616 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10618 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10619 Config.hasRequiresUnifiedSharedMemory())) {
10624 if (!IsExternallyVisible)
10625 OS <<
format(
"_%x", EntryInfo.FileID);
10626 OS <<
"_decl_tgt_ref_ptr";
10629 Value *Ptr = M.getNamedValue(PtrName);
10633 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10638 if (!Config.isTargetDevice()) {
10639 if (GlobalInitializer)
10640 GV->setInitializer(GlobalInitializer());
10645 registerTargetGlobalVariable(
10646 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10647 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10648 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
10657void OpenMPIRBuilder::registerTargetGlobalVariable(
10658 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10659 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10660 bool IsDeclaration,
bool IsExternallyVisible,
10661 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10662 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10663 std::vector<Triple> TargetTriple,
10664 std::function<
Constant *()> GlobalInitializer,
10667 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10668 (TargetTriple.empty() && !Config.isTargetDevice()))
10671 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10676 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10678 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10679 !Config.hasRequiresUnifiedSharedMemory()) {
10680 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10684 if (!IsDeclaration)
10686 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10693 if (Config.isTargetDevice() &&
10697 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10700 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10702 if (!M.getNamedValue(RefName)) {
10704 getOrCreateInternalVariable(Addr->
getType(), RefName);
10706 GvAddrRef->setConstant(
true);
10708 GvAddrRef->setInitializer(Addr);
10709 GeneratedRefs.push_back(GvAddrRef);
10713 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10714 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10716 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10718 if (Config.isTargetDevice()) {
10722 Addr = getAddrOfDeclareTargetVar(
10723 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10724 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10725 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10728 VarSize = M.getDataLayout().getPointerSize();
10732 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10738void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10742 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10747 auto &&GetMDInt = [MN](
unsigned Idx) {
10752 auto &&GetMDString = [MN](
unsigned Idx) {
10754 return V->getString();
10757 switch (GetMDInt(0)) {
10761 case OffloadEntriesInfoManager::OffloadEntryInfo::
10762 OffloadingEntryInfoTargetRegion: {
10763 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10768 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10772 case OffloadEntriesInfoManager::OffloadEntryInfo::
10773 OffloadingEntryInfoDeviceGlobalVar:
10774 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10776 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10786 if (HostFilePath.
empty())
10790 if (std::error_code Err = Buf.getError()) {
10792 "OpenMPIRBuilder: " +
10800 if (std::error_code Err = M.getError()) {
10802 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10806 loadOffloadInfoMetadata(*M.get());
10813bool OffloadEntriesInfoManager::empty()
const {
10814 return OffloadEntriesTargetRegion.empty() &&
10815 OffloadEntriesDeviceGlobalVar.empty();
10818unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10819 const TargetRegionEntryInfo &EntryInfo)
const {
10820 auto It = OffloadEntriesTargetRegionCount.find(
10821 getTargetRegionEntryCountKey(EntryInfo));
10822 if (It == OffloadEntriesTargetRegionCount.end())
10827void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10828 const TargetRegionEntryInfo &EntryInfo) {
10829 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10830 EntryInfo.Count + 1;
10834void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10835 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10836 OffloadEntriesTargetRegion[EntryInfo] =
10837 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10838 OMPTargetRegionEntryTargetRegion);
10839 ++OffloadingEntriesNum;
10842void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10844 OMPTargetRegionEntryKind Flags) {
10845 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10848 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10852 if (OMPBuilder->Config.isTargetDevice()) {
10854 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10857 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10858 Entry.setAddress(Addr);
10860 Entry.setFlags(Flags);
10862 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10863 hasTargetRegionEntryInfo(EntryInfo,
true))
10865 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10866 "Target region entry already registered!");
10867 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10868 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10869 ++OffloadingEntriesNum;
10871 incrementTargetRegionEntryInfoCount(EntryInfo);
10874bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10875 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10878 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10880 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10881 if (It == OffloadEntriesTargetRegion.end()) {
10885 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10890void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10891 const OffloadTargetRegionEntryInfoActTy &Action) {
10893 for (
const auto &It : OffloadEntriesTargetRegion) {
10894 Action(It.first, It.second);
10898void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10899 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10900 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10901 ++OffloadingEntriesNum;
10904void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10907 if (OMPBuilder->Config.isTargetDevice()) {
10909 if (!hasDeviceGlobalVarEntryInfo(VarName))
10911 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10912 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10913 if (Entry.getVarSize() == 0) {
10914 Entry.setVarSize(VarSize);
10919 Entry.setVarSize(VarSize);
10921 Entry.setAddress(Addr);
10923 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10924 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10925 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10926 "Entry not initialized!");
10927 if (Entry.getVarSize() == 0) {
10928 Entry.setVarSize(VarSize);
10933 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10934 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10935 Addr, VarSize, Flags,
Linkage,
10938 OffloadEntriesDeviceGlobalVar.try_emplace(
10939 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10940 ++OffloadingEntriesNum;
10944void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10945 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10947 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10948 Action(
E.getKey(),
E.getValue());
10955void CanonicalLoopInfo::collectControlBlocks(
10962 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10965BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10974void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10986void CanonicalLoopInfo::mapIndVar(
10996 for (
Use &U : OldIV->
uses()) {
11000 if (
User->getParent() == getCond())
11002 if (
User->getParent() == getLatch())
11008 Value *NewIV = Updater(OldIV);
11011 for (
Use *U : ReplacableUses)
11019void CanonicalLoopInfo::assertOK()
const {
11032 "Preheader must terminate with unconditional branch");
11034 "Preheader must jump to header");
11038 "Header must terminate with unconditional branch");
11039 assert(Header->getSingleSuccessor() ==
Cond &&
11040 "Header must jump to exiting block");
11043 assert(
Cond->getSinglePredecessor() == Header &&
11044 "Exiting block only reachable from header");
11047 "Exiting block must terminate with conditional branch");
11049 "Exiting block must have two successors");
11051 "Exiting block's first successor jump to the body");
11053 "Exiting block's second successor must exit the loop");
11057 "Body only reachable from exiting block");
11062 "Latch must terminate with unconditional branch");
11071 "Exit block must terminate with unconditional branch");
11072 assert(
Exit->getSingleSuccessor() == After &&
11073 "Exit block must jump to after block");
11077 "After block only reachable from exit block");
11081 assert(IndVar &&
"Canonical induction variable not found?");
11083 "Induction variable must be an integer");
11085 "Induction variable must be a PHI in the loop header");
11091 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11098 Value *TripCount = getTripCount();
11099 assert(TripCount &&
"Loop trip count not found?");
11101 "Trip count and induction variable must have the same type");
11105 "Exit condition must be a signed less-than comparison");
11107 "Exit condition must compare the induction variable");
11109 "Exit condition must compare with the trip count");
11113void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
@ Null
Return null pointer.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...