65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr size_t MaxDim = 3;
534 Value *HasNoWaitFlag = Builder.getInt64(KernelArgs.HasNoWait);
536 Value *DynCGroupMemFallbackFlag =
537 Builder.getInt64(
static_cast<uint64_t>(KernelArgs.DynCGroupMemFallback));
538 DynCGroupMemFallbackFlag = Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
539 Value *Flags = Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
541 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
544 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
545 Value *NumThreads3D =
546 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
548 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
550 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
552 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
554 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
556 ArgsVector = {Version,
558 KernelArgs.RTArgs.BasePointersArray,
559 KernelArgs.RTArgs.PointersArray,
560 KernelArgs.RTArgs.SizesArray,
561 KernelArgs.RTArgs.MapTypesArray,
562 KernelArgs.RTArgs.MapNamesArray,
563 KernelArgs.RTArgs.MappersArray,
564 KernelArgs.NumIterations,
568 KernelArgs.DynCGroupMem};
576 auto FnAttrs =
Attrs.getFnAttrs();
577 auto RetAttrs =
Attrs.getRetAttrs();
579 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
584 bool Param =
true) ->
void {
585 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
586 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
587 if (HasSignExt || HasZeroExt) {
588 assert(AS.getNumAttributes() == 1 &&
589 "Currently not handling extension attr combined with others.");
591 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
594 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
601#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
602#include "llvm/Frontend/OpenMP/OMPKinds.def"
606#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
608 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
609 addAttrSet(RetAttrs, RetAttrSet, false); \
610 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
611 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
612 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
614#include "llvm/Frontend/OpenMP/OMPKinds.def"
628#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
630 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
632 Fn = M.getFunction(Str); \
634#include "llvm/Frontend/OpenMP/OMPKinds.def"
640#define OMP_RTL(Enum, Str, ...) \
642 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
644#include "llvm/Frontend/OpenMP/OMPKinds.def"
648 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
658 LLVMContext::MD_callback,
660 2, {-1, -1},
true)}));
666 addAttributes(FnID, *Fn);
673 assert(Fn &&
"Failed to create OpenMP runtime function");
681 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
685void OpenMPIRBuilder::initialize() { initializeTypes(M); }
696 for (
auto Inst =
Block->getReverseIterator()->begin();
697 Inst !=
Block->getReverseIterator()->end();) {
710void OpenMPIRBuilder::finalize(
Function *Fn) {
714 for (OutlineInfo &OI : OutlineInfos) {
717 if (Fn && OI.getFunction() != Fn) {
722 ParallelRegionBlockSet.
clear();
724 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
734 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
743 ".omp_par", ArgsInZeroAddressSpace);
747 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
748 assert(Extractor.isEligible() &&
749 "Expected OpenMP outlining to be possible!");
751 for (
auto *V : OI.ExcludeArgsFromAggregate)
752 Extractor.excludeArgFromAggregate(V);
754 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
758 if (TargetCpuAttr.isStringAttribute())
761 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
762 if (TargetFeaturesAttr.isStringAttribute())
763 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
766 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
768 "OpenMP outlined functions should not return a value!");
773 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
780 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
787 "Expected instructions to add in the outlined region entry");
789 End = ArtificialEntry.
rend();
794 if (
I.isTerminator()) {
796 if (OI.EntryBB->getTerminator())
797 OI.EntryBB->getTerminator()->adoptDbgRecords(
798 &ArtificialEntry,
I.getIterator(),
false);
802 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
805 OI.EntryBB->moveBefore(&ArtificialEntry);
812 if (OI.PostOutlineCB)
813 OI.PostOutlineCB(*OutlinedFn);
817 OutlineInfos = std::move(DeferredOutlines);
838 for (
Function *
F : ConstantAllocaRaiseCandidates)
841 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
842 [](EmitMetadataErrorKind Kind,
843 const TargetRegionEntryInfo &EntryInfo) ->
void {
844 errs() <<
"Error of kind: " << Kind
845 <<
" when emitting offload entries and metadata during "
846 "OMPIRBuilder finalization \n";
849 if (!OffloadInfoManager.empty())
850 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
852 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
853 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
854 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
855 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
861bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
863OpenMPIRBuilder::~OpenMPIRBuilder() {
864 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
872 ConstantInt::get(I32Ty,
Value), Name);
884 UsedArray.
resize(List.size());
885 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
889 if (UsedArray.
empty())
896 GV->setSection(
"llvm.metadata");
900OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
902 auto *Int8Ty = Builder.getInt8Ty();
905 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
913 unsigned Reserve2Flags) {
915 LocFlags |= OMP_IDENT_FLAG_KMPC;
918 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
923 ConstantInt::get(
Int32, Reserve2Flags),
924 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
926 size_t SrcLocStrArgIdx = 4;
927 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
931 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
938 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
939 if (
GV.getInitializer() == Initializer)
944 M, OpenMPIRBuilder::Ident,
947 M.getDataLayout().getDefaultGlobalsAddressSpace());
959 SrcLocStrSize = LocStr.
size();
960 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
968 if (
GV.isConstant() &&
GV.hasInitializer() &&
969 GV.getInitializer() == Initializer)
972 SrcLocStr = Builder.CreateGlobalString(
973 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
981 unsigned Line,
unsigned Column,
987 Buffer.
append(FunctionName);
989 Buffer.
append(std::to_string(Line));
991 Buffer.
append(std::to_string(Column));
994 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
998OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
999 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1000 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1008 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1010 if (
DIFile *DIF = DIL->getFile())
1011 if (std::optional<StringRef> Source = DIF->getSource())
1016 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1017 DIL->getColumn(), SrcLocStrSize);
1020Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1022 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1023 Loc.IP.getBlock()->getParent());
1026Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1027 return Builder.CreateCall(
1028 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1029 "omp_global_thread_num");
1032OpenMPIRBuilder::InsertPointOrErrorTy
1033OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1034 bool ForceSimpleCall,
bool CheckCancelFlag) {
1035 if (!updateToLocation(
Loc))
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1053 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1056 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1061 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1063 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1064 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1069 bool UseCancelBarrier =
1070 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1073 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1074 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1075 : OMPRTL___kmpc_barrier),
1078 if (UseCancelBarrier && CheckCancelFlag)
1079 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1082 return Builder.saveIP();
1085OpenMPIRBuilder::InsertPointOrErrorTy
1086OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1088 omp::Directive CanceledDirective) {
1089 if (!updateToLocation(
Loc))
1093 auto *UI = Builder.CreateUnreachable();
1098 Builder.SetInsertPoint(ThenTI);
1100 Value *CancelKind =
nullptr;
1101 switch (CanceledDirective) {
1102#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1103 case DirectiveEnum: \
1104 CancelKind = Builder.getInt32(Value); \
1106#include "llvm/Frontend/OpenMP/OMPKinds.def"
1112 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1113 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1114 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1116 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1117 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1118 if (CanceledDirective == OMPD_parallel) {
1120 Builder.restoreIP(IP);
1121 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1122 omp::Directive::OMPD_unknown,
1131 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1135 Builder.SetInsertPoint(UI->getParent());
1136 UI->eraseFromParent();
1138 return Builder.saveIP();
1141OpenMPIRBuilder::InsertPointOrErrorTy
1142OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1143 omp::Directive CanceledDirective) {
1144 if (!updateToLocation(
Loc))
1148 auto *UI = Builder.CreateUnreachable();
1149 Builder.SetInsertPoint(UI);
1151 Value *CancelKind =
nullptr;
1152 switch (CanceledDirective) {
1153#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1154 case DirectiveEnum: \
1155 CancelKind = Builder.getInt32(Value); \
1157#include "llvm/Frontend/OpenMP/OMPKinds.def"
1163 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1164 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1165 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1167 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1168 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1169 if (CanceledDirective == OMPD_parallel) {
1171 Builder.restoreIP(IP);
1172 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1173 omp::Directive::OMPD_unknown,
1182 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1186 Builder.SetInsertPoint(UI->getParent());
1187 UI->eraseFromParent();
1189 return Builder.saveIP();
1192OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1193 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1196 if (!updateToLocation(
Loc))
1199 Builder.restoreIP(AllocaIP);
1200 auto *KernelArgsPtr =
1201 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1202 updateToLocation(
Loc);
1206 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1207 Builder.CreateAlignedStore(
1209 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1213 NumThreads, HostPtr, KernelArgsPtr};
1215 Return = Builder.CreateCall(
1216 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1219 return Builder.saveIP();
1222OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1223 const LocationDescription &
Loc,
Value *OutlinedFnID,
1224 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1225 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1227 if (!updateToLocation(
Loc))
1240 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1244 Value *Return =
nullptr;
1248 getKernelArgsVector(Args, Builder, ArgsVector);
1263 Builder.restoreIP(emitTargetKernel(
1264 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1265 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1272 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1274 auto CurFn = Builder.GetInsertBlock()->getParent();
1275 emitBlock(OffloadFailedBlock, CurFn);
1276 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1278 return AfterIP.takeError();
1279 Builder.restoreIP(*AfterIP);
1280 emitBranch(OffloadContBlock);
1281 emitBlock(OffloadContBlock, CurFn,
true);
1282 return Builder.saveIP();
1285Error OpenMPIRBuilder::emitCancelationCheckImpl(
1286 Value *CancelFlag, omp::Directive CanceledDirective,
1287 FinalizeCallbackTy ExitCB) {
1288 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1289 "Unexpected cancellation!");
1294 if (Builder.GetInsertPoint() == BB->
end()) {
1300 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1302 Builder.SetInsertPoint(BB);
1308 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1309 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1314 Builder.SetInsertPoint(CancellationBlock);
1316 if (
Error Err = ExitCB(Builder.saveIP()))
1318 auto &FI = FinalizationStack.back();
1319 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1323 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1342 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1345 "Expected at least tid and bounded tid as arguments");
1346 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1349 assert(CI &&
"Expected call instruction to outlined function");
1350 CI->
getParent()->setName(
"omp_parallel");
1352 Builder.SetInsertPoint(CI);
1353 Type *PtrTy = OMPIRBuilder->VoidPtr;
1357 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1361 Value *Args = ArgsAlloca;
1365 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1366 Builder.restoreIP(CurrentIP);
1369 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1371 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1373 Builder.CreateStore(V, StoreAddress);
1377 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1378 : Builder.getInt32(1);
1381 Value *Parallel51CallArgs[] = {
1385 NumThreads ? NumThreads : Builder.getInt32(-1),
1386 Builder.getInt32(-1),
1390 Builder.getInt64(NumCapturedVars)};
1393 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1395 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1398 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1401 Builder.SetInsertPoint(PrivTID);
1403 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1410 I->eraseFromParent();
1427 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1430 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1433 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1441 F->addMetadata(LLVMContext::MD_callback,
1450 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1453 "Expected at least tid and bounded tid as arguments");
1454 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1457 CI->
getParent()->setName(
"omp_parallel");
1458 Builder.SetInsertPoint(CI);
1461 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1465 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1467 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1474 auto PtrTy = OMPIRBuilder->VoidPtr;
1475 if (IfCondition && NumCapturedVars == 0) {
1480 Builder.CreateCall(RTLFn, RealArgs);
1483 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1486 Builder.SetInsertPoint(PrivTID);
1488 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1495 I->eraseFromParent();
1499OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1500 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1501 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1502 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1503 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1506 if (!updateToLocation(
Loc))
1510 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1511 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1512 Value *ThreadID = getOrCreateThreadID(Ident);
1518 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1522 if (NumThreads && !Config.isTargetDevice()) {
1525 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1527 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1530 if (ProcBind != OMP_PROC_BIND_default) {
1534 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1536 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1539 BasicBlock *InsertBB = Builder.GetInsertBlock();
1544 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1552 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1553 Builder.restoreIP(NewOuter);
1554 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1556 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1559 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1562 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1566 PointerType ::get(M.getContext(), 0),
1567 "zero.addr.ascast");
1588 auto FiniCBWrapper = [&](InsertPointTy IP) {
1593 Builder.restoreIP(IP);
1595 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1599 "Unexpected insertion point for finalization call!");
1603 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1608 InsertPointTy InnerAllocaIP = Builder.saveIP();
1611 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1615 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1617 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1635 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1638 assert(BodyGenCB &&
"Expected body generation callback!");
1639 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1640 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1643 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1646 if (Config.isTargetDevice()) {
1648 OI.PostOutlineCB = [=, ToBeDeletedVec =
1649 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1651 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1652 ThreadID, ToBeDeletedVec);
1656 OI.PostOutlineCB = [=, ToBeDeletedVec =
1657 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1659 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1663 OI.OuterAllocaBB = OuterAllocaBlock;
1664 OI.EntryBB = PRegEntryBB;
1665 OI.ExitBB = PRegExitBB;
1669 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1680 ".omp_par", ArgsInZeroAddressSpace);
1685 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1687 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1692 return GV->getValueType() == OpenMPIRBuilder::Ident;
1697 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1700 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1703 if (&V == TIDAddr || &V == ZeroAddr) {
1704 OI.ExcludeArgsFromAggregate.push_back(&V);
1709 for (
Use &U : V.uses())
1711 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1721 if (!V.getType()->isPointerTy()) {
1725 Builder.restoreIP(OuterAllocaIP);
1727 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1731 Builder.SetInsertPoint(InsertBB,
1733 Builder.CreateStore(&V,
Ptr);
1736 Builder.restoreIP(InnerAllocaIP);
1737 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1740 Value *ReplacementValue =
nullptr;
1743 ReplacementValue = PrivTID;
1745 InsertPointOrErrorTy AfterIP =
1746 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1748 return AfterIP.takeError();
1749 Builder.restoreIP(*AfterIP);
1751 InnerAllocaIP.getBlock(),
1752 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1754 assert(ReplacementValue &&
1755 "Expected copy/create callback to set replacement value!");
1756 if (ReplacementValue == &V)
1761 UPtr->set(ReplacementValue);
1786 for (
Value *Output : Outputs)
1789 assert(Outputs.empty() &&
1790 "OpenMP outlining should not produce live-out values!");
1792 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1794 for (
auto *BB : Blocks)
1801 auto FiniInfo = FinalizationStack.pop_back_val();
1803 assert(FiniInfo.DK == OMPD_parallel &&
1804 "Unexpected finalization stack state!");
1808 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1809 if (
Error Err = FiniCB(PreFiniIP))
1813 addOutlineInfo(std::move(OI));
1815 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1816 UI->eraseFromParent();
1821void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1824 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1825 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1827 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1830void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1831 if (!updateToLocation(
Loc))
1836void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1840 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1841 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1842 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1845 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1849void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1850 if (!updateToLocation(
Loc))
1852 emitTaskwaitImpl(
Loc);
1855void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1858 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1859 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1861 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1863 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1867void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1868 if (!updateToLocation(
Loc))
1870 emitTaskyieldImpl(
Loc);
1879 OpenMPIRBuilder &OMPBuilder,
1882 if (Dependencies.
empty())
1902 Type *DependInfo = OMPBuilder.DependInfo;
1903 Module &M = OMPBuilder.M;
1905 Value *DepArray =
nullptr;
1906 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1907 Builder.SetInsertPoint(
1908 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1911 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1913 Builder.restoreIP(OldIP);
1915 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1917 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1919 Value *Addr = Builder.CreateStructGEP(
1921 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1922 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1923 Builder.CreateStore(DepValPtr, Addr);
1926 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1927 Builder.CreateStore(
1928 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1931 Value *Flags = Builder.CreateStructGEP(
1933 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1934 Builder.CreateStore(
1935 ConstantInt::get(Builder.getInt8Ty(),
1936 static_cast<unsigned int>(Dep.DepKind)),
1942OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1943 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1944 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1948 if (!updateToLocation(
Loc))
1949 return InsertPointTy();
1952 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1953 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1970 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1971 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1973 splitBB(Builder,
true,
"task.alloca");
1975 InsertPointTy TaskAllocaIP =
1976 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1977 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1978 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1982 OI.EntryBB = TaskAllocaBB;
1983 OI.OuterAllocaBB = AllocaIP.getBlock();
1984 OI.ExitBB = TaskExitBB;
1989 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1991 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1992 Mergeable, Priority, EventHandle, TaskAllocaBB,
1993 ToBeDeleted](
Function &OutlinedFn)
mutable {
1996 "there must be a single user for the outlined function");
2001 bool HasShareds = StaleCI->
arg_size() > 1;
2002 Builder.SetInsertPoint(StaleCI);
2007 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2011 Value *ThreadID = getOrCreateThreadID(Ident);
2023 Value *Flags = Builder.getInt32(Tied);
2026 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2027 Flags = Builder.CreateOr(FinalFlag, Flags);
2031 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2033 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2039 Value *TaskSize = Builder.getInt64(
2040 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2045 Value *SharedsSize = Builder.getInt64(0);
2049 assert(ArgStructAlloca &&
2050 "Unable to find the alloca instruction corresponding to arguments "
2051 "for extracted function");
2054 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2055 "arguments for extracted function");
2057 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2062 CallInst *TaskData = Builder.CreateCall(
2063 TaskAllocFn, {Ident, ThreadID, Flags,
2064 TaskSize, SharedsSize,
2071 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2072 OMPRTL___kmpc_task_allow_completion_event);
2074 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2076 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2077 Builder.getPtrTy(0));
2078 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2079 Builder.CreateStore(EventVal, EventHandleAddr);
2085 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2086 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2104 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2107 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2108 Value *PriorityData = Builder.CreateInBoundsGEP(
2109 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2112 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2114 Builder.CreateStore(Priority, CmplrData);
2139 splitBB(Builder,
true,
"if.end");
2141 Builder.GetInsertPoint()->
getParent()->getTerminator();
2142 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2143 Builder.SetInsertPoint(IfTerminator);
2146 Builder.SetInsertPoint(ElseTI);
2148 if (Dependencies.size()) {
2150 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2153 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2154 ConstantInt::get(Builder.getInt32Ty(), 0),
2158 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2160 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2161 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2164 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2166 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2168 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2169 Builder.SetInsertPoint(ThenTI);
2172 if (Dependencies.size()) {
2174 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2177 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2178 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2183 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2184 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2189 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2191 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2193 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2197 I->eraseFromParent();
2200 addOutlineInfo(std::move(OI));
2201 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2203 return Builder.saveIP();
2206OpenMPIRBuilder::InsertPointOrErrorTy
2207OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2208 InsertPointTy AllocaIP,
2209 BodyGenCallbackTy BodyGenCB) {
2210 if (!updateToLocation(
Loc))
2211 return InsertPointTy();
2214 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2215 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2216 Value *ThreadID = getOrCreateThreadID(Ident);
2220 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2221 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2223 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2224 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2227 Builder.SetInsertPoint(TaskgroupExitBB);
2230 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2231 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2233 return Builder.saveIP();
2236OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2237 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2239 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2242 if (!updateToLocation(
Loc))
2248 auto FiniCBWrapper = [&](InsertPointTy IP) {
2257 CancellationBranches.
push_back(DummyBranch);
2261 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2279 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2280 Builder.restoreIP(CodeGenIP);
2282 splitBBWithSuffix(Builder,
false,
".sections.after");
2286 unsigned CaseNumber = 0;
2287 for (
auto SectionCB : SectionCBs) {
2289 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2290 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2291 Builder.SetInsertPoint(CaseBB);
2293 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2305 Value *LB = ConstantInt::get(I32Ty, 0);
2306 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2307 Value *
ST = ConstantInt::get(I32Ty, 1);
2309 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2313 InsertPointOrErrorTy WsloopIP =
2314 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2315 WorksharingLoopType::ForStaticLoop, !IsNowait);
2317 return WsloopIP.takeError();
2318 InsertPointTy AfterIP = *WsloopIP;
2321 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2324 auto FiniInfo = FinalizationStack.pop_back_val();
2325 assert(FiniInfo.DK == OMPD_sections &&
2326 "Unexpected finalization stack state!");
2327 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2328 Builder.restoreIP(AfterIP);
2330 splitBBWithSuffix(Builder,
true,
"sections.fini");
2331 if (
Error Err = CB(Builder.saveIP()))
2333 AfterIP = {FiniBB, FiniBB->
begin()};
2337 for (
BranchInst *DummyBranch : CancellationBranches) {
2345OpenMPIRBuilder::InsertPointOrErrorTy
2346OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2347 BodyGenCallbackTy BodyGenCB,
2348 FinalizeCallbackTy FiniCB) {
2349 if (!updateToLocation(
Loc))
2352 auto FiniCBWrapper = [&](InsertPointTy IP) {
2363 Builder.restoreIP(IP);
2364 auto *CaseBB =
Loc.IP.getBlock();
2368 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2372 Directive OMPD = Directive::OMPD_sections;
2375 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2383 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2386Value *OpenMPIRBuilder::getGPUThreadID() {
2387 return Builder.CreateCall(
2388 getOrCreateRuntimeFunction(M,
2389 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2393Value *OpenMPIRBuilder::getGPUWarpSize() {
2394 return Builder.CreateCall(
2395 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2398Value *OpenMPIRBuilder::getNVPTXWarpID() {
2399 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2400 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2403Value *OpenMPIRBuilder::getNVPTXLaneID() {
2404 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2405 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2406 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2407 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2411Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2414 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2415 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2416 assert(FromSize > 0 &&
"From size must be greater than zero");
2417 assert(ToSize > 0 &&
"To size must be greater than zero");
2418 if (FromType == ToType)
2420 if (FromSize == ToSize)
2421 return Builder.CreateBitCast(From, ToType);
2423 return Builder.CreateIntCast(From, ToType,
true);
2424 InsertPointTy SaveIP = Builder.saveIP();
2425 Builder.restoreIP(AllocaIP);
2426 Value *CastItem = Builder.CreateAlloca(ToType);
2427 Builder.restoreIP(SaveIP);
2429 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2430 CastItem, Builder.getPtrTy(0));
2431 Builder.CreateStore(From, ValCastItem);
2432 return Builder.CreateLoad(ToType, CastItem);
2435Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2439 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2440 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2444 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2446 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2447 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2448 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2449 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2450 Value *WarpSizeCast =
2451 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2452 Value *ShuffleCall =
2453 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2454 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2457void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2460 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2471 Type *IndexTy = Builder.getIndexTy(
2472 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2473 Value *ElemPtr = DstAddr;
2475 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2479 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2480 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2482 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2483 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2484 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2487 if ((
Size / IntSize) > 1) {
2488 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2489 SrcAddrGEP, Builder.getPtrTy());
2494 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2495 emitBlock(PreCondBB, CurFunc);
2497 Builder.CreatePHI(
Ptr->getType(), 2);
2500 Builder.CreatePHI(ElemPtr->
getType(), 2);
2504 Value *PtrDiff = Builder.CreatePtrDiff(
2505 Builder.getInt8Ty(), PtrEnd,
2506 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2507 Builder.CreateCondBr(
2508 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2510 emitBlock(ThenBB, CurFunc);
2511 Value *Res = createRuntimeShuffleFunction(
2513 Builder.CreateAlignedLoad(
2514 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2516 Builder.CreateAlignedStore(Res, ElemPtr,
2517 M.getDataLayout().getPrefTypeAlign(ElemType));
2519 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2520 Value *LocalElemPtr =
2521 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2524 emitBranch(PreCondBB);
2525 emitBlock(ExitBB, CurFunc);
2527 Value *Res = createRuntimeShuffleFunction(
2528 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2531 Res = Builder.CreateTrunc(Res, ElemType);
2532 Builder.CreateStore(Res, ElemPtr);
2533 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2535 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2541void OpenMPIRBuilder::emitReductionListCopy(
2542 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2544 CopyOptionsTy CopyOptions) {
2545 Type *IndexTy = Builder.getIndexTy(
2546 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2547 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2551 for (
auto En :
enumerate(ReductionInfos)) {
2552 const ReductionInfo &RI = En.value();
2553 Value *SrcElementAddr =
nullptr;
2554 Value *DestElementAddr =
nullptr;
2555 Value *DestElementPtrAddr =
nullptr;
2557 bool ShuffleInElement =
false;
2560 bool UpdateDestListPtr =
false;
2563 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2564 ReductionArrayTy, SrcBase,
2565 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2566 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2570 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2571 ReductionArrayTy, DestBase,
2572 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2574 case CopyAction::RemoteLaneToThread: {
2575 InsertPointTy CurIP = Builder.saveIP();
2576 Builder.restoreIP(AllocaIP);
2577 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2578 ".omp.reduction.element");
2580 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2581 DestElementAddr = DestAlloca;
2583 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2584 DestElementAddr->
getName() +
".ascast");
2585 Builder.restoreIP(CurIP);
2586 ShuffleInElement =
true;
2587 UpdateDestListPtr =
true;
2590 case CopyAction::ThreadCopy: {
2592 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2599 if (ShuffleInElement) {
2600 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2601 RemoteLaneOffset, ReductionArrayTy);
2603 switch (RI.EvaluationKind) {
2604 case EvalKind::Scalar: {
2605 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2607 Builder.CreateStore(Elem, DestElementAddr);
2610 case EvalKind::Complex: {
2611 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2612 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2613 Value *SrcReal = Builder.CreateLoad(
2614 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2615 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2616 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2617 Value *SrcImg = Builder.CreateLoad(
2618 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2620 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2621 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2622 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2623 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2624 Builder.CreateStore(SrcReal, DestRealPtr);
2625 Builder.CreateStore(SrcImg, DestImgPtr);
2628 case EvalKind::Aggregate: {
2629 Value *SizeVal = Builder.getInt64(
2630 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2631 Builder.CreateMemCpy(
2632 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2633 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2645 if (UpdateDestListPtr) {
2646 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2647 DestElementAddr, Builder.getPtrTy(),
2648 DestElementAddr->
getName() +
".ascast");
2649 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2656 AttributeList FuncAttrs) {
2657 InsertPointTy SavedIP = Builder.saveIP();
2660 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2664 "_omp_reduction_inter_warp_copy_func", &M);
2669 Builder.SetInsertPoint(EntryBB);
2687 "__openmp_nvptx_data_transfer_temporary_storage";
2688 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2689 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2691 if (!TransferMedium) {
2700 Value *GPUThreadID = getGPUThreadID();
2702 Value *LaneID = getNVPTXLaneID();
2704 Value *WarpID = getNVPTXWarpID();
2706 InsertPointTy AllocaIP =
2707 InsertPointTy(Builder.GetInsertBlock(),
2708 Builder.GetInsertBlock()->getFirstInsertionPt());
2711 Builder.restoreIP(AllocaIP);
2712 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2713 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2715 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2716 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2717 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2718 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2719 NumWarpsAlloca, Builder.getPtrTy(0),
2720 NumWarpsAlloca->
getName() +
".ascast");
2721 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2722 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2724 InsertPointTy CodeGenIP =
2726 Builder.restoreIP(CodeGenIP);
2729 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2731 for (
auto En :
enumerate(ReductionInfos)) {
2736 const ReductionInfo &RI = En.value();
2737 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2738 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2741 unsigned NumIters = RealTySize / TySize;
2744 Value *Cnt =
nullptr;
2745 Value *CntAddr =
nullptr;
2749 CodeGenIP = Builder.saveIP();
2750 Builder.restoreIP(AllocaIP);
2752 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2754 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2755 CntAddr->
getName() +
".ascast");
2756 Builder.restoreIP(CodeGenIP);
2763 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2764 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2766 Value *
Cmp = Builder.CreateICmpULT(
2767 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2768 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2769 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2773 InsertPointOrErrorTy BarrierIP1 =
2774 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2775 omp::Directive::OMPD_unknown,
2779 return BarrierIP1.takeError();
2785 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2786 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2787 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2790 auto *RedListArrayTy =
2792 Type *IndexTy = Builder.getIndexTy(
2793 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2795 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2796 {ConstantInt::get(IndexTy, 0),
2797 ConstantInt::get(IndexTy, En.index())});
2799 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2801 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2805 Value *MediumPtr = Builder.CreateInBoundsGEP(
2806 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2809 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2811 Builder.CreateStore(Elem, MediumPtr,
2813 Builder.CreateBr(MergeBB);
2816 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2817 Builder.CreateBr(MergeBB);
2820 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2821 InsertPointOrErrorTy BarrierIP2 =
2822 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2823 omp::Directive::OMPD_unknown,
2827 return BarrierIP2.takeError();
2834 Value *NumWarpsVal =
2835 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2837 Value *IsActiveThread =
2838 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2839 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2841 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2845 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2846 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2848 Value *TargetElemPtrPtr =
2849 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2850 {ConstantInt::get(IndexTy, 0),
2851 ConstantInt::get(IndexTy, En.index())});
2852 Value *TargetElemPtrVal =
2853 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2854 Value *TargetElemPtr = TargetElemPtrVal;
2857 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2860 Value *SrcMediumValue =
2861 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2862 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2863 Builder.CreateBr(W0MergeBB);
2865 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2866 Builder.CreateBr(W0MergeBB);
2868 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2871 Cnt = Builder.CreateNSWAdd(
2872 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2873 Builder.CreateStore(Cnt, CntAddr,
false);
2875 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2876 emitBranch(PrecondBB);
2877 emitBlock(ExitBB, CurFn);
2879 RealTySize %= TySize;
2883 Builder.CreateRetVoid();
2884 Builder.restoreIP(SavedIP);
2889Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2891 AttributeList FuncAttrs) {
2895 {Builder.getPtrTy(), Builder.getInt16Ty(),
2896 Builder.getInt16Ty(), Builder.getInt16Ty()},
2900 "_omp_reduction_shuffle_and_reduce_func", &M);
2910 Builder.SetInsertPoint(EntryBB);
2921 Type *ReduceListArgType = ReduceListArg->
getType();
2923 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2924 Value *ReduceListAlloca = Builder.CreateAlloca(
2925 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2926 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2927 LaneIDArg->
getName() +
".addr");
2928 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2929 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2930 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2931 AlgoVerArg->
getName() +
".addr");
2937 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2938 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2940 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2941 ReduceListAlloca, ReduceListArgType,
2942 ReduceListAlloca->
getName() +
".ascast");
2943 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2944 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2945 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2946 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2947 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2948 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2949 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2950 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2951 RemoteReductionListAlloca, Builder.getPtrTy(),
2952 RemoteReductionListAlloca->
getName() +
".ascast");
2954 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2955 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2956 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2957 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2959 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2960 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2961 Value *RemoteLaneOffset =
2962 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2963 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2970 emitReductionListCopy(
2971 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2972 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2995 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2996 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2997 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2998 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2999 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
3000 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
3001 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
3002 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
3003 Value *RemoteOffsetComp =
3004 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
3005 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3006 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3007 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3013 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3014 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3015 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3016 ReduceList, Builder.getPtrTy());
3017 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3018 RemoteListAddrCast, Builder.getPtrTy());
3019 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3020 ->addFnAttr(Attribute::NoUnwind);
3021 Builder.CreateBr(MergeBB);
3023 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3024 Builder.CreateBr(MergeBB);
3026 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3030 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3031 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3032 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3037 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3039 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3040 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3041 ReductionInfos, RemoteListAddrCast, ReduceList);
3042 Builder.CreateBr(CpyMergeBB);
3044 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3045 Builder.CreateBr(CpyMergeBB);
3047 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3049 Builder.CreateRetVoid();
3054Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3056 AttributeList FuncAttrs) {
3057 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3060 Builder.getVoidTy(),
3061 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3065 "_omp_reduction_list_to_global_copy_func", &M);
3072 Builder.SetInsertPoint(EntryBlock);
3081 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3082 BufferArg->
getName() +
".addr");
3083 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3085 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3086 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3087 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3088 BufferArgAlloca, Builder.getPtrTy(),
3089 BufferArgAlloca->
getName() +
".ascast");
3090 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3091 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3092 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3093 ReduceListArgAlloca, Builder.getPtrTy(),
3094 ReduceListArgAlloca->
getName() +
".ascast");
3096 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3097 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3098 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3100 Value *LocalReduceList =
3101 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3102 Value *BufferArgVal =
3103 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3104 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3105 Type *IndexTy = Builder.getIndexTy(
3106 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3107 for (
auto En :
enumerate(ReductionInfos)) {
3108 const ReductionInfo &RI = En.value();
3109 auto *RedListArrayTy =
3112 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3113 RedListArrayTy, LocalReduceList,
3114 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3116 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3120 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3121 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3122 ReductionsBufferTy, BufferVD, 0, En.index());
3124 switch (RI.EvaluationKind) {
3125 case EvalKind::Scalar: {
3126 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3127 Builder.CreateStore(TargetElement, GlobVal);
3130 case EvalKind::Complex: {
3131 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3132 RI.ElementType, ElemPtr, 0, 0,
".realp");
3133 Value *SrcReal = Builder.CreateLoad(
3134 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3135 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3136 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3137 Value *SrcImg = Builder.CreateLoad(
3138 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3140 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3141 RI.ElementType, GlobVal, 0, 0,
".realp");
3142 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3143 RI.ElementType, GlobVal, 0, 1,
".imagp");
3144 Builder.CreateStore(SrcReal, DestRealPtr);
3145 Builder.CreateStore(SrcImg, DestImgPtr);
3148 case EvalKind::Aggregate: {
3150 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3151 Builder.CreateMemCpy(
3152 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3153 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3159 Builder.CreateRetVoid();
3160 Builder.restoreIP(OldIP);
3164Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3166 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3167 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3170 Builder.getVoidTy(),
3171 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3175 "_omp_reduction_list_to_global_reduce_func", &M);
3182 Builder.SetInsertPoint(EntryBlock);
3191 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3192 BufferArg->
getName() +
".addr");
3193 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3195 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3196 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3197 auto *RedListArrayTy =
3202 Value *LocalReduceList =
3203 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3205 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3206 BufferArgAlloca, Builder.getPtrTy(),
3207 BufferArgAlloca->
getName() +
".ascast");
3208 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3209 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3210 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3211 ReduceListArgAlloca, Builder.getPtrTy(),
3212 ReduceListArgAlloca->
getName() +
".ascast");
3213 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3214 LocalReduceList, Builder.getPtrTy(),
3215 LocalReduceList->
getName() +
".ascast");
3217 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3218 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3219 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3221 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3222 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3223 Type *IndexTy = Builder.getIndexTy(
3224 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3225 for (
auto En :
enumerate(ReductionInfos)) {
3226 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3227 RedListArrayTy, LocalReduceListAddrCast,
3228 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3230 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3232 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3233 ReductionsBufferTy, BufferVD, 0, En.index());
3234 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3239 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3240 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3241 ->addFnAttr(Attribute::NoUnwind);
3242 Builder.CreateRetVoid();
3243 Builder.restoreIP(OldIP);
3247Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3249 AttributeList FuncAttrs) {
3250 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3253 Builder.getVoidTy(),
3254 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3258 "_omp_reduction_global_to_list_copy_func", &M);
3265 Builder.SetInsertPoint(EntryBlock);
3274 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3275 BufferArg->
getName() +
".addr");
3276 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3278 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3279 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3280 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3281 BufferArgAlloca, Builder.getPtrTy(),
3282 BufferArgAlloca->
getName() +
".ascast");
3283 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3284 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3285 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3286 ReduceListArgAlloca, Builder.getPtrTy(),
3287 ReduceListArgAlloca->
getName() +
".ascast");
3288 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3289 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3290 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3292 Value *LocalReduceList =
3293 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3294 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3295 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3296 Type *IndexTy = Builder.getIndexTy(
3297 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3298 for (
auto En :
enumerate(ReductionInfos)) {
3299 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3300 auto *RedListArrayTy =
3303 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3304 RedListArrayTy, LocalReduceList,
3305 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3307 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3310 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3311 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3312 ReductionsBufferTy, BufferVD, 0, En.index());
3314 switch (RI.EvaluationKind) {
3315 case EvalKind::Scalar: {
3316 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3317 Builder.CreateStore(TargetElement, ElemPtr);
3320 case EvalKind::Complex: {
3321 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3322 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3323 Value *SrcReal = Builder.CreateLoad(
3324 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3325 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3326 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3327 Value *SrcImg = Builder.CreateLoad(
3328 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3330 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3331 RI.ElementType, ElemPtr, 0, 0,
".realp");
3332 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3333 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3334 Builder.CreateStore(SrcReal, DestRealPtr);
3335 Builder.CreateStore(SrcImg, DestImgPtr);
3338 case EvalKind::Aggregate: {
3340 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3341 Builder.CreateMemCpy(
3342 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3343 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3350 Builder.CreateRetVoid();
3351 Builder.restoreIP(OldIP);
3355Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3357 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3358 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3361 Builder.getVoidTy(),
3362 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3366 "_omp_reduction_global_to_list_reduce_func", &M);
3373 Builder.SetInsertPoint(EntryBlock);
3382 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3383 BufferArg->
getName() +
".addr");
3384 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3386 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3387 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3393 Value *LocalReduceList =
3394 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3396 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3397 BufferArgAlloca, Builder.getPtrTy(),
3398 BufferArgAlloca->
getName() +
".ascast");
3399 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3400 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3401 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3402 ReduceListArgAlloca, Builder.getPtrTy(),
3403 ReduceListArgAlloca->
getName() +
".ascast");
3404 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3405 LocalReduceList, Builder.getPtrTy(),
3406 LocalReduceList->
getName() +
".ascast");
3408 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3409 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3410 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3412 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3413 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3414 Type *IndexTy = Builder.getIndexTy(
3415 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3416 for (
auto En :
enumerate(ReductionInfos)) {
3417 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3418 RedListArrayTy, ReductionList,
3419 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3422 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3423 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3424 ReductionsBufferTy, BufferVD, 0, En.index());
3425 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3430 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3431 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3432 ->addFnAttr(Attribute::NoUnwind);
3433 Builder.CreateRetVoid();
3434 Builder.restoreIP(OldIP);
3438std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3439 std::string Suffix =
3440 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3441 return (Name + Suffix).
str();
3446 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3448 {Builder.getPtrTy(), Builder.getPtrTy()},
3450 std::string
Name = getReductionFuncName(ReducerName);
3458 Builder.SetInsertPoint(EntryBB);
3462 Value *LHSArrayPtr =
nullptr;
3463 Value *RHSArrayPtr =
nullptr;
3470 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3472 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3473 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3474 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3475 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3476 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3477 Builder.CreateStore(Arg0, LHSAddrCast);
3478 Builder.CreateStore(Arg1, RHSAddrCast);
3479 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3480 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3483 Type *IndexTy = Builder.getIndexTy(
3484 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3486 for (
auto En :
enumerate(ReductionInfos)) {
3487 const ReductionInfo &RI = En.value();
3488 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3489 RedArrayTy, RHSArrayPtr,
3490 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3491 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3492 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3493 RHSI8Ptr, RI.PrivateVariable->getType(),
3494 RHSI8Ptr->
getName() +
".ascast");
3496 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3497 RedArrayTy, LHSArrayPtr,
3498 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3499 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3500 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3501 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3503 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3507 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3508 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3510 InsertPointOrErrorTy AfterIP =
3511 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3513 return AfterIP.takeError();
3514 if (!Builder.GetInsertBlock())
3515 return ReductionFunc;
3517 Builder.restoreIP(*AfterIP);
3518 Builder.CreateStore(Reduced, LHSPtr);
3522 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3523 for (
auto En :
enumerate(ReductionInfos)) {
3524 unsigned Index = En.index();
3525 const ReductionInfo &RI = En.value();
3526 Value *LHSFixupPtr, *RHSFixupPtr;
3527 Builder.restoreIP(RI.ReductionGenClang(
3528 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3533 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3538 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3544 Builder.CreateRetVoid();
3545 return ReductionFunc;
3551 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3553 assert(RI.Variable &&
"expected non-null variable");
3554 assert(RI.PrivateVariable &&
"expected non-null private variable");
3555 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3556 "expected non-null reduction generator callback");
3559 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3560 "expected variables and their private equivalents to have the same "
3563 assert(RI.Variable->getType()->isPointerTy() &&
3564 "expected variables to be pointers");
3568OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3569 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3571 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3572 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3573 Value *SrcLocInfo) {
3574 if (!updateToLocation(
Loc))
3575 return InsertPointTy();
3576 Builder.restoreIP(CodeGenIP);
3583 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3584 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3587 if (ReductionInfos.
size() == 0)
3588 return Builder.saveIP();
3591 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3597 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3601 AttributeList FuncAttrs;
3602 AttrBuilder AttrBldr(Ctx);
3604 AttrBldr.addAttribute(Attr);
3605 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3606 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3608 CodeGenIP = Builder.saveIP();
3610 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3611 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3612 if (!ReductionResult)
3614 Function *ReductionFunc = *ReductionResult;
3615 Builder.restoreIP(CodeGenIP);
3618 if (GridValue.has_value())
3619 Config.setGridValue(GridValue.value());
3634 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
3636 CodeGenIP = Builder.saveIP();
3637 Builder.restoreIP(AllocaIP);
3638 Value *ReductionListAlloca =
3639 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3640 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3641 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3642 Builder.restoreIP(CodeGenIP);
3643 Type *IndexTy = Builder.getIndexTy(
3644 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3645 for (
auto En :
enumerate(ReductionInfos)) {
3646 const ReductionInfo &RI = En.value();
3647 Value *ElemPtr = Builder.CreateInBoundsGEP(
3648 RedArrayTy, ReductionList,
3649 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3651 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3652 Builder.CreateStore(CastElem, ElemPtr);
3654 CodeGenIP = Builder.saveIP();
3656 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3658 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3662 Builder.restoreIP(CodeGenIP);
3664 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3666 unsigned MaxDataSize = 0;
3668 for (
auto En :
enumerate(ReductionInfos)) {
3669 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3670 if (
Size > MaxDataSize)
3672 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3674 Value *ReductionDataSize =
3675 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3676 if (!IsTeamsReduction) {
3677 Value *SarFuncCast =
3678 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, FuncPtrTy);
3680 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
3681 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3683 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3684 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3685 Res = Builder.CreateCall(Pv2Ptr, Args);
3687 CodeGenIP = Builder.saveIP();
3689 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3690 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3691 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3692 Function *LtGCFunc = emitListToGlobalCopyFunction(
3693 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3694 Function *LtGRFunc = emitListToGlobalReduceFunction(
3695 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3696 Function *GtLCFunc = emitGlobalToListCopyFunction(
3697 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3698 Function *GtLRFunc = emitGlobalToListReduceFunction(
3699 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3700 Builder.restoreIP(CodeGenIP);
3702 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3703 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3705 Value *Args3[] = {SrcLocInfo,
3706 KernelTeamsReductionPtr,
3707 Builder.getInt32(ReductionBufNum),
3717 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3718 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3719 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3725 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3726 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3732 emitBlock(ThenBB, CurFunc);
3735 for (
auto En :
enumerate(ReductionInfos)) {
3736 const ReductionInfo &RI = En.value();
3739 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3741 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3742 Value *LHSPtr, *RHSPtr;
3743 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3744 &LHSPtr, &RHSPtr, CurFunc));
3757 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3758 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3760 InsertPointOrErrorTy AfterIP =
3761 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3763 return AfterIP.takeError();
3764 Builder.restoreIP(*AfterIP);
3765 Builder.CreateStore(Reduced,
LHS,
false);
3768 emitBlock(ExitBB, CurFunc);
3769 if (ContinuationBlock) {
3770 Builder.CreateBr(ContinuationBlock);
3771 Builder.SetInsertPoint(ContinuationBlock);
3773 Config.setEmitLLVMUsed();
3775 return Builder.saveIP();
3784 ".omp.reduction.func", &M);
3794 Builder.SetInsertPoint(ReductionFuncBlock);
3795 Value *LHSArrayPtr =
nullptr;
3796 Value *RHSArrayPtr =
nullptr;
3807 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3809 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3810 Value *LHSAddrCast =
3811 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3812 Value *RHSAddrCast =
3813 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3814 Builder.CreateStore(Arg0, LHSAddrCast);
3815 Builder.CreateStore(Arg1, RHSAddrCast);
3816 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3817 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3819 LHSArrayPtr = ReductionFunc->
getArg(0);
3820 RHSArrayPtr = ReductionFunc->
getArg(1);
3823 unsigned NumReductions = ReductionInfos.
size();
3826 for (
auto En :
enumerate(ReductionInfos)) {
3827 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3828 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3829 RedArrayTy, LHSArrayPtr, 0, En.index());
3830 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3831 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3832 LHSI8Ptr, RI.Variable->
getType());
3833 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3834 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3835 RedArrayTy, RHSArrayPtr, 0, En.index());
3836 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3837 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3838 RHSI8Ptr, RI.PrivateVariable->
getType());
3839 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3841 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3842 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3844 return AfterIP.takeError();
3846 Builder.restoreIP(*AfterIP);
3848 if (!Builder.GetInsertBlock())
3852 if (!IsByRef[En.index()])
3853 Builder.CreateStore(Reduced, LHSPtr);
3855 Builder.CreateRetVoid();
3859OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3860 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3862 bool IsNoWait,
bool IsTeamsReduction) {
3865 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3866 IsNoWait, IsTeamsReduction);
3870 if (!updateToLocation(
Loc))
3871 return InsertPointTy();
3873 if (ReductionInfos.
size() == 0)
3874 return Builder.saveIP();
3883 unsigned NumReductions = ReductionInfos.
size();
3885 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3886 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3888 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3890 for (
auto En :
enumerate(ReductionInfos)) {
3891 unsigned Index = En.index();
3892 const ReductionInfo &RI = En.value();
3893 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3894 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3895 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3900 Type *IndexTy = Builder.getIndexTy(
3901 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3902 Function *
Func = Builder.GetInsertBlock()->getParent();
3905 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3906 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3907 return RI.AtomicReductionGen;
3909 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3911 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3913 Value *ThreadId = getOrCreateThreadID(Ident);
3914 Constant *NumVariables = Builder.getInt32(NumReductions);
3916 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3917 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3919 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3920 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3921 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3922 : RuntimeFunction::OMPRTL___kmpc_reduce);
3924 Builder.CreateCall(ReduceFunc,
3925 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3926 ReductionFunc, Lock},
3937 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3938 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3939 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3944 Builder.SetInsertPoint(NonAtomicRedBlock);
3945 for (
auto En :
enumerate(ReductionInfos)) {
3946 const ReductionInfo &RI = En.value();
3950 Value *RedValue = RI.Variable;
3951 if (!IsByRef[En.index()]) {
3952 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3953 "red.value." +
Twine(En.index()));
3955 Value *PrivateRedValue =
3956 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3957 "red.private.value." +
Twine(En.index()));
3959 InsertPointOrErrorTy AfterIP =
3960 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3962 return AfterIP.takeError();
3963 Builder.restoreIP(*AfterIP);
3965 if (!Builder.GetInsertBlock())
3966 return InsertPointTy();
3968 if (!IsByRef[En.index()])
3969 Builder.CreateStore(Reduced, RI.Variable);
3971 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3972 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3973 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3974 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3975 Builder.CreateBr(ContinuationBlock);
3980 Builder.SetInsertPoint(AtomicRedBlock);
3981 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3982 for (
const ReductionInfo &RI : ReductionInfos) {
3983 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3984 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3986 return AfterIP.takeError();
3987 Builder.restoreIP(*AfterIP);
3988 if (!Builder.GetInsertBlock())
3989 return InsertPointTy();
3991 Builder.CreateBr(ContinuationBlock);
3993 Builder.CreateUnreachable();
4004 if (!Builder.GetInsertBlock())
4005 return InsertPointTy();
4007 Builder.SetInsertPoint(ContinuationBlock);
4008 return Builder.saveIP();
4011OpenMPIRBuilder::InsertPointOrErrorTy
4012OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4013 BodyGenCallbackTy BodyGenCB,
4014 FinalizeCallbackTy FiniCB) {
4015 if (!updateToLocation(
Loc))
4018 Directive OMPD = Directive::OMPD_master;
4020 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4021 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4022 Value *ThreadId = getOrCreateThreadID(Ident);
4025 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4026 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4028 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4029 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4031 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4035OpenMPIRBuilder::InsertPointOrErrorTy
4036OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4037 BodyGenCallbackTy BodyGenCB,
4039 if (!updateToLocation(
Loc))
4042 Directive OMPD = Directive::OMPD_masked;
4044 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4045 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4046 Value *ThreadId = getOrCreateThreadID(Ident);
4048 Value *ArgsEnd[] = {Ident, ThreadId};
4050 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4051 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4053 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4054 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4056 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4066 Call->setDoesNotThrow();
4078OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4079 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4081 bool IsInclusive, ScanInfo *ScanRedInfo) {
4082 if (ScanRedInfo->OMPFirstScanLoop) {
4083 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4084 ScanVarsType, ScanRedInfo);
4088 if (!updateToLocation(
Loc))
4093 if (ScanRedInfo->OMPFirstScanLoop) {
4095 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4096 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4097 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4098 Type *DestTy = ScanVarsType[i];
4099 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4100 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4102 Builder.CreateStore(Src, Val);
4105 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4106 emitBlock(ScanRedInfo->OMPScanDispatch,
4107 Builder.GetInsertBlock()->getParent());
4109 if (!ScanRedInfo->OMPFirstScanLoop) {
4110 IV = ScanRedInfo->IV;
4113 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4114 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4115 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4116 Type *DestTy = ScanVarsType[i];
4118 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4119 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4120 Builder.CreateStore(Src, ScanVars[i]);
4126 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4127 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4128 ScanRedInfo->OMPAfterScanBlock);
4130 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4131 ScanRedInfo->OMPBeforeScanBlock);
4133 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4134 Builder.GetInsertBlock()->getParent());
4135 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4136 return Builder.saveIP();
4139Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4143 Builder.restoreIP(AllocaIP);
4145 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4147 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4148 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4152 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4153 InsertPointTy CodeGenIP) ->
Error {
4154 Builder.restoreIP(CodeGenIP);
4156 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4157 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4161 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4162 AllocSpan,
nullptr,
"arr");
4163 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4171 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4173 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4174 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4177 return AfterIP.takeError();
4178 Builder.restoreIP(*AfterIP);
4179 BasicBlock *InputBB = Builder.GetInsertBlock();
4181 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4182 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4184 return AfterIP.takeError();
4185 Builder.restoreIP(*AfterIP);
4190Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4192 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4193 InsertPointTy CodeGenIP) ->
Error {
4194 Builder.restoreIP(CodeGenIP);
4195 for (ReductionInfo RedInfo : ReductionInfos) {
4196 Value *PrivateVar = RedInfo.PrivateVariable;
4197 Value *OrigVar = RedInfo.Variable;
4198 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4199 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4201 Type *SrcTy = RedInfo.ElementType;
4202 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4204 Value *Src = Builder.CreateLoad(SrcTy, Val);
4206 Builder.CreateStore(Src, OrigVar);
4207 Builder.CreateFree(Buff);
4215 if (ScanRedInfo->OMPScanFinish->getTerminator())
4216 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4218 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4221 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4222 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4225 return AfterIP.takeError();
4226 Builder.restoreIP(*AfterIP);
4227 BasicBlock *InputBB = Builder.GetInsertBlock();
4229 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4230 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4232 return AfterIP.takeError();
4233 Builder.restoreIP(*AfterIP);
4237OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4238 const LocationDescription &
Loc,
4240 ScanInfo *ScanRedInfo) {
4242 if (!updateToLocation(
Loc))
4244 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4245 InsertPointTy CodeGenIP) ->
Error {
4246 Builder.restoreIP(CodeGenIP);
4252 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4254 Builder.GetInsertBlock()->getModule(),
4258 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4261 Builder.GetInsertBlock()->getModule(),
4264 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4267 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4268 Builder.SetInsertPoint(InputBB);
4269 Builder.CreateBr(LoopBB);
4270 emitBlock(LoopBB, CurFn);
4271 Builder.SetInsertPoint(LoopBB);
4273 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4275 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4276 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4278 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4286 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4287 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4288 emitBlock(InnerLoopBB, CurFn);
4289 Builder.SetInsertPoint(InnerLoopBB);
4290 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4292 for (ReductionInfo RedInfo : ReductionInfos) {
4293 Value *ReductionVal = RedInfo.PrivateVariable;
4294 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4295 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4296 Type *DestTy = RedInfo.ElementType;
4297 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4299 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4300 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4302 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4303 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4304 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4306 InsertPointOrErrorTy AfterIP =
4307 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4309 return AfterIP.takeError();
4310 Builder.CreateStore(Result, LHSPtr);
4313 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4314 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4315 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4316 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4317 emitBlock(InnerExitBB, CurFn);
4319 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4322 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4323 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4325 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4335 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4336 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4339 return AfterIP.takeError();
4340 Builder.restoreIP(*AfterIP);
4341 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4344 return AfterIP.takeError();
4345 Builder.restoreIP(*AfterIP);
4346 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4353Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4356 ScanInfo *ScanRedInfo) {
4364 ScanRedInfo->OMPFirstScanLoop =
true;
4365 Error Err = InputLoopGen();
4375 ScanRedInfo->OMPFirstScanLoop =
false;
4376 Error Err = ScanLoopGen(Builder.saveIP());
4383void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4384 Function *
Fun = Builder.GetInsertBlock()->getParent();
4385 ScanRedInfo->OMPScanDispatch =
4387 ScanRedInfo->OMPAfterScanBlock =
4389 ScanRedInfo->OMPBeforeScanBlock =
4391 ScanRedInfo->OMPScanLoopExit =
4394CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4418 Builder.SetCurrentDebugLocation(
DL);
4420 Builder.SetInsertPoint(Preheader);
4421 Builder.CreateBr(Header);
4423 Builder.SetInsertPoint(Header);
4424 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4425 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4426 Builder.CreateBr(
Cond);
4428 Builder.SetInsertPoint(
Cond);
4430 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4431 Builder.CreateCondBr(Cmp, Body, Exit);
4433 Builder.SetInsertPoint(Body);
4434 Builder.CreateBr(Latch);
4436 Builder.SetInsertPoint(Latch);
4437 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4438 "omp_" + Name +
".next",
true);
4439 Builder.CreateBr(Header);
4442 Builder.SetInsertPoint(Exit);
4443 Builder.CreateBr(After);
4446 LoopInfos.emplace_front();
4447 CanonicalLoopInfo *CL = &LoopInfos.front();
4449 CL->Header = Header;
4461OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4462 LoopBodyGenCallbackTy BodyGenCB,
4467 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4468 NextBB, NextBB, Name);
4472 if (updateToLocation(
Loc)) {
4476 spliceBB(Builder, After,
false);
4477 Builder.CreateBr(CL->getPreheader());
4482 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4492 ScanInfos.emplace_front();
4493 ScanInfo *
Result = &ScanInfos.front();
4498OpenMPIRBuilder::createCanonicalScanLoops(
4499 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4500 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4501 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4502 LocationDescription ComputeLoc =
4503 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4504 updateToLocation(ComputeLoc);
4508 Value *TripCount = calculateCanonicalLoopTripCount(
4509 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4510 ScanRedInfo->Span = TripCount;
4511 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4512 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4514 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4515 Builder.restoreIP(CodeGenIP);
4516 ScanRedInfo->IV =
IV;
4517 createScanBBs(ScanRedInfo);
4518 BasicBlock *InputBlock = Builder.GetInsertBlock();
4522 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4523 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4524 Builder.GetInsertBlock()->getParent());
4525 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4526 emitBlock(ScanRedInfo->OMPScanLoopExit,
4527 Builder.GetInsertBlock()->getParent());
4528 Builder.CreateBr(ContinueBlock);
4529 Builder.SetInsertPoint(
4530 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4531 return BodyGenCB(Builder.saveIP(),
IV);
4534 const auto &&InputLoopGen = [&]() ->
Error {
4536 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4537 ComputeIP, Name,
true, ScanRedInfo);
4541 Builder.restoreIP((*LoopInfo)->getAfterIP());
4544 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4546 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4547 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4551 Builder.restoreIP((*LoopInfo)->getAfterIP());
4552 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4555 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4561Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4563 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4573 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4574 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4576 updateToLocation(
Loc);
4593 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4594 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4595 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4596 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4597 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4598 ZeroCmp = Builder.CreateICmp(
4601 Span = Builder.CreateSub(Stop, Start,
"",
true);
4602 ZeroCmp = Builder.CreateICmp(
4606 Value *CountIfLooping;
4607 if (InclusiveStop) {
4608 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4611 Value *CountIfTwo = Builder.CreateAdd(
4612 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4614 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4617 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4618 "omp_" + Name +
".tripcount");
4622 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4623 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4624 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4625 ScanInfo *ScanRedInfo) {
4626 LocationDescription ComputeLoc =
4627 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4629 Value *TripCount = calculateCanonicalLoopTripCount(
4630 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4632 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4633 Builder.restoreIP(CodeGenIP);
4634 Value *Span = Builder.CreateMul(
IV, Step);
4635 Value *IndVar = Builder.CreateAdd(Span, Start);
4637 ScanRedInfo->IV = IndVar;
4638 return BodyGenCB(Builder.saveIP(), IndVar);
4640 LocationDescription LoopLoc =
4643 : LocationDescription(Builder.saveIP(),
4644 Builder.getCurrentDebugLocation());
4645 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4654 OpenMPIRBuilder &OMPBuilder) {
4655 unsigned Bitwidth = Ty->getIntegerBitWidth();
4657 return OMPBuilder.getOrCreateRuntimeFunction(
4658 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4660 return OMPBuilder.getOrCreateRuntimeFunction(
4661 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4670 OpenMPIRBuilder &OMPBuilder) {
4671 unsigned Bitwidth = Ty->getIntegerBitWidth();
4673 return OMPBuilder.getOrCreateRuntimeFunction(
4674 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4676 return OMPBuilder.getOrCreateRuntimeFunction(
4677 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4681OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4682 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4684 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4686 "Require dedicated allocate IP");
4689 Builder.restoreIP(CLI->getPreheaderIP());
4690 Builder.SetCurrentDebugLocation(
DL);
4693 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4694 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4698 Type *IVTy =
IV->getType();
4700 LoopType == WorksharingLoopType::DistributeForStaticLoop
4704 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4707 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4710 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4711 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4712 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4713 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4714 CLI->setLastIter(PLastIter);
4720 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4722 Constant *One = ConstantInt::get(IVTy, 1);
4723 Builder.CreateStore(Zero, PLowerBound);
4724 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4725 Builder.CreateStore(UpperBound, PUpperBound);
4726 Builder.CreateStore(One, PStride);
4728 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4731 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4732 ? OMPScheduleType::OrderedDistribute
4735 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4740 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4741 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4742 Value *PDistUpperBound =
4743 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4744 Args.push_back(PDistUpperBound);
4747 Builder.CreateCall(StaticInit, Args);
4748 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4749 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4750 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4751 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4752 CLI->setTripCount(TripCount);
4759 Builder.SetInsertPoint(CLI->getBody(),
4760 CLI->getBody()->getFirstInsertionPt());
4761 Builder.SetCurrentDebugLocation(
DL);
4762 return Builder.CreateAdd(OldIV, LowerBound);
4766 Builder.SetInsertPoint(CLI->getExit(),
4767 CLI->getExit()->getTerminator()->getIterator());
4768 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4772 InsertPointOrErrorTy BarrierIP =
4773 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4774 omp::Directive::OMPD_for,
false,
4777 return BarrierIP.takeError();
4780 InsertPointTy AfterIP = CLI->getAfterIP();
4786OpenMPIRBuilder::InsertPointOrErrorTy
4787OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4788 CanonicalLoopInfo *CLI,
4789 InsertPointTy AllocaIP,
4792 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4793 assert(ChunkSize &&
"Chunk size is required");
4795 LLVMContext &Ctx = CLI->getFunction()->getContext();
4797 Value *OrigTripCount = CLI->getTripCount();
4798 Type *IVTy =
IV->getType();
4800 "Max supported tripcount bitwidth is 64 bits");
4802 :
Type::getInt64Ty(Ctx);
4805 Constant *One = ConstantInt::get(InternalIVTy, 1);
4811 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4814 Builder.restoreIP(AllocaIP);
4815 Builder.SetCurrentDebugLocation(
DL);
4816 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4817 Value *PLowerBound =
4818 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4819 Value *PUpperBound =
4820 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4821 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4822 CLI->setLastIter(PLastIter);
4825 Builder.restoreIP(CLI->getPreheaderIP());
4826 Builder.SetCurrentDebugLocation(
DL);
4829 Value *CastedChunkSize =
4830 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4831 Value *CastedTripCount =
4832 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4834 Constant *SchedulingType = ConstantInt::get(
4835 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4836 Builder.CreateStore(Zero, PLowerBound);
4837 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4838 Builder.CreateStore(OrigUpperBound, PUpperBound);
4839 Builder.CreateStore(One, PStride);
4844 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4845 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4846 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4847 Builder.CreateCall(StaticInit,
4849 SchedulingType, PLastIter,
4850 PLowerBound, PUpperBound,
4855 Value *FirstChunkStart =
4856 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4857 Value *FirstChunkStop =
4858 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4859 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4861 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4862 Value *NextChunkStride =
4863 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4866 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4867 Value *DispatchCounter;
4872 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4873 {Builder.saveIP(),
DL},
4874 [&](InsertPointTy BodyIP,
Value *Counter) {
4875 DispatchCounter = Counter;
4878 FirstChunkStart, CastedTripCount, NextChunkStride,
4884 BasicBlock *DispatchBody = DispatchCLI->getBody();
4885 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4886 BasicBlock *DispatchExit = DispatchCLI->getExit();
4887 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4888 DispatchCLI->invalidate();
4896 Builder.restoreIP(CLI->getPreheaderIP());
4897 Builder.SetCurrentDebugLocation(
DL);
4900 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4901 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4902 Value *IsLastChunk =
4903 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4904 Value *CountUntilOrigTripCount =
4905 Builder.CreateSub(CastedTripCount, DispatchCounter);
4906 Value *ChunkTripCount = Builder.CreateSelect(
4907 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4908 Value *BackcastedChunkTC =
4909 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4910 CLI->setTripCount(BackcastedChunkTC);
4915 Value *BackcastedDispatchCounter =
4916 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4918 Builder.restoreIP(CLI->getBodyIP());
4919 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4924 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4928 InsertPointOrErrorTy AfterIP =
4929 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4932 return AfterIP.takeError();
4950 unsigned Bitwidth = Ty->getIntegerBitWidth();
4951 Module &M = OMPBuilder->M;
4953 case WorksharingLoopType::ForStaticLoop:
4955 return OMPBuilder->getOrCreateRuntimeFunction(
4956 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4958 return OMPBuilder->getOrCreateRuntimeFunction(
4959 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4961 case WorksharingLoopType::DistributeStaticLoop:
4963 return OMPBuilder->getOrCreateRuntimeFunction(
4964 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4966 return OMPBuilder->getOrCreateRuntimeFunction(
4967 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4969 case WorksharingLoopType::DistributeForStaticLoop:
4971 return OMPBuilder->getOrCreateRuntimeFunction(
4972 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4974 return OMPBuilder->getOrCreateRuntimeFunction(
4975 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4978 if (Bitwidth != 32 && Bitwidth != 64) {
4990 Function &LoopBodyFn,
bool NoLoop) {
4992 Module &M = OMPBuilder->M;
5001 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5002 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5003 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5004 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5005 Builder.CreateCall(RTLFn, RealArgs);
5008 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5009 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5010 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5011 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5014 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5015 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5016 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5017 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5018 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5020 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5023 Builder.CreateCall(RTLFn, RealArgs);
5027 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5032 Value *TripCount = CLI->getTripCount();
5038 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5039 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5044 Builder.restoreIP({Preheader, Preheader->
end()});
5047 Builder.CreateBr(CLI->getExit());
5050 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5053 CleanUpInfo.EntryBB = CLI->getHeader();
5054 CleanUpInfo.ExitBB = CLI->getExit();
5055 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5063 "Expected unique undroppable user of outlined function");
5065 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5067 "Expected outlined function call to be located in loop preheader");
5069 if (OutlinedFnCallInstruction->
arg_size() > 1)
5076 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5078 for (
auto &ToBeDeletedItem : ToBeDeleted)
5079 ToBeDeletedItem->eraseFromParent();
5083OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5084 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5087 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5088 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5091 OI.OuterAllocaBB = CLI->getPreheader();
5097 OI.OuterAllocaBB = AllocaIP.getBlock();
5100 OI.EntryBB = CLI->getBody();
5101 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5102 "omp.prelatch",
true);
5105 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5109 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5111 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5122 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5133 CLI->getPreheader(),
5142 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5148 CLI->getIndVar()->user_end());
5151 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5152 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5158 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5165 OI.PostOutlineCB = [=, ToBeDeletedVec =
5166 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5170 addOutlineInfo(std::move(OI));
5171 return CLI->getAfterIP();
5174OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5175 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5176 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5177 bool HasSimdModifier,
bool HasMonotonicModifier,
5178 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5180 if (Config.isTargetDevice())
5181 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5183 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5184 HasNonmonotonicModifier, HasOrderedClause);
5186 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5187 OMPScheduleType::ModifierOrdered;
5188 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5189 case OMPScheduleType::BaseStatic:
5190 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5192 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5193 NeedsBarrier, ChunkSize);
5195 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5197 case OMPScheduleType::BaseStaticChunked:
5199 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5200 NeedsBarrier, ChunkSize);
5202 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5205 case OMPScheduleType::BaseRuntime:
5206 case OMPScheduleType::BaseAuto:
5207 case OMPScheduleType::BaseGreedy:
5208 case OMPScheduleType::BaseBalanced:
5209 case OMPScheduleType::BaseSteal:
5210 case OMPScheduleType::BaseGuidedSimd:
5211 case OMPScheduleType::BaseRuntimeSimd:
5213 "schedule type does not support user-defined chunk sizes");
5215 case OMPScheduleType::BaseDynamicChunked:
5216 case OMPScheduleType::BaseGuidedChunked:
5217 case OMPScheduleType::BaseGuidedIterativeChunked:
5218 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5219 case OMPScheduleType::BaseStaticBalancedChunked:
5220 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5221 NeedsBarrier, ChunkSize);
5234 unsigned Bitwidth = Ty->getIntegerBitWidth();
5236 return OMPBuilder.getOrCreateRuntimeFunction(
5237 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5239 return OMPBuilder.getOrCreateRuntimeFunction(
5240 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5250 unsigned Bitwidth = Ty->getIntegerBitWidth();
5252 return OMPBuilder.getOrCreateRuntimeFunction(
5253 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5255 return OMPBuilder.getOrCreateRuntimeFunction(
5256 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5265 unsigned Bitwidth = Ty->getIntegerBitWidth();
5267 return OMPBuilder.getOrCreateRuntimeFunction(
5268 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5270 return OMPBuilder.getOrCreateRuntimeFunction(
5271 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5275OpenMPIRBuilder::InsertPointOrErrorTy
5276OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5277 InsertPointTy AllocaIP,
5279 bool NeedsBarrier,
Value *Chunk) {
5280 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5282 "Require dedicated allocate IP");
5284 "Require valid schedule type");
5286 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5287 OMPScheduleType::ModifierOrdered;
5290 Builder.SetCurrentDebugLocation(
DL);
5293 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5294 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5298 Type *IVTy =
IV->getType();
5303 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5305 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5306 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5307 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5308 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5309 CLI->setLastIter(PLastIter);
5317 Constant *One = ConstantInt::get(IVTy, 1);
5318 Builder.CreateStore(One, PLowerBound);
5319 Value *UpperBound = CLI->getTripCount();
5320 Builder.CreateStore(UpperBound, PUpperBound);
5321 Builder.CreateStore(One, PStride);
5327 InsertPointTy AfterIP = CLI->getAfterIP();
5335 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5338 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5341 Builder.CreateCall(DynamicInit,
5342 {SrcLoc, ThreadNum, SchedulingType, One,
5343 UpperBound, One, Chunk});
5352 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5353 PLowerBound, PUpperBound, PStride});
5354 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5357 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5358 Builder.CreateCondBr(MoreWork, Header, Exit);
5364 PI->setIncomingBlock(0, OuterCond);
5365 PI->setIncomingValue(0, LowerBound);
5370 Br->setSuccessor(0, OuterCond);
5375 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5376 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5383 assert(BI->getSuccessor(1) == Exit);
5384 BI->setSuccessor(1, OuterCond);
5388 Builder.SetInsertPoint(&Latch->
back());
5390 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5395 Builder.SetInsertPoint(&
Exit->back());
5396 InsertPointOrErrorTy BarrierIP =
5397 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5398 omp::Directive::OMPD_for,
false,
5401 return BarrierIP.takeError();
5420 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5425 if (BBsToErase.
count(UseInst->getParent()))
5432 while (BBsToErase.
remove_if(HasRemainingUses)) {
5442 InsertPointTy ComputeIP) {
5443 assert(
Loops.size() >= 1 &&
"At least one loop required");
5444 size_t NumLoops =
Loops.size();
5448 return Loops.front();
5450 CanonicalLoopInfo *Outermost =
Loops.front();
5451 CanonicalLoopInfo *Innermost =
Loops.back();
5452 BasicBlock *OrigPreheader = Outermost->getPreheader();
5453 BasicBlock *OrigAfter = Outermost->getAfter();
5460 Loop->collectControlBlocks(OldControlBBs);
5463 Builder.SetCurrentDebugLocation(
DL);
5464 if (ComputeIP.isSet())
5465 Builder.restoreIP(ComputeIP);
5467 Builder.restoreIP(Outermost->getPreheaderIP());
5471 Value *CollapsedTripCount =
nullptr;
5472 for (CanonicalLoopInfo *L :
Loops) {
5474 "All loops to collapse must be valid canonical loops");
5475 Value *OrigTripCount =
L->getTripCount();
5476 if (!CollapsedTripCount) {
5477 CollapsedTripCount = OrigTripCount;
5482 CollapsedTripCount =
5483 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5487 CanonicalLoopInfo *
Result =
5488 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5489 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5495 Builder.restoreIP(
Result->getBodyIP());
5499 NewIndVars.
resize(NumLoops);
5500 for (
int i = NumLoops - 1; i >= 1; --i) {
5501 Value *OrigTripCount =
Loops[i]->getTripCount();
5503 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5504 NewIndVars[i] = NewIndVar;
5506 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5509 NewIndVars[0] = Leftover;
5520 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5527 ContinueBlock =
nullptr;
5528 ContinuePred = NextSrc;
5535 for (
size_t i = 0; i < NumLoops - 1; ++i)
5536 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5539 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5542 for (
size_t i = NumLoops - 1; i > 0; --i)
5543 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5546 ContinueWith(
Result->getLatch(),
nullptr);
5553 for (
size_t i = 0; i < NumLoops; ++i)
5554 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5559 for (CanonicalLoopInfo *L :
Loops)
5568std::vector<CanonicalLoopInfo *>
5572 "Must pass as many tile sizes as there are loops");
5573 int NumLoops =
Loops.size();
5574 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5576 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5577 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5578 Function *
F = OutermostLoop->getBody()->getParent();
5579 BasicBlock *InnerEnter = InnermostLoop->getBody();
5580 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5586 Loop->collectControlBlocks(OldControlBBs);
5593 for (CanonicalLoopInfo *L :
Loops) {
5594 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5606 for (
int i = 0; i < NumLoops - 1; ++i) {
5607 CanonicalLoopInfo *Surrounding =
Loops[i];
5610 BasicBlock *EnterBB = Surrounding->getBody();
5616 Builder.SetCurrentDebugLocation(
DL);
5617 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5619 for (
int i = 0; i < NumLoops; ++i) {
5621 Value *OrigTripCount = OrigTripCounts[i];
5624 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5625 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5634 Value *FloorTripOverflow =
5635 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5637 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5638 Value *FloorTripCount =
5639 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5640 "omp_floor" +
Twine(i) +
".tripcount",
true);
5643 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5649 std::vector<CanonicalLoopInfo *>
Result;
5650 Result.reserve(NumLoops * 2);
5654 BasicBlock *Enter = OutermostLoop->getPreheader();
5661 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5663 auto EmbeddNewLoop =
5664 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5666 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5667 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5672 Enter = EmbeddedLoop->getBody();
5673 Continue = EmbeddedLoop->getLatch();
5674 OutroInsertBefore = EmbeddedLoop->getLatch();
5675 return EmbeddedLoop;
5679 const Twine &NameBase) {
5681 CanonicalLoopInfo *EmbeddedLoop =
5682 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5683 Result.push_back(EmbeddedLoop);
5687 EmbeddNewLoops(FloorCount,
"floor");
5691 Builder.SetInsertPoint(Enter->getTerminator());
5693 for (
int i = 0; i < NumLoops; ++i) {
5694 CanonicalLoopInfo *FloorLoop =
Result[i];
5697 Value *FloorIsEpilogue =
5698 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5699 Value *TileTripCount =
5700 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5706 EmbeddNewLoops(TileCounts,
"tile");
5711 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5720 BodyEnter =
nullptr;
5721 BodyEntered = ExitBB;
5733 Builder.restoreIP(
Result.back()->getBodyIP());
5734 for (
int i = 0; i < NumLoops; ++i) {
5735 CanonicalLoopInfo *FloorLoop =
Result[i];
5736 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5737 Value *OrigIndVar = OrigIndVars[i];
5741 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5743 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5750 for (CanonicalLoopInfo *L :
Loops)
5754 for (CanonicalLoopInfo *GenL : Result)
5765 if (Properties.
empty())
5788 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5792 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5800 if (
I.mayReadOrWriteMemory()) {
5804 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5809void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5816void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5824void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5827 const Twine &NamePrefix) {
5828 Function *
F = CanonicalLoop->getFunction();
5850 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5856 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5858 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5861 Builder.SetInsertPoint(SplitBeforeIt);
5863 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5866 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5869 Builder.SetInsertPoint(ElseBlock);
5875 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5877 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5883 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5885 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5892 if (
Block == ThenBlock)
5893 NewBB->
setName(NamePrefix +
".if.else");
5896 VMap[
Block] = NewBB;
5900 Builder.CreateBr(NewBlocks.
front());
5904 L->getLoopLatch()->splitBasicBlock(
5905 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5909 L->addBasicBlockToLoop(ThenBlock, LI);
5913OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5915 if (TargetTriple.
isX86()) {
5916 if (Features.
lookup(
"avx512f"))
5918 else if (Features.
lookup(
"avx"))
5922 if (TargetTriple.
isPPC())
5924 if (TargetTriple.
isWasm())
5929void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5931 Value *IfCond, OrderKind Order,
5935 Function *
F = CanonicalLoop->getFunction();
5950 if (AlignedVars.
size()) {
5951 InsertPointTy IP = Builder.saveIP();
5952 for (
auto &AlignedItem : AlignedVars) {
5953 Value *AlignedPtr = AlignedItem.first;
5954 Value *Alignment = AlignedItem.second;
5957 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5960 Builder.restoreIP(IP);
5965 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5975 if (
Block == CanonicalLoop->getCond() ||
5976 Block == CanonicalLoop->getHeader())
5978 Reachable.insert(
Block);
5988 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5996 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
6012 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6014 if (Simdlen || Safelen) {
6018 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6044static std::unique_ptr<TargetMachine>
6048 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6049 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6060 std::nullopt, OptLevel));
6084 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6085 FAM.registerPass([&]() {
return TIRA; });
6099 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6104 nullptr, ORE,
static_cast<int>(OptLevel),
6125 <<
" Threshold=" << UP.
Threshold <<
"\n"
6128 <<
" PartialOptSizeThreshold="
6148 Ptr = Load->getPointerOperand();
6150 Ptr = Store->getPointerOperand();
6154 Ptr =
Ptr->stripPointerCasts();
6157 if (Alloca->getParent() == &
F->getEntryBlock())
6177 int MaxTripCount = 0;
6178 bool MaxOrZero =
false;
6179 unsigned TripMultiple = 0;
6181 bool UseUpperBound =
false;
6183 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6185 unsigned Factor = UP.
Count;
6186 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6194void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6196 CanonicalLoopInfo **UnrolledCLI) {
6197 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6213 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6226 *UnrolledCLI =
Loop;
6231 "unrolling only makes sense with a factor of 2 or larger");
6233 Type *IndVarTy =
Loop->getIndVarType();
6240 std::vector<CanonicalLoopInfo *>
LoopNest =
6241 tileLoops(
DL, {
Loop}, {FactorVal});
6244 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6255 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6258 (*UnrolledCLI)->assertOK();
6262OpenMPIRBuilder::InsertPointTy
6263OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6266 if (!updateToLocation(
Loc))
6270 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6271 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6272 Value *ThreadId = getOrCreateThreadID(Ident);
6274 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6276 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6278 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6279 Builder.CreateCall(Fn, Args);
6281 return Builder.saveIP();
6284OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6285 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6289 if (!updateToLocation(
Loc))
6295 if (!CPVars.
empty()) {
6297 Builder.CreateStore(Builder.getInt32(0), DidIt);
6300 Directive OMPD = Directive::OMPD_single;
6302 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6303 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6304 Value *ThreadId = getOrCreateThreadID(Ident);
6307 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6308 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6310 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6311 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6313 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6314 if (
Error Err = FiniCB(IP))
6321 Builder.CreateStore(Builder.getInt32(1), DidIt);
6334 InsertPointOrErrorTy AfterIP =
6335 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6339 return AfterIP.takeError();
6342 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6344 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6345 ConstantInt::get(
Int64, 0), CPVars[
I],
6348 }
else if (!IsNowait) {
6349 InsertPointOrErrorTy AfterIP =
6350 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6351 omp::Directive::OMPD_unknown,
false,
6354 return AfterIP.takeError();
6356 return Builder.saveIP();
6359OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6360 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6361 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6363 if (!updateToLocation(
Loc))
6366 Directive OMPD = Directive::OMPD_critical;
6368 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6369 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6370 Value *ThreadId = getOrCreateThreadID(Ident);
6371 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6372 Value *
Args[] = {Ident, ThreadId, LockVar};
6378 EnterArgs.push_back(HintInst);
6379 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6381 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6383 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6386 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6387 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6389 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6393OpenMPIRBuilder::InsertPointTy
6394OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6395 InsertPointTy AllocaIP,
unsigned NumLoops,
6397 const Twine &Name,
bool IsDependSource) {
6401 "OpenMP runtime requires depend vec with i64 type");
6403 if (!updateToLocation(
Loc))
6408 Builder.restoreIP(AllocaIP);
6409 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6411 updateToLocation(
Loc);
6414 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6415 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6416 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6417 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6421 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6422 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6425 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6426 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6427 Value *ThreadId = getOrCreateThreadID(Ident);
6428 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6432 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6434 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6435 Builder.CreateCall(RTLFn, Args);
6437 return Builder.saveIP();
6440OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6441 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6442 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6443 if (!updateToLocation(
Loc))
6446 Directive OMPD = Directive::OMPD_ordered;
6452 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6453 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6454 Value *ThreadId = getOrCreateThreadID(Ident);
6457 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6458 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6461 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6462 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6465 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6469OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6471 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6472 bool HasFinalize,
bool IsCancellable) {
6475 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6479 BasicBlock *EntryBB = Builder.GetInsertBlock();
6488 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6491 if (
Error Err = BodyGenCB( InsertPointTy(),
6499 "Unexpected control flow graph state!!");
6500 InsertPointOrErrorTy AfterIP =
6501 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6503 return AfterIP.takeError();
6505 "Unexpected Control Flow State!");
6511 "Unexpected Insertion point location!");
6514 auto InsertBB = merged ? ExitPredBB : ExitBB;
6517 Builder.SetInsertPoint(InsertBB);
6519 return Builder.saveIP();
6522OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6525 if (!Conditional || !EntryCall)
6526 return Builder.saveIP();
6528 BasicBlock *EntryBB = Builder.GetInsertBlock();
6529 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6541 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6543 Builder.SetInsertPoint(UI);
6544 Builder.Insert(EntryBBTI);
6545 UI->eraseFromParent();
6552OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6553 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6556 Builder.restoreIP(FinIP);
6560 assert(!FinalizationStack.empty() &&
6561 "Unexpected finalization stack state!");
6563 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6564 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6566 if (
Error Err = Fi.FiniCB(FinIP))
6573 Builder.SetInsertPoint(FiniBBTI);
6577 return Builder.saveIP();
6581 Builder.Insert(ExitCall);
6587OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6588 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6617 "copyin.not.master.end");
6624 Builder.SetInsertPoint(OMP_Entry);
6625 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6626 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6627 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6628 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6630 Builder.SetInsertPoint(CopyBegin);
6632 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6634 return Builder.saveIP();
6637CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6641 updateToLocation(
Loc);
6644 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6645 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6646 Value *ThreadId = getOrCreateThreadID(Ident);
6649 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6651 return Builder.CreateCall(Fn, Args, Name);
6654CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6658 updateToLocation(
Loc);
6661 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6662 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6663 Value *ThreadId = getOrCreateThreadID(Ident);
6665 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6666 return Builder.CreateCall(Fn, Args, Name);
6669CallInst *OpenMPIRBuilder::createOMPInteropInit(
6670 const LocationDescription &
Loc,
Value *InteropVar,
6672 Value *DependenceAddress,
bool HaveNowaitClause) {
6674 updateToLocation(
Loc);
6677 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6678 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6679 Value *ThreadId = getOrCreateThreadID(Ident);
6680 if (Device ==
nullptr)
6682 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6683 if (NumDependences ==
nullptr) {
6684 NumDependences = ConstantInt::get(
Int32, 0);
6688 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6690 Ident, ThreadId, InteropVar, InteropTypeVal,
6691 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6693 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6695 return Builder.CreateCall(Fn, Args);
6698CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6699 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6700 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6702 updateToLocation(
Loc);
6705 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6706 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6707 Value *ThreadId = getOrCreateThreadID(Ident);
6708 if (Device ==
nullptr)
6710 if (NumDependences ==
nullptr) {
6711 NumDependences = ConstantInt::get(
Int32, 0);
6715 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6717 Ident, ThreadId, InteropVar,
Device,
6718 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6720 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6722 return Builder.CreateCall(Fn, Args);
6725CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6727 Value *NumDependences,
6728 Value *DependenceAddress,
6729 bool HaveNowaitClause) {
6731 updateToLocation(
Loc);
6733 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6734 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6735 Value *ThreadId = getOrCreateThreadID(Ident);
6736 if (Device ==
nullptr)
6738 if (NumDependences ==
nullptr) {
6739 NumDependences = ConstantInt::get(
Int32, 0);
6743 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6745 Ident, ThreadId, InteropVar,
Device,
6746 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6748 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6750 return Builder.CreateCall(Fn, Args);
6753CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6757 updateToLocation(
Loc);
6760 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6761 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6762 Value *ThreadId = getOrCreateThreadID(Ident);
6764 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6768 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6770 return Builder.CreateCall(Fn, Args);
6773OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6774 const LocationDescription &
Loc,
6775 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6777 "expected num_threads and num_teams to be specified");
6779 if (!updateToLocation(
Loc))
6783 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6784 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6796 const std::string DebugPrefix =
"_debug__";
6797 if (KernelName.
ends_with(DebugPrefix)) {
6798 KernelName = KernelName.
drop_back(DebugPrefix.length());
6799 Kernel = M.getFunction(KernelName);
6805 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6810 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6811 if (MaxThreadsVal < 0)
6812 MaxThreadsVal = std::max(
6815 if (MaxThreadsVal > 0)
6816 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6827 Function *Fn = getOrCreateRuntimeFunctionPtr(
6828 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6831 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6832 Constant *DynamicEnvironmentInitializer =
6836 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6838 DL.getDefaultGlobalsAddressSpace());
6842 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6843 ? DynamicEnvironmentGV
6845 DynamicEnvironmentPtr);
6848 ConfigurationEnvironment, {
6849 UseGenericStateMachineVal,
6850 MayUseNestedParallelismVal,
6857 ReductionBufferLength,
6860 KernelEnvironment, {
6861 ConfigurationEnvironmentInitializer,
6865 std::string KernelEnvironmentName =
6866 (KernelName +
"_kernel_environment").str();
6869 KernelEnvironmentInitializer, KernelEnvironmentName,
6871 DL.getDefaultGlobalsAddressSpace());
6875 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6876 ? KernelEnvironmentGV
6878 KernelEnvironmentPtr);
6879 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6881 KernelLaunchEnvironment =
6882 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6883 ? KernelLaunchEnvironment
6884 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6885 KernelLaunchEnvParamTy);
6887 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6889 Value *ExecUserCode = Builder.CreateICmpEQ(
6899 auto *UI = Builder.CreateUnreachable();
6905 Builder.SetInsertPoint(WorkerExitBB);
6906 Builder.CreateRetVoid();
6909 Builder.SetInsertPoint(CheckBBTI);
6910 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6913 UI->eraseFromParent();
6920void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6921 int32_t TeamsReductionDataSize,
6922 int32_t TeamsReductionBufferLength) {
6923 if (!updateToLocation(
Loc))
6926 Function *Fn = getOrCreateRuntimeFunctionPtr(
6927 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6929 Builder.CreateCall(Fn, {});
6931 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6937 const std::string DebugPrefix =
"_debug__";
6939 KernelName = KernelName.
drop_back(DebugPrefix.length());
6940 auto *KernelEnvironmentGV =
6941 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6942 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6943 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6945 KernelEnvironmentInitializer,
6946 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6948 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6955 if (
Kernel.hasFnAttribute(Name)) {
6956 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6962std::pair<int32_t, int32_t>
6964 int32_t ThreadLimit =
6965 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6968 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6969 if (!Attr.isValid() || !Attr.isStringAttribute())
6970 return {0, ThreadLimit};
6971 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6974 return {0, ThreadLimit};
6975 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6981 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6982 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6983 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6985 return {0, ThreadLimit};
6988void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6991 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6994 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7002std::pair<int32_t, int32_t>
7005 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7009 int32_t LB, int32_t UB) {
7016 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7019void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7021 if (Config.isTargetDevice()) {
7028 else if (
T.isNVPTX())
7030 else if (
T.isSPIRV())
7037 if (Config.isTargetDevice()) {
7038 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7047Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7052 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7053 "Named kernel already exists?");
7059Error OpenMPIRBuilder::emitTargetRegionFunction(
7060 TargetRegionEntryInfo &EntryInfo,
7061 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7065 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7067 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7071 OutlinedFn = *CBResult;
7073 OutlinedFn =
nullptr;
7079 if (!IsOffloadEntry)
7082 std::string EntryFnIDName =
7083 Config.isTargetDevice()
7084 ? std::string(EntryFnName)
7085 : createPlatformSpecificName({EntryFnName,
"region_id"});
7087 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7088 EntryFnName, EntryFnIDName);
7092Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7093 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7096 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7097 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7098 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7099 OffloadInfoManager.registerTargetRegionEntryInfo(
7100 EntryInfo, EntryAddr, OutlinedFnID,
7101 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7102 return OutlinedFnID;
7105OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7106 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7107 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7108 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7110 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7111 BodyGenTy BodyGenType)>
7114 if (!updateToLocation(
Loc))
7115 return InsertPointTy();
7117 Builder.restoreIP(CodeGenIP);
7119 if (Config.IsTargetDevice.value_or(
false)) {
7121 InsertPointOrErrorTy AfterIP =
7122 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7124 return AfterIP.takeError();
7125 Builder.restoreIP(*AfterIP);
7127 return Builder.saveIP();
7130 bool IsStandAlone = !BodyGenCB;
7131 MapInfosTy *MapInfo;
7135 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7136 InsertPointTy CodeGenIP) ->
Error {
7137 MapInfo = &GenMapInfoCB(Builder.saveIP());
7138 if (
Error Err = emitOffloadingArrays(
7139 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7140 true, DeviceAddrCB))
7143 TargetDataRTArgs RTArgs;
7144 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7147 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7152 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7153 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7157 SrcLocInfo, DeviceID,
7158 PointerNum, RTArgs.BasePointersArray,
7159 RTArgs.PointersArray, RTArgs.SizesArray,
7160 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7161 RTArgs.MappersArray};
7164 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7168 if (
Info.HasNoWait) {
7175 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7178 if (
Info.HasNoWait) {
7182 emitBlock(OffloadContBlock, CurFn,
true);
7183 Builder.restoreIP(Builder.saveIP());
7188 bool RequiresOuterTargetTask =
Info.HasNoWait;
7189 if (!RequiresOuterTargetTask)
7190 cantFail(TaskBodyCB(
nullptr,
nullptr,
7193 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7194 {}, RTArgs,
Info.HasNoWait));
7196 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7197 omp::OMPRTL___tgt_target_data_begin_mapper);
7199 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7201 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7204 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7205 Builder.CreateStore(LI, DeviceMap.second.second);
7212 InsertPointOrErrorTy AfterIP =
7213 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7215 return AfterIP.takeError();
7216 Builder.restoreIP(*AfterIP);
7224 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7225 InsertPointTy CodeGenIP) ->
Error {
7226 InsertPointOrErrorTy AfterIP =
7227 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7229 return AfterIP.takeError();
7230 Builder.restoreIP(*AfterIP);
7235 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7236 TargetDataRTArgs RTArgs;
7237 Info.EmitDebug = !MapInfo->Names.empty();
7238 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7241 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7246 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7247 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7250 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7251 PointerNum, RTArgs.BasePointersArray,
7252 RTArgs.PointersArray, RTArgs.SizesArray,
7253 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7254 RTArgs.MappersArray};
7256 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7258 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7264 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7272 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7273 return BeginThenGen(AllocaIP, Builder.saveIP());
7281 InsertPointOrErrorTy AfterIP =
7282 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7284 return AfterIP.takeError();
7288 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7289 return EndThenGen(AllocaIP, Builder.saveIP());
7292 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7293 return BeginThenGen(AllocaIP, Builder.saveIP());
7299 return Builder.saveIP();
7303OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7304 bool IsGPUDistribute) {
7305 assert((IVSize == 32 || IVSize == 64) &&
7306 "IV size is not compatible with the omp runtime");
7308 if (IsGPUDistribute)
7310 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7311 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7312 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7313 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7315 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7316 : omp::OMPRTL___kmpc_for_static_init_4u)
7317 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7318 : omp::OMPRTL___kmpc_for_static_init_8u);
7320 return getOrCreateRuntimeFunction(M, Name);
7323FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7325 assert((IVSize == 32 || IVSize == 64) &&
7326 "IV size is not compatible with the omp runtime");
7328 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7329 : omp::OMPRTL___kmpc_dispatch_init_4u)
7330 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7331 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7333 return getOrCreateRuntimeFunction(M, Name);
7336FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7338 assert((IVSize == 32 || IVSize == 64) &&
7339 "IV size is not compatible with the omp runtime");
7341 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7342 : omp::OMPRTL___kmpc_dispatch_next_4u)
7343 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7344 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7346 return getOrCreateRuntimeFunction(M, Name);
7349FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7351 assert((IVSize == 32 || IVSize == 64) &&
7352 "IV size is not compatible with the omp runtime");
7354 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7355 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7356 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7357 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7359 return getOrCreateRuntimeFunction(M, Name);
7363 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7368 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7376 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7380 if (NewVar && (arg == NewVar->
getArg()))
7390 auto UpdateDebugRecord = [&](
auto *DR) {
7393 for (
auto Loc : DR->location_ops()) {
7394 auto Iter = ValueReplacementMap.find(
Loc);
7395 if (Iter != ValueReplacementMap.end()) {
7396 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7397 ArgNo = std::get<1>(Iter->second) + 1;
7401 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7408 "Unexpected debug intrinsic");
7410 UpdateDebugRecord(&DVR);
7413 if (OMPBuilder.Config.isTargetDevice()) {
7415 Module *M = Func->getParent();
7418 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7420 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7421 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7423 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7436 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7438 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7439 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7441 if (OMPBuilder.Config.isTargetDevice()) {
7449 for (
auto &Arg : Inputs)
7454 for (
auto &Arg : Inputs)
7458 auto BB = Builder.GetInsertBlock();
7470 if (TargetCpuAttr.isStringAttribute())
7471 Func->addFnAttr(TargetCpuAttr);
7473 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7474 if (TargetFeaturesAttr.isStringAttribute())
7475 Func->addFnAttr(TargetFeaturesAttr);
7477 if (OMPBuilder.Config.isTargetDevice()) {
7479 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7480 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7491 Builder.SetInsertPoint(EntryBB);
7494 if (OMPBuilder.Config.isTargetDevice())
7495 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7497 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7502 if (OMPBuilder.Config.isTargetDevice())
7503 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7507 splitBB(Builder,
true,
"outlined.body");
7508 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7510 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7512 return AfterIP.takeError();
7513 Builder.restoreIP(*AfterIP);
7514 if (OMPBuilder.Config.isTargetDevice())
7515 OMPBuilder.createTargetDeinit(Builder);
7518 Builder.CreateRetVoid();
7522 auto AllocaIP = Builder.saveIP();
7527 const auto &ArgRange =
7528 OMPBuilder.Config.isTargetDevice()
7529 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7562 if (Instr->getFunction() == Func)
7563 Instr->replaceUsesOfWith(
Input, InputCopy);
7569 for (
auto InArg :
zip(Inputs, ArgRange)) {
7571 Argument &Arg = std::get<1>(InArg);
7572 Value *InputCopy =
nullptr;
7574 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7575 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7577 return AfterIP.takeError();
7578 Builder.restoreIP(*AfterIP);
7579 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7599 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7606 ReplaceValue(
Input, InputCopy, Func);
7610 for (
auto Deferred : DeferredReplacement)
7611 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7614 ValueReplacementMap);
7622 Value *TaskWithPrivates,
7623 Type *TaskWithPrivatesTy) {
7625 Type *TaskTy = OMPIRBuilder.Task;
7628 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7629 Value *Shareds = TaskT;
7639 if (TaskWithPrivatesTy != TaskTy)
7640 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7657 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7662 assert((!NumOffloadingArrays || PrivatesTy) &&
7663 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7666 Module &M = OMPBuilder.M;
7690 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7696 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7697 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7703 ".omp_target_task_proxy_func",
7704 Builder.GetInsertBlock()->getModule());
7705 Value *ThreadId = ProxyFn->getArg(0);
7706 Value *TaskWithPrivates = ProxyFn->getArg(1);
7707 ThreadId->
setName(
"thread.id");
7708 TaskWithPrivates->
setName(
"task");
7710 bool HasShareds = SharedArgsOperandNo > 0;
7711 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7714 Builder.SetInsertPoint(EntryBB);
7720 if (HasOffloadingArrays) {
7721 assert(TaskTy != TaskWithPrivatesTy &&
7722 "If there are offloading arrays to pass to the target"
7723 "TaskTy cannot be the same as TaskWithPrivatesTy");
7726 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7727 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7729 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7733 auto *ArgStructAlloca =
7735 assert(ArgStructAlloca &&
7736 "Unable to find the alloca instruction corresponding to arguments "
7737 "for extracted function");
7741 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7743 Value *SharedsSize =
7744 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7747 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7749 Builder.CreateMemCpy(
7750 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7752 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7754 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7755 Builder.CreateRetVoid();
7761 return GEP->getSourceElementType();
7763 return Alloca->getAllocatedType();
7786 if (OffloadingArraysToPrivatize.
empty())
7787 return OMPIRBuilder.Task;
7790 for (
Value *V : OffloadingArraysToPrivatize) {
7791 assert(V->getType()->isPointerTy() &&
7792 "Expected pointer to array to privatize. Got a non-pointer value "
7795 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7801 "struct.task_with_privates");
7804 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7805 TargetRegionEntryInfo &EntryInfo,
7806 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7809 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7810 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7812 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7815 EntryFnName, Inputs, CBFunc,
7819 return OMPBuilder.emitTargetRegionFunction(
7820 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7824OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7825 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7826 OpenMPIRBuilder::InsertPointTy AllocaIP,
7828 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7952 splitBB(Builder,
true,
"target.task.body");
7954 splitBB(Builder,
true,
"target.task.alloca");
7956 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7957 TargetTaskAllocaBB->
begin());
7958 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7961 OI.EntryBB = TargetTaskAllocaBB;
7962 OI.OuterAllocaBB = AllocaIP.getBlock();
7967 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7970 Builder.restoreIP(TargetTaskBodyIP);
7971 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7985 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7989 bool NeedsTargetTask = HasNoWait && DeviceID;
7990 if (NeedsTargetTask) {
7992 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7993 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7994 RTArgs.SizesArray}) {
7996 OffloadingArraysToPrivatize.
push_back(V);
7997 OI.ExcludeArgsFromAggregate.push_back(V);
8001 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8002 DeviceID, OffloadingArraysToPrivatize](
8005 "there must be a single user for the outlined function");
8019 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8020 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8022 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8023 "Wrong number of arguments for StaleCI when shareds are present");
8024 int SharedArgOperandNo =
8025 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8031 if (!OffloadingArraysToPrivatize.
empty())
8036 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8037 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8039 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8042 Builder.SetInsertPoint(StaleCI);
8047 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8048 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8057 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8058 : getOrCreateRuntimeFunctionPtr(
8059 OMPRTL___kmpc_omp_target_task_alloc);
8063 Value *ThreadID = getOrCreateThreadID(Ident);
8070 Value *TaskSize = Builder.getInt64(
8071 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8076 Value *SharedsSize = Builder.getInt64(0);
8078 auto *ArgStructAlloca =
8080 assert(ArgStructAlloca &&
8081 "Unable to find the alloca instruction corresponding to arguments "
8082 "for extracted function");
8083 auto *ArgStructType =
8085 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8086 "arguments for extracted function");
8088 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8097 Value *Flags = Builder.getInt32(0);
8107 TaskSize, SharedsSize,
8110 if (NeedsTargetTask) {
8111 assert(DeviceID &&
"Expected non-empty device ID.");
8115 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8121 *
this, Builder, TaskData, TaskWithPrivatesTy);
8122 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8125 if (!OffloadingArraysToPrivatize.
empty()) {
8127 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8128 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8129 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8136 "ElementType should match ArrayType");
8139 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8140 Builder.CreateMemCpy(
8141 Dst, Alignment, PtrToPrivatize, Alignment,
8142 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8156 if (!NeedsTargetTask) {
8159 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8163 Builder.getInt32(Dependencies.size()),
8165 ConstantInt::get(Builder.getInt32Ty(), 0),
8171 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8173 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8174 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8175 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8177 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8178 }
else if (DepArray) {
8183 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8186 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8187 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8191 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8192 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8197 I->eraseFromParent();
8199 addOutlineInfo(std::move(OI));
8202 << *(Builder.GetInsertBlock()) <<
"\n");
8204 << *(Builder.GetInsertBlock()->getParent()->getParent())
8206 return Builder.saveIP();
8209Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8210 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8211 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8212 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8215 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8216 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8218 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8224 OpenMPIRBuilder::InsertPointTy AllocaIP,
8225 OpenMPIRBuilder::TargetDataInfo &
Info,
8226 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8227 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8230 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8231 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8233 bool HasNoWait,
Value *DynCGroupMem,
8238 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8239 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8240 Builder.restoreIP(IP);
8241 Builder.CreateCall(OutlinedFn, Args);
8242 return Builder.saveIP();
8245 bool HasDependencies = Dependencies.
size() > 0;
8246 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8248 OpenMPIRBuilder::TargetKernelArgs KArgs;
8255 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8263 if (OutlinedFnID && DeviceID)
8264 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8265 EmitTargetCallFallbackCB, KArgs,
8266 DeviceID, RTLoc, TargetTaskAllocaIP);
8274 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8277 OMPBuilder.Builder.restoreIP(AfterIP);
8281 auto &&EmitTargetCallElse =
8282 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8283 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8286 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8287 if (RequiresOuterTargetTask) {
8291 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8292 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8294 Dependencies, EmptyRTArgs, HasNoWait);
8296 return EmitTargetCallFallbackCB(Builder.saveIP());
8299 Builder.restoreIP(AfterIP);
8303 auto &&EmitTargetCallThen =
8304 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8305 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8306 Info.HasNoWait = HasNoWait;
8307 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8308 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8309 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8310 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8317 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8318 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8323 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8325 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8329 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8332 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8340 Value *MaxThreadsClause =
8341 RuntimeAttrs.TeamsThreadLimit.size() == 1
8342 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8345 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8346 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8347 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8348 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8350 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8351 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8353 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8356 unsigned NumTargetItems =
Info.NumberOfPtrs;
8360 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8361 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8364 Value *TripCount = RuntimeAttrs.LoopTripCount
8365 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8366 Builder.getInt64Ty(),
8368 : Builder.getInt64(0);
8372 DynCGroupMem = Builder.getInt32(0);
8374 KArgs = OpenMPIRBuilder::TargetKernelArgs(
8375 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
8376 HasNoWait, DynCGroupMemFallback);
8380 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8383 if (RequiresOuterTargetTask)
8384 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8385 Dependencies, KArgs.RTArgs,
8388 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8389 EmitTargetCallFallbackCB, KArgs,
8390 DeviceID, RTLoc, AllocaIP);
8393 Builder.restoreIP(AfterIP);
8400 if (!OutlinedFnID) {
8401 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8407 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8411 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8412 EmitTargetCallElse, AllocaIP));
8415OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8416 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8417 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8418 TargetRegionEntryInfo &EntryInfo,
8419 const TargetKernelDefaultAttrs &DefaultAttrs,
8420 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8422 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8423 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8424 CustomMapperCallbackTy CustomMapperCB,
8428 if (!updateToLocation(
Loc))
8429 return InsertPointTy();
8431 Builder.restoreIP(CodeGenIP);
8439 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8440 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8446 if (!Config.isTargetDevice())
8448 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8449 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
8450 DynCGroupMemFallback);
8451 return Builder.saveIP();
8464 return OS.
str().str();
8469 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8470 Config.separator());
8475 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8477 assert(Elem.second->getValueType() == Ty &&
8478 "OMP internal variable has different type than requested");
8491 : M.getTargetTriple().isAMDGPU()
8493 :
DL.getDefaultGlobalsAddressSpace();
8502 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
8503 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8510Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8511 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8512 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8513 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8516Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8521 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8523 return SizePtrToInt;
8528 std::string VarName) {
8532 M, MaptypesArrayInit->
getType(),
8536 return MaptypesArrayGlobal;
8539void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8540 InsertPointTy AllocaIP,
8541 unsigned NumOperands,
8542 struct MapperAllocas &MapperAllocas) {
8543 if (!updateToLocation(
Loc))
8548 Builder.restoreIP(AllocaIP);
8550 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8554 ArrI64Ty,
nullptr,
".offload_sizes");
8555 updateToLocation(
Loc);
8556 MapperAllocas.ArgsBase = ArgsBase;
8557 MapperAllocas.Args =
Args;
8558 MapperAllocas.ArgSizes = ArgSizes;
8561void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8564 struct MapperAllocas &MapperAllocas,
8565 int64_t DeviceID,
unsigned NumOperands) {
8566 if (!updateToLocation(
Loc))
8571 Value *ArgsBaseGEP =
8572 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8573 {Builder.getInt32(0), Builder.getInt32(0)});
8575 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8576 {Builder.getInt32(0), Builder.getInt32(0)});
8577 Value *ArgSizesGEP =
8578 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8579 {Builder.getInt32(0), Builder.getInt32(0)});
8582 Builder.CreateCall(MapperFunc,
8583 {SrcLocInfo, Builder.getInt64(DeviceID),
8584 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8585 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8588void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8589 TargetDataRTArgs &RTArgs,
8590 TargetDataInfo &
Info,
8592 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8593 "expected region end call to runtime only when end call is separate");
8595 auto VoidPtrTy = UnqualPtrTy;
8596 auto VoidPtrPtrTy = UnqualPtrTy;
8598 auto Int64PtrTy = UnqualPtrTy;
8600 if (!
Info.NumberOfPtrs) {
8610 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8612 Info.RTArgs.BasePointersArray,
8614 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8618 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8621 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8623 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8624 :
Info.RTArgs.MapTypesArray,
8630 if (!
Info.EmitDebug)
8633 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8639 if (!
Info.HasMapper)
8642 RTArgs.MappersArray =
8643 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8646void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8647 InsertPointTy CodeGenIP,
8648 MapInfosTy &CombinedInfo,
8649 TargetDataInfo &
Info) {
8650 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8651 CombinedInfo.NonContigInfo;
8664 "struct.descriptor_dim");
8666 enum { OffsetFD = 0, CountFD, StrideFD };
8670 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8673 if (NonContigInfo.Dims[
I] == 1)
8675 Builder.restoreIP(AllocaIP);
8678 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8679 Builder.restoreIP(CodeGenIP);
8680 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8681 unsigned RevIdx = EE -
II - 1;
8682 Value *DimsLVal = Builder.CreateInBoundsGEP(
8684 {Builder.getInt64(0), Builder.getInt64(II)});
8686 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8687 Builder.CreateAlignedStore(
8688 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8689 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8691 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8692 Builder.CreateAlignedStore(
8693 NonContigInfo.Counts[L][RevIdx], CountLVal,
8694 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8696 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8697 Builder.CreateAlignedStore(
8698 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8699 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8702 Builder.restoreIP(CodeGenIP);
8703 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8704 DimsAddr, Builder.getPtrTy());
8705 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8707 Info.RTArgs.PointersArray, 0,
I);
8708 Builder.CreateAlignedStore(
8709 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8714void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8722 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8724 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8725 Value *DeleteBit = Builder.CreateAnd(
8728 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8729 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8734 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8736 Value *PtrAndObjBit = Builder.CreateAnd(
8739 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8740 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8741 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8742 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8743 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8744 DeleteCond = Builder.CreateIsNull(
8746 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8749 DeleteCond = Builder.CreateIsNotNull(
8751 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8753 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8754 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8756 emitBlock(BodyBB, MapperFn);
8759 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8762 Value *MapTypeArg = Builder.CreateAnd(
8765 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8766 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8767 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8768 MapTypeArg = Builder.CreateOr(
8771 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8772 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8776 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8777 ArraySize, MapTypeArg, MapName};
8779 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8787 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8803 MapperFn->
addFnAttr(Attribute::NoInline);
8804 MapperFn->
addFnAttr(Attribute::NoUnwind);
8814 auto SavedIP = Builder.saveIP();
8815 Builder.SetInsertPoint(EntryBB);
8827 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8828 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8829 Value *PtrBegin = BeginIn;
8830 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8835 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8836 MapType, MapName, ElementSize, HeadBB,
8842 emitBlock(HeadBB, MapperFn);
8847 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8848 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8851 emitBlock(BodyBB, MapperFn);
8854 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8858 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8860 return Info.takeError();
8864 Value *OffloadingArgs[] = {MapperHandle};
8865 Value *PreviousSize = Builder.CreateCall(
8866 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8868 Value *ShiftedPreviousSize =
8869 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8872 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8881 Value *OriMapType = Builder.getInt64(
8882 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8884 Value *MemberMapType =
8885 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8899 Value *LeftToFrom = Builder.CreateAnd(
8902 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8903 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8904 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8913 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8914 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8916 emitBlock(AllocBB, MapperFn);
8917 Value *AllocMapType = Builder.CreateAnd(
8920 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8921 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8922 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8923 Builder.CreateBr(EndBB);
8924 emitBlock(AllocElseBB, MapperFn);
8925 Value *IsTo = Builder.CreateICmpEQ(
8928 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8929 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8930 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8932 emitBlock(ToBB, MapperFn);
8933 Value *ToMapType = Builder.CreateAnd(
8936 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8937 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8938 Builder.CreateBr(EndBB);
8939 emitBlock(ToElseBB, MapperFn);
8940 Value *IsFrom = Builder.CreateICmpEQ(
8943 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8944 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8945 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8947 emitBlock(FromBB, MapperFn);
8948 Value *FromMapType = Builder.CreateAnd(
8951 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8952 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8954 emitBlock(EndBB, MapperFn);
8957 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8963 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8964 CurSizeArg, CurMapType, CurNameArg};
8966 auto ChildMapperFn = CustomMapperCB(
I);
8968 return ChildMapperFn.takeError();
8969 if (*ChildMapperFn) {
8971 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8976 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8983 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8984 "omp.arraymap.next");
8986 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8988 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8990 emitBlock(ExitBB, MapperFn);
8993 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8994 MapType, MapName, ElementSize, DoneBB,
8998 emitBlock(DoneBB, MapperFn,
true);
9000 Builder.CreateRetVoid();
9001 Builder.restoreIP(SavedIP);
9005Error OpenMPIRBuilder::emitOffloadingArrays(
9006 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
9007 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
9008 bool IsNonContiguous,
9012 Info.clearArrayInfo();
9013 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9015 if (
Info.NumberOfPtrs == 0)
9018 Builder.restoreIP(AllocaIP);
9024 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9025 PointerArrayType,
nullptr,
".offload_baseptrs");
9027 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9028 PointerArrayType,
nullptr,
".offload_ptrs");
9029 AllocaInst *MappersArray = Builder.CreateAlloca(
9030 PointerArrayType,
nullptr,
".offload_mappers");
9031 Info.RTArgs.MappersArray = MappersArray;
9038 ConstantInt::get(Int64Ty, 0));
9040 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9043 if (IsNonContiguous &&
9044 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9045 CombinedInfo.Types[
I] &
9046 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9048 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9054 RuntimeSizes.set(
I);
9057 if (RuntimeSizes.all()) {
9059 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9060 SizeArrayType,
nullptr,
".offload_sizes");
9065 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9066 auto *SizesArrayGbl =
9071 if (!RuntimeSizes.any()) {
9072 Info.RTArgs.SizesArray = SizesArrayGbl;
9074 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9075 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9078 SizeArrayType,
nullptr,
".offload_sizes");
9081 Builder.CreateMemCpy(
9082 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9083 SizesArrayGbl, OffloadSizeAlign,
9088 Info.RTArgs.SizesArray = Buffer;
9096 for (
auto mapFlag : CombinedInfo.Types)
9098 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9100 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9101 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9102 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9105 if (!CombinedInfo.Names.empty()) {
9106 auto *MapNamesArrayGbl = createOffloadMapnames(
9107 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9108 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9109 Info.EmitDebug =
true;
9111 Info.RTArgs.MapNamesArray =
9113 Info.EmitDebug =
false;
9118 if (
Info.separateBeginEndCalls()) {
9119 bool EndMapTypesDiffer =
false;
9121 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9122 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9123 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9124 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9125 EndMapTypesDiffer =
true;
9128 if (EndMapTypesDiffer) {
9129 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9130 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9135 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9136 Value *BPVal = CombinedInfo.BasePointers[
I];
9137 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9140 Builder.CreateAlignedStore(BPVal, BP,
9141 M.getDataLayout().getPrefTypeAlign(PtrTy));
9143 if (
Info.requiresDevicePointerInfo()) {
9144 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9145 CodeGenIP = Builder.saveIP();
9146 Builder.restoreIP(AllocaIP);
9147 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9148 Builder.restoreIP(CodeGenIP);
9150 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9151 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9152 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9154 DeviceAddrCB(
I, BP);
9158 Value *PVal = CombinedInfo.Pointers[
I];
9159 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9163 Builder.CreateAlignedStore(PVal,
P,
9164 M.getDataLayout().getPrefTypeAlign(PtrTy));
9166 if (RuntimeSizes.test(
I)) {
9167 Value *S = Builder.CreateConstInBoundsGEP2_32(
9171 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9174 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9177 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9180 auto CustomMFunc = CustomMapperCB(
I);
9182 return CustomMFunc.takeError();
9184 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9186 Value *MAddr = Builder.CreateInBoundsGEP(
9188 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9189 Builder.CreateAlignedStore(
9190 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9193 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9194 Info.NumberOfPtrs == 0)
9196 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9201 BasicBlock *CurBB = Builder.GetInsertBlock();
9208 Builder.CreateBr(
Target);
9211 Builder.ClearInsertionPoint();
9216 BasicBlock *CurBB = Builder.GetInsertBlock();
9232 Builder.SetInsertPoint(BB);
9235Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9236 BodyGenCallbackTy ElseGen,
9237 InsertPointTy AllocaIP) {
9241 auto CondConstant = CI->getSExtValue();
9243 return ThenGen(AllocaIP, Builder.saveIP());
9245 return ElseGen(AllocaIP, Builder.saveIP());
9255 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9257 emitBlock(ThenBlock, CurFn);
9258 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9260 emitBranch(ContBlock);
9263 emitBlock(ElseBlock, CurFn);
9264 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9267 emitBranch(ContBlock);
9269 emitBlock(ContBlock, CurFn,
true);
9273bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9277 "Unexpected Atomic Ordering.");
9334OpenMPIRBuilder::InsertPointTy
9335OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9336 AtomicOpValue &
X, AtomicOpValue &V,
9338 if (!updateToLocation(
Loc))
9341 assert(
X.Var->getType()->isPointerTy() &&
9342 "OMP Atomic expects a pointer to target memory");
9343 Type *XElemTy =
X.ElemTy;
9346 "OMP atomic read expected a scalar type");
9348 Value *XRead =
nullptr;
9352 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9358 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9361 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9362 OpenMPIRBuilder::AtomicInfo atomicInfo(
9363 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9364 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9365 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9366 XRead = AtomicLoadRes.first;
9373 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9376 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9378 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9381 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9382 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9383 return Builder.saveIP();
9386OpenMPIRBuilder::InsertPointTy
9387OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9388 AtomicOpValue &
X,
Value *Expr,
9390 if (!updateToLocation(
Loc))
9393 assert(
X.Var->getType()->isPointerTy() &&
9394 "OMP Atomic expects a pointer to target memory");
9395 Type *XElemTy =
X.ElemTy;
9398 "OMP atomic write expected a scalar type");
9401 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9404 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9406 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9407 OpenMPIRBuilder::AtomicInfo atomicInfo(
9408 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9409 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9410 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9417 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9418 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9422 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9423 return Builder.saveIP();
9426OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9427 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9429 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9430 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9432 if (!updateToLocation(
Loc))
9436 Type *XTy =
X.Var->getType();
9438 "OMP Atomic expects a pointer to target memory");
9439 Type *XElemTy =
X.ElemTy;
9442 "OMP atomic update expected a scalar type");
9445 "OpenMP atomic does not support LT or GT operations");
9449 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9450 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9452 return AtomicResult.takeError();
9453 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9454 return Builder.saveIP();
9458Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9462 return Builder.CreateAdd(Src1, Src2);
9464 return Builder.CreateSub(Src1, Src2);
9466 return Builder.CreateAnd(Src1, Src2);
9468 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9470 return Builder.CreateOr(Src1, Src2);
9472 return Builder.CreateXor(Src1, Src2);
9497 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9498 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9501 bool emitRMWOp =
false;
9509 emitRMWOp = XElemTy;
9512 emitRMWOp = (IsXBinopExpr && XElemTy);
9519 std::pair<Value *, Value *> Res;
9524 if (IsIgnoreDenormalMode)
9525 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9527 if (!IsFineGrainedMemory)
9528 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9530 if (!IsRemoteMemory)
9534 Res.first = RMWInst;
9539 Res.second = Res.first;
9541 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9545 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9551 OpenMPIRBuilder::AtomicInfo atomicInfo(
9552 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9553 OldVal->
getAlign(),
true , AllocaIP,
X);
9554 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9555 BasicBlock *CurBB = Builder.GetInsertBlock();
9557 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9561 X->getName() +
".atomic.cont");
9563 Builder.restoreIP(AllocaIP);
9564 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9565 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9566 Builder.SetInsertPoint(ContBB);
9568 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9573 Value *Upd = *CBResult;
9574 Builder.CreateStore(Upd, NewAtomicAddr);
9577 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9578 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9580 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9581 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9583 Res.first = OldExprVal;
9589 Builder.SetInsertPoint(ExitBB);
9591 Builder.SetInsertPoint(ExitTI);
9597 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9604 BasicBlock *CurBB = Builder.GetInsertBlock();
9606 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9610 X->getName() +
".atomic.cont");
9612 Builder.restoreIP(AllocaIP);
9613 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9614 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9615 Builder.SetInsertPoint(ContBB);
9617 PHI->addIncoming(OldVal, CurBB);
9622 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9623 X->getName() +
".atomic.fltCast");
9625 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9626 X->getName() +
".atomic.ptrCast");
9633 Value *Upd = *CBResult;
9634 Builder.CreateStore(Upd, NewAtomicAddr);
9635 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9640 Result->setVolatile(VolatileX);
9641 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9642 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9643 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9644 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9646 Res.first = OldExprVal;
9653 Builder.SetInsertPoint(ExitBB);
9655 Builder.SetInsertPoint(ExitTI);
9662OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9663 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9666 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9667 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9668 if (!updateToLocation(
Loc))
9672 Type *XTy =
X.Var->getType();
9674 "OMP Atomic expects a pointer to target memory");
9675 Type *XElemTy =
X.ElemTy;
9678 "OMP atomic capture expected a scalar type");
9680 "OpenMP atomic does not support LT or GT operations");
9687 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9688 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9691 Value *CapturedVal =
9692 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9693 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9695 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9696 return Builder.saveIP();
9699OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9700 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9706 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9707 IsPostfixUpdate, IsFailOnly, Failure);
9710OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9711 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9716 if (!updateToLocation(
Loc))
9719 assert(
X.Var->getType()->isPointerTy() &&
9720 "OMP atomic expects a pointer to target memory");
9723 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9724 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9727 bool IsInteger =
E->getType()->isIntegerTy();
9729 if (
Op == OMPAtomicCompareOp::EQ) {
9734 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9735 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9740 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9744 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9746 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9748 "OldValue and V must be of same type");
9749 if (IsPostfixUpdate) {
9750 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9752 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9763 BasicBlock *CurBB = Builder.GetInsertBlock();
9765 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9767 CurBBTI,
X.Var->getName() +
".atomic.exit");
9773 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9775 Builder.SetInsertPoint(ContBB);
9776 Builder.CreateStore(OldValue, V.Var);
9777 Builder.CreateBr(ExitBB);
9782 Builder.SetInsertPoint(ExitBB);
9784 Builder.SetInsertPoint(ExitTI);
9787 Value *CapturedValue =
9788 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9789 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9795 assert(
R.Var->getType()->isPointerTy() &&
9796 "r.var must be of pointer type");
9797 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9799 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9800 Value *ResultCast =
R.IsSigned
9801 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9802 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9803 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9806 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9807 "Op should be either max or min at this point");
9808 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9846 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9848 Value *CapturedValue =
nullptr;
9849 if (IsPostfixUpdate) {
9850 CapturedValue = OldValue;
9875 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9876 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9878 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9882 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9884 return Builder.saveIP();
9887OpenMPIRBuilder::InsertPointOrErrorTy
9888OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9889 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9892 if (!updateToLocation(
Loc))
9893 return InsertPointTy();
9896 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9897 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9902 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9903 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9904 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9924 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9925 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9927 splitBB(Builder,
true,
"teams.alloca");
9929 bool SubClausesPresent =
9930 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9932 if (!Config.isTargetDevice() && SubClausesPresent) {
9933 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9934 "if lowerbound is non-null, then upperbound must also be non-null "
9935 "for bounds on num_teams");
9937 if (NumTeamsUpper ==
nullptr)
9938 NumTeamsUpper = Builder.getInt32(0);
9940 if (NumTeamsLower ==
nullptr)
9941 NumTeamsLower = NumTeamsUpper;
9945 "argument to if clause must be an integer value");
9949 IfExpr = Builder.CreateICmpNE(IfExpr,
9950 ConstantInt::get(IfExpr->
getType(), 0));
9951 NumTeamsUpper = Builder.CreateSelect(
9952 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9955 NumTeamsLower = Builder.CreateSelect(
9956 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9959 if (ThreadLimit ==
nullptr)
9960 ThreadLimit = Builder.getInt32(0);
9962 Value *ThreadNum = getOrCreateThreadID(Ident);
9964 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9965 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9968 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9969 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9970 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9974 OI.EntryBB = AllocaBB;
9976 OI.OuterAllocaBB = &OuterAllocaBB;
9980 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9982 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9984 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9986 auto HostPostOutlineCB = [
this, Ident,
9987 ToBeDeleted](
Function &OutlinedFn)
mutable {
9992 "there must be a single user for the outlined function");
9997 "Outlined function must have two or three arguments only");
9999 bool HasShared = OutlinedFn.
arg_size() == 3;
10007 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10008 "outlined function.");
10009 Builder.SetInsertPoint(StaleCI);
10011 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
10014 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
10015 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10019 I->eraseFromParent();
10022 if (!Config.isTargetDevice())
10023 OI.PostOutlineCB = HostPostOutlineCB;
10025 addOutlineInfo(std::move(OI));
10027 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10029 return Builder.saveIP();
10032OpenMPIRBuilder::InsertPointOrErrorTy
10033OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10034 InsertPointTy OuterAllocaIP,
10035 BodyGenCallbackTy BodyGenCB) {
10036 if (!updateToLocation(
Loc))
10037 return InsertPointTy();
10039 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10041 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10043 splitBB(Builder,
true,
"distribute.entry");
10044 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10047 splitBB(Builder,
true,
"distribute.exit");
10049 splitBB(Builder,
true,
"distribute.body");
10051 splitBB(Builder,
true,
"distribute.alloca");
10054 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10055 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10056 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10061 if (Config.isTargetDevice()) {
10063 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10064 OI.EntryBB = AllocaBB;
10065 OI.ExitBB = ExitBB;
10067 addOutlineInfo(std::move(OI));
10069 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10071 return Builder.saveIP();
10076 std::string VarName) {
10082 M, MapNamesArrayInit->
getType(),
10085 return MapNamesArrayGlobal;
10090void OpenMPIRBuilder::initializeTypes(
Module &M) {
10093 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10094 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10095#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10096#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10097 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10098 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10099#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10100 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10101 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10102#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10103 T = StructType::getTypeByName(Ctx, StructName); \
10105 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10107 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10108#include "llvm/Frontend/OpenMP/OMPKinds.def"
10111void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10119 while (!Worklist.
empty()) {
10123 if (
BlockSet.insert(SuccBB).second)
10132 if (!Config.isGPU()) {
10147 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10148 Fn->
addFnAttr(Attribute::MustProgress);
10152void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10153 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10156 if (OffloadInfoManager.empty())
10160 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10161 TargetRegionEntryInfo>,
10163 OrderedEntries(OffloadInfoManager.size());
10166 auto &&GetMDInt = [
this](
unsigned V) {
10173 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10174 auto &&TargetRegionMetadataEmitter =
10175 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10176 const TargetRegionEntryInfo &EntryInfo,
10177 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10190 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10191 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10192 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10193 GetMDInt(
E.getOrder())};
10196 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10202 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10205 auto &&DeviceGlobalVarMetadataEmitter =
10206 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10208 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10216 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10217 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10220 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10221 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10227 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10228 DeviceGlobalVarMetadataEmitter);
10230 for (
const auto &
E : OrderedEntries) {
10231 assert(
E.first &&
"All ordered entries must exist!");
10232 if (
const auto *CE =
10235 if (!
CE->getID() || !
CE->getAddress()) {
10237 TargetRegionEntryInfo EntryInfo =
E.second;
10238 StringRef FnName = EntryInfo.ParentName;
10239 if (!M.getNamedValue(FnName))
10241 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10244 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10247 }
else if (
const auto *CE =
dyn_cast<
10248 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10250 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10251 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10254 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10255 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10256 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10258 if (!
CE->getAddress()) {
10259 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10263 if (
CE->getVarSize() == 0)
10266 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10267 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10268 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10269 "Declaret target link address is set.");
10270 if (Config.isTargetDevice())
10272 if (!
CE->getAddress()) {
10273 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10285 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10286 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10291 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10292 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10293 Flags,
CE->getLinkage(),
CE->getVarName());
10295 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10296 Flags,
CE->getLinkage());
10307 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10312 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10313 Config.getRequiresFlags());
10316void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10318 unsigned FileID,
unsigned Line,
unsigned Count) {
10320 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10321 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10323 OS <<
"_" <<
Count;
10326void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10328 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10329 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10330 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10331 EntryInfo.Line, NewCount);
10334TargetRegionEntryInfo
10335OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10339 auto FileIDInfo = CallBack();
10343 FileID =
Status->getUniqueID().getFile();
10347 FileID =
hash_value(std::get<0>(FileIDInfo));
10350 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10351 std::get<1>(FileIDInfo));
10354unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10357 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10359 !(Remain & 1); Remain = Remain >> 1)
10365OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10368 << getFlagMemberOffset());
10371void OpenMPIRBuilder::setCorrectMemberOfFlag(
10377 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10379 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10386 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10387 Flags |= MemberOfFlag;
10390Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10391 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10392 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10393 bool IsDeclaration,
bool IsExternallyVisible,
10394 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10395 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10396 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10397 std::function<
Constant *()> GlobalInitializer,
10404 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10405 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10407 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10408 Config.hasRequiresUnifiedSharedMemory())) {
10413 if (!IsExternallyVisible)
10414 OS <<
format(
"_%x", EntryInfo.FileID);
10415 OS <<
"_decl_tgt_ref_ptr";
10418 Value *
Ptr = M.getNamedValue(PtrName);
10422 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10427 if (!Config.isTargetDevice()) {
10428 if (GlobalInitializer)
10429 GV->setInitializer(GlobalInitializer());
10434 registerTargetGlobalVariable(
10435 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10436 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10446void OpenMPIRBuilder::registerTargetGlobalVariable(
10447 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10448 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10449 bool IsDeclaration,
bool IsExternallyVisible,
10450 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10451 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10452 std::vector<Triple> TargetTriple,
10453 std::function<
Constant *()> GlobalInitializer,
10456 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10457 (TargetTriple.empty() && !Config.isTargetDevice()))
10460 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10465 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10467 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10468 !Config.hasRequiresUnifiedSharedMemory()) {
10469 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10473 if (!IsDeclaration)
10475 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10482 if (Config.isTargetDevice() &&
10486 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10489 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10491 if (!M.getNamedValue(RefName)) {
10493 getOrCreateInternalVariable(Addr->
getType(), RefName);
10495 GvAddrRef->setConstant(
true);
10497 GvAddrRef->setInitializer(Addr);
10498 GeneratedRefs.push_back(GvAddrRef);
10502 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10503 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10505 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10507 if (Config.isTargetDevice()) {
10511 Addr = getAddrOfDeclareTargetVar(
10512 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10513 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10514 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10517 VarSize = M.getDataLayout().getPointerSize();
10521 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10527void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10531 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10536 auto &&GetMDInt = [MN](
unsigned Idx) {
10541 auto &&GetMDString = [MN](
unsigned Idx) {
10543 return V->getString();
10546 switch (GetMDInt(0)) {
10550 case OffloadEntriesInfoManager::OffloadEntryInfo::
10551 OffloadingEntryInfoTargetRegion: {
10552 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10557 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10561 case OffloadEntriesInfoManager::OffloadEntryInfo::
10562 OffloadingEntryInfoDeviceGlobalVar:
10563 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10565 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10575 if (HostFilePath.
empty())
10579 if (std::error_code Err = Buf.getError()) {
10581 "OpenMPIRBuilder: " +
10589 if (std::error_code Err = M.getError()) {
10591 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10595 loadOffloadInfoMetadata(*M.get());
10602bool OffloadEntriesInfoManager::empty()
const {
10603 return OffloadEntriesTargetRegion.empty() &&
10604 OffloadEntriesDeviceGlobalVar.empty();
10607unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10608 const TargetRegionEntryInfo &EntryInfo)
const {
10609 auto It = OffloadEntriesTargetRegionCount.find(
10610 getTargetRegionEntryCountKey(EntryInfo));
10611 if (It == OffloadEntriesTargetRegionCount.end())
10616void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10617 const TargetRegionEntryInfo &EntryInfo) {
10618 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10619 EntryInfo.Count + 1;
10623void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10624 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10625 OffloadEntriesTargetRegion[EntryInfo] =
10626 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10627 OMPTargetRegionEntryTargetRegion);
10628 ++OffloadingEntriesNum;
10631void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10633 OMPTargetRegionEntryKind Flags) {
10634 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10637 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10641 if (OMPBuilder->Config.isTargetDevice()) {
10643 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10646 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10647 Entry.setAddress(Addr);
10649 Entry.setFlags(Flags);
10651 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10652 hasTargetRegionEntryInfo(EntryInfo,
true))
10654 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10655 "Target region entry already registered!");
10656 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10657 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10658 ++OffloadingEntriesNum;
10660 incrementTargetRegionEntryInfoCount(EntryInfo);
10663bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10664 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10667 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10669 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10670 if (It == OffloadEntriesTargetRegion.end()) {
10674 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10679void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10680 const OffloadTargetRegionEntryInfoActTy &Action) {
10682 for (
const auto &It : OffloadEntriesTargetRegion) {
10683 Action(It.first, It.second);
10687void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10688 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10689 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10690 ++OffloadingEntriesNum;
10693void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10696 if (OMPBuilder->Config.isTargetDevice()) {
10698 if (!hasDeviceGlobalVarEntryInfo(VarName))
10700 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10701 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10702 if (Entry.getVarSize() == 0) {
10703 Entry.setVarSize(VarSize);
10708 Entry.setVarSize(VarSize);
10710 Entry.setAddress(Addr);
10712 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10713 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10714 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10715 "Entry not initialized!");
10716 if (Entry.getVarSize() == 0) {
10717 Entry.setVarSize(VarSize);
10722 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10723 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10724 Addr, VarSize, Flags,
Linkage,
10727 OffloadEntriesDeviceGlobalVar.try_emplace(
10728 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10729 ++OffloadingEntriesNum;
10733void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10734 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10736 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10737 Action(
E.getKey(),
E.getValue());
10744void CanonicalLoopInfo::collectControlBlocks(
10751 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10754BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10763void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10775void CanonicalLoopInfo::mapIndVar(
10785 for (
Use &U : OldIV->
uses()) {
10789 if (
User->getParent() == getCond())
10791 if (
User->getParent() == getLatch())
10797 Value *NewIV = Updater(OldIV);
10800 for (
Use *U : ReplacableUses)
10808void CanonicalLoopInfo::assertOK()
const {
10821 "Preheader must terminate with unconditional branch");
10823 "Preheader must jump to header");
10827 "Header must terminate with unconditional branch");
10828 assert(Header->getSingleSuccessor() ==
Cond &&
10829 "Header must jump to exiting block");
10832 assert(
Cond->getSinglePredecessor() == Header &&
10833 "Exiting block only reachable from header");
10836 "Exiting block must terminate with conditional branch");
10838 "Exiting block must have two successors");
10840 "Exiting block's first successor jump to the body");
10842 "Exiting block's second successor must exit the loop");
10846 "Body only reachable from exiting block");
10851 "Latch must terminate with unconditional branch");
10860 "Exit block must terminate with unconditional branch");
10861 assert(
Exit->getSingleSuccessor() == After &&
10862 "Exit block must jump to after block");
10866 "After block only reachable from exit block");
10870 assert(IndVar &&
"Canonical induction variable not found?");
10872 "Induction variable must be an integer");
10874 "Induction variable must be a PHI in the loop header");
10880 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10887 Value *TripCount = getTripCount();
10888 assert(TripCount &&
"Loop trip count not found?");
10890 "Trip count and induction variable must have the same type");
10894 "Exit condition must be a signed less-than comparison");
10896 "Exit condition must compare the induction variable");
10898 "Exit condition must compare with the trip count");
10902void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
@ Null
Return null pointer.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...