65#define DEBUG_TYPE "openmp-ir-builder"
72 cl::desc(
"Use optimistic attributes describing "
73 "'as-if' properties of runtime calls."),
77 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
78 cl::desc(
"Factor for the unroll threshold to account for code "
79 "simplifications still taking place"),
90 if (!IP1.isSet() || !IP2.isSet())
92 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
97 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
98 case OMPScheduleType::UnorderedStaticChunked:
99 case OMPScheduleType::UnorderedStatic:
100 case OMPScheduleType::UnorderedDynamicChunked:
101 case OMPScheduleType::UnorderedGuidedChunked:
102 case OMPScheduleType::UnorderedRuntime:
103 case OMPScheduleType::UnorderedAuto:
104 case OMPScheduleType::UnorderedTrapezoidal:
105 case OMPScheduleType::UnorderedGreedy:
106 case OMPScheduleType::UnorderedBalanced:
107 case OMPScheduleType::UnorderedGuidedIterativeChunked:
108 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
109 case OMPScheduleType::UnorderedSteal:
110 case OMPScheduleType::UnorderedStaticBalancedChunked:
111 case OMPScheduleType::UnorderedGuidedSimd:
112 case OMPScheduleType::UnorderedRuntimeSimd:
113 case OMPScheduleType::OrderedStaticChunked:
114 case OMPScheduleType::OrderedStatic:
115 case OMPScheduleType::OrderedDynamicChunked:
116 case OMPScheduleType::OrderedGuidedChunked:
117 case OMPScheduleType::OrderedRuntime:
118 case OMPScheduleType::OrderedAuto:
119 case OMPScheduleType::OrderdTrapezoidal:
120 case OMPScheduleType::NomergeUnorderedStaticChunked:
121 case OMPScheduleType::NomergeUnorderedStatic:
122 case OMPScheduleType::NomergeUnorderedDynamicChunked:
123 case OMPScheduleType::NomergeUnorderedGuidedChunked:
124 case OMPScheduleType::NomergeUnorderedRuntime:
125 case OMPScheduleType::NomergeUnorderedAuto:
126 case OMPScheduleType::NomergeUnorderedTrapezoidal:
127 case OMPScheduleType::NomergeUnorderedGreedy:
128 case OMPScheduleType::NomergeUnorderedBalanced:
129 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
130 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
131 case OMPScheduleType::NomergeUnorderedSteal:
132 case OMPScheduleType::NomergeOrderedStaticChunked:
133 case OMPScheduleType::NomergeOrderedStatic:
134 case OMPScheduleType::NomergeOrderedDynamicChunked:
135 case OMPScheduleType::NomergeOrderedGuidedChunked:
136 case OMPScheduleType::NomergeOrderedRuntime:
137 case OMPScheduleType::NomergeOrderedAuto:
138 case OMPScheduleType::NomergeOrderedTrapezoidal:
146 SchedType & OMPScheduleType::MonotonicityMask;
147 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
159 Builder.restoreIP(IP);
169 Kernel->getFnAttribute(
"target-features").getValueAsString();
170 if (Features.
count(
"+wavefrontsize64"))
185 bool HasSimdModifier) {
187 switch (ClauseKind) {
188 case OMP_SCHEDULE_Default:
189 case OMP_SCHEDULE_Static:
190 return HasChunks ? OMPScheduleType::BaseStaticChunked
191 : OMPScheduleType::BaseStatic;
192 case OMP_SCHEDULE_Dynamic:
193 return OMPScheduleType::BaseDynamicChunked;
194 case OMP_SCHEDULE_Guided:
195 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
196 : OMPScheduleType::BaseGuidedChunked;
197 case OMP_SCHEDULE_Auto:
199 case OMP_SCHEDULE_Runtime:
200 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
201 : OMPScheduleType::BaseRuntime;
209 bool HasOrderedClause) {
210 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
211 OMPScheduleType::None &&
212 "Must not have ordering nor monotonicity flags already set");
215 ? OMPScheduleType::ModifierOrdered
216 : OMPScheduleType::ModifierUnordered;
217 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
220 if (OrderingScheduleType ==
221 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
222 return OMPScheduleType::OrderedGuidedChunked;
223 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
224 OMPScheduleType::ModifierOrdered))
225 return OMPScheduleType::OrderedRuntime;
227 return OrderingScheduleType;
233 bool HasSimdModifier,
bool HasMonotonic,
234 bool HasNonmonotonic,
bool HasOrderedClause) {
235 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
236 OMPScheduleType::None &&
237 "Must not have monotonicity flags already set");
238 assert((!HasMonotonic || !HasNonmonotonic) &&
239 "Monotonic and Nonmonotonic are contradicting each other");
242 return ScheduleType | OMPScheduleType::ModifierMonotonic;
243 }
else if (HasNonmonotonic) {
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
254 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
255 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
261 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
269 bool HasSimdModifier,
bool HasMonotonicModifier,
270 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
276 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
277 HasNonmonotonicModifier, HasOrderedClause);
292 assert(!Br->isConditional() &&
293 "BB's terminator must be an unconditional branch (or degenerate)");
296 Br->setSuccessor(0,
Target);
301 NewBr->setDebugLoc(
DL);
307 "Target BB must not have PHI nodes");
327 NewBr->setDebugLoc(
DL);
335 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
339 Builder.SetInsertPoint(Old);
343 Builder.SetCurrentDebugLocation(
DebugLoc);
352 spliceBB(IP, New, CreateBranch,
DL);
353 New->replaceSuccessorsPhiUsesWith(Old, New);
362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
364 Builder.SetInsertPoint(Builder.GetInsertBlock());
367 Builder.SetCurrentDebugLocation(
DebugLoc);
376 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
378 Builder.SetInsertPoint(Builder.GetInsertBlock());
381 Builder.SetCurrentDebugLocation(
DebugLoc);
388 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
395 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
397 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
398 const Twine &Name =
"",
bool AsPtr =
true) {
399 Builder.restoreIP(OuterAllocaIP);
402 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
406 FakeVal = FakeValAddr;
409 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
414 Builder.restoreIP(InnerAllocaIP);
418 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
434enum OpenMPOffloadingRequiresDirFlags {
436 OMP_REQ_UNDEFINED = 0x000,
438 OMP_REQ_NONE = 0x001,
440 OMP_REQ_REVERSE_OFFLOAD = 0x002,
442 OMP_REQ_UNIFIED_ADDRESS = 0x004,
444 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
446 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
452OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
453 : RequiresFlags(OMP_REQ_UNDEFINED) {}
455OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
456 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
457 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
458 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
459 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
460 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
461 RequiresFlags(OMP_REQ_UNDEFINED) {
462 if (HasRequiresReverseOffload)
463 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
464 if (HasRequiresUnifiedAddress)
465 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
466 if (HasRequiresUnifiedSharedMemory)
467 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
468 if (HasRequiresDynamicAllocators)
469 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
472bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
473 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
476bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
477 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
480bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
481 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
484bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
485 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
488int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
489 return hasRequiresFlags() ? RequiresFlags
490 :
static_cast<int64_t
>(OMP_REQ_NONE);
493void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
495 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
497 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
500void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
502 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
504 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
514void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
516 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
518 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
525void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
529 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
531 constexpr const size_t MaxDim = 3;
533 Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
535 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
538 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
539 Value *NumThreads3D =
540 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
542 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
544 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
546 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
548 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
550 ArgsVector = {Version,
552 KernelArgs.RTArgs.BasePointersArray,
553 KernelArgs.RTArgs.PointersArray,
554 KernelArgs.RTArgs.SizesArray,
555 KernelArgs.RTArgs.MapTypesArray,
556 KernelArgs.RTArgs.MapNamesArray,
557 KernelArgs.RTArgs.MappersArray,
558 KernelArgs.NumIterations,
562 KernelArgs.DynCGGroupMem};
570 auto FnAttrs =
Attrs.getFnAttrs();
571 auto RetAttrs =
Attrs.getRetAttrs();
573 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
578 bool Param =
true) ->
void {
579 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
580 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
581 if (HasSignExt || HasZeroExt) {
582 assert(AS.getNumAttributes() == 1 &&
583 "Currently not handling extension attr combined with others.");
585 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
588 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
595#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
596#include "llvm/Frontend/OpenMP/OMPKinds.def"
600#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
602 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
603 addAttrSet(RetAttrs, RetAttrSet, false); \
604 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
605 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
606 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
608#include "llvm/Frontend/OpenMP/OMPKinds.def"
622#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
624 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
626 Fn = M.getFunction(Str); \
628#include "llvm/Frontend/OpenMP/OMPKinds.def"
634#define OMP_RTL(Enum, Str, ...) \
636 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
638#include "llvm/Frontend/OpenMP/OMPKinds.def"
642 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
652 LLVMContext::MD_callback,
654 2, {-1, -1},
true)}));
660 addAttributes(FnID, *Fn);
667 assert(Fn &&
"Failed to create OpenMP runtime function");
675 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
679void OpenMPIRBuilder::initialize() { initializeTypes(M); }
690 for (
auto Inst =
Block->getReverseIterator()->begin();
691 Inst !=
Block->getReverseIterator()->end();) {
704void OpenMPIRBuilder::finalize(
Function *Fn) {
708 for (OutlineInfo &OI : OutlineInfos) {
711 if (Fn && OI.getFunction() != Fn) {
716 ParallelRegionBlockSet.
clear();
718 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
728 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
737 ".omp_par", ArgsInZeroAddressSpace);
741 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
742 assert(Extractor.isEligible() &&
743 "Expected OpenMP outlining to be possible!");
745 for (
auto *V : OI.ExcludeArgsFromAggregate)
746 Extractor.excludeArgFromAggregate(V);
748 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
752 if (TargetCpuAttr.isStringAttribute())
755 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
756 if (TargetFeaturesAttr.isStringAttribute())
757 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
760 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
762 "OpenMP outlined functions should not return a value!");
767 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
774 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
781 "Expected instructions to add in the outlined region entry");
783 End = ArtificialEntry.
rend();
788 if (
I.isTerminator()) {
790 if (OI.EntryBB->getTerminator())
791 OI.EntryBB->getTerminator()->adoptDbgRecords(
792 &ArtificialEntry,
I.getIterator(),
false);
796 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
799 OI.EntryBB->moveBefore(&ArtificialEntry);
806 if (OI.PostOutlineCB)
807 OI.PostOutlineCB(*OutlinedFn);
811 OutlineInfos = std::move(DeferredOutlines);
832 for (
Function *
F : ConstantAllocaRaiseCandidates)
835 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
836 [](EmitMetadataErrorKind Kind,
837 const TargetRegionEntryInfo &EntryInfo) ->
void {
838 errs() <<
"Error of kind: " << Kind
839 <<
" when emitting offload entries and metadata during "
840 "OMPIRBuilder finalization \n";
843 if (!OffloadInfoManager.empty())
844 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
846 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
847 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
848 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
849 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
855bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
857OpenMPIRBuilder::~OpenMPIRBuilder() {
858 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
866 ConstantInt::get(I32Ty,
Value), Name);
878 UsedArray.
resize(List.size());
879 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
883 if (UsedArray.
empty())
890 GV->setSection(
"llvm.metadata");
894OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
896 auto *Int8Ty = Builder.getInt8Ty();
899 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
907 unsigned Reserve2Flags) {
909 LocFlags |= OMP_IDENT_FLAG_KMPC;
912 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
917 ConstantInt::get(
Int32, Reserve2Flags),
918 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
920 size_t SrcLocStrArgIdx = 4;
921 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
925 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
932 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
933 if (
GV.getInitializer() == Initializer)
938 M, OpenMPIRBuilder::Ident,
941 M.getDataLayout().getDefaultGlobalsAddressSpace());
953 SrcLocStrSize = LocStr.
size();
954 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
962 if (
GV.isConstant() &&
GV.hasInitializer() &&
963 GV.getInitializer() == Initializer)
966 SrcLocStr = Builder.CreateGlobalString(
967 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
975 unsigned Line,
unsigned Column,
981 Buffer.
append(FunctionName);
983 Buffer.
append(std::to_string(Line));
985 Buffer.
append(std::to_string(Column));
988 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
992OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
993 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
994 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1002 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1004 if (
DIFile *DIF = DIL->getFile())
1005 if (std::optional<StringRef> Source = DIF->getSource())
1010 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1011 DIL->getColumn(), SrcLocStrSize);
1014Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1016 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1017 Loc.IP.getBlock()->getParent());
1020Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1021 return Builder.CreateCall(
1022 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1023 "omp_global_thread_num");
1026OpenMPIRBuilder::InsertPointOrErrorTy
1027OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1028 bool ForceSimpleCall,
bool CheckCancelFlag) {
1029 if (!updateToLocation(
Loc))
1038 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1041 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1044 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1047 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1050 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1055 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1057 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1058 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1063 bool UseCancelBarrier =
1064 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1067 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
1068 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
1069 : OMPRTL___kmpc_barrier),
1072 if (UseCancelBarrier && CheckCancelFlag)
1073 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1076 return Builder.saveIP();
1079OpenMPIRBuilder::InsertPointOrErrorTy
1080OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1082 omp::Directive CanceledDirective) {
1083 if (!updateToLocation(
Loc))
1087 auto *UI = Builder.CreateUnreachable();
1092 Builder.SetInsertPoint(ThenTI);
1094 Value *CancelKind =
nullptr;
1095 switch (CanceledDirective) {
1096#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1097 case DirectiveEnum: \
1098 CancelKind = Builder.getInt32(Value); \
1100#include "llvm/Frontend/OpenMP/OMPKinds.def"
1106 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1107 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1108 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1110 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1111 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1112 if (CanceledDirective == OMPD_parallel) {
1114 Builder.restoreIP(IP);
1115 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1116 omp::Directive::OMPD_unknown,
1125 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1129 Builder.SetInsertPoint(UI->getParent());
1130 UI->eraseFromParent();
1132 return Builder.saveIP();
1135OpenMPIRBuilder::InsertPointOrErrorTy
1136OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1137 omp::Directive CanceledDirective) {
1138 if (!updateToLocation(
Loc))
1142 auto *UI = Builder.CreateUnreachable();
1143 Builder.SetInsertPoint(UI);
1145 Value *CancelKind =
nullptr;
1146 switch (CanceledDirective) {
1147#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1148 case DirectiveEnum: \
1149 CancelKind = Builder.getInt32(Value); \
1151#include "llvm/Frontend/OpenMP/OMPKinds.def"
1157 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1158 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1159 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1161 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1162 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1163 if (CanceledDirective == OMPD_parallel) {
1165 Builder.restoreIP(IP);
1166 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1167 omp::Directive::OMPD_unknown,
1176 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1180 Builder.SetInsertPoint(UI->getParent());
1181 UI->eraseFromParent();
1183 return Builder.saveIP();
1186OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1187 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1190 if (!updateToLocation(
Loc))
1193 Builder.restoreIP(AllocaIP);
1194 auto *KernelArgsPtr =
1195 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1196 updateToLocation(
Loc);
1200 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1201 Builder.CreateAlignedStore(
1203 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1207 NumThreads, HostPtr, KernelArgsPtr};
1209 Return = Builder.CreateCall(
1210 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1213 return Builder.saveIP();
1216OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1217 const LocationDescription &
Loc,
Value *OutlinedFnID,
1218 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1219 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1221 if (!updateToLocation(
Loc))
1234 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1238 Value *Return =
nullptr;
1242 getKernelArgsVector(Args, Builder, ArgsVector);
1257 Builder.restoreIP(emitTargetKernel(
1258 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1259 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1266 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1268 auto CurFn = Builder.GetInsertBlock()->getParent();
1269 emitBlock(OffloadFailedBlock, CurFn);
1270 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1272 return AfterIP.takeError();
1273 Builder.restoreIP(*AfterIP);
1274 emitBranch(OffloadContBlock);
1275 emitBlock(OffloadContBlock, CurFn,
true);
1276 return Builder.saveIP();
1279Error OpenMPIRBuilder::emitCancelationCheckImpl(
1280 Value *CancelFlag, omp::Directive CanceledDirective,
1281 FinalizeCallbackTy ExitCB) {
1282 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1283 "Unexpected cancellation!");
1288 if (Builder.GetInsertPoint() == BB->
end()) {
1294 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1296 Builder.SetInsertPoint(BB);
1302 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1303 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1308 Builder.SetInsertPoint(CancellationBlock);
1310 if (
Error Err = ExitCB(Builder.saveIP()))
1312 auto &FI = FinalizationStack.back();
1313 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1317 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1336 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1339 "Expected at least tid and bounded tid as arguments");
1340 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1343 assert(CI &&
"Expected call instruction to outlined function");
1344 CI->
getParent()->setName(
"omp_parallel");
1346 Builder.SetInsertPoint(CI);
1347 Type *PtrTy = OMPIRBuilder->VoidPtr;
1351 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1355 Value *Args = ArgsAlloca;
1359 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1360 Builder.restoreIP(CurrentIP);
1363 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1365 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1367 Builder.CreateStore(V, StoreAddress);
1371 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1372 : Builder.getInt32(1);
1375 Value *Parallel51CallArgs[] = {
1379 NumThreads ? NumThreads : Builder.getInt32(-1),
1380 Builder.getInt32(-1),
1384 Builder.getInt64(NumCapturedVars)};
1387 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1389 Builder.CreateCall(RTLFn, Parallel51CallArgs);
1392 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1395 Builder.SetInsertPoint(PrivTID);
1397 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1404 I->eraseFromParent();
1421 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1424 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1427 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1435 F->addMetadata(LLVMContext::MD_callback,
1444 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1447 "Expected at least tid and bounded tid as arguments");
1448 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1451 CI->
getParent()->setName(
"omp_parallel");
1452 Builder.SetInsertPoint(CI);
1455 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1459 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1461 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1468 auto PtrTy = OMPIRBuilder->VoidPtr;
1469 if (IfCondition && NumCapturedVars == 0) {
1474 Builder.CreateCall(RTLFn, RealArgs);
1477 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1480 Builder.SetInsertPoint(PrivTID);
1482 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1489 I->eraseFromParent();
1493OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1494 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1495 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1496 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1497 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1500 if (!updateToLocation(
Loc))
1504 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1505 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1506 Value *ThreadID = getOrCreateThreadID(Ident);
1512 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1516 if (NumThreads && !Config.isTargetDevice()) {
1519 Builder.CreateIntCast(NumThreads,
Int32,
false)};
1521 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1524 if (ProcBind != OMP_PROC_BIND_default) {
1528 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1530 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1533 BasicBlock *InsertBB = Builder.GetInsertBlock();
1538 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1546 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1547 Builder.restoreIP(NewOuter);
1548 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr");
1550 Builder.CreateAlloca(
Int32,
nullptr,
"zero.addr");
1553 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1556 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1560 PointerType ::get(M.getContext(), 0),
1561 "zero.addr.ascast");
1582 auto FiniCBWrapper = [&](InsertPointTy IP) {
1587 Builder.restoreIP(IP);
1589 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1593 "Unexpected insertion point for finalization call!");
1597 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1602 InsertPointTy InnerAllocaIP = Builder.saveIP();
1605 Builder.CreateAlloca(
Int32,
nullptr,
"tid.addr.local");
1609 ToBeDeleted.
push_back(Builder.CreateLoad(
Int32, TIDAddr,
"tid.addr.use"));
1611 Builder.CreateLoad(
Int32, ZeroAddr,
"zero.addr.use");
1629 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1632 assert(BodyGenCB &&
"Expected body generation callback!");
1633 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1634 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1637 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1640 if (Config.isTargetDevice()) {
1642 OI.PostOutlineCB = [=, ToBeDeletedVec =
1643 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1645 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1646 ThreadID, ToBeDeletedVec);
1650 OI.PostOutlineCB = [=, ToBeDeletedVec =
1651 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1653 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1657 OI.OuterAllocaBB = OuterAllocaBlock;
1658 OI.EntryBB = PRegEntryBB;
1659 OI.ExitBB = PRegExitBB;
1663 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1674 ".omp_par", ArgsInZeroAddressSpace);
1679 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1681 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1686 return GV->getValueType() == OpenMPIRBuilder::Ident;
1691 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1694 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1697 if (&V == TIDAddr || &V == ZeroAddr) {
1698 OI.ExcludeArgsFromAggregate.push_back(&V);
1703 for (
Use &U : V.uses())
1705 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1715 if (!V.getType()->isPointerTy()) {
1719 Builder.restoreIP(OuterAllocaIP);
1721 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1725 Builder.SetInsertPoint(InsertBB,
1727 Builder.CreateStore(&V,
Ptr);
1730 Builder.restoreIP(InnerAllocaIP);
1731 Inner = Builder.CreateLoad(V.getType(),
Ptr);
1734 Value *ReplacementValue =
nullptr;
1737 ReplacementValue = PrivTID;
1739 InsertPointOrErrorTy AfterIP =
1740 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1742 return AfterIP.takeError();
1743 Builder.restoreIP(*AfterIP);
1745 InnerAllocaIP.getBlock(),
1746 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1748 assert(ReplacementValue &&
1749 "Expected copy/create callback to set replacement value!");
1750 if (ReplacementValue == &V)
1755 UPtr->set(ReplacementValue);
1780 for (
Value *Output : Outputs)
1783 assert(Outputs.empty() &&
1784 "OpenMP outlining should not produce live-out values!");
1786 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1788 for (
auto *BB : Blocks)
1795 auto FiniInfo = FinalizationStack.pop_back_val();
1797 assert(FiniInfo.DK == OMPD_parallel &&
1798 "Unexpected finalization stack state!");
1802 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1803 if (
Error Err = FiniCB(PreFiniIP))
1807 addOutlineInfo(std::move(OI));
1809 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1810 UI->eraseFromParent();
1815void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1818 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1819 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1821 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush), Args);
1824void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1825 if (!updateToLocation(
Loc))
1830void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1834 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1835 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1836 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1839 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1843void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1844 if (!updateToLocation(
Loc))
1846 emitTaskwaitImpl(
Loc);
1849void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1852 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1853 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1855 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1857 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1861void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1862 if (!updateToLocation(
Loc))
1864 emitTaskyieldImpl(
Loc);
1873 OpenMPIRBuilder &OMPBuilder,
1876 if (Dependencies.
empty())
1896 Type *DependInfo = OMPBuilder.DependInfo;
1897 Module &M = OMPBuilder.M;
1899 Value *DepArray =
nullptr;
1900 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1901 Builder.SetInsertPoint(
1902 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1905 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1907 Builder.restoreIP(OldIP);
1909 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1911 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1913 Value *Addr = Builder.CreateStructGEP(
1915 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1916 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1917 Builder.CreateStore(DepValPtr, Addr);
1920 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1921 Builder.CreateStore(
1922 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1925 Value *Flags = Builder.CreateStructGEP(
1927 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1928 Builder.CreateStore(
1929 ConstantInt::get(Builder.getInt8Ty(),
1930 static_cast<unsigned int>(Dep.DepKind)),
1936OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1937 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1938 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1942 if (!updateToLocation(
Loc))
1943 return InsertPointTy();
1946 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1947 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1964 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1965 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1967 splitBB(Builder,
true,
"task.alloca");
1969 InsertPointTy TaskAllocaIP =
1970 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1971 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1972 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1976 OI.EntryBB = TaskAllocaBB;
1977 OI.OuterAllocaBB = AllocaIP.getBlock();
1978 OI.ExitBB = TaskExitBB;
1983 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1985 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1986 Mergeable, Priority, EventHandle, TaskAllocaBB,
1987 ToBeDeleted](
Function &OutlinedFn)
mutable {
1990 "there must be a single user for the outlined function");
1995 bool HasShareds = StaleCI->
arg_size() > 1;
1996 Builder.SetInsertPoint(StaleCI);
2001 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2005 Value *ThreadID = getOrCreateThreadID(Ident);
2017 Value *Flags = Builder.getInt32(Tied);
2020 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2021 Flags = Builder.CreateOr(FinalFlag, Flags);
2025 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2027 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2033 Value *TaskSize = Builder.getInt64(
2034 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2039 Value *SharedsSize = Builder.getInt64(0);
2043 assert(ArgStructAlloca &&
2044 "Unable to find the alloca instruction corresponding to arguments "
2045 "for extracted function");
2048 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2049 "arguments for extracted function");
2051 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2056 CallInst *TaskData = Builder.CreateCall(
2057 TaskAllocFn, {Ident, ThreadID, Flags,
2058 TaskSize, SharedsSize,
2065 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2066 OMPRTL___kmpc_task_allow_completion_event);
2068 Builder.CreateCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2070 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2071 Builder.getPtrTy(0));
2072 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2073 Builder.CreateStore(EventVal, EventHandleAddr);
2079 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2080 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2098 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2101 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2102 Value *PriorityData = Builder.CreateInBoundsGEP(
2103 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2106 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2108 Builder.CreateStore(Priority, CmplrData);
2133 splitBB(Builder,
true,
"if.end");
2135 Builder.GetInsertPoint()->
getParent()->getTerminator();
2136 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2137 Builder.SetInsertPoint(IfTerminator);
2140 Builder.SetInsertPoint(ElseTI);
2142 if (Dependencies.size()) {
2144 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2147 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2148 ConstantInt::get(Builder.getInt32Ty(), 0),
2152 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2154 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2155 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2158 CI = Builder.CreateCall(&OutlinedFn, {ThreadID, TaskData});
2160 CI = Builder.CreateCall(&OutlinedFn, {ThreadID});
2162 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2163 Builder.SetInsertPoint(ThenTI);
2166 if (Dependencies.size()) {
2168 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2171 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2172 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2177 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2178 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
2183 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2185 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2187 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2191 I->eraseFromParent();
2194 addOutlineInfo(std::move(OI));
2195 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2197 return Builder.saveIP();
2200OpenMPIRBuilder::InsertPointOrErrorTy
2201OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2202 InsertPointTy AllocaIP,
2203 BodyGenCallbackTy BodyGenCB) {
2204 if (!updateToLocation(
Loc))
2205 return InsertPointTy();
2208 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2209 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2210 Value *ThreadID = getOrCreateThreadID(Ident);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2215 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2217 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2218 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2221 Builder.SetInsertPoint(TaskgroupExitBB);
2224 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2225 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2227 return Builder.saveIP();
2230OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2231 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2233 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2236 if (!updateToLocation(
Loc))
2242 auto FiniCBWrapper = [&](InsertPointTy IP) {
2251 CancellationBranches.
push_back(DummyBranch);
2255 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2273 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2274 Builder.restoreIP(CodeGenIP);
2276 splitBBWithSuffix(Builder,
false,
".sections.after");
2280 unsigned CaseNumber = 0;
2281 for (
auto SectionCB : SectionCBs) {
2283 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2284 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2285 Builder.SetInsertPoint(CaseBB);
2287 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2299 Value *LB = ConstantInt::get(I32Ty, 0);
2300 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2301 Value *
ST = ConstantInt::get(I32Ty, 1);
2303 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2307 InsertPointOrErrorTy WsloopIP =
2308 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2309 WorksharingLoopType::ForStaticLoop, !IsNowait);
2311 return WsloopIP.takeError();
2312 InsertPointTy AfterIP = *WsloopIP;
2315 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2318 auto FiniInfo = FinalizationStack.pop_back_val();
2319 assert(FiniInfo.DK == OMPD_sections &&
2320 "Unexpected finalization stack state!");
2321 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2322 Builder.restoreIP(AfterIP);
2324 splitBBWithSuffix(Builder,
true,
"sections.fini");
2325 if (
Error Err = CB(Builder.saveIP()))
2327 AfterIP = {FiniBB, FiniBB->
begin()};
2331 for (
BranchInst *DummyBranch : CancellationBranches) {
2339OpenMPIRBuilder::InsertPointOrErrorTy
2340OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2341 BodyGenCallbackTy BodyGenCB,
2342 FinalizeCallbackTy FiniCB) {
2343 if (!updateToLocation(
Loc))
2346 auto FiniCBWrapper = [&](InsertPointTy IP) {
2357 Builder.restoreIP(IP);
2358 auto *CaseBB =
Loc.IP.getBlock();
2362 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2366 Directive OMPD = Directive::OMPD_sections;
2369 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2377 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2380Value *OpenMPIRBuilder::getGPUThreadID() {
2381 return Builder.CreateCall(
2382 getOrCreateRuntimeFunction(M,
2383 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2387Value *OpenMPIRBuilder::getGPUWarpSize() {
2388 return Builder.CreateCall(
2389 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2392Value *OpenMPIRBuilder::getNVPTXWarpID() {
2393 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2394 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2397Value *OpenMPIRBuilder::getNVPTXLaneID() {
2398 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2399 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2400 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2401 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2405Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2408 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2409 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2410 assert(FromSize > 0 &&
"From size must be greater than zero");
2411 assert(ToSize > 0 &&
"To size must be greater than zero");
2412 if (FromType == ToType)
2414 if (FromSize == ToSize)
2415 return Builder.CreateBitCast(From, ToType);
2417 return Builder.CreateIntCast(From, ToType,
true);
2418 InsertPointTy SaveIP = Builder.saveIP();
2419 Builder.restoreIP(AllocaIP);
2420 Value *CastItem = Builder.CreateAlloca(ToType);
2421 Builder.restoreIP(SaveIP);
2423 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2424 CastItem, Builder.getPtrTy(0));
2425 Builder.CreateStore(From, ValCastItem);
2426 return Builder.CreateLoad(ToType, CastItem);
2429Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2433 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2434 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2438 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2440 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2441 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2442 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2443 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2444 Value *WarpSizeCast =
2445 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2446 Value *ShuffleCall =
2447 Builder.CreateCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2448 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2451void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2454 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2465 Type *IndexTy = Builder.getIndexTy(
2466 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2467 Value *ElemPtr = DstAddr;
2469 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2473 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2474 Ptr, Builder.getPtrTy(0),
Ptr->getName() +
".ascast");
2476 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2477 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2478 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2481 if ((
Size / IntSize) > 1) {
2482 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2483 SrcAddrGEP, Builder.getPtrTy());
2488 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2489 emitBlock(PreCondBB, CurFunc);
2491 Builder.CreatePHI(
Ptr->getType(), 2);
2494 Builder.CreatePHI(ElemPtr->
getType(), 2);
2498 Value *PtrDiff = Builder.CreatePtrDiff(
2499 Builder.getInt8Ty(), PtrEnd,
2500 Builder.CreatePointerBitCastOrAddrSpaceCast(
Ptr, Builder.getPtrTy()));
2501 Builder.CreateCondBr(
2502 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2504 emitBlock(ThenBB, CurFunc);
2505 Value *Res = createRuntimeShuffleFunction(
2507 Builder.CreateAlignedLoad(
2508 IntType,
Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2510 Builder.CreateAlignedStore(Res, ElemPtr,
2511 M.getDataLayout().getPrefTypeAlign(ElemType));
2513 Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2514 Value *LocalElemPtr =
2515 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2518 emitBranch(PreCondBB);
2519 emitBlock(ExitBB, CurFunc);
2521 Value *Res = createRuntimeShuffleFunction(
2522 AllocaIP, Builder.CreateLoad(IntType,
Ptr), IntType,
Offset);
2525 Res = Builder.CreateTrunc(Res, ElemType);
2526 Builder.CreateStore(Res, ElemPtr);
2527 Ptr = Builder.CreateGEP(IntType,
Ptr, {ConstantInt::get(IndexTy, 1)});
2529 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2535void OpenMPIRBuilder::emitReductionListCopy(
2536 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2538 CopyOptionsTy CopyOptions) {
2539 Type *IndexTy = Builder.getIndexTy(
2540 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2541 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2545 for (
auto En :
enumerate(ReductionInfos)) {
2546 const ReductionInfo &RI = En.value();
2547 Value *SrcElementAddr =
nullptr;
2548 Value *DestElementAddr =
nullptr;
2549 Value *DestElementPtrAddr =
nullptr;
2551 bool ShuffleInElement =
false;
2554 bool UpdateDestListPtr =
false;
2557 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2558 ReductionArrayTy, SrcBase,
2559 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2560 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2564 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2565 ReductionArrayTy, DestBase,
2566 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2568 case CopyAction::RemoteLaneToThread: {
2569 InsertPointTy CurIP = Builder.saveIP();
2570 Builder.restoreIP(AllocaIP);
2571 AllocaInst *DestAlloca = Builder.CreateAlloca(RI.ElementType,
nullptr,
2572 ".omp.reduction.element");
2574 M.getDataLayout().getPrefTypeAlign(RI.ElementType));
2575 DestElementAddr = DestAlloca;
2577 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2578 DestElementAddr->
getName() +
".ascast");
2579 Builder.restoreIP(CurIP);
2580 ShuffleInElement =
true;
2581 UpdateDestListPtr =
true;
2584 case CopyAction::ThreadCopy: {
2586 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2593 if (ShuffleInElement) {
2594 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2595 RemoteLaneOffset, ReductionArrayTy);
2597 switch (RI.EvaluationKind) {
2598 case EvalKind::Scalar: {
2599 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2601 Builder.CreateStore(Elem, DestElementAddr);
2604 case EvalKind::Complex: {
2605 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2606 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2607 Value *SrcReal = Builder.CreateLoad(
2608 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2609 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2610 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2611 Value *SrcImg = Builder.CreateLoad(
2612 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2614 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2615 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2616 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2617 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2618 Builder.CreateStore(SrcReal, DestRealPtr);
2619 Builder.CreateStore(SrcImg, DestImgPtr);
2622 case EvalKind::Aggregate: {
2623 Value *SizeVal = Builder.getInt64(
2624 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2625 Builder.CreateMemCpy(
2626 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2627 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2639 if (UpdateDestListPtr) {
2640 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2641 DestElementAddr, Builder.getPtrTy(),
2642 DestElementAddr->
getName() +
".ascast");
2643 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2650 AttributeList FuncAttrs) {
2651 InsertPointTy SavedIP = Builder.saveIP();
2654 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2658 "_omp_reduction_inter_warp_copy_func", &M);
2663 Builder.SetInsertPoint(EntryBB);
2681 "__openmp_nvptx_data_transfer_temporary_storage";
2682 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2683 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2685 if (!TransferMedium) {
2694 Value *GPUThreadID = getGPUThreadID();
2696 Value *LaneID = getNVPTXLaneID();
2698 Value *WarpID = getNVPTXWarpID();
2700 InsertPointTy AllocaIP =
2701 InsertPointTy(Builder.GetInsertBlock(),
2702 Builder.GetInsertBlock()->getFirstInsertionPt());
2705 Builder.restoreIP(AllocaIP);
2706 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2707 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2709 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2710 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2711 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2712 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2713 NumWarpsAlloca, Builder.getPtrTy(0),
2714 NumWarpsAlloca->
getName() +
".ascast");
2715 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2716 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2718 InsertPointTy CodeGenIP =
2720 Builder.restoreIP(CodeGenIP);
2723 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2725 for (
auto En :
enumerate(ReductionInfos)) {
2730 const ReductionInfo &RI = En.value();
2731 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(RI.ElementType);
2732 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2735 unsigned NumIters = RealTySize / TySize;
2738 Value *Cnt =
nullptr;
2739 Value *CntAddr =
nullptr;
2743 CodeGenIP = Builder.saveIP();
2744 Builder.restoreIP(AllocaIP);
2746 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2748 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2749 CntAddr->
getName() +
".ascast");
2750 Builder.restoreIP(CodeGenIP);
2757 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2758 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2760 Value *
Cmp = Builder.CreateICmpULT(
2761 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2762 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2763 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2767 InsertPointOrErrorTy BarrierIP1 =
2768 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2769 omp::Directive::OMPD_unknown,
2773 return BarrierIP1.takeError();
2779 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2780 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2781 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2784 auto *RedListArrayTy =
2786 Type *IndexTy = Builder.getIndexTy(
2787 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2789 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2790 {ConstantInt::get(IndexTy, 0),
2791 ConstantInt::get(IndexTy, En.index())});
2793 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2795 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2799 Value *MediumPtr = Builder.CreateInBoundsGEP(
2800 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2803 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2805 Builder.CreateStore(Elem, MediumPtr,
2807 Builder.CreateBr(MergeBB);
2810 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2811 Builder.CreateBr(MergeBB);
2814 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2815 InsertPointOrErrorTy BarrierIP2 =
2816 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2817 omp::Directive::OMPD_unknown,
2821 return BarrierIP2.takeError();
2828 Value *NumWarpsVal =
2829 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2831 Value *IsActiveThread =
2832 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2833 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2835 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2839 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2840 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2842 Value *TargetElemPtrPtr =
2843 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2844 {ConstantInt::get(IndexTy, 0),
2845 ConstantInt::get(IndexTy, En.index())});
2846 Value *TargetElemPtrVal =
2847 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2848 Value *TargetElemPtr = TargetElemPtrVal;
2851 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2854 Value *SrcMediumValue =
2855 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2856 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2857 Builder.CreateBr(W0MergeBB);
2859 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2860 Builder.CreateBr(W0MergeBB);
2862 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2865 Cnt = Builder.CreateNSWAdd(
2866 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2867 Builder.CreateStore(Cnt, CntAddr,
false);
2869 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2870 emitBranch(PrecondBB);
2871 emitBlock(ExitBB, CurFn);
2873 RealTySize %= TySize;
2877 Builder.CreateRetVoid();
2878 Builder.restoreIP(SavedIP);
2883Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2885 AttributeList FuncAttrs) {
2889 {Builder.getPtrTy(), Builder.getInt16Ty(),
2890 Builder.getInt16Ty(), Builder.getInt16Ty()},
2894 "_omp_reduction_shuffle_and_reduce_func", &M);
2904 Builder.SetInsertPoint(EntryBB);
2915 Type *ReduceListArgType = ReduceListArg->
getType();
2917 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
2918 Value *ReduceListAlloca = Builder.CreateAlloca(
2919 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2920 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2921 LaneIDArg->
getName() +
".addr");
2922 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
2923 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2924 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
2925 AlgoVerArg->
getName() +
".addr");
2931 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
2932 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2934 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2935 ReduceListAlloca, ReduceListArgType,
2936 ReduceListAlloca->
getName() +
".ascast");
2937 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2938 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2939 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2940 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2941 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2942 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2943 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2944 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2945 RemoteReductionListAlloca, Builder.getPtrTy(),
2946 RemoteReductionListAlloca->
getName() +
".ascast");
2948 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2949 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
2950 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
2951 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
2953 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
2954 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
2955 Value *RemoteLaneOffset =
2956 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
2957 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
2964 emitReductionListCopy(
2965 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
2966 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2989 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
2990 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
2991 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
2992 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
2993 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
2994 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
2995 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
2996 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
2997 Value *RemoteOffsetComp =
2998 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
2999 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3000 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3001 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3007 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3008 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3009 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3010 ReduceList, Builder.getPtrTy());
3011 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3012 RemoteListAddrCast, Builder.getPtrTy());
3013 Builder.CreateCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3014 ->addFnAttr(Attribute::NoUnwind);
3015 Builder.CreateBr(MergeBB);
3017 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3018 Builder.CreateBr(MergeBB);
3020 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3024 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3025 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3026 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3031 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3033 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3034 emitReductionListCopy(AllocaIP, CopyAction::ThreadCopy, RedListArrayTy,
3035 ReductionInfos, RemoteListAddrCast, ReduceList);
3036 Builder.CreateBr(CpyMergeBB);
3038 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3039 Builder.CreateBr(CpyMergeBB);
3041 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3043 Builder.CreateRetVoid();
3048Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3050 AttributeList FuncAttrs) {
3051 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3054 Builder.getVoidTy(),
3055 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3059 "_omp_reduction_list_to_global_copy_func", &M);
3066 Builder.SetInsertPoint(EntryBlock);
3075 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3076 BufferArg->
getName() +
".addr");
3077 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3079 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3080 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3081 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 BufferArgAlloca, Builder.getPtrTy(),
3083 BufferArgAlloca->
getName() +
".ascast");
3084 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3085 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3086 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3087 ReduceListArgAlloca, Builder.getPtrTy(),
3088 ReduceListArgAlloca->
getName() +
".ascast");
3090 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3091 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3092 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3094 Value *LocalReduceList =
3095 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3096 Value *BufferArgVal =
3097 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3098 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3099 Type *IndexTy = Builder.getIndexTy(
3100 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3101 for (
auto En :
enumerate(ReductionInfos)) {
3102 const ReductionInfo &RI = En.value();
3103 auto *RedListArrayTy =
3106 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3107 RedListArrayTy, LocalReduceList,
3108 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3110 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3114 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3115 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3116 ReductionsBufferTy, BufferVD, 0, En.index());
3118 switch (RI.EvaluationKind) {
3119 case EvalKind::Scalar: {
3120 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3121 Builder.CreateStore(TargetElement, GlobVal);
3124 case EvalKind::Complex: {
3125 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3126 RI.ElementType, ElemPtr, 0, 0,
".realp");
3127 Value *SrcReal = Builder.CreateLoad(
3128 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3129 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3130 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3131 Value *SrcImg = Builder.CreateLoad(
3132 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3134 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3135 RI.ElementType, GlobVal, 0, 0,
".realp");
3136 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3137 RI.ElementType, GlobVal, 0, 1,
".imagp");
3138 Builder.CreateStore(SrcReal, DestRealPtr);
3139 Builder.CreateStore(SrcImg, DestImgPtr);
3142 case EvalKind::Aggregate: {
3144 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3145 Builder.CreateMemCpy(
3146 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3147 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3153 Builder.CreateRetVoid();
3154 Builder.restoreIP(OldIP);
3158Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3160 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3161 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3164 Builder.getVoidTy(),
3165 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3169 "_omp_reduction_list_to_global_reduce_func", &M);
3176 Builder.SetInsertPoint(EntryBlock);
3185 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3186 BufferArg->
getName() +
".addr");
3187 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3189 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3190 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3191 auto *RedListArrayTy =
3196 Value *LocalReduceList =
3197 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3199 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 BufferArgAlloca, Builder.getPtrTy(),
3201 BufferArgAlloca->
getName() +
".ascast");
3202 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3203 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3204 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 ReduceListArgAlloca, Builder.getPtrTy(),
3206 ReduceListArgAlloca->
getName() +
".ascast");
3207 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 LocalReduceList, Builder.getPtrTy(),
3209 LocalReduceList->
getName() +
".ascast");
3211 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3212 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3213 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3215 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3216 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3217 Type *IndexTy = Builder.getIndexTy(
3218 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3219 for (
auto En :
enumerate(ReductionInfos)) {
3220 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3221 RedListArrayTy, LocalReduceListAddrCast,
3222 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3224 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3226 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3227 ReductionsBufferTy, BufferVD, 0, En.index());
3228 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3233 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3234 Builder.CreateCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3235 ->addFnAttr(Attribute::NoUnwind);
3236 Builder.CreateRetVoid();
3237 Builder.restoreIP(OldIP);
3241Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3243 AttributeList FuncAttrs) {
3244 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3247 Builder.getVoidTy(),
3248 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3252 "_omp_reduction_global_to_list_copy_func", &M);
3259 Builder.SetInsertPoint(EntryBlock);
3268 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3269 BufferArg->
getName() +
".addr");
3270 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3272 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3273 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3274 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3275 BufferArgAlloca, Builder.getPtrTy(),
3276 BufferArgAlloca->
getName() +
".ascast");
3277 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3278 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3279 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3280 ReduceListArgAlloca, Builder.getPtrTy(),
3281 ReduceListArgAlloca->
getName() +
".ascast");
3282 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3283 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3284 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3286 Value *LocalReduceList =
3287 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3288 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3289 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3290 Type *IndexTy = Builder.getIndexTy(
3291 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3292 for (
auto En :
enumerate(ReductionInfos)) {
3293 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3294 auto *RedListArrayTy =
3297 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3298 RedListArrayTy, LocalReduceList,
3299 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3301 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3304 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3305 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3306 ReductionsBufferTy, BufferVD, 0, En.index());
3308 switch (RI.EvaluationKind) {
3309 case EvalKind::Scalar: {
3310 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3311 Builder.CreateStore(TargetElement, ElemPtr);
3314 case EvalKind::Complex: {
3315 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3316 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3317 Value *SrcReal = Builder.CreateLoad(
3318 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3319 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3320 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3321 Value *SrcImg = Builder.CreateLoad(
3322 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3324 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3325 RI.ElementType, ElemPtr, 0, 0,
".realp");
3326 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3327 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3328 Builder.CreateStore(SrcReal, DestRealPtr);
3329 Builder.CreateStore(SrcImg, DestImgPtr);
3332 case EvalKind::Aggregate: {
3334 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3335 Builder.CreateMemCpy(
3336 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3337 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3344 Builder.CreateRetVoid();
3345 Builder.restoreIP(OldIP);
3349Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3351 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3352 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3355 Builder.getVoidTy(),
3356 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3360 "_omp_reduction_global_to_list_reduce_func", &M);
3367 Builder.SetInsertPoint(EntryBlock);
3376 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3377 BufferArg->
getName() +
".addr");
3378 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3380 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3381 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *LocalReduceList =
3388 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3390 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 BufferArgAlloca, Builder.getPtrTy(),
3392 BufferArgAlloca->
getName() +
".ascast");
3393 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3394 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3395 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3396 ReduceListArgAlloca, Builder.getPtrTy(),
3397 ReduceListArgAlloca->
getName() +
".ascast");
3398 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3399 LocalReduceList, Builder.getPtrTy(),
3400 LocalReduceList->
getName() +
".ascast");
3402 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3403 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3404 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3406 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3407 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3408 Type *IndexTy = Builder.getIndexTy(
3409 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3410 for (
auto En :
enumerate(ReductionInfos)) {
3411 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3412 RedListArrayTy, ReductionList,
3413 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3416 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3417 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3418 ReductionsBufferTy, BufferVD, 0, En.index());
3419 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3424 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3425 Builder.CreateCall(ReduceFn, {ReduceList, ReductionList})
3426 ->addFnAttr(Attribute::NoUnwind);
3427 Builder.CreateRetVoid();
3428 Builder.restoreIP(OldIP);
3432std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3433 std::string Suffix =
3434 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3435 return (Name + Suffix).
str();
3440 ReductionGenCBKind ReductionGenCBKind, AttributeList FuncAttrs) {
3442 {Builder.getPtrTy(), Builder.getPtrTy()},
3444 std::string
Name = getReductionFuncName(ReducerName);
3452 Builder.SetInsertPoint(EntryBB);
3456 Value *LHSArrayPtr =
nullptr;
3457 Value *RHSArrayPtr =
nullptr;
3464 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3466 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3467 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3468 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3469 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3470 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3471 Builder.CreateStore(Arg0, LHSAddrCast);
3472 Builder.CreateStore(Arg1, RHSAddrCast);
3473 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3474 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3477 Type *IndexTy = Builder.getIndexTy(
3478 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3480 for (
auto En :
enumerate(ReductionInfos)) {
3481 const ReductionInfo &RI = En.value();
3482 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3483 RedArrayTy, RHSArrayPtr,
3484 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3485 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3486 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3487 RHSI8Ptr, RI.PrivateVariable->getType(),
3488 RHSI8Ptr->
getName() +
".ascast");
3490 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3491 RedArrayTy, LHSArrayPtr,
3492 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3493 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3494 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3495 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3497 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3501 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3502 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3504 InsertPointOrErrorTy AfterIP =
3505 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3507 return AfterIP.takeError();
3508 if (!Builder.GetInsertBlock())
3509 return ReductionFunc;
3510 Builder.CreateStore(Reduced, LHSPtr);
3514 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3515 for (
auto En :
enumerate(ReductionInfos)) {
3516 unsigned Index = En.index();
3517 const ReductionInfo &RI = En.value();
3518 Value *LHSFixupPtr, *RHSFixupPtr;
3519 Builder.restoreIP(RI.ReductionGenClang(
3520 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3525 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3530 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3536 Builder.CreateRetVoid();
3537 return ReductionFunc;
3543 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3545 assert(RI.Variable &&
"expected non-null variable");
3546 assert(RI.PrivateVariable &&
"expected non-null private variable");
3547 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3548 "expected non-null reduction generator callback");
3551 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3552 "expected variables and their private equivalents to have the same "
3555 assert(RI.Variable->getType()->isPointerTy() &&
3556 "expected variables to be pointers");
3560OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3561 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3563 bool IsNoWait,
bool IsTeamsReduction, ReductionGenCBKind ReductionGenCBKind,
3564 std::optional<omp::GV> GridValue,
unsigned ReductionBufNum,
3565 Value *SrcLocInfo) {
3566 if (!updateToLocation(
Loc))
3567 return InsertPointTy();
3568 Builder.restoreIP(CodeGenIP);
3575 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3576 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3579 if (ReductionInfos.
size() == 0)
3580 return Builder.saveIP();
3583 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3589 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3593 AttributeList FuncAttrs;
3594 AttrBuilder AttrBldr(Ctx);
3596 AttrBldr.addAttribute(Attr);
3597 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3598 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3600 CodeGenIP = Builder.saveIP();
3602 createReductionFunction(Builder.GetInsertBlock()->getParent()->getName(),
3603 ReductionInfos, ReductionGenCBKind, FuncAttrs);
3604 if (!ReductionResult)
3606 Function *ReductionFunc = *ReductionResult;
3607 Builder.restoreIP(CodeGenIP);
3610 if (GridValue.has_value())
3611 Config.setGridValue(GridValue.value());
3626 CodeGenIP = Builder.saveIP();
3627 Builder.restoreIP(AllocaIP);
3628 Value *ReductionListAlloca =
3629 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3630 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3631 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3632 Builder.restoreIP(CodeGenIP);
3633 Type *IndexTy = Builder.getIndexTy(
3634 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3635 for (
auto En :
enumerate(ReductionInfos)) {
3636 const ReductionInfo &RI = En.value();
3637 Value *ElemPtr = Builder.CreateInBoundsGEP(
3638 RedArrayTy, ReductionList,
3639 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3641 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3642 Builder.CreateStore(CastElem, ElemPtr);
3644 CodeGenIP = Builder.saveIP();
3646 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3648 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs);
3652 Builder.restoreIP(CodeGenIP);
3654 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3656 unsigned MaxDataSize = 0;
3658 for (
auto En :
enumerate(ReductionInfos)) {
3659 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3660 if (
Size > MaxDataSize)
3662 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3664 Value *ReductionDataSize =
3665 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3666 if (!IsTeamsReduction) {
3667 Value *SarFuncCast =
3668 Builder.CreatePointerBitCastOrAddrSpaceCast(SarFunc, PtrTy);
3670 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, PtrTy);
3671 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3673 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3674 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3675 Res = Builder.CreateCall(Pv2Ptr, Args);
3677 CodeGenIP = Builder.saveIP();
3679 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3680 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3681 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3682 Function *LtGCFunc = emitListToGlobalCopyFunction(
3683 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3684 Function *LtGRFunc = emitListToGlobalReduceFunction(
3685 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3686 Function *GtLCFunc = emitGlobalToListCopyFunction(
3687 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3688 Function *GtLRFunc = emitGlobalToListReduceFunction(
3689 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3690 Builder.restoreIP(CodeGenIP);
3692 Value *KernelTeamsReductionPtr = Builder.CreateCall(
3693 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3695 Value *Args3[] = {SrcLocInfo,
3696 KernelTeamsReductionPtr,
3697 Builder.getInt32(ReductionBufNum),
3707 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3708 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3709 Res = Builder.CreateCall(TeamsReduceFn, Args3);
3715 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3716 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3722 emitBlock(ThenBB, CurFunc);
3725 for (
auto En :
enumerate(ReductionInfos)) {
3726 const ReductionInfo &RI = En.value();
3729 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3731 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3732 Value *LHSPtr, *RHSPtr;
3733 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3734 &LHSPtr, &RHSPtr, CurFunc));
3747 Value *LHSValue = Builder.CreateLoad(RI.ElementType,
LHS,
"final.lhs");
3748 Value *RHSValue = Builder.CreateLoad(RI.ElementType,
RHS,
"final.rhs");
3750 InsertPointOrErrorTy AfterIP =
3751 RI.ReductionGen(Builder.saveIP(), RHSValue, LHSValue, Reduced);
3753 return AfterIP.takeError();
3754 Builder.CreateStore(Reduced,
LHS,
false);
3757 emitBlock(ExitBB, CurFunc);
3758 if (ContinuationBlock) {
3759 Builder.CreateBr(ContinuationBlock);
3760 Builder.SetInsertPoint(ContinuationBlock);
3762 Config.setEmitLLVMUsed();
3764 return Builder.saveIP();
3773 ".omp.reduction.func", &M);
3783 Builder.SetInsertPoint(ReductionFuncBlock);
3784 Value *LHSArrayPtr =
nullptr;
3785 Value *RHSArrayPtr =
nullptr;
3796 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3798 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3799 Value *LHSAddrCast =
3800 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3801 Value *RHSAddrCast =
3802 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3803 Builder.CreateStore(Arg0, LHSAddrCast);
3804 Builder.CreateStore(Arg1, RHSAddrCast);
3805 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3806 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3808 LHSArrayPtr = ReductionFunc->
getArg(0);
3809 RHSArrayPtr = ReductionFunc->
getArg(1);
3812 unsigned NumReductions = ReductionInfos.
size();
3815 for (
auto En :
enumerate(ReductionInfos)) {
3816 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3817 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3818 RedArrayTy, LHSArrayPtr, 0, En.index());
3819 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3820 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3821 LHSI8Ptr, RI.Variable->
getType());
3822 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3823 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3824 RedArrayTy, RHSArrayPtr, 0, En.index());
3825 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3826 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3827 RHSI8Ptr, RI.PrivateVariable->
getType());
3828 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3830 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3831 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3833 return AfterIP.takeError();
3835 Builder.restoreIP(*AfterIP);
3837 if (!Builder.GetInsertBlock())
3841 if (!IsByRef[En.index()])
3842 Builder.CreateStore(Reduced, LHSPtr);
3844 Builder.CreateRetVoid();
3848OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3849 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3851 bool IsNoWait,
bool IsTeamsReduction) {
3854 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3855 IsNoWait, IsTeamsReduction);
3859 if (!updateToLocation(
Loc))
3860 return InsertPointTy();
3862 if (ReductionInfos.
size() == 0)
3863 return Builder.saveIP();
3872 unsigned NumReductions = ReductionInfos.
size();
3874 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
3875 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
3877 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3879 for (
auto En :
enumerate(ReductionInfos)) {
3880 unsigned Index = En.index();
3881 const ReductionInfo &RI = En.value();
3882 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
3883 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
3884 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
3889 Type *IndexTy = Builder.getIndexTy(
3890 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3891 Function *
Func = Builder.GetInsertBlock()->getParent();
3894 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3895 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
3896 return RI.AtomicReductionGen;
3898 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
3900 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3902 Value *ThreadId = getOrCreateThreadID(Ident);
3903 Constant *NumVariables = Builder.getInt32(NumReductions);
3905 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3906 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
3908 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3909 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
3910 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3911 : RuntimeFunction::OMPRTL___kmpc_reduce);
3913 Builder.CreateCall(ReduceFunc,
3914 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3915 ReductionFunc, Lock},
3926 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
3927 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
3928 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
3933 Builder.SetInsertPoint(NonAtomicRedBlock);
3934 for (
auto En :
enumerate(ReductionInfos)) {
3935 const ReductionInfo &RI = En.value();
3939 Value *RedValue = RI.Variable;
3940 if (!IsByRef[En.index()]) {
3941 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3942 "red.value." +
Twine(En.index()));
3944 Value *PrivateRedValue =
3945 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
3946 "red.private.value." +
Twine(En.index()));
3948 InsertPointOrErrorTy AfterIP =
3949 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3951 return AfterIP.takeError();
3952 Builder.restoreIP(*AfterIP);
3954 if (!Builder.GetInsertBlock())
3955 return InsertPointTy();
3957 if (!IsByRef[En.index()])
3958 Builder.CreateStore(Reduced, RI.Variable);
3960 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
3961 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3962 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3963 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId, Lock});
3964 Builder.CreateBr(ContinuationBlock);
3969 Builder.SetInsertPoint(AtomicRedBlock);
3970 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3971 for (
const ReductionInfo &RI : ReductionInfos) {
3972 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
3973 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
3975 return AfterIP.takeError();
3976 Builder.restoreIP(*AfterIP);
3977 if (!Builder.GetInsertBlock())
3978 return InsertPointTy();
3980 Builder.CreateBr(ContinuationBlock);
3982 Builder.CreateUnreachable();
3993 if (!Builder.GetInsertBlock())
3994 return InsertPointTy();
3996 Builder.SetInsertPoint(ContinuationBlock);
3997 return Builder.saveIP();
4000OpenMPIRBuilder::InsertPointOrErrorTy
4001OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4002 BodyGenCallbackTy BodyGenCB,
4003 FinalizeCallbackTy FiniCB) {
4004 if (!updateToLocation(
Loc))
4007 Directive OMPD = Directive::OMPD_master;
4009 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4010 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4011 Value *ThreadId = getOrCreateThreadID(Ident);
4014 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4015 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4017 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4018 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
4020 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4024OpenMPIRBuilder::InsertPointOrErrorTy
4025OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4026 BodyGenCallbackTy BodyGenCB,
4028 if (!updateToLocation(
Loc))
4031 Directive OMPD = Directive::OMPD_masked;
4033 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4034 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4035 Value *ThreadId = getOrCreateThreadID(Ident);
4037 Value *ArgsEnd[] = {Ident, ThreadId};
4039 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4040 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
4042 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4043 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, ArgsEnd);
4045 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4055 Call->setDoesNotThrow();
4067OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4068 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4070 bool IsInclusive, ScanInfo *ScanRedInfo) {
4071 if (ScanRedInfo->OMPFirstScanLoop) {
4072 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4073 ScanVarsType, ScanRedInfo);
4077 if (!updateToLocation(
Loc))
4082 if (ScanRedInfo->OMPFirstScanLoop) {
4084 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4085 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4086 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4087 Type *DestTy = ScanVarsType[i];
4088 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4089 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4091 Builder.CreateStore(Src, Val);
4094 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4095 emitBlock(ScanRedInfo->OMPScanDispatch,
4096 Builder.GetInsertBlock()->getParent());
4098 if (!ScanRedInfo->OMPFirstScanLoop) {
4099 IV = ScanRedInfo->IV;
4102 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4103 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4104 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4105 Type *DestTy = ScanVarsType[i];
4107 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4108 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4109 Builder.CreateStore(Src, ScanVars[i]);
4115 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4116 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4117 ScanRedInfo->OMPAfterScanBlock);
4119 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4120 ScanRedInfo->OMPBeforeScanBlock);
4122 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4123 Builder.GetInsertBlock()->getParent());
4124 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4125 return Builder.saveIP();
4128Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4132 Builder.restoreIP(AllocaIP);
4134 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4136 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4137 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4141 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4142 InsertPointTy CodeGenIP) ->
Error {
4143 Builder.restoreIP(CodeGenIP);
4145 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4146 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4150 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4151 AllocSpan,
nullptr,
"arr");
4152 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4160 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4162 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4163 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4166 return AfterIP.takeError();
4167 Builder.restoreIP(*AfterIP);
4168 BasicBlock *InputBB = Builder.GetInsertBlock();
4170 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4171 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4173 return AfterIP.takeError();
4174 Builder.restoreIP(*AfterIP);
4179Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4181 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4182 InsertPointTy CodeGenIP) ->
Error {
4183 Builder.restoreIP(CodeGenIP);
4184 for (ReductionInfo RedInfo : ReductionInfos) {
4185 Value *PrivateVar = RedInfo.PrivateVariable;
4186 Value *OrigVar = RedInfo.Variable;
4187 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4188 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4190 Type *SrcTy = RedInfo.ElementType;
4191 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4193 Value *Src = Builder.CreateLoad(SrcTy, Val);
4195 Builder.CreateStore(Src, OrigVar);
4196 Builder.CreateFree(Buff);
4204 if (ScanRedInfo->OMPScanFinish->getTerminator())
4205 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4207 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4210 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4211 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4214 return AfterIP.takeError();
4215 Builder.restoreIP(*AfterIP);
4216 BasicBlock *InputBB = Builder.GetInsertBlock();
4218 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4219 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4221 return AfterIP.takeError();
4222 Builder.restoreIP(*AfterIP);
4226OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4227 const LocationDescription &
Loc,
4229 ScanInfo *ScanRedInfo) {
4231 if (!updateToLocation(
Loc))
4233 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4234 InsertPointTy CodeGenIP) ->
Error {
4235 Builder.restoreIP(CodeGenIP);
4241 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4243 Builder.GetInsertBlock()->getModule(),
4247 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4250 Builder.GetInsertBlock()->getModule(),
4253 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4256 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4257 Builder.SetInsertPoint(InputBB);
4258 Builder.CreateBr(LoopBB);
4259 emitBlock(LoopBB, CurFn);
4260 Builder.SetInsertPoint(LoopBB);
4262 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4264 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4265 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4267 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4275 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4276 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4277 emitBlock(InnerLoopBB, CurFn);
4278 Builder.SetInsertPoint(InnerLoopBB);
4279 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4281 for (ReductionInfo RedInfo : ReductionInfos) {
4282 Value *ReductionVal = RedInfo.PrivateVariable;
4283 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4284 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4285 Type *DestTy = RedInfo.ElementType;
4286 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4288 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4289 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4291 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4292 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4293 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4295 InsertPointOrErrorTy AfterIP =
4296 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4298 return AfterIP.takeError();
4299 Builder.CreateStore(Result, LHSPtr);
4302 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4303 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4304 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4305 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4306 emitBlock(InnerExitBB, CurFn);
4308 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4311 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4312 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4314 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4324 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4325 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4328 return AfterIP.takeError();
4329 Builder.restoreIP(*AfterIP);
4330 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4333 return AfterIP.takeError();
4334 Builder.restoreIP(*AfterIP);
4335 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4342Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4345 ScanInfo *ScanRedInfo) {
4353 ScanRedInfo->OMPFirstScanLoop =
true;
4354 Error Err = InputLoopGen();
4364 ScanRedInfo->OMPFirstScanLoop =
false;
4365 Error Err = ScanLoopGen(Builder.saveIP());
4372void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4373 Function *
Fun = Builder.GetInsertBlock()->getParent();
4374 ScanRedInfo->OMPScanDispatch =
4376 ScanRedInfo->OMPAfterScanBlock =
4378 ScanRedInfo->OMPBeforeScanBlock =
4380 ScanRedInfo->OMPScanLoopExit =
4383CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4407 Builder.SetCurrentDebugLocation(
DL);
4409 Builder.SetInsertPoint(Preheader);
4410 Builder.CreateBr(Header);
4412 Builder.SetInsertPoint(Header);
4413 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4414 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4415 Builder.CreateBr(
Cond);
4417 Builder.SetInsertPoint(
Cond);
4419 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4420 Builder.CreateCondBr(Cmp, Body, Exit);
4422 Builder.SetInsertPoint(Body);
4423 Builder.CreateBr(Latch);
4425 Builder.SetInsertPoint(Latch);
4426 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4427 "omp_" + Name +
".next",
true);
4428 Builder.CreateBr(Header);
4431 Builder.SetInsertPoint(Exit);
4432 Builder.CreateBr(After);
4435 LoopInfos.emplace_front();
4436 CanonicalLoopInfo *CL = &LoopInfos.front();
4438 CL->Header = Header;
4450OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4451 LoopBodyGenCallbackTy BodyGenCB,
4456 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4457 NextBB, NextBB, Name);
4461 if (updateToLocation(
Loc)) {
4465 spliceBB(Builder, After,
false);
4466 Builder.CreateBr(CL->getPreheader());
4471 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4481 ScanInfos.emplace_front();
4482 ScanInfo *
Result = &ScanInfos.front();
4487OpenMPIRBuilder::createCanonicalScanLoops(
4488 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4489 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4490 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4491 LocationDescription ComputeLoc =
4492 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4493 updateToLocation(ComputeLoc);
4497 Value *TripCount = calculateCanonicalLoopTripCount(
4498 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4499 ScanRedInfo->Span = TripCount;
4500 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4501 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4503 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4504 Builder.restoreIP(CodeGenIP);
4505 ScanRedInfo->IV =
IV;
4506 createScanBBs(ScanRedInfo);
4507 BasicBlock *InputBlock = Builder.GetInsertBlock();
4511 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4512 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4513 Builder.GetInsertBlock()->getParent());
4514 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4515 emitBlock(ScanRedInfo->OMPScanLoopExit,
4516 Builder.GetInsertBlock()->getParent());
4517 Builder.CreateBr(ContinueBlock);
4518 Builder.SetInsertPoint(
4519 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4520 return BodyGenCB(Builder.saveIP(),
IV);
4523 const auto &&InputLoopGen = [&]() ->
Error {
4525 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4526 ComputeIP, Name,
true, ScanRedInfo);
4530 Builder.restoreIP((*LoopInfo)->getAfterIP());
4533 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4535 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4536 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4540 Builder.restoreIP((*LoopInfo)->getAfterIP());
4541 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4544 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4550Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4552 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4562 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4563 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4565 updateToLocation(
Loc);
4582 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4583 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4584 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4585 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4586 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4587 ZeroCmp = Builder.CreateICmp(
4590 Span = Builder.CreateSub(Stop, Start,
"",
true);
4591 ZeroCmp = Builder.CreateICmp(
4595 Value *CountIfLooping;
4596 if (InclusiveStop) {
4597 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4600 Value *CountIfTwo = Builder.CreateAdd(
4601 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4603 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4606 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4607 "omp_" + Name +
".tripcount");
4611 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4612 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4613 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4614 ScanInfo *ScanRedInfo) {
4615 LocationDescription ComputeLoc =
4616 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4618 Value *TripCount = calculateCanonicalLoopTripCount(
4619 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4621 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4622 Builder.restoreIP(CodeGenIP);
4623 Value *Span = Builder.CreateMul(
IV, Step);
4624 Value *IndVar = Builder.CreateAdd(Span, Start);
4626 ScanRedInfo->IV = IndVar;
4627 return BodyGenCB(Builder.saveIP(), IndVar);
4629 LocationDescription LoopLoc =
4632 : LocationDescription(Builder.saveIP(),
4633 Builder.getCurrentDebugLocation());
4634 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4643 OpenMPIRBuilder &OMPBuilder) {
4644 unsigned Bitwidth = Ty->getIntegerBitWidth();
4646 return OMPBuilder.getOrCreateRuntimeFunction(
4647 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4649 return OMPBuilder.getOrCreateRuntimeFunction(
4650 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4659 OpenMPIRBuilder &OMPBuilder) {
4660 unsigned Bitwidth = Ty->getIntegerBitWidth();
4662 return OMPBuilder.getOrCreateRuntimeFunction(
4663 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4665 return OMPBuilder.getOrCreateRuntimeFunction(
4666 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4670OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4671 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4673 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4675 "Require dedicated allocate IP");
4678 Builder.restoreIP(CLI->getPreheaderIP());
4679 Builder.SetCurrentDebugLocation(
DL);
4682 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4683 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4687 Type *IVTy =
IV->getType();
4689 LoopType == WorksharingLoopType::DistributeForStaticLoop
4693 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4696 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4699 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4700 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4701 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4702 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4703 CLI->setLastIter(PLastIter);
4709 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4711 Constant *One = ConstantInt::get(IVTy, 1);
4712 Builder.CreateStore(Zero, PLowerBound);
4713 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4714 Builder.CreateStore(UpperBound, PUpperBound);
4715 Builder.CreateStore(One, PStride);
4717 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4720 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4721 ? OMPScheduleType::OrderedDistribute
4724 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4729 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4730 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4731 Value *PDistUpperBound =
4732 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4733 Args.push_back(PDistUpperBound);
4736 Builder.CreateCall(StaticInit, Args);
4737 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4738 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4739 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4740 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4741 CLI->setTripCount(TripCount);
4748 Builder.SetInsertPoint(CLI->getBody(),
4749 CLI->getBody()->getFirstInsertionPt());
4750 Builder.SetCurrentDebugLocation(
DL);
4751 return Builder.CreateAdd(OldIV, LowerBound);
4755 Builder.SetInsertPoint(CLI->getExit(),
4756 CLI->getExit()->getTerminator()->getIterator());
4757 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4761 InsertPointOrErrorTy BarrierIP =
4762 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4763 omp::Directive::OMPD_for,
false,
4766 return BarrierIP.takeError();
4769 InsertPointTy AfterIP = CLI->getAfterIP();
4775OpenMPIRBuilder::InsertPointOrErrorTy
4776OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
DebugLoc DL,
4777 CanonicalLoopInfo *CLI,
4778 InsertPointTy AllocaIP,
4781 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4782 assert(ChunkSize &&
"Chunk size is required");
4784 LLVMContext &Ctx = CLI->getFunction()->getContext();
4786 Value *OrigTripCount = CLI->getTripCount();
4787 Type *IVTy =
IV->getType();
4789 "Max supported tripcount bitwidth is 64 bits");
4791 :
Type::getInt64Ty(Ctx);
4794 Constant *One = ConstantInt::get(InternalIVTy, 1);
4800 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4803 Builder.restoreIP(AllocaIP);
4804 Builder.SetCurrentDebugLocation(
DL);
4805 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4806 Value *PLowerBound =
4807 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
4808 Value *PUpperBound =
4809 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
4810 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
4811 CLI->setLastIter(PLastIter);
4814 Builder.restoreIP(CLI->getPreheaderIP());
4815 Builder.SetCurrentDebugLocation(
DL);
4818 Value *CastedChunkSize =
4819 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
4820 Value *CastedTripCount =
4821 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
4823 Constant *SchedulingType = ConstantInt::get(
4824 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4825 Builder.CreateStore(Zero, PLowerBound);
4826 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
4827 Builder.CreateStore(OrigUpperBound, PUpperBound);
4828 Builder.CreateStore(One, PStride);
4833 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4834 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4835 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4836 Builder.CreateCall(StaticInit,
4838 SchedulingType, PLastIter,
4839 PLowerBound, PUpperBound,
4844 Value *FirstChunkStart =
4845 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
4846 Value *FirstChunkStop =
4847 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
4848 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
4850 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
4851 Value *NextChunkStride =
4852 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
4855 BasicBlock *DispatchEnter = splitBB(Builder,
true);
4856 Value *DispatchCounter;
4861 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
4862 {Builder.saveIP(),
DL},
4863 [&](InsertPointTy BodyIP,
Value *Counter) {
4864 DispatchCounter = Counter;
4867 FirstChunkStart, CastedTripCount, NextChunkStride,
4873 BasicBlock *DispatchBody = DispatchCLI->getBody();
4874 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
4875 BasicBlock *DispatchExit = DispatchCLI->getExit();
4876 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
4877 DispatchCLI->invalidate();
4885 Builder.restoreIP(CLI->getPreheaderIP());
4886 Builder.SetCurrentDebugLocation(
DL);
4889 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4890 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
4891 Value *IsLastChunk =
4892 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
4893 Value *CountUntilOrigTripCount =
4894 Builder.CreateSub(CastedTripCount, DispatchCounter);
4895 Value *ChunkTripCount = Builder.CreateSelect(
4896 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4897 Value *BackcastedChunkTC =
4898 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
4899 CLI->setTripCount(BackcastedChunkTC);
4904 Value *BackcastedDispatchCounter =
4905 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
4907 Builder.restoreIP(CLI->getBodyIP());
4908 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
4913 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
4917 InsertPointOrErrorTy AfterIP =
4918 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
4921 return AfterIP.takeError();
4939 unsigned Bitwidth = Ty->getIntegerBitWidth();
4940 Module &M = OMPBuilder->M;
4942 case WorksharingLoopType::ForStaticLoop:
4944 return OMPBuilder->getOrCreateRuntimeFunction(
4945 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
4947 return OMPBuilder->getOrCreateRuntimeFunction(
4948 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
4950 case WorksharingLoopType::DistributeStaticLoop:
4952 return OMPBuilder->getOrCreateRuntimeFunction(
4953 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
4955 return OMPBuilder->getOrCreateRuntimeFunction(
4956 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
4958 case WorksharingLoopType::DistributeForStaticLoop:
4960 return OMPBuilder->getOrCreateRuntimeFunction(
4961 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
4963 return OMPBuilder->getOrCreateRuntimeFunction(
4964 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
4967 if (Bitwidth != 32 && Bitwidth != 64) {
4981 Module &M = OMPBuilder->M;
4990 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
4991 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
4992 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
4993 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
4994 Builder.CreateCall(RTLFn, RealArgs);
4997 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
4998 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
4999 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5000 Value *NumThreads = Builder.CreateCall(RTLNumThreads, {});
5003 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5004 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5005 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5006 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5008 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5010 Builder.CreateCall(RTLFn, RealArgs);
5014 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5019 Value *TripCount = CLI->getTripCount();
5025 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5026 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5031 Builder.restoreIP({Preheader, Preheader->
end()});
5034 Builder.CreateBr(CLI->getExit());
5037 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5040 CleanUpInfo.EntryBB = CLI->getHeader();
5041 CleanUpInfo.ExitBB = CLI->getExit();
5042 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5050 "Expected unique undroppable user of outlined function");
5052 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5054 "Expected outlined function call to be located in loop preheader");
5056 if (OutlinedFnCallInstruction->
arg_size() > 1)
5063 LoopBodyArg, TripCount, OutlinedFn);
5065 for (
auto &ToBeDeletedItem : ToBeDeleted)
5066 ToBeDeletedItem->eraseFromParent();
5070OpenMPIRBuilder::InsertPointTy
5071OpenMPIRBuilder::applyWorkshareLoopTarget(
DebugLoc DL, CanonicalLoopInfo *CLI,
5072 InsertPointTy AllocaIP,
5075 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5076 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5079 OI.OuterAllocaBB = CLI->getPreheader();
5085 OI.OuterAllocaBB = AllocaIP.getBlock();
5088 OI.EntryBB = CLI->getBody();
5089 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5090 "omp.prelatch",
true);
5093 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5097 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5099 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5110 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5121 CLI->getPreheader(),
5130 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5136 CLI->getIndVar()->user_end());
5139 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5140 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5146 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5153 OI.PostOutlineCB = [=, ToBeDeletedVec =
5154 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5158 addOutlineInfo(std::move(OI));
5159 return CLI->getAfterIP();
5162OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5163 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5164 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5165 bool HasSimdModifier,
bool HasMonotonicModifier,
5166 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5168 if (Config.isTargetDevice())
5169 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType);
5171 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5172 HasNonmonotonicModifier, HasOrderedClause);
5174 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5175 OMPScheduleType::ModifierOrdered;
5176 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5177 case OMPScheduleType::BaseStatic:
5178 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
5180 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5181 NeedsBarrier, ChunkSize);
5183 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier);
5185 case OMPScheduleType::BaseStaticChunked:
5187 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5188 NeedsBarrier, ChunkSize);
5190 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
5193 case OMPScheduleType::BaseRuntime:
5194 case OMPScheduleType::BaseAuto:
5195 case OMPScheduleType::BaseGreedy:
5196 case OMPScheduleType::BaseBalanced:
5197 case OMPScheduleType::BaseSteal:
5198 case OMPScheduleType::BaseGuidedSimd:
5199 case OMPScheduleType::BaseRuntimeSimd:
5201 "schedule type does not support user-defined chunk sizes");
5203 case OMPScheduleType::BaseDynamicChunked:
5204 case OMPScheduleType::BaseGuidedChunked:
5205 case OMPScheduleType::BaseGuidedIterativeChunked:
5206 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5207 case OMPScheduleType::BaseStaticBalancedChunked:
5208 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5209 NeedsBarrier, ChunkSize);
5222 unsigned Bitwidth = Ty->getIntegerBitWidth();
5224 return OMPBuilder.getOrCreateRuntimeFunction(
5225 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5227 return OMPBuilder.getOrCreateRuntimeFunction(
5228 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5238 unsigned Bitwidth = Ty->getIntegerBitWidth();
5240 return OMPBuilder.getOrCreateRuntimeFunction(
5241 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5243 return OMPBuilder.getOrCreateRuntimeFunction(
5244 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5253 unsigned Bitwidth = Ty->getIntegerBitWidth();
5255 return OMPBuilder.getOrCreateRuntimeFunction(
5256 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5258 return OMPBuilder.getOrCreateRuntimeFunction(
5259 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5263OpenMPIRBuilder::InsertPointOrErrorTy
5264OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5265 InsertPointTy AllocaIP,
5267 bool NeedsBarrier,
Value *Chunk) {
5268 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5270 "Require dedicated allocate IP");
5272 "Require valid schedule type");
5274 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5275 OMPScheduleType::ModifierOrdered;
5278 Builder.SetCurrentDebugLocation(
DL);
5281 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5282 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5286 Type *IVTy =
IV->getType();
5291 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5293 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5294 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5295 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5296 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5297 CLI->setLastIter(PLastIter);
5305 Constant *One = ConstantInt::get(IVTy, 1);
5306 Builder.CreateStore(One, PLowerBound);
5307 Value *UpperBound = CLI->getTripCount();
5308 Builder.CreateStore(UpperBound, PUpperBound);
5309 Builder.CreateStore(One, PStride);
5315 InsertPointTy AfterIP = CLI->getAfterIP();
5323 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5326 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5329 Builder.CreateCall(DynamicInit,
5330 {SrcLoc, ThreadNum, SchedulingType, One,
5331 UpperBound, One, Chunk});
5340 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
5341 PLowerBound, PUpperBound, PStride});
5342 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5345 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5346 Builder.CreateCondBr(MoreWork, Header, Exit);
5352 PI->setIncomingBlock(0, OuterCond);
5353 PI->setIncomingValue(0, LowerBound);
5358 Br->setSuccessor(0, OuterCond);
5363 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5364 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5371 assert(BI->getSuccessor(1) == Exit);
5372 BI->setSuccessor(1, OuterCond);
5376 Builder.SetInsertPoint(&Latch->
back());
5378 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
5383 Builder.SetInsertPoint(&
Exit->back());
5384 InsertPointOrErrorTy BarrierIP =
5385 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5386 omp::Directive::OMPD_for,
false,
5389 return BarrierIP.takeError();
5408 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5413 if (BBsToErase.
count(UseInst->getParent()))
5420 while (BBsToErase.
remove_if(HasRemainingUses)) {
5430 InsertPointTy ComputeIP) {
5431 assert(
Loops.size() >= 1 &&
"At least one loop required");
5432 size_t NumLoops =
Loops.size();
5436 return Loops.front();
5438 CanonicalLoopInfo *Outermost =
Loops.front();
5439 CanonicalLoopInfo *Innermost =
Loops.back();
5440 BasicBlock *OrigPreheader = Outermost->getPreheader();
5441 BasicBlock *OrigAfter = Outermost->getAfter();
5448 Loop->collectControlBlocks(OldControlBBs);
5451 Builder.SetCurrentDebugLocation(
DL);
5452 if (ComputeIP.isSet())
5453 Builder.restoreIP(ComputeIP);
5455 Builder.restoreIP(Outermost->getPreheaderIP());
5459 Value *CollapsedTripCount =
nullptr;
5460 for (CanonicalLoopInfo *L :
Loops) {
5462 "All loops to collapse must be valid canonical loops");
5463 Value *OrigTripCount =
L->getTripCount();
5464 if (!CollapsedTripCount) {
5465 CollapsedTripCount = OrigTripCount;
5470 CollapsedTripCount = Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5474 CanonicalLoopInfo *
Result =
5475 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5476 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5482 Builder.restoreIP(
Result->getBodyIP());
5486 NewIndVars.
resize(NumLoops);
5487 for (
int i = NumLoops - 1; i >= 1; --i) {
5488 Value *OrigTripCount =
Loops[i]->getTripCount();
5490 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5491 NewIndVars[i] = NewIndVar;
5493 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5496 NewIndVars[0] = Leftover;
5507 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5514 ContinueBlock =
nullptr;
5515 ContinuePred = NextSrc;
5522 for (
size_t i = 0; i < NumLoops - 1; ++i)
5523 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5526 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5529 for (
size_t i = NumLoops - 1; i > 0; --i)
5530 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5533 ContinueWith(
Result->getLatch(),
nullptr);
5540 for (
size_t i = 0; i < NumLoops; ++i)
5541 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5546 for (CanonicalLoopInfo *L :
Loops)
5555std::vector<CanonicalLoopInfo *>
5559 "Must pass as many tile sizes as there are loops");
5560 int NumLoops =
Loops.size();
5561 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5563 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5564 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5565 Function *
F = OutermostLoop->getBody()->getParent();
5566 BasicBlock *InnerEnter = InnermostLoop->getBody();
5567 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5573 Loop->collectControlBlocks(OldControlBBs);
5580 for (CanonicalLoopInfo *L :
Loops) {
5581 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5593 for (
int i = 0; i < NumLoops - 1; ++i) {
5594 CanonicalLoopInfo *Surrounding =
Loops[i];
5597 BasicBlock *EnterBB = Surrounding->getBody();
5603 Builder.SetCurrentDebugLocation(
DL);
5604 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5606 for (
int i = 0; i < NumLoops; ++i) {
5608 Value *OrigTripCount = OrigTripCounts[i];
5611 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5612 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5621 Value *FloorTripOverflow =
5622 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5624 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5625 Value *FloorTripCount =
5626 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5627 "omp_floor" +
Twine(i) +
".tripcount",
true);
5630 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5636 std::vector<CanonicalLoopInfo *>
Result;
5637 Result.reserve(NumLoops * 2);
5641 BasicBlock *Enter = OutermostLoop->getPreheader();
5648 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5650 auto EmbeddNewLoop =
5651 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5653 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5654 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5659 Enter = EmbeddedLoop->getBody();
5660 Continue = EmbeddedLoop->getLatch();
5661 OutroInsertBefore = EmbeddedLoop->getLatch();
5662 return EmbeddedLoop;
5666 const Twine &NameBase) {
5668 CanonicalLoopInfo *EmbeddedLoop =
5669 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5670 Result.push_back(EmbeddedLoop);
5674 EmbeddNewLoops(FloorCount,
"floor");
5678 Builder.SetInsertPoint(Enter->getTerminator());
5680 for (
int i = 0; i < NumLoops; ++i) {
5681 CanonicalLoopInfo *FloorLoop =
Result[i];
5684 Value *FloorIsEpilogue =
5685 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5686 Value *TileTripCount =
5687 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5693 EmbeddNewLoops(TileCounts,
"tile");
5698 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5707 BodyEnter =
nullptr;
5708 BodyEntered = ExitBB;
5720 Builder.restoreIP(
Result.back()->getBodyIP());
5721 for (
int i = 0; i < NumLoops; ++i) {
5722 CanonicalLoopInfo *FloorLoop =
Result[i];
5723 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5724 Value *OrigIndVar = OrigIndVars[i];
5728 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5730 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5737 for (CanonicalLoopInfo *L :
Loops)
5741 for (CanonicalLoopInfo *GenL : Result)
5752 if (Properties.
empty())
5775 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
5779 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
5787 if (
I.mayReadOrWriteMemory()) {
5791 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
5796void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
5803void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
5811void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
5814 const Twine &NamePrefix) {
5815 Function *
F = CanonicalLoop->getFunction();
5837 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
5843 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
5845 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
5848 Builder.SetInsertPoint(SplitBeforeIt);
5850 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
5853 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
5856 Builder.SetInsertPoint(ElseBlock);
5862 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
5864 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
5870 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
5872 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
5879 if (
Block == ThenBlock)
5880 NewBB->
setName(NamePrefix +
".if.else");
5883 VMap[
Block] = NewBB;
5887 Builder.CreateBr(NewBlocks.
front());
5891 L->getLoopLatch()->splitBasicBlock(
5892 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
5896 L->addBasicBlockToLoop(ThenBlock, LI);
5900OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
5902 if (TargetTriple.
isX86()) {
5903 if (Features.
lookup(
"avx512f"))
5905 else if (Features.
lookup(
"avx"))
5909 if (TargetTriple.
isPPC())
5911 if (TargetTriple.
isWasm())
5916void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
5918 Value *IfCond, OrderKind Order,
5922 Function *
F = CanonicalLoop->getFunction();
5937 if (AlignedVars.
size()) {
5938 InsertPointTy IP = Builder.saveIP();
5939 for (
auto &AlignedItem : AlignedVars) {
5940 Value *AlignedPtr = AlignedItem.first;
5941 Value *Alignment = AlignedItem.second;
5944 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
5947 Builder.restoreIP(IP);
5952 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
5962 if (
Block == CanonicalLoop->getCond() ||
5963 Block == CanonicalLoop->getHeader())
5965 Reachable.insert(
Block);
5975 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
5983 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5999 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6001 if (Simdlen || Safelen) {
6005 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6031static std::unique_ptr<TargetMachine>
6035 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6036 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6047 std::nullopt, OptLevel));
6071 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6072 FAM.registerPass([&]() {
return TIRA; });
6086 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6091 nullptr, ORE,
static_cast<int>(OptLevel),
6112 <<
" Threshold=" << UP.
Threshold <<
"\n"
6115 <<
" PartialOptSizeThreshold="
6135 Ptr = Load->getPointerOperand();
6137 Ptr = Store->getPointerOperand();
6141 Ptr =
Ptr->stripPointerCasts();
6144 if (Alloca->getParent() == &
F->getEntryBlock())
6164 int MaxTripCount = 0;
6165 bool MaxOrZero =
false;
6166 unsigned TripMultiple = 0;
6168 bool UseUpperBound =
false;
6170 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6172 unsigned Factor = UP.
Count;
6173 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6181void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6183 CanonicalLoopInfo **UnrolledCLI) {
6184 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6200 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6213 *UnrolledCLI =
Loop;
6218 "unrolling only makes sense with a factor of 2 or larger");
6220 Type *IndVarTy =
Loop->getIndVarType();
6227 std::vector<CanonicalLoopInfo *>
LoopNest =
6228 tileLoops(
DL, {
Loop}, {FactorVal});
6231 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6242 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6245 (*UnrolledCLI)->assertOK();
6249OpenMPIRBuilder::InsertPointTy
6250OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6253 if (!updateToLocation(
Loc))
6257 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6258 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6259 Value *ThreadId = getOrCreateThreadID(Ident);
6261 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6263 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6265 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6266 Builder.CreateCall(Fn, Args);
6268 return Builder.saveIP();
6271OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6272 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6276 if (!updateToLocation(
Loc))
6282 if (!CPVars.
empty()) {
6284 Builder.CreateStore(Builder.getInt32(0), DidIt);
6287 Directive OMPD = Directive::OMPD_single;
6289 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6290 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6291 Value *ThreadId = getOrCreateThreadID(Ident);
6294 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6295 Instruction *EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6297 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6298 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6300 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6301 if (
Error Err = FiniCB(IP))
6308 Builder.CreateStore(Builder.getInt32(1), DidIt);
6321 InsertPointOrErrorTy AfterIP =
6322 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6326 return AfterIP.takeError();
6329 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6331 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6332 ConstantInt::get(
Int64, 0), CPVars[
I],
6335 }
else if (!IsNowait) {
6336 InsertPointOrErrorTy AfterIP =
6337 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6338 omp::Directive::OMPD_unknown,
false,
6341 return AfterIP.takeError();
6343 return Builder.saveIP();
6346OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6347 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6348 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6350 if (!updateToLocation(
Loc))
6353 Directive OMPD = Directive::OMPD_critical;
6355 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6356 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6357 Value *ThreadId = getOrCreateThreadID(Ident);
6358 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6359 Value *
Args[] = {Ident, ThreadId, LockVar};
6365 EnterArgs.push_back(HintInst);
6366 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6368 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6370 Instruction *EntryCall = Builder.CreateCall(RTFn, EnterArgs);
6373 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6374 Instruction *ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6376 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6380OpenMPIRBuilder::InsertPointTy
6381OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6382 InsertPointTy AllocaIP,
unsigned NumLoops,
6384 const Twine &Name,
bool IsDependSource) {
6388 "OpenMP runtime requires depend vec with i64 type");
6390 if (!updateToLocation(
Loc))
6395 Builder.restoreIP(AllocaIP);
6396 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6398 updateToLocation(
Loc);
6401 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6402 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6403 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6404 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6408 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6409 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6412 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6413 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6414 Value *ThreadId = getOrCreateThreadID(Ident);
6415 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6419 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6421 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6422 Builder.CreateCall(RTLFn, Args);
6424 return Builder.saveIP();
6427OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6428 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6429 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6430 if (!updateToLocation(
Loc))
6433 Directive OMPD = Directive::OMPD_ordered;
6439 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6440 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6441 Value *ThreadId = getOrCreateThreadID(Ident);
6444 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6445 EntryCall = Builder.CreateCall(EntryRTLFn, Args);
6448 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6449 ExitCall = Builder.CreateCall(ExitRTLFn, Args);
6452 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6456OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6458 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6459 bool HasFinalize,
bool IsCancellable) {
6462 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6466 BasicBlock *EntryBB = Builder.GetInsertBlock();
6475 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6478 if (
Error Err = BodyGenCB( InsertPointTy(),
6486 "Unexpected control flow graph state!!");
6487 InsertPointOrErrorTy AfterIP =
6488 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6490 return AfterIP.takeError();
6492 "Unexpected Control Flow State!");
6498 "Unexpected Insertion point location!");
6501 auto InsertBB = merged ? ExitPredBB : ExitBB;
6504 Builder.SetInsertPoint(InsertBB);
6506 return Builder.saveIP();
6509OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6512 if (!Conditional || !EntryCall)
6513 return Builder.saveIP();
6515 BasicBlock *EntryBB = Builder.GetInsertBlock();
6516 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6528 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6530 Builder.SetInsertPoint(UI);
6531 Builder.Insert(EntryBBTI);
6532 UI->eraseFromParent();
6539OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6540 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6543 Builder.restoreIP(FinIP);
6547 assert(!FinalizationStack.empty() &&
6548 "Unexpected finalization stack state!");
6550 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6551 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6553 if (
Error Err = Fi.FiniCB(FinIP))
6560 Builder.SetInsertPoint(FiniBBTI);
6564 return Builder.saveIP();
6568 Builder.Insert(ExitCall);
6574OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6575 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6604 "copyin.not.master.end");
6611 Builder.SetInsertPoint(OMP_Entry);
6612 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6613 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6614 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6615 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6617 Builder.SetInsertPoint(CopyBegin);
6619 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6621 return Builder.saveIP();
6624CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6628 updateToLocation(
Loc);
6631 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6632 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6633 Value *ThreadId = getOrCreateThreadID(Ident);
6636 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6638 return Builder.CreateCall(Fn, Args, Name);
6641CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6645 updateToLocation(
Loc);
6648 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6649 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6650 Value *ThreadId = getOrCreateThreadID(Ident);
6652 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6653 return Builder.CreateCall(Fn, Args, Name);
6656CallInst *OpenMPIRBuilder::createOMPInteropInit(
6657 const LocationDescription &
Loc,
Value *InteropVar,
6659 Value *DependenceAddress,
bool HaveNowaitClause) {
6661 updateToLocation(
Loc);
6664 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6665 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6666 Value *ThreadId = getOrCreateThreadID(Ident);
6667 if (Device ==
nullptr)
6669 Constant *InteropTypeVal = ConstantInt::get(
Int32, (
int)InteropType);
6670 if (NumDependences ==
nullptr) {
6671 NumDependences = ConstantInt::get(
Int32, 0);
6675 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6677 Ident, ThreadId, InteropVar, InteropTypeVal,
6678 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6680 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6682 return Builder.CreateCall(Fn, Args);
6685CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6686 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6687 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6689 updateToLocation(
Loc);
6692 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6693 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6694 Value *ThreadId = getOrCreateThreadID(Ident);
6695 if (Device ==
nullptr)
6697 if (NumDependences ==
nullptr) {
6698 NumDependences = ConstantInt::get(
Int32, 0);
6702 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6704 Ident, ThreadId, InteropVar,
Device,
6705 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6707 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6709 return Builder.CreateCall(Fn, Args);
6712CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6714 Value *NumDependences,
6715 Value *DependenceAddress,
6716 bool HaveNowaitClause) {
6718 updateToLocation(
Loc);
6720 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6721 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6722 Value *ThreadId = getOrCreateThreadID(Ident);
6723 if (Device ==
nullptr)
6725 if (NumDependences ==
nullptr) {
6726 NumDependences = ConstantInt::get(
Int32, 0);
6730 Value *HaveNowaitClauseVal = ConstantInt::get(
Int32, HaveNowaitClause);
6732 Ident, ThreadId, InteropVar,
Device,
6733 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6735 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6737 return Builder.CreateCall(Fn, Args);
6740CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6744 updateToLocation(
Loc);
6747 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6748 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6749 Value *ThreadId = getOrCreateThreadID(Ident);
6751 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6755 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6757 return Builder.CreateCall(Fn, Args);
6760OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6761 const LocationDescription &
Loc,
6762 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6764 "expected num_threads and num_teams to be specified");
6766 if (!updateToLocation(
Loc))
6770 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6771 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6783 const std::string DebugPrefix =
"_debug__";
6784 if (KernelName.
ends_with(DebugPrefix)) {
6785 KernelName = KernelName.
drop_back(DebugPrefix.length());
6786 Kernel = M.getFunction(KernelName);
6792 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
6797 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
6798 if (MaxThreadsVal < 0)
6799 MaxThreadsVal = std::max(
6802 if (MaxThreadsVal > 0)
6803 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
6814 Function *Fn = getOrCreateRuntimeFunctionPtr(
6815 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
6818 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
6819 Constant *DynamicEnvironmentInitializer =
6823 DynamicEnvironmentInitializer, DynamicEnvironmentName,
6825 DL.getDefaultGlobalsAddressSpace());
6829 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
6830 ? DynamicEnvironmentGV
6832 DynamicEnvironmentPtr);
6835 ConfigurationEnvironment, {
6836 UseGenericStateMachineVal,
6837 MayUseNestedParallelismVal,
6844 ReductionBufferLength,
6847 KernelEnvironment, {
6848 ConfigurationEnvironmentInitializer,
6852 std::string KernelEnvironmentName =
6853 (KernelName +
"_kernel_environment").str();
6856 KernelEnvironmentInitializer, KernelEnvironmentName,
6858 DL.getDefaultGlobalsAddressSpace());
6862 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
6863 ? KernelEnvironmentGV
6865 KernelEnvironmentPtr);
6866 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
6868 KernelLaunchEnvironment =
6869 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
6870 ? KernelLaunchEnvironment
6871 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
6872 KernelLaunchEnvParamTy);
6874 Builder.CreateCall(Fn, {KernelEnvironment, KernelLaunchEnvironment});
6876 Value *ExecUserCode = Builder.CreateICmpEQ(
6886 auto *UI = Builder.CreateUnreachable();
6892 Builder.SetInsertPoint(WorkerExitBB);
6893 Builder.CreateRetVoid();
6896 Builder.SetInsertPoint(CheckBBTI);
6897 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
6900 UI->eraseFromParent();
6907void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
6908 int32_t TeamsReductionDataSize,
6909 int32_t TeamsReductionBufferLength) {
6910 if (!updateToLocation(
Loc))
6913 Function *Fn = getOrCreateRuntimeFunctionPtr(
6914 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
6916 Builder.CreateCall(Fn, {});
6918 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
6924 const std::string DebugPrefix =
"_debug__";
6926 KernelName = KernelName.
drop_back(DebugPrefix.length());
6927 auto *KernelEnvironmentGV =
6928 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
6929 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
6930 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
6932 KernelEnvironmentInitializer,
6933 ConstantInt::get(
Int32, TeamsReductionDataSize), {0, 7});
6935 NewInitializer, ConstantInt::get(
Int32, TeamsReductionBufferLength),
6942 if (
Kernel.hasFnAttribute(Name)) {
6943 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
6949std::pair<int32_t, int32_t>
6951 int32_t ThreadLimit =
6952 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
6955 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
6956 if (!Attr.isValid() || !Attr.isStringAttribute())
6957 return {0, ThreadLimit};
6958 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
6961 return {0, ThreadLimit};
6962 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
6968 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
6969 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
6970 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
6972 return {0, ThreadLimit};
6975void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
6978 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
6981 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
6989std::pair<int32_t, int32_t>
6992 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
6996 int32_t LB, int32_t UB) {
7003 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7006void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7008 if (Config.isTargetDevice()) {
7015 else if (
T.isNVPTX())
7017 else if (
T.isSPIRV())
7024 if (Config.isTargetDevice()) {
7025 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7034Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7039 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7040 "Named kernel already exists?");
7046Error OpenMPIRBuilder::emitTargetRegionFunction(
7047 TargetRegionEntryInfo &EntryInfo,
7048 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7052 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7054 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7058 OutlinedFn = *CBResult;
7060 OutlinedFn =
nullptr;
7066 if (!IsOffloadEntry)
7069 std::string EntryFnIDName =
7070 Config.isTargetDevice()
7071 ? std::string(EntryFnName)
7072 : createPlatformSpecificName({EntryFnName,
"region_id"});
7074 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7075 EntryFnName, EntryFnIDName);
7079Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7080 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7083 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7084 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7085 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7086 OffloadInfoManager.registerTargetRegionEntryInfo(
7087 EntryInfo, EntryAddr, OutlinedFnID,
7088 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7089 return OutlinedFnID;
7092OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7093 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7094 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7095 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7097 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7098 BodyGenTy BodyGenType)>
7101 if (!updateToLocation(
Loc))
7102 return InsertPointTy();
7104 Builder.restoreIP(CodeGenIP);
7106 if (Config.IsTargetDevice.value_or(
false)) {
7108 InsertPointOrErrorTy AfterIP =
7109 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7111 return AfterIP.takeError();
7112 Builder.restoreIP(*AfterIP);
7114 return Builder.saveIP();
7117 bool IsStandAlone = !BodyGenCB;
7118 MapInfosTy *MapInfo;
7122 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7123 InsertPointTy CodeGenIP) ->
Error {
7124 MapInfo = &GenMapInfoCB(Builder.saveIP());
7125 if (
Error Err = emitOffloadingArrays(
7126 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7127 true, DeviceAddrCB))
7130 TargetDataRTArgs RTArgs;
7131 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7134 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7139 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7140 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7144 SrcLocInfo, DeviceID,
7145 PointerNum, RTArgs.BasePointersArray,
7146 RTArgs.PointersArray, RTArgs.SizesArray,
7147 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7148 RTArgs.MappersArray};
7151 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7155 if (
Info.HasNoWait) {
7162 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7165 if (
Info.HasNoWait) {
7169 emitBlock(OffloadContBlock, CurFn,
true);
7170 Builder.restoreIP(Builder.saveIP());
7175 bool RequiresOuterTargetTask =
Info.HasNoWait;
7176 if (!RequiresOuterTargetTask)
7177 cantFail(TaskBodyCB(
nullptr,
nullptr,
7180 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7181 {}, RTArgs,
Info.HasNoWait));
7183 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7184 omp::OMPRTL___tgt_target_data_begin_mapper);
7186 Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
7188 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7191 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7192 Builder.CreateStore(LI, DeviceMap.second.second);
7199 InsertPointOrErrorTy AfterIP =
7200 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7202 return AfterIP.takeError();
7203 Builder.restoreIP(*AfterIP);
7211 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7212 InsertPointTy CodeGenIP) ->
Error {
7213 InsertPointOrErrorTy AfterIP =
7214 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7216 return AfterIP.takeError();
7217 Builder.restoreIP(*AfterIP);
7222 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7223 TargetDataRTArgs RTArgs;
7224 Info.EmitDebug = !MapInfo->Names.empty();
7225 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7228 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7233 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7234 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7237 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7238 PointerNum, RTArgs.BasePointersArray,
7239 RTArgs.PointersArray, RTArgs.SizesArray,
7240 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7241 RTArgs.MappersArray};
7243 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7245 Builder.CreateCall(EndMapperFunc, OffloadingArgs);
7251 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7259 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7260 return BeginThenGen(AllocaIP, Builder.saveIP());
7268 InsertPointOrErrorTy AfterIP =
7269 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7271 return AfterIP.takeError();
7275 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7276 return EndThenGen(AllocaIP, Builder.saveIP());
7279 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7280 return BeginThenGen(AllocaIP, Builder.saveIP());
7286 return Builder.saveIP();
7290OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7291 bool IsGPUDistribute) {
7292 assert((IVSize == 32 || IVSize == 64) &&
7293 "IV size is not compatible with the omp runtime");
7295 if (IsGPUDistribute)
7297 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7298 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7299 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7300 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7302 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7303 : omp::OMPRTL___kmpc_for_static_init_4u)
7304 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7305 : omp::OMPRTL___kmpc_for_static_init_8u);
7307 return getOrCreateRuntimeFunction(M, Name);
7310FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7312 assert((IVSize == 32 || IVSize == 64) &&
7313 "IV size is not compatible with the omp runtime");
7315 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7316 : omp::OMPRTL___kmpc_dispatch_init_4u)
7317 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7318 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7320 return getOrCreateRuntimeFunction(M, Name);
7323FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7325 assert((IVSize == 32 || IVSize == 64) &&
7326 "IV size is not compatible with the omp runtime");
7328 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7329 : omp::OMPRTL___kmpc_dispatch_next_4u)
7330 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7331 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7333 return getOrCreateRuntimeFunction(M, Name);
7336FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7338 assert((IVSize == 32 || IVSize == 64) &&
7339 "IV size is not compatible with the omp runtime");
7341 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7342 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7343 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7344 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7346 return getOrCreateRuntimeFunction(M, Name);
7350 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7355 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7363 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7367 if (NewVar && (arg == NewVar->
getArg()))
7377 auto UpdateDebugRecord = [&](
auto *DR) {
7380 for (
auto Loc : DR->location_ops()) {
7381 auto Iter = ValueReplacementMap.find(
Loc);
7382 if (Iter != ValueReplacementMap.end()) {
7383 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7384 ArgNo = std::get<1>(Iter->second) + 1;
7388 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7395 "Unexpected debug intrinsic");
7397 UpdateDebugRecord(&DVR);
7400 if (OMPBuilder.Config.isTargetDevice()) {
7402 Module *M = Func->getParent();
7405 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7407 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7408 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7410 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7417 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7419 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7420 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7422 if (OMPBuilder.Config.isTargetDevice()) {
7430 for (
auto &Arg : Inputs)
7435 for (
auto &Arg : Inputs)
7439 auto BB = Builder.GetInsertBlock();
7451 if (TargetCpuAttr.isStringAttribute())
7452 Func->addFnAttr(TargetCpuAttr);
7454 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7455 if (TargetFeaturesAttr.isStringAttribute())
7456 Func->addFnAttr(TargetFeaturesAttr);
7458 if (OMPBuilder.Config.isTargetDevice()) {
7460 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7461 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7472 Builder.SetInsertPoint(EntryBB);
7475 if (OMPBuilder.Config.isTargetDevice())
7476 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7478 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7483 if (OMPBuilder.Config.isTargetDevice())
7484 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7488 splitBB(Builder,
true,
"outlined.body");
7489 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7491 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7493 return AfterIP.takeError();
7494 Builder.restoreIP(*AfterIP);
7495 if (OMPBuilder.Config.isTargetDevice())
7496 OMPBuilder.createTargetDeinit(Builder);
7499 Builder.CreateRetVoid();
7503 auto AllocaIP = Builder.saveIP();
7508 const auto &ArgRange =
7509 OMPBuilder.Config.isTargetDevice()
7510 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7543 if (Instr->getFunction() == Func)
7544 Instr->replaceUsesOfWith(
Input, InputCopy);
7550 for (
auto InArg :
zip(Inputs, ArgRange)) {
7552 Argument &Arg = std::get<1>(InArg);
7553 Value *InputCopy =
nullptr;
7555 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7556 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7558 return AfterIP.takeError();
7559 Builder.restoreIP(*AfterIP);
7560 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7579 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7586 ReplaceValue(
Input, InputCopy, Func);
7590 for (
auto Deferred : DeferredReplacement)
7591 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7594 ValueReplacementMap);
7602 Value *TaskWithPrivates,
7603 Type *TaskWithPrivatesTy) {
7605 Type *TaskTy = OMPIRBuilder.Task;
7608 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7609 Value *Shareds = TaskT;
7619 if (TaskWithPrivatesTy != TaskTy)
7620 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7637 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7642 assert((!NumOffloadingArrays || PrivatesTy) &&
7643 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7646 Module &M = OMPBuilder.M;
7670 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7676 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7677 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7683 ".omp_target_task_proxy_func",
7684 Builder.GetInsertBlock()->getModule());
7685 Value *ThreadId = ProxyFn->getArg(0);
7686 Value *TaskWithPrivates = ProxyFn->getArg(1);
7687 ThreadId->
setName(
"thread.id");
7688 TaskWithPrivates->
setName(
"task");
7690 bool HasShareds = SharedArgsOperandNo > 0;
7691 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7694 Builder.SetInsertPoint(EntryBB);
7700 if (HasOffloadingArrays) {
7701 assert(TaskTy != TaskWithPrivatesTy &&
7702 "If there are offloading arrays to pass to the target"
7703 "TaskTy cannot be the same as TaskWithPrivatesTy");
7706 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7707 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7709 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7713 auto *ArgStructAlloca =
7715 assert(ArgStructAlloca &&
7716 "Unable to find the alloca instruction corresponding to arguments "
7717 "for extracted function");
7721 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7723 Value *SharedsSize =
7724 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7727 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7729 Builder.CreateMemCpy(
7730 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7732 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7734 Builder.CreateCall(KernelLaunchFunction, KernelLaunchArgs);
7735 Builder.CreateRetVoid();
7741 return GEP->getSourceElementType();
7743 return Alloca->getAllocatedType();
7766 if (OffloadingArraysToPrivatize.
empty())
7767 return OMPIRBuilder.Task;
7770 for (
Value *V : OffloadingArraysToPrivatize) {
7771 assert(V->getType()->isPointerTy() &&
7772 "Expected pointer to array to privatize. Got a non-pointer value "
7775 assert(ArrayTy &&
"ArrayType cannot be nullptr");
7781 "struct.task_with_privates");
7784 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
7785 TargetRegionEntryInfo &EntryInfo,
7786 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7789 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7790 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7792 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
7795 EntryFnName, Inputs, CBFunc,
7799 return OMPBuilder.emitTargetRegionFunction(
7800 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
7804OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
7805 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
7806 OpenMPIRBuilder::InsertPointTy AllocaIP,
7808 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
7932 splitBB(Builder,
true,
"target.task.body");
7934 splitBB(Builder,
true,
"target.task.alloca");
7936 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
7937 TargetTaskAllocaBB->
begin());
7938 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
7941 OI.EntryBB = TargetTaskAllocaBB;
7942 OI.OuterAllocaBB = AllocaIP.getBlock();
7947 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
7950 Builder.restoreIP(TargetTaskBodyIP);
7951 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
7965 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
7969 bool NeedsTargetTask = HasNoWait && DeviceID;
7970 if (NeedsTargetTask) {
7972 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
7973 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
7974 RTArgs.SizesArray}) {
7976 OffloadingArraysToPrivatize.
push_back(V);
7977 OI.ExcludeArgsFromAggregate.push_back(V);
7981 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
7982 DeviceID, OffloadingArraysToPrivatize](
7985 "there must be a single user for the outlined function");
7999 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8000 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8002 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8003 "Wrong number of arguments for StaleCI when shareds are present");
8004 int SharedArgOperandNo =
8005 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8011 if (!OffloadingArraysToPrivatize.
empty())
8016 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8017 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8019 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8022 Builder.SetInsertPoint(StaleCI);
8027 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8028 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8037 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8038 : getOrCreateRuntimeFunctionPtr(
8039 OMPRTL___kmpc_omp_target_task_alloc);
8043 Value *ThreadID = getOrCreateThreadID(Ident);
8050 Value *TaskSize = Builder.getInt64(
8051 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8056 Value *SharedsSize = Builder.getInt64(0);
8058 auto *ArgStructAlloca =
8060 assert(ArgStructAlloca &&
8061 "Unable to find the alloca instruction corresponding to arguments "
8062 "for extracted function");
8063 auto *ArgStructType =
8065 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8066 "arguments for extracted function");
8068 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8077 Value *Flags = Builder.getInt32(0);
8087 TaskSize, SharedsSize,
8090 if (NeedsTargetTask) {
8091 assert(DeviceID &&
"Expected non-empty device ID.");
8095 TaskData = Builder.CreateCall(TaskAllocFn, TaskAllocArgs);
8101 *
this, Builder, TaskData, TaskWithPrivatesTy);
8102 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8105 if (!OffloadingArraysToPrivatize.
empty()) {
8107 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8108 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8109 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8116 "ElementType should match ArrayType");
8119 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8120 Builder.CreateMemCpy(
8121 Dst, Alignment, PtrToPrivatize, Alignment,
8122 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8136 if (!NeedsTargetTask) {
8139 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8143 Builder.getInt32(Dependencies.size()),
8145 ConstantInt::get(Builder.getInt32Ty(), 0),
8151 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8153 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8154 Builder.CreateCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8155 CallInst *CI = Builder.CreateCall(ProxyFn, {ThreadID, TaskData});
8157 Builder.CreateCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8158 }
else if (DepArray) {
8163 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8166 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8167 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8171 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8172 Builder.CreateCall(TaskFn, {Ident, ThreadID, TaskData});
8177 I->eraseFromParent();
8179 addOutlineInfo(std::move(OI));
8182 << *(Builder.GetInsertBlock()) <<
"\n");
8184 << *(Builder.GetInsertBlock()->getParent()->getParent())
8186 return Builder.saveIP();
8189Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8190 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8191 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8192 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8195 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8196 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8198 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8204 OpenMPIRBuilder::InsertPointTy AllocaIP,
8205 OpenMPIRBuilder::TargetDataInfo &
Info,
8206 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8207 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8210 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8211 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8217 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8218 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8219 Builder.restoreIP(IP);
8220 Builder.CreateCall(OutlinedFn, Args);
8221 return Builder.saveIP();
8224 bool HasDependencies = Dependencies.
size() > 0;
8225 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8227 OpenMPIRBuilder::TargetKernelArgs KArgs;
8234 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8242 if (OutlinedFnID && DeviceID)
8243 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8244 EmitTargetCallFallbackCB, KArgs,
8245 DeviceID, RTLoc, TargetTaskAllocaIP);
8253 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8256 OMPBuilder.Builder.restoreIP(AfterIP);
8260 auto &&EmitTargetCallElse =
8261 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8262 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8265 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8266 if (RequiresOuterTargetTask) {
8270 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8271 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8273 Dependencies, EmptyRTArgs, HasNoWait);
8275 return EmitTargetCallFallbackCB(Builder.saveIP());
8278 Builder.restoreIP(AfterIP);
8282 auto &&EmitTargetCallThen =
8283 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8284 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8285 Info.HasNoWait = HasNoWait;
8286 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8287 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8288 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8289 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8296 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8297 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8302 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8304 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8308 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8311 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8319 Value *MaxThreadsClause =
8320 RuntimeAttrs.TeamsThreadLimit.size() == 1
8321 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8324 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8325 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8326 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8327 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8329 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8330 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8332 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8335 unsigned NumTargetItems =
Info.NumberOfPtrs;
8339 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8340 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8343 Value *TripCount = RuntimeAttrs.LoopTripCount
8344 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8345 Builder.getInt64Ty(),
8347 : Builder.getInt64(0);
8350 Value *DynCGGroupMem = Builder.getInt32(0);
8352 KArgs = OpenMPIRBuilder::TargetKernelArgs(NumTargetItems, RTArgs, TripCount,
8353 NumTeamsC, NumThreadsC,
8354 DynCGGroupMem, HasNoWait);
8358 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8361 if (RequiresOuterTargetTask)
8362 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8363 Dependencies, KArgs.RTArgs,
8366 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8367 EmitTargetCallFallbackCB, KArgs,
8368 DeviceID, RTLoc, AllocaIP);
8371 Builder.restoreIP(AfterIP);
8378 if (!OutlinedFnID) {
8379 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8385 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8389 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8390 EmitTargetCallElse, AllocaIP));
8393OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8394 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8395 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8396 TargetRegionEntryInfo &EntryInfo,
8397 const TargetKernelDefaultAttrs &DefaultAttrs,
8398 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8400 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8401 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8402 CustomMapperCallbackTy CustomMapperCB,
8405 if (!updateToLocation(
Loc))
8406 return InsertPointTy();
8408 Builder.restoreIP(CodeGenIP);
8416 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8417 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8423 if (!Config.isTargetDevice())
8425 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8426 CustomMapperCB, Dependencies, HasNowait);
8427 return Builder.saveIP();
8440 return OS.
str().str();
8445 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8446 Config.separator());
8450OpenMPIRBuilder::getOrCreateInternalVariable(
Type *Ty,
const StringRef &Name,
8452 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8454 assert(Elem.second->getValueType() == Ty &&
8455 "OMP internal variable has different type than requested");
8471 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8478Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8479 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8480 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8481 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8484Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8489 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8491 return SizePtrToInt;
8496 std::string VarName) {
8500 M, MaptypesArrayInit->
getType(),
8504 return MaptypesArrayGlobal;
8507void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8508 InsertPointTy AllocaIP,
8509 unsigned NumOperands,
8510 struct MapperAllocas &MapperAllocas) {
8511 if (!updateToLocation(
Loc))
8516 Builder.restoreIP(AllocaIP);
8518 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8522 ArrI64Ty,
nullptr,
".offload_sizes");
8523 updateToLocation(
Loc);
8524 MapperAllocas.ArgsBase = ArgsBase;
8525 MapperAllocas.Args =
Args;
8526 MapperAllocas.ArgSizes = ArgSizes;
8529void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8532 struct MapperAllocas &MapperAllocas,
8533 int64_t DeviceID,
unsigned NumOperands) {
8534 if (!updateToLocation(
Loc))
8539 Value *ArgsBaseGEP =
8540 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8541 {Builder.getInt32(0), Builder.getInt32(0)});
8543 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8544 {Builder.getInt32(0), Builder.getInt32(0)});
8545 Value *ArgSizesGEP =
8546 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8547 {Builder.getInt32(0), Builder.getInt32(0)});
8550 Builder.CreateCall(MapperFunc,
8551 {SrcLocInfo, Builder.getInt64(DeviceID),
8552 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
8553 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
8556void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8557 TargetDataRTArgs &RTArgs,
8558 TargetDataInfo &
Info,
8560 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8561 "expected region end call to runtime only when end call is separate");
8563 auto VoidPtrTy = UnqualPtrTy;
8564 auto VoidPtrPtrTy = UnqualPtrTy;
8566 auto Int64PtrTy = UnqualPtrTy;
8568 if (!
Info.NumberOfPtrs) {
8578 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8580 Info.RTArgs.BasePointersArray,
8582 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8586 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8589 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8591 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8592 :
Info.RTArgs.MapTypesArray,
8598 if (!
Info.EmitDebug)
8601 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8607 if (!
Info.HasMapper)
8610 RTArgs.MappersArray =
8611 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8614void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8615 InsertPointTy CodeGenIP,
8616 MapInfosTy &CombinedInfo,
8617 TargetDataInfo &
Info) {
8618 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8619 CombinedInfo.NonContigInfo;
8632 "struct.descriptor_dim");
8634 enum { OffsetFD = 0, CountFD, StrideFD };
8638 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8641 if (NonContigInfo.Dims[
I] == 1)
8643 Builder.restoreIP(AllocaIP);
8646 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8647 Builder.restoreIP(CodeGenIP);
8648 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8649 unsigned RevIdx = EE -
II - 1;
8650 Value *DimsLVal = Builder.CreateInBoundsGEP(
8652 {Builder.getInt64(0), Builder.getInt64(II)});
8654 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8655 Builder.CreateAlignedStore(
8656 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8657 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8659 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8660 Builder.CreateAlignedStore(
8661 NonContigInfo.Counts[L][RevIdx], CountLVal,
8662 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8664 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8665 Builder.CreateAlignedStore(
8666 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8667 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8670 Builder.restoreIP(CodeGenIP);
8671 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8672 DimsAddr, Builder.getPtrTy());
8673 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8675 Info.RTArgs.PointersArray, 0,
I);
8676 Builder.CreateAlignedStore(
8677 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8682void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8690 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8692 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8693 Value *DeleteBit = Builder.CreateAnd(
8696 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8697 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8702 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8704 Value *PtrAndObjBit = Builder.CreateAnd(
8707 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8708 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8709 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8710 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8711 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8712 DeleteCond = Builder.CreateIsNull(
8714 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8717 DeleteCond = Builder.CreateIsNotNull(
8719 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8721 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8722 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8724 emitBlock(BodyBB, MapperFn);
8727 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8730 Value *MapTypeArg = Builder.CreateAnd(
8733 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8734 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8735 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8736 MapTypeArg = Builder.CreateOr(
8739 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8740 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8744 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8745 ArraySize, MapTypeArg, MapName};
8747 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8755 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
8771 MapperFn->
addFnAttr(Attribute::NoInline);
8772 MapperFn->
addFnAttr(Attribute::NoUnwind);
8782 auto SavedIP = Builder.saveIP();
8783 Builder.SetInsertPoint(EntryBB);
8795 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
8796 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
8797 Value *PtrBegin = BeginIn;
8798 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
8803 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8804 MapType, MapName, ElementSize, HeadBB,
8810 emitBlock(HeadBB, MapperFn);
8815 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
8816 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
8819 emitBlock(BodyBB, MapperFn);
8822 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
8826 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
8828 return Info.takeError();
8832 Value *OffloadingArgs[] = {MapperHandle};
8833 Value *PreviousSize = Builder.CreateCall(
8834 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
8836 Value *ShiftedPreviousSize =
8837 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
8840 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
8849 Value *OriMapType = Builder.getInt64(
8850 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8852 Value *MemberMapType =
8853 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
8867 Value *LeftToFrom = Builder.CreateAnd(
8870 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8871 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8872 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8881 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
8882 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
8884 emitBlock(AllocBB, MapperFn);
8885 Value *AllocMapType = Builder.CreateAnd(
8888 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8889 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8890 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8891 Builder.CreateBr(EndBB);
8892 emitBlock(AllocElseBB, MapperFn);
8893 Value *IsTo = Builder.CreateICmpEQ(
8896 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8897 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8898 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
8900 emitBlock(ToBB, MapperFn);
8901 Value *ToMapType = Builder.CreateAnd(
8904 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8905 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8906 Builder.CreateBr(EndBB);
8907 emitBlock(ToElseBB, MapperFn);
8908 Value *IsFrom = Builder.CreateICmpEQ(
8911 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8912 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8913 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
8915 emitBlock(FromBB, MapperFn);
8916 Value *FromMapType = Builder.CreateAnd(
8919 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8920 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
8922 emitBlock(EndBB, MapperFn);
8925 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
8931 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
8932 CurSizeArg, CurMapType, CurNameArg};
8934 auto ChildMapperFn = CustomMapperCB(
I);
8936 return ChildMapperFn.takeError();
8937 if (*ChildMapperFn) {
8939 Builder.CreateCall(*ChildMapperFn, OffloadingArgs)->setDoesNotThrow();
8944 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
8951 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
8952 "omp.arraymap.next");
8954 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
8956 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
8958 emitBlock(ExitBB, MapperFn);
8961 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
8962 MapType, MapName, ElementSize, DoneBB,
8966 emitBlock(DoneBB, MapperFn,
true);
8968 Builder.CreateRetVoid();
8969 Builder.restoreIP(SavedIP);
8973Error OpenMPIRBuilder::emitOffloadingArrays(
8974 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
8975 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
8976 bool IsNonContiguous,
8980 Info.clearArrayInfo();
8981 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8983 if (
Info.NumberOfPtrs == 0)
8986 Builder.restoreIP(AllocaIP);
8992 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
8993 PointerArrayType,
nullptr,
".offload_baseptrs");
8995 Info.RTArgs.PointersArray = Builder.CreateAlloca(
8996 PointerArrayType,
nullptr,
".offload_ptrs");
8997 AllocaInst *MappersArray = Builder.CreateAlloca(
8998 PointerArrayType,
nullptr,
".offload_mappers");
8999 Info.RTArgs.MappersArray = MappersArray;
9006 ConstantInt::get(Int64Ty, 0));
9008 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9011 if (IsNonContiguous &&
9012 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9013 CombinedInfo.Types[
I] &
9014 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9016 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9022 RuntimeSizes.set(
I);
9025 if (RuntimeSizes.all()) {
9027 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9028 SizeArrayType,
nullptr,
".offload_sizes");
9033 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9034 auto *SizesArrayGbl =
9039 if (!RuntimeSizes.any()) {
9040 Info.RTArgs.SizesArray = SizesArrayGbl;
9042 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9043 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9046 SizeArrayType,
nullptr,
".offload_sizes");
9049 Builder.CreateMemCpy(
9050 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9051 SizesArrayGbl, OffloadSizeAlign,
9056 Info.RTArgs.SizesArray = Buffer;
9064 for (
auto mapFlag : CombinedInfo.Types)
9066 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9068 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9069 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9070 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9073 if (!CombinedInfo.Names.empty()) {
9074 auto *MapNamesArrayGbl = createOffloadMapnames(
9075 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9076 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9077 Info.EmitDebug =
true;
9079 Info.RTArgs.MapNamesArray =
9081 Info.EmitDebug =
false;
9086 if (
Info.separateBeginEndCalls()) {
9087 bool EndMapTypesDiffer =
false;
9089 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9090 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9091 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9092 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9093 EndMapTypesDiffer =
true;
9096 if (EndMapTypesDiffer) {
9097 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9098 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9103 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9104 Value *BPVal = CombinedInfo.BasePointers[
I];
9105 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9108 Builder.CreateAlignedStore(BPVal, BP,
9109 M.getDataLayout().getPrefTypeAlign(PtrTy));
9111 if (
Info.requiresDevicePointerInfo()) {
9112 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9113 CodeGenIP = Builder.saveIP();
9114 Builder.restoreIP(AllocaIP);
9115 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9116 Builder.restoreIP(CodeGenIP);
9118 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9119 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9120 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9122 DeviceAddrCB(
I, BP);
9126 Value *PVal = CombinedInfo.Pointers[
I];
9127 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9131 Builder.CreateAlignedStore(PVal,
P,
9132 M.getDataLayout().getPrefTypeAlign(PtrTy));
9134 if (RuntimeSizes.test(
I)) {
9135 Value *S = Builder.CreateConstInBoundsGEP2_32(
9139 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9142 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9145 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9148 auto CustomMFunc = CustomMapperCB(
I);
9150 return CustomMFunc.takeError();
9152 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9154 Value *MAddr = Builder.CreateInBoundsGEP(
9156 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9157 Builder.CreateAlignedStore(
9158 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9161 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9162 Info.NumberOfPtrs == 0)
9164 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9169 BasicBlock *CurBB = Builder.GetInsertBlock();
9176 Builder.CreateBr(
Target);
9179 Builder.ClearInsertionPoint();
9184 BasicBlock *CurBB = Builder.GetInsertBlock();
9200 Builder.SetInsertPoint(BB);
9203Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9204 BodyGenCallbackTy ElseGen,
9205 InsertPointTy AllocaIP) {
9209 auto CondConstant = CI->getSExtValue();
9211 return ThenGen(AllocaIP, Builder.saveIP());
9213 return ElseGen(AllocaIP, Builder.saveIP());
9223 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9225 emitBlock(ThenBlock, CurFn);
9226 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9228 emitBranch(ContBlock);
9231 emitBlock(ElseBlock, CurFn);
9232 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9235 emitBranch(ContBlock);
9237 emitBlock(ContBlock, CurFn,
true);
9241bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9245 "Unexpected Atomic Ordering.");
9302OpenMPIRBuilder::InsertPointTy
9303OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9304 AtomicOpValue &
X, AtomicOpValue &V,
9306 if (!updateToLocation(
Loc))
9309 assert(
X.Var->getType()->isPointerTy() &&
9310 "OMP Atomic expects a pointer to target memory");
9311 Type *XElemTy =
X.ElemTy;
9314 "OMP atomic read expected a scalar type");
9316 Value *XRead =
nullptr;
9320 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9326 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9331 OpenMPIRBuilder::AtomicInfo atomicInfo(
9332 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9333 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9334 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9335 XRead = AtomicLoadRes.first;
9342 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9345 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9347 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9350 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9351 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9352 return Builder.saveIP();
9355OpenMPIRBuilder::InsertPointTy
9356OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9357 AtomicOpValue &
X,
Value *Expr,
9359 if (!updateToLocation(
Loc))
9362 assert(
X.Var->getType()->isPointerTy() &&
9363 "OMP Atomic expects a pointer to target memory");
9364 Type *XElemTy =
X.ElemTy;
9367 "OMP atomic write expected a scalar type");
9370 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9373 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9377 OpenMPIRBuilder::AtomicInfo atomicInfo(
9378 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9379 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9380 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9387 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9388 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9392 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9393 return Builder.saveIP();
9396OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9397 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9399 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9400 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9402 if (!updateToLocation(
Loc))
9406 Type *XTy =
X.Var->getType();
9408 "OMP Atomic expects a pointer to target memory");
9409 Type *XElemTy =
X.ElemTy;
9412 "OMP atomic update expected a scalar type");
9415 "OpenMP atomic does not support LT or GT operations");
9419 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9420 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9422 return AtomicResult.takeError();
9423 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9424 return Builder.saveIP();
9428Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9432 return Builder.CreateAdd(Src1, Src2);
9434 return Builder.CreateSub(Src1, Src2);
9436 return Builder.CreateAnd(Src1, Src2);
9438 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9440 return Builder.CreateOr(Src1, Src2);
9442 return Builder.CreateXor(Src1, Src2);
9467 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9468 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9471 bool emitRMWOp =
false;
9479 emitRMWOp = XElemTy;
9482 emitRMWOp = (IsXBinopExpr && XElemTy);
9489 std::pair<Value *, Value *> Res;
9494 if (IsIgnoreDenormalMode)
9495 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9497 if (!IsFineGrainedMemory)
9498 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9500 if (!IsRemoteMemory)
9504 Res.first = RMWInst;
9509 Res.second = Res.first;
9511 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9515 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9521 OpenMPIRBuilder::AtomicInfo atomicInfo(
9522 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9523 OldVal->
getAlign(),
true , AllocaIP,
X);
9524 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9525 BasicBlock *CurBB = Builder.GetInsertBlock();
9527 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9531 X->getName() +
".atomic.cont");
9533 Builder.restoreIP(AllocaIP);
9534 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9535 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9536 Builder.SetInsertPoint(ContBB);
9538 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9543 Value *Upd = *CBResult;
9544 Builder.CreateStore(Upd, NewAtomicAddr);
9547 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9548 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9550 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9551 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9553 Res.first = OldExprVal;
9559 Builder.SetInsertPoint(ExitBB);
9561 Builder.SetInsertPoint(ExitTI);
9567 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9574 BasicBlock *CurBB = Builder.GetInsertBlock();
9576 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9580 X->getName() +
".atomic.cont");
9582 Builder.restoreIP(AllocaIP);
9583 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9584 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9585 Builder.SetInsertPoint(ContBB);
9587 PHI->addIncoming(OldVal, CurBB);
9592 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9593 X->getName() +
".atomic.fltCast");
9595 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9596 X->getName() +
".atomic.ptrCast");
9603 Value *Upd = *CBResult;
9604 Builder.CreateStore(Upd, NewAtomicAddr);
9605 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9610 Result->setVolatile(VolatileX);
9611 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9612 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9613 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9614 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9616 Res.first = OldExprVal;
9623 Builder.SetInsertPoint(ExitBB);
9625 Builder.SetInsertPoint(ExitTI);
9632OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9633 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9636 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9637 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9638 if (!updateToLocation(
Loc))
9642 Type *XTy =
X.Var->getType();
9644 "OMP Atomic expects a pointer to target memory");
9645 Type *XElemTy =
X.ElemTy;
9648 "OMP atomic capture expected a scalar type");
9650 "OpenMP atomic does not support LT or GT operations");
9657 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9658 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9661 Value *CapturedVal =
9662 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9663 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9665 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9666 return Builder.saveIP();
9669OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9670 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9676 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9677 IsPostfixUpdate, IsFailOnly, Failure);
9680OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9681 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9686 if (!updateToLocation(
Loc))
9689 assert(
X.Var->getType()->isPointerTy() &&
9690 "OMP atomic expects a pointer to target memory");
9693 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9694 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9697 bool IsInteger =
E->getType()->isIntegerTy();
9699 if (
Op == OMPAtomicCompareOp::EQ) {
9704 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9705 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9710 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9714 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9716 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9718 "OldValue and V must be of same type");
9719 if (IsPostfixUpdate) {
9720 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9722 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9733 BasicBlock *CurBB = Builder.GetInsertBlock();
9735 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9737 CurBBTI,
X.Var->getName() +
".atomic.exit");
9743 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9745 Builder.SetInsertPoint(ContBB);
9746 Builder.CreateStore(OldValue, V.Var);
9747 Builder.CreateBr(ExitBB);
9752 Builder.SetInsertPoint(ExitBB);
9754 Builder.SetInsertPoint(ExitTI);
9757 Value *CapturedValue =
9758 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
9759 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9765 assert(
R.Var->getType()->isPointerTy() &&
9766 "r.var must be of pointer type");
9767 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
9769 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9770 Value *ResultCast =
R.IsSigned
9771 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
9772 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
9773 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
9776 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
9777 "Op should be either max or min at this point");
9778 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
9816 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
9818 Value *CapturedValue =
nullptr;
9819 if (IsPostfixUpdate) {
9820 CapturedValue = OldValue;
9845 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
9846 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
9848 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
9852 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
9854 return Builder.saveIP();
9857OpenMPIRBuilder::InsertPointOrErrorTy
9858OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
9859 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
9862 if (!updateToLocation(
Loc))
9863 return InsertPointTy();
9866 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
9867 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
9872 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
9873 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
9874 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
9894 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
9895 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
9897 splitBB(Builder,
true,
"teams.alloca");
9899 bool SubClausesPresent =
9900 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
9902 if (!Config.isTargetDevice() && SubClausesPresent) {
9903 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
9904 "if lowerbound is non-null, then upperbound must also be non-null "
9905 "for bounds on num_teams");
9907 if (NumTeamsUpper ==
nullptr)
9908 NumTeamsUpper = Builder.getInt32(0);
9910 if (NumTeamsLower ==
nullptr)
9911 NumTeamsLower = NumTeamsUpper;
9915 "argument to if clause must be an integer value");
9919 IfExpr = Builder.CreateICmpNE(IfExpr,
9920 ConstantInt::get(IfExpr->
getType(), 0));
9921 NumTeamsUpper = Builder.CreateSelect(
9922 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
9925 NumTeamsLower = Builder.CreateSelect(
9926 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
9929 if (ThreadLimit ==
nullptr)
9930 ThreadLimit = Builder.getInt32(0);
9932 Value *ThreadNum = getOrCreateThreadID(Ident);
9934 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
9935 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
9938 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
9939 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
9940 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
9944 OI.EntryBB = AllocaBB;
9946 OI.OuterAllocaBB = &OuterAllocaBB;
9950 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
9952 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
9954 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
9956 auto HostPostOutlineCB = [
this, Ident,
9957 ToBeDeleted](
Function &OutlinedFn)
mutable {
9962 "there must be a single user for the outlined function");
9967 "Outlined function must have two or three arguments only");
9969 bool HasShared = OutlinedFn.
arg_size() == 3;
9977 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
9978 "outlined function.");
9979 Builder.SetInsertPoint(StaleCI);
9981 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
9984 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
9985 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
9989 I->eraseFromParent();
9992 if (!Config.isTargetDevice())
9993 OI.PostOutlineCB = HostPostOutlineCB;
9995 addOutlineInfo(std::move(OI));
9997 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
9999 return Builder.saveIP();
10002OpenMPIRBuilder::InsertPointOrErrorTy
10003OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10004 InsertPointTy OuterAllocaIP,
10005 BodyGenCallbackTy BodyGenCB) {
10006 if (!updateToLocation(
Loc))
10007 return InsertPointTy();
10009 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10011 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10013 splitBB(Builder,
true,
"distribute.entry");
10014 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10017 splitBB(Builder,
true,
"distribute.exit");
10019 splitBB(Builder,
true,
"distribute.body");
10021 splitBB(Builder,
true,
"distribute.alloca");
10024 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10025 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10026 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10030 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10031 OI.EntryBB = AllocaBB;
10032 OI.ExitBB = ExitBB;
10034 addOutlineInfo(std::move(OI));
10035 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10037 return Builder.saveIP();
10042 std::string VarName) {
10048 M, MapNamesArrayInit->
getType(),
10051 return MapNamesArrayGlobal;
10056void OpenMPIRBuilder::initializeTypes(
Module &M) {
10059 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10060#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10061#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10062 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10063 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10064#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10065 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10066 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10067#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10068 T = StructType::getTypeByName(Ctx, StructName); \
10070 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10072 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10073#include "llvm/Frontend/OpenMP/OMPKinds.def"
10076void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10084 while (!Worklist.
empty()) {
10088 if (
BlockSet.insert(SuccBB).second)
10097 if (!Config.isGPU()) {
10112 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10113 Fn->
addFnAttr(Attribute::MustProgress);
10117void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10118 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10121 if (OffloadInfoManager.empty())
10125 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10126 TargetRegionEntryInfo>,
10128 OrderedEntries(OffloadInfoManager.size());
10131 auto &&GetMDInt = [
this](
unsigned V) {
10138 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10139 auto &&TargetRegionMetadataEmitter =
10140 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10141 const TargetRegionEntryInfo &EntryInfo,
10142 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10155 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10156 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10157 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10158 GetMDInt(
E.getOrder())};
10161 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10167 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10170 auto &&DeviceGlobalVarMetadataEmitter =
10171 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10173 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10181 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10182 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10185 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10186 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10192 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10193 DeviceGlobalVarMetadataEmitter);
10195 for (
const auto &
E : OrderedEntries) {
10196 assert(
E.first &&
"All ordered entries must exist!");
10197 if (
const auto *CE =
10200 if (!
CE->getID() || !
CE->getAddress()) {
10202 TargetRegionEntryInfo EntryInfo =
E.second;
10203 StringRef FnName = EntryInfo.ParentName;
10204 if (!M.getNamedValue(FnName))
10206 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10209 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10212 }
else if (
const auto *CE =
dyn_cast<
10213 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10215 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10216 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10219 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10220 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10221 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10223 if (!
CE->getAddress()) {
10224 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10228 if (
CE->getVarSize() == 0)
10231 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10232 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10233 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10234 "Declaret target link address is set.");
10235 if (Config.isTargetDevice())
10237 if (!
CE->getAddress()) {
10238 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10250 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10251 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10256 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10257 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10258 Flags,
CE->getLinkage(),
CE->getVarName());
10260 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10261 Flags,
CE->getLinkage());
10272 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10277 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10278 Config.getRequiresFlags());
10281void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10283 unsigned FileID,
unsigned Line,
unsigned Count) {
10285 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10286 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10288 OS <<
"_" <<
Count;
10291void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10293 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10294 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10295 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10296 EntryInfo.Line, NewCount);
10299TargetRegionEntryInfo
10300OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10303 auto FileIDInfo = CallBack();
10309 FileID =
hash_value(std::get<0>(FileIDInfo));
10311 FileID =
ID.getFile();
10313 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10314 std::get<1>(FileIDInfo));
10317unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10320 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10322 !(Remain & 1); Remain = Remain >> 1)
10328OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10331 << getFlagMemberOffset());
10334void OpenMPIRBuilder::setCorrectMemberOfFlag(
10340 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10342 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10349 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10350 Flags |= MemberOfFlag;
10353Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10354 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10355 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10356 bool IsDeclaration,
bool IsExternallyVisible,
10357 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10358 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10359 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10360 std::function<
Constant *()> GlobalInitializer,
10367 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10368 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10370 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10371 Config.hasRequiresUnifiedSharedMemory())) {
10376 if (!IsExternallyVisible)
10377 OS <<
format(
"_%x", EntryInfo.FileID);
10378 OS <<
"_decl_tgt_ref_ptr";
10381 Value *
Ptr = M.getNamedValue(PtrName);
10385 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10390 if (!Config.isTargetDevice()) {
10391 if (GlobalInitializer)
10392 GV->setInitializer(GlobalInitializer());
10397 registerTargetGlobalVariable(
10398 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10399 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10409void OpenMPIRBuilder::registerTargetGlobalVariable(
10410 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10411 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10412 bool IsDeclaration,
bool IsExternallyVisible,
10413 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10414 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10415 std::vector<Triple> TargetTriple,
10416 std::function<
Constant *()> GlobalInitializer,
10419 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10420 (TargetTriple.empty() && !Config.isTargetDevice()))
10423 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10428 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10430 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10431 !Config.hasRequiresUnifiedSharedMemory()) {
10432 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10436 if (!IsDeclaration)
10438 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10445 if (Config.isTargetDevice() &&
10449 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10452 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10454 if (!M.getNamedValue(RefName)) {
10456 getOrCreateInternalVariable(Addr->
getType(), RefName);
10458 GvAddrRef->setConstant(
true);
10460 GvAddrRef->setInitializer(Addr);
10461 GeneratedRefs.push_back(GvAddrRef);
10465 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10466 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10468 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10470 if (Config.isTargetDevice()) {
10474 Addr = getAddrOfDeclareTargetVar(
10475 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10476 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10477 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10480 VarSize = M.getDataLayout().getPointerSize();
10484 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10490void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10494 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10499 auto &&GetMDInt = [MN](
unsigned Idx) {
10504 auto &&GetMDString = [MN](
unsigned Idx) {
10506 return V->getString();
10509 switch (GetMDInt(0)) {
10513 case OffloadEntriesInfoManager::OffloadEntryInfo::
10514 OffloadingEntryInfoTargetRegion: {
10515 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10520 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10524 case OffloadEntriesInfoManager::OffloadEntryInfo::
10525 OffloadingEntryInfoDeviceGlobalVar:
10526 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10528 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10538 if (HostFilePath.
empty())
10542 if (std::error_code Err = Buf.getError()) {
10544 "OpenMPIRBuilder: " +
10552 if (std::error_code Err = M.getError()) {
10554 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10558 loadOffloadInfoMetadata(*M.get());
10565bool OffloadEntriesInfoManager::empty()
const {
10566 return OffloadEntriesTargetRegion.empty() &&
10567 OffloadEntriesDeviceGlobalVar.empty();
10570unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10571 const TargetRegionEntryInfo &EntryInfo)
const {
10572 auto It = OffloadEntriesTargetRegionCount.find(
10573 getTargetRegionEntryCountKey(EntryInfo));
10574 if (It == OffloadEntriesTargetRegionCount.end())
10579void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10580 const TargetRegionEntryInfo &EntryInfo) {
10581 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10582 EntryInfo.Count + 1;
10586void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10587 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10588 OffloadEntriesTargetRegion[EntryInfo] =
10589 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10590 OMPTargetRegionEntryTargetRegion);
10591 ++OffloadingEntriesNum;
10594void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10596 OMPTargetRegionEntryKind Flags) {
10597 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10600 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10604 if (OMPBuilder->Config.isTargetDevice()) {
10606 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10609 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10610 Entry.setAddress(Addr);
10612 Entry.setFlags(Flags);
10614 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10615 hasTargetRegionEntryInfo(EntryInfo,
true))
10617 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10618 "Target region entry already registered!");
10619 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10620 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10621 ++OffloadingEntriesNum;
10623 incrementTargetRegionEntryInfoCount(EntryInfo);
10626bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10627 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10630 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10632 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10633 if (It == OffloadEntriesTargetRegion.end()) {
10637 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10642void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10643 const OffloadTargetRegionEntryInfoActTy &Action) {
10645 for (
const auto &It : OffloadEntriesTargetRegion) {
10646 Action(It.first, It.second);
10650void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10651 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10652 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10653 ++OffloadingEntriesNum;
10656void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10659 if (OMPBuilder->Config.isTargetDevice()) {
10661 if (!hasDeviceGlobalVarEntryInfo(VarName))
10663 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10664 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10665 if (Entry.getVarSize() == 0) {
10666 Entry.setVarSize(VarSize);
10671 Entry.setVarSize(VarSize);
10673 Entry.setAddress(Addr);
10675 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10676 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10677 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10678 "Entry not initialized!");
10679 if (Entry.getVarSize() == 0) {
10680 Entry.setVarSize(VarSize);
10685 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10686 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10687 Addr, VarSize, Flags,
Linkage,
10690 OffloadEntriesDeviceGlobalVar.try_emplace(
10691 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10692 ++OffloadingEntriesNum;
10696void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10697 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10699 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10700 Action(
E.getKey(),
E.getValue());
10707void CanonicalLoopInfo::collectControlBlocks(
10714 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10717BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10726void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10738void CanonicalLoopInfo::mapIndVar(
10748 for (
Use &U : OldIV->
uses()) {
10752 if (
User->getParent() == getCond())
10754 if (
User->getParent() == getLatch())
10760 Value *NewIV = Updater(OldIV);
10763 for (
Use *U : ReplacableUses)
10771void CanonicalLoopInfo::assertOK()
const {
10784 "Preheader must terminate with unconditional branch");
10786 "Preheader must jump to header");
10790 "Header must terminate with unconditional branch");
10791 assert(Header->getSingleSuccessor() ==
Cond &&
10792 "Header must jump to exiting block");
10795 assert(
Cond->getSinglePredecessor() == Header &&
10796 "Exiting block only reachable from header");
10799 "Exiting block must terminate with conditional branch");
10801 "Exiting block must have two successors");
10803 "Exiting block's first successor jump to the body");
10805 "Exiting block's second successor must exit the loop");
10809 "Body only reachable from exiting block");
10814 "Latch must terminate with unconditional branch");
10823 "Exit block must terminate with unconditional branch");
10824 assert(
Exit->getSingleSuccessor() == After &&
10825 "Exit block must jump to after block");
10829 "After block only reachable from exit block");
10833 assert(IndVar &&
"Canonical induction variable not found?");
10835 "Induction variable must be an integer");
10837 "Induction variable must be a PHI in the loop header");
10843 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
10850 Value *TripCount = getTripCount();
10851 assert(TripCount &&
"Loop trip count not found?");
10853 "Trip count and induction variable must have the same type");
10857 "Exit condition must be a signed less-than comparison");
10859 "Exit condition must compare the induction variable");
10861 "Exit condition must compare with the trip count");
10865void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void addSimdMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause)
Determine the schedule type using schedule and ordering clause arguments.
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait)
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI void emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
LLVM_ABI std::error_code getUniqueID(const Twine Path, UniqueID &Result)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
ConstantFoldInsertValueInstruction - Attempt to constant fold an insertvalue instruction with the spe...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...