46#define DEBUG_TYPE "openmp-ir-builder"
53 cl::desc(
"Use optimistic attributes describing "
54 "'as-if' properties of runtime calls."),
58 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
59 cl::desc(
"Factor for the unroll threshold to account for code "
60 "simplifications still taking place"),
71 if (!IP1.isSet() || !IP2.isSet())
73 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
78 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
79 case OMPScheduleType::UnorderedStaticChunked:
80 case OMPScheduleType::UnorderedStatic:
81 case OMPScheduleType::UnorderedDynamicChunked:
82 case OMPScheduleType::UnorderedGuidedChunked:
83 case OMPScheduleType::UnorderedRuntime:
84 case OMPScheduleType::UnorderedAuto:
85 case OMPScheduleType::UnorderedTrapezoidal:
86 case OMPScheduleType::UnorderedGreedy:
87 case OMPScheduleType::UnorderedBalanced:
88 case OMPScheduleType::UnorderedGuidedIterativeChunked:
89 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
90 case OMPScheduleType::UnorderedSteal:
91 case OMPScheduleType::UnorderedStaticBalancedChunked:
92 case OMPScheduleType::UnorderedGuidedSimd:
93 case OMPScheduleType::UnorderedRuntimeSimd:
94 case OMPScheduleType::OrderedStaticChunked:
95 case OMPScheduleType::OrderedStatic:
96 case OMPScheduleType::OrderedDynamicChunked:
97 case OMPScheduleType::OrderedGuidedChunked:
98 case OMPScheduleType::OrderedRuntime:
99 case OMPScheduleType::OrderedAuto:
100 case OMPScheduleType::OrderdTrapezoidal:
101 case OMPScheduleType::NomergeUnorderedStaticChunked:
102 case OMPScheduleType::NomergeUnorderedStatic:
103 case OMPScheduleType::NomergeUnorderedDynamicChunked:
104 case OMPScheduleType::NomergeUnorderedGuidedChunked:
105 case OMPScheduleType::NomergeUnorderedRuntime:
106 case OMPScheduleType::NomergeUnorderedAuto:
107 case OMPScheduleType::NomergeUnorderedTrapezoidal:
108 case OMPScheduleType::NomergeUnorderedGreedy:
109 case OMPScheduleType::NomergeUnorderedBalanced:
110 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
111 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
112 case OMPScheduleType::NomergeUnorderedSteal:
113 case OMPScheduleType::NomergeOrderedStaticChunked:
114 case OMPScheduleType::NomergeOrderedStatic:
115 case OMPScheduleType::NomergeOrderedDynamicChunked:
116 case OMPScheduleType::NomergeOrderedGuidedChunked:
117 case OMPScheduleType::NomergeOrderedRuntime:
118 case OMPScheduleType::NomergeOrderedAuto:
119 case OMPScheduleType::NomergeOrderedTrapezoidal:
127 SchedType & OMPScheduleType::MonotonicityMask;
128 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
139 bool HasSimdModifier) {
141 switch (ClauseKind) {
142 case OMP_SCHEDULE_Default:
143 case OMP_SCHEDULE_Static:
144 return HasChunks ? OMPScheduleType::BaseStaticChunked
145 : OMPScheduleType::BaseStatic;
146 case OMP_SCHEDULE_Dynamic:
147 return OMPScheduleType::BaseDynamicChunked;
148 case OMP_SCHEDULE_Guided:
149 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
150 : OMPScheduleType::BaseGuidedChunked;
151 case OMP_SCHEDULE_Auto:
153 case OMP_SCHEDULE_Runtime:
154 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
155 : OMPScheduleType::BaseRuntime;
163 bool HasOrderedClause) {
164 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
165 OMPScheduleType::None &&
166 "Must not have ordering nor monotonicity flags already set");
169 ? OMPScheduleType::ModifierOrdered
170 : OMPScheduleType::ModifierUnordered;
171 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
174 if (OrderingScheduleType ==
175 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
176 return OMPScheduleType::OrderedGuidedChunked;
177 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
178 OMPScheduleType::ModifierOrdered))
179 return OMPScheduleType::OrderedRuntime;
181 return OrderingScheduleType;
187 bool HasSimdModifier,
bool HasMonotonic,
188 bool HasNonmonotonic,
bool HasOrderedClause) {
189 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
190 OMPScheduleType::None &&
191 "Must not have monotonicity flags already set");
192 assert((!HasMonotonic || !HasNonmonotonic) &&
193 "Monotonic and Nonmonotonic are contradicting each other");
196 return ScheduleType | OMPScheduleType::ModifierMonotonic;
197 }
else if (HasNonmonotonic) {
198 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
208 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
209 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
215 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
223 bool HasSimdModifier,
bool HasMonotonicModifier,
224 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
230 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
231 HasNonmonotonicModifier, HasOrderedClause);
245 auto *Br = cast<BranchInst>(Term);
246 assert(!Br->isConditional() &&
247 "BB's terminator must be an unconditional branch (or degenerate)");
250 Br->setSuccessor(0,
Target);
255 NewBr->setDebugLoc(
DL);
260 assert(New->getFirstInsertionPt() == New->begin() &&
261 "Target BB must not have PHI nodes");
265 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
293 New->replaceSuccessorsPhiUsesWith(Old, New);
302 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
316 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
337 auto FnAttrs = Attrs.getFnAttrs();
338 auto RetAttrs = Attrs.getRetAttrs();
340 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
345 bool Param =
true) ->
void {
346 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
347 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
348 if (HasSignExt || HasZeroExt) {
349 assert(AS.getNumAttributes() == 1 &&
350 "Currently not handling extension attr combined with others.");
352 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
355 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
362#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
363#include "llvm/Frontend/OpenMP/OMPKinds.def"
367#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
369 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
370 addAttrSet(RetAttrs, RetAttrSet, false); \
371 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
372 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
373 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
375#include "llvm/Frontend/OpenMP/OMPKinds.def"
389#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
391 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
393 Fn = M.getFunction(Str); \
395#include "llvm/Frontend/OpenMP/OMPKinds.def"
401#define OMP_RTL(Enum, Str, ...) \
403 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
405#include "llvm/Frontend/OpenMP/OMPKinds.def"
409 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
419 LLVMContext::MD_callback,
421 2, {-1, -1},
true)}));
434 assert(Fn &&
"Failed to create OpenMP runtime function");
443 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
444 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
462 ParallelRegionBlockSet.
clear();
464 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
480 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
482 "Expected OpenMP outlining to be possible!");
484 for (
auto *V : OI.ExcludeArgsFromAggregate)
490 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
492 "OpenMP outlined functions should not return a value!");
504 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
511 "Expected instructions to add in the outlined region entry");
513 End = ArtificialEntry.
rend();
518 if (
I.isTerminator())
521 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
524 OI.EntryBB->moveBefore(&ArtificialEntry);
531 if (OI.PostOutlineCB)
532 OI.PostOutlineCB(*OutlinedFn);
557 unsigned Reserve2Flags) {
559 LocFlags |= OMP_IDENT_FLAG_KMPC;
575 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
576 if (
GV.getInitializer() == Initializer)
581 M, OpenMPIRBuilder::Ident,
596 SrcLocStrSize = LocStr.
size();
605 if (
GV.isConstant() &&
GV.hasInitializer() &&
606 GV.getInitializer() == Initializer)
617 unsigned Line,
unsigned Column,
623 Buffer.
append(FunctionName);
625 Buffer.
append(std::to_string(Line));
627 Buffer.
append(std::to_string(Column));
635 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
646 if (
DIFile *DIF = DIL->getFile())
647 if (std::optional<StringRef> Source = DIF->getSource())
653 DIL->getColumn(), SrcLocStrSize);
665 "omp_global_thread_num");
670 bool ForceSimpleCall,
bool CheckCancelFlag) {
678 bool ForceSimpleCall,
bool CheckCancelFlag) {
685 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
688 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
691 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
694 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
697 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
710 bool UseCancelBarrier =
715 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
716 : OMPRTL___kmpc_barrier),
719 if (UseCancelBarrier && CheckCancelFlag)
728 omp::Directive CanceledDirective) {
740 Value *CancelKind =
nullptr;
741 switch (CanceledDirective) {
742#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
743 case DirectiveEnum: \
744 CancelKind = Builder.getInt32(Value); \
746#include "llvm/Frontend/OpenMP/OMPKinds.def"
757 auto ExitCB = [
this, CanceledDirective, Loc](
InsertPointTy IP) {
758 if (CanceledDirective == OMPD_parallel) {
762 omp::Directive::OMPD_unknown,
false,
772 UI->eraseFromParent();
790 ".omp_offloading.entry_name");
805 M, OpenMPIRBuilder::OffloadEntry,
812 Entry->setAlignment(
Align(1));
823 auto *KernelArgsPtr =
836 NumThreads, HostPtr, KernelArgsPtr};
846 omp::Directive CanceledDirective,
849 "Unexpected cancellation!");
888 omp::ProcBindKind ProcBind,
bool IsCancellable) {
908 if (ProcBind != OMP_PROC_BIND_default) {
953 if (IP.getBlock()->end() == IP.getPoint()) {
959 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
960 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
961 "Unexpected insertion point for finalization call!");
997 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1000 assert(BodyGenCB &&
"Expected body generation callback!");
1002 BodyGenCB(InnerAllocaIP, CodeGenIP);
1004 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1011 if (
auto *
F = dyn_cast<llvm::Function>(RTLFn.
getCallee())) {
1012 if (!
F->hasMetadata(llvm::LLVMContext::MD_callback)) {
1021 llvm::LLVMContext::MD_callback,
1031 OutlinedFn.addParamAttr(0, Attribute::NoAlias);
1032 OutlinedFn.addParamAttr(1, Attribute::NoAlias);
1033 OutlinedFn.addFnAttr(Attribute::NoUnwind);
1034 OutlinedFn.addFnAttr(Attribute::NoRecurse);
1036 assert(OutlinedFn.arg_size() >= 2 &&
1037 "Expected at least tid and bounded tid as arguments");
1038 unsigned NumCapturedVars =
1039 OutlinedFn.arg_size() - 2;
1041 CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
1046 Value *ForkCallArgs[] = {
1051 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1062 if (IfCondition && NumCapturedVars == 0) {
1066 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1084 I->eraseFromParent();
1092 assert(FiniInfo.DK == OMPD_parallel &&
1093 "Unexpected finalization stack state!");
1113 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1130 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1133 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1138 auto PrivHelper = [&](
Value &V) {
1139 if (&V == TIDAddr || &V == ZeroAddr) {
1145 for (
Use &U : V.uses())
1146 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1147 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1157 if (!V.getType()->isPointerTy()) {
1176 Value *ReplacementValue =
nullptr;
1177 CallInst *CI = dyn_cast<CallInst>(&V);
1179 ReplacementValue = PrivTID;
1182 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue));
1183 assert(ReplacementValue &&
1184 "Expected copy/create callback to set replacement value!");
1185 if (ReplacementValue == &V)
1190 UPtr->set(ReplacementValue);
1207 for (
Value *Input : Inputs) {
1212 for (
Value *Output : Outputs)
1216 "OpenMP outlining should not produce live-out values!");
1218 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1220 for (
auto *BB : Blocks)
1221 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1227 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1228 UI->eraseFromParent();
1322 Dependencies](
Function &OutlinedFn) {
1345 assert(OutlinedFn.getNumUses() == 1 &&
1346 "there must be a single user for the outlined function");
1347 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1351 bool HasTaskData = StaleCI->
arg_size() > 0;
1383 assert(ArgStructAlloca &&
1384 "Unable to find the alloca instruction corresponding to arguments "
1385 "for extracted function");
1388 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1389 "arguments for extracted function");
1403 WrapperArgTys.push_back(OutlinedFn.getArg(0)->getType());
1405 (
Twine(OutlinedFn.getName()) +
".wrapper").str(),
1410 {Builder.getInt32Ty(), Builder.getInt8PtrTy()},
false)
1412 Value *WrapperFuncBitcast =
1420 {Ident, ThreadID,
Flags,
1422 WrapperFuncBitcast});
1432 Value *DepArrayPtr =
nullptr;
1433 if (Dependencies.
size()) {
1449 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1456 static_cast<unsigned int>(RTLDependInfoFields::Len));
1463 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1466 static_cast<unsigned int>(Dep.DepKind)),
1496 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
1514 if (Dependencies.
size()) {
1547 BodyGenCB(TaskAllocaIP, TaskBodyIP);
1592 if (IP.getBlock()->end() != IP.getPoint())
1603 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
1604 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1605 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1636 unsigned CaseNumber = 0;
1637 for (
auto SectionCB : SectionCBs) {
1639 M.
getContext(),
"omp_section_loop.body.case", CurFn, Continue);
1657 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
1659 applyStaticWorkshareLoop(Loc.
DL,
LoopInfo, AllocaIP, !IsNowait);
1663 assert(FiniInfo.DK == OMPD_sections &&
1664 "Unexpected finalization stack state!");
1670 AfterIP = {FiniBB, FiniBB->
begin()};
1684 if (IP.getBlock()->end() != IP.getPoint())
1703 Directive OMPD = Directive::OMPD_sections;
1706 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
1719 M.getDataLayout().getDefaultGlobalsAddressSpace(),
1720 ".omp.reduction.func", &M);
1728 assert(RI.Variable &&
"expected non-null variable");
1729 assert(RI.PrivateVariable &&
"expected non-null private variable");
1730 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
1731 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1732 "expected variables and their private equivalents to have the same "
1734 assert(RI.Variable->getType()->isPointerTy() &&
1735 "expected variables to be pointers");
1748 unsigned NumReductions = ReductionInfos.
size();
1755 for (
auto En :
enumerate(ReductionInfos)) {
1756 unsigned Index = En.index();
1762 "private.red.var." +
Twine(
Index) +
".casted");
1770 Value *RedArrayPtr =
1774 bool CanGenerateAtomic =
1780 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1785 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
1788 Value *Lock = getOMPCriticalRegionLock(
".reduction");
1790 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1791 : RuntimeFunction::OMPRTL___kmpc_reduce);
1794 {Ident, ThreadId, NumVariables, RedArraySize,
1795 RedArrayPtr, ReductionFunc, Lock},
1814 for (
auto En :
enumerate(ReductionInfos)) {
1818 "red.value." +
Twine(En.index()));
1819 Value *PrivateRedValue =
1821 "red.private.value." +
Twine(En.index()));
1830 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1831 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1839 if (CanGenerateAtomic) {
1861 for (
auto En :
enumerate(ReductionInfos)) {
1864 RedArrayTy, LHSArrayPtr, 0, En.index());
1869 RedArrayTy, RHSArrayPtr, 0, En.index());
1894 Directive OMPD = Directive::OMPD_master;
1899 Value *Args[] = {Ident, ThreadId};
1907 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1918 Directive OMPD = Directive::OMPD_masked;
1924 Value *ArgsEnd[] = {Ident, ThreadId};
1932 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1980 "omp_" +
Name +
".next",
true);
1991 CL->Header = Header;
2010 NextBB, NextBB,
Name);
2034 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
2044 auto *IndVarTy = cast<IntegerType>(Start->getType());
2045 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
2046 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
2080 Value *CountIfLooping;
2081 if (InclusiveStop) {
2092 "omp_" +
Name +
".tripcount");
2113 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
2116 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
2122 InsertPointTy AllocaIP,
2123 bool NeedsBarrier) {
2124 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
2126 "Require dedicated allocate IP");
2138 Type *IVTy =
IV->getType();
2166 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
2171 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
2172 PUpperBound, PStride, One,
Zero});
2177 CLI->setTripCount(TripCount);
2198 omp::Directive::OMPD_for,
false,
2209 bool NeedsBarrier,
Value *ChunkSize) {
2210 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
2211 assert(ChunkSize &&
"Chunk size is required");
2216 Type *IVTy =
IV->getType();
2218 "Max supported tripcount bitwidth is 64 bits");
2220 :
Type::getInt64Ty(Ctx);
2235 Value *PLowerBound =
2237 Value *PUpperBound =
2246 Value *CastedChunkSize =
2248 Value *CastedTripCount =
2252 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
2266 SchedulingType, PLastIter,
2267 PLowerBound, PUpperBound,
2272 Value *FirstChunkStart =
2274 Value *FirstChunkStop =
2279 Value *NextChunkStride =
2284 Value *DispatchCounter;
2288 FirstChunkStart, CastedTripCount, NextChunkStride,
2312 Value *IsLastChunk =
2314 Value *CountUntilOrigTripCount =
2317 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
2318 Value *BackcastedChunkTC =
2320 CLI->setTripCount(BackcastedChunkTC);
2325 Value *BackcastedDispatchCounter =
2352 bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
2353 llvm::Value *ChunkSize,
bool HasSimdModifier,
bool HasMonotonicModifier,
2354 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
2356 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
2357 HasNonmonotonicModifier, HasOrderedClause);
2359 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
2360 OMPScheduleType::ModifierOrdered;
2361 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
2362 case OMPScheduleType::BaseStatic:
2363 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
2365 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
2366 NeedsBarrier, ChunkSize);
2368 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
2370 case OMPScheduleType::BaseStaticChunked:
2372 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
2373 NeedsBarrier, ChunkSize);
2375 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
2378 case OMPScheduleType::BaseRuntime:
2379 case OMPScheduleType::BaseAuto:
2380 case OMPScheduleType::BaseGreedy:
2381 case OMPScheduleType::BaseBalanced:
2382 case OMPScheduleType::BaseSteal:
2383 case OMPScheduleType::BaseGuidedSimd:
2384 case OMPScheduleType::BaseRuntimeSimd:
2386 "schedule type does not support user-defined chunk sizes");
2388 case OMPScheduleType::BaseDynamicChunked:
2389 case OMPScheduleType::BaseGuidedChunked:
2390 case OMPScheduleType::BaseGuidedIterativeChunked:
2391 case OMPScheduleType::BaseGuidedAnalyticalChunked:
2392 case OMPScheduleType::BaseStaticBalancedChunked:
2393 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
2394 NeedsBarrier, ChunkSize);
2410 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
2413 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
2426 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
2429 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
2441 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
2444 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
2451 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
2453 "Require dedicated allocate IP");
2455 "Require valid schedule type");
2457 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
2458 OMPScheduleType::ModifierOrdered;
2469 Type *IVTy =
IV->getType();
2512 {SrcLoc, ThreadNum, SchedulingType, One,
2513 UpperBound, One, Chunk});
2523 PLowerBound, PUpperBound, PStride});
2533 auto *PI = cast<PHINode>(Phi);
2534 PI->setIncomingBlock(0, OuterCond);
2535 PI->setIncomingValue(0, LowerBound);
2539 auto *Br = cast<BranchInst>(Term);
2540 Br->setSuccessor(0, OuterCond);
2548 auto *CI = cast<CmpInst>(Comp);
2549 CI->setOperand(1, UpperBound);
2552 auto *BI = cast<BranchInst>(Branch);
2553 assert(BI->getSuccessor(1) == Exit);
2554 BI->setSuccessor(1, OuterCond);
2567 omp::Directive::OMPD_for,
false,
2587 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
2588 for (
Use &U : BB->uses()) {
2589 auto *UseInst = dyn_cast<Instruction>(U.getUser());
2592 if (BBsToErase.count(UseInst->getParent()))
2600 bool Changed =
false;
2602 if (HasRemainingUses(BB)) {
2603 BBsToErase.erase(BB);
2618 assert(
Loops.size() >= 1 &&
"At least one loop required");
2619 size_t NumLoops =
Loops.size();
2623 return Loops.front();
2635 Loop->collectControlBlocks(OldControlBBs);
2639 if (ComputeIP.
isSet())
2646 Value *CollapsedTripCount =
nullptr;
2649 "All loops to collapse must be valid canonical loops");
2650 Value *OrigTripCount = L->getTripCount();
2651 if (!CollapsedTripCount) {
2652 CollapsedTripCount = OrigTripCount;
2664 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
2672 Value *Leftover = Result->getIndVar();
2674 NewIndVars.
resize(NumLoops);
2675 for (
int i = NumLoops - 1; i >= 1; --i) {
2676 Value *OrigTripCount =
Loops[i]->getTripCount();
2679 NewIndVars[i] = NewIndVar;
2684 NewIndVars[0] = Leftover;
2693 BasicBlock *ContinueBlock = Result->getBody();
2695 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
2702 ContinueBlock =
nullptr;
2703 ContinuePred = NextSrc;
2710 for (
size_t i = 0; i < NumLoops - 1; ++i)
2711 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
2717 for (
size_t i = NumLoops - 1; i > 0; --i)
2718 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
2721 ContinueWith(Result->getLatch(),
nullptr);
2728 for (
size_t i = 0; i < NumLoops; ++i)
2729 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
2743std::vector<CanonicalLoopInfo *>
2747 "Must pass as many tile sizes as there are loops");
2748 int NumLoops =
Loops.size();
2749 assert(NumLoops >= 1 &&
"At least one loop to tile required");
2761 Loop->collectControlBlocks(OldControlBBs);
2769 assert(L->isValid() &&
"All input loops must be valid canonical loops");
2770 OrigTripCounts.
push_back(L->getTripCount());
2781 for (
int i = 0; i < NumLoops - 1; ++i) {
2794 for (
int i = 0; i < NumLoops; ++i) {
2796 Value *OrigTripCount = OrigTripCounts[i];
2809 Value *FloorTripOverflow =
2815 "omp_floor" +
Twine(i) +
".tripcount",
true);
2823 std::vector<CanonicalLoopInfo *> Result;
2824 Result.reserve(NumLoops * 2);
2837 auto EmbeddNewLoop =
2838 [
this,
DL,
F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
2841 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
2846 Enter = EmbeddedLoop->
getBody();
2847 Continue = EmbeddedLoop->
getLatch();
2848 OutroInsertBefore = EmbeddedLoop->
getLatch();
2849 return EmbeddedLoop;
2853 const Twine &NameBase) {
2856 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
2857 Result.push_back(EmbeddedLoop);
2861 EmbeddNewLoops(FloorCount,
"floor");
2867 for (
int i = 0; i < NumLoops; ++i) {
2871 Value *FloorIsEpilogue =
2873 Value *TileTripCount =
2880 EmbeddNewLoops(TileCounts,
"tile");
2885 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
2894 BodyEnter =
nullptr;
2895 BodyEntered = ExitBB;
2908 for (
int i = 0; i < NumLoops; ++i) {
2911 Value *OrigIndVar = OrigIndVars[i];
2939 if (Properties.
empty())
2962 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
2966 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
2974 if (
I.mayReadOrWriteMemory()) {
2978 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
3000 const Twine &NamePrefix) {
3006 SplitBefore = dyn_cast<Instruction>(IfCond);
3052 VMap[
Block] = NewBB;
3062 if (TargetTriple.
isX86()) {
3063 if (Features.
lookup(
"avx512f"))
3065 else if (Features.
lookup(
"avx"))
3069 if (TargetTriple.
isPPC())
3071 if (TargetTriple.
isWasm())
3078 Value *IfCond, OrderKind Order,
3097 if (AlignedVars.
size()) {
3100 for (
auto &AlignedItem : AlignedVars) {
3101 Value *AlignedPtr = AlignedItem.first;
3102 Value *Alignment = AlignedItem.second;
3104 AlignedPtr, Alignment);
3111 createIfVersion(CanonicalLoop, IfCond, VMap,
"simd");
3115 "Cannot find value which corresponds to original loop latch");
3116 assert(isa<BasicBlock>(MappedLatch) &&
3117 "Cannot cast mapped latch block value to BasicBlock");
3118 BasicBlock *NewLatchBlock = dyn_cast<BasicBlock>(MappedLatch);
3147 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent)) {
3155 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
3163 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
3165 if (Simdlen || Safelen) {
3169 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
3195static std::unique_ptr<TargetMachine>
3199 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
3200 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
3201 const std::string &
Triple = M->getTargetTriple();
3211 std::nullopt, OptLevel));
3235 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
3250 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
3255 nullptr, ORE, OptLevel,
3276 <<
" Threshold=" << UP.
Threshold <<
"\n"
3279 <<
" PartialOptSizeThreshold="
3298 if (
auto *Load = dyn_cast<LoadInst>(&
I)) {
3299 Ptr = Load->getPointerOperand();
3300 }
else if (
auto *Store = dyn_cast<StoreInst>(&
I)) {
3301 Ptr = Store->getPointerOperand();
3305 Ptr =
Ptr->stripPointerCasts();
3307 if (
auto *Alloca = dyn_cast<AllocaInst>(
Ptr)) {
3308 if (Alloca->getParent() == &
F->getEntryBlock())
3314 unsigned NumInlineCandidates;
3315 bool NotDuplicatable;
3320 LLVM_DEBUG(
dbgs() <<
"Estimated loop size is " << LoopSizeIC <<
"\n");
3323 if (NotDuplicatable || Convergent || !LoopSizeIC.
isValid()) {
3327 unsigned LoopSize = *LoopSizeIC.
getValue();
3332 int MaxTripCount = 0;
3333 bool MaxOrZero =
false;
3334 unsigned TripMultiple = 0;
3336 bool UseUpperBound =
false;
3338 MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
3340 unsigned Factor = UP.
Count;
3341 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
3352 assert(Factor >= 0 &&
"Unroll factor must not be negative");
3368 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
3381 *UnrolledCLI =
Loop;
3386 "unrolling only makes sense with a factor of 2 or larger");
3388 Type *IndVarTy =
Loop->getIndVarType();
3395 std::vector<CanonicalLoopInfo *>
LoopNest =
3410 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
3413 (*UnrolledCLI)->assertOK();
3431 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
3451 Directive OMPD = Directive::OMPD_single;
3456 Value *Args[] = {Ident, ThreadId};
3471 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3476 omp::Directive::OMPD_unknown,
false,
3488 Directive OMPD = Directive::OMPD_critical;
3493 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
3494 Value *Args[] = {Ident, ThreadId, LockVar};
3511 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3519 const Twine &
Name,
bool IsDependSource) {
3522 [](
Value *SV) {
return SV->
getType()->isIntegerTy(64); }) &&
3523 "OpenMP runtime requires depend vec with i64 type");
3536 for (
unsigned I = 0;
I < NumLoops; ++
I) {
3550 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
3568 Directive OMPD = Directive::OMPD_ordered;
3577 Value *Args[] = {Ident, ThreadId};
3587 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3593 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
3594 bool HasFinalize,
bool IsCancellable) {
3603 if (!isa_and_nonnull<BranchInst>(SplitPos))
3610 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
3620 "Unexpected control flow graph state!!");
3621 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
3623 "Unexpected Control Flow State!");
3629 "Unexpected Insertion point location!");
3632 auto InsertBB = merged ? ExitPredBB : ExitBB;
3633 if (!isa_and_nonnull<BranchInst>(SplitPos))
3643 if (!Conditional || !EntryCall)
3663 UI->eraseFromParent();
3671 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
3679 "Unexpected finalization stack state!");
3682 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
3732 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
3734 "copyin.not.master.end");
3789 Value *DependenceAddress,
bool HaveNowaitClause) {
3797 if (Device ==
nullptr)
3800 if (NumDependences ==
nullptr) {
3807 Ident, ThreadId, InteropVar, InteropTypeVal,
3808 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
3817 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
3825 if (Device ==
nullptr)
3827 if (NumDependences ==
nullptr) {
3834 Ident, ThreadId, InteropVar, Device,
3835 NumDependences, DependenceAddress, HaveNowaitClauseVal};
3844 Value *NumDependences,
3845 Value *DependenceAddress,
3846 bool HaveNowaitClause) {
3853 if (Device ==
nullptr)
3855 if (NumDependences ==
nullptr) {
3862 Ident, ThreadId, InteropVar, Device,
3863 NumDependences, DependenceAddress, HaveNowaitClauseVal};
3882 llvm::Value *Args[] = {Ident, ThreadId, Pointer,
Size, ThreadPrivateCache};
3905 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
3908 Fn, {Ident, IsSPMDVal, UseGenericStateMachine});
3934 UI->eraseFromParent();
3954 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
3959void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
3960 Function *OutlinedFn, int32_t NumTeams, int32_t NumThreads) {
3971 OutlinedFn->
addFnAttr(
"omp_target_num_teams", std::to_string(NumTeams));
3973 OutlinedFn->
addFnAttr(
"omp_target_thread_limit",
3974 std::to_string(NumThreads));
3980 assert(OutlinedFn &&
"The outlined function must exist if embedded");
3989Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
3995 "Named kernel already exists?");
4004 int32_t NumThreads,
bool IsOffloadEntry,
Function *&OutlinedFn,
4011 ? GenerateFunctionCallback(EntryFnName)
4017 if (!IsOffloadEntry)
4020 std::string EntryFnIDName =
4022 ? std::string(EntryFnName)
4026 InfoManager, EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams,
4033 int32_t NumTeams, int32_t NumThreads) {
4035 setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads);
4036 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
4037 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
4039 EntryInfo, EntryAddr, OutlinedFnID,
4041 return OutlinedFnID;
4048 bool IsBegin, int64_t DeviceID,
Value *IfCond,
4061 ThenTI->getParent()->setName(
"omp_if.then");
4104 DeviceID, MapTypeFlags.
size());
4110 UI->getParent()->setName(
"omp_if.end");
4111 UI->eraseFromParent();
4125 return OS.str().str();
4139 assert(cast<PointerType>(Elem.second->getType())
4140 ->isOpaqueOrPointeeTypeMatches(Ty) &&
4141 "OMP internal variable has different type than requested");
4153 return cast<GlobalVariable>(&*Elem.second);
4156Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
4157 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
4158 std::string
Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
4168 return SizePtrToInt;
4173 std::string VarName) {
4181 return MaptypesArrayGlobal;
4186 unsigned NumOperands,
4195 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
4199 ArrI64Ty,
nullptr,
".offload_sizes");
4210 int64_t DeviceID,
unsigned NumOperands) {
4216 Value *ArgsBaseGEP =
4218 {Builder.getInt32(0), Builder.getInt32(0)});
4221 {Builder.getInt32(0), Builder.getInt32(0)});
4222 Value *ArgSizesGEP =
4224 {Builder.getInt32(0), Builder.getInt32(0)});
4229 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
4237 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
4238 "expected region end call to runtime only when end call is separate");
4240 auto VoidPtrPtrTy = VoidPtrTy->getPointerTo(0);
4244 if (!
Info.NumberOfPtrs) {
4256 Info.RTArgs.BasePointersArray,
4267 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
4268 :
Info.RTArgs.MapTypesArray,
4283 if (!
Info.HasMapper)
4290bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
4294 "Unexpected Atomic Ordering.");
4358 Type *XTy =
X.Var->getType();
4360 Type *XElemTy =
X.ElemTy;
4363 "OMP atomic read expected a scalar type");
4365 Value *XRead =
nullptr;
4371 XRead = cast<Value>(XLD);
4374 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
4378 X.Var, IntCastTy->
getPointerTo(Addrspace),
"atomic.src.int.cast");
4388 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
4400 Type *XTy =
X.Var->getType();
4402 Type *XElemTy =
X.ElemTy;