43 #define DEBUG_TYPE "openmp-ir-builder"
50 cl::desc(
"Use optimistic attributes describing "
51 "'as-if' properties of runtime calls."),
55 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
56 cl::desc(
"Factor for the unroll threshold to account for code "
57 "simplifications still taking place"),
68 if (!IP1.isSet() || !IP2.isSet())
70 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
136 bool HasSimdModifier) {
138 switch (ClauseKind) {
139 case OMP_SCHEDULE_Default:
140 case OMP_SCHEDULE_Static:
143 case OMP_SCHEDULE_Dynamic:
145 case OMP_SCHEDULE_Guided:
148 case OMP_SCHEDULE_Auto:
150 case OMP_SCHEDULE_Runtime:
160 bool HasOrderedClause) {
163 "Must not have ordering nor monotonicity flags already set");
168 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
171 if (OrderingScheduleType ==
178 return OrderingScheduleType;
184 bool HasSimdModifier,
bool HasMonotonic,
185 bool HasNonmonotonic,
bool HasOrderedClause) {
188 "Must not have monotonicity flags already set");
189 assert((!HasMonotonic || !HasNonmonotonic) &&
190 "Monotonic and Nonmonotonic are contradicting each other");
194 }
else if (HasNonmonotonic) {
220 bool HasSimdModifier,
bool HasMonotonicModifier,
221 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
227 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
228 HasNonmonotonicModifier, HasOrderedClause);
242 auto *Br = cast<BranchInst>(
Term);
243 assert(!Br->isConditional() &&
244 "BB's terminator must be an unconditional branch (or degenerate)");
247 Br->setSuccessor(0,
Target);
252 NewBr->setDebugLoc(
DL);
257 assert(New->getFirstInsertionPt() == New->begin() &&
258 "Target BB must not have PHI nodes");
262 New->getInstList().splice(New->begin(), Old->
getInstList(),
IP.getPoint(),
291 New->replaceSuccessorsPhiUsesWith(Old, New);
300 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
314 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
334 auto FnAttrs =
Attrs.getFnAttrs();
335 auto RetAttrs =
Attrs.getRetAttrs();
337 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
340 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
341 #include "llvm/Frontend/OpenMP/OMPKinds.def"
345 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
347 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
348 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
349 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
351 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
352 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
354 #include "llvm/Frontend/OpenMP/OMPKinds.def"
368 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
370 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
372 Fn = M.getFunction(Str); \
374 #include "llvm/Frontend/OpenMP/OMPKinds.def"
380 #define OMP_RTL(Enum, Str, ...) \
382 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
384 #include "llvm/Frontend/OpenMP/OMPKinds.def"
388 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
398 LLVMContext::MD_callback,
400 2, {-1, -1},
true)}));
406 addAttributes(FnID, *Fn);
413 assert(Fn &&
"Failed to create OpenMP runtime function");
422 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
423 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
437 DeferredOutlines.push_back(OI);
441 ParallelRegionBlockSet.
clear();
443 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
459 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
461 "Expected OpenMP outlining to be possible!");
463 for (
auto *V : OI.ExcludeArgsFromAggregate)
469 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
471 "OpenMP outlined functions should not return a value!");
476 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
483 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
490 "Expected instructions to add in the outlined region entry");
492 End = ArtificialEntry.
rend();
497 if (
I.isTerminator())
500 I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
503 OI.EntryBB->moveBefore(&ArtificialEntry);
510 if (OI.PostOutlineCB)
511 OI.PostOutlineCB(*OutlinedFn);
515 OutlineInfos =
std::move(DeferredOutlines);
519 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
536 unsigned Reserve2Flags) {
538 LocFlags |= OMP_IDENT_FLAG_KMPC;
541 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
554 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
555 if (
GV.getInitializer() == Initializer)
560 M, OpenMPIRBuilder::Ident,
563 M.getDataLayout().getDefaultGlobalsAddressSpace());
575 SrcLocStrSize = LocStr.
size();
576 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
584 if (
GV.isConstant() &&
GV.hasInitializer() &&
585 GV.getInitializer() == Initializer)
588 SrcLocStr =
Builder.CreateGlobalStringPtr(LocStr,
"",
596 unsigned Line,
unsigned Column,
599 Buffer.push_back(
';');
601 Buffer.push_back(
';');
602 Buffer.
append(FunctionName);
603 Buffer.push_back(
';');
605 Buffer.push_back(
';');
607 Buffer.push_back(
';');
608 Buffer.push_back(
';');
609 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
614 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
615 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
623 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
625 if (
DIFile *DIF = DIL->getFile())
631 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
632 DIL->getColumn(), SrcLocStrSize);
637 return getOrCreateSrcLocStr(Loc.
DL, SrcLocStrSize,
638 Loc.
IP.getBlock()->getParent());
643 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
644 "omp_global_thread_num");
649 bool ForceSimpleCall,
bool CheckCancelFlag) {
650 if (!updateToLocation(Loc))
652 return emitBarrierImpl(Loc, DK, ForceSimpleCall, CheckCancelFlag);
657 bool ForceSimpleCall,
bool CheckCancelFlag) {
664 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
667 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
670 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
673 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
676 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
681 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
683 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
684 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
689 bool UseCancelBarrier =
690 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
693 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(
694 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
695 : OMPRTL___kmpc_barrier),
698 if (UseCancelBarrier && CheckCancelFlag)
699 emitCancelationCheckImpl(Result, OMPD_parallel);
707 omp::Directive CanceledDirective) {
708 if (!updateToLocation(Loc))
712 auto *UI =
Builder.CreateUnreachable();
717 Builder.SetInsertPoint(ThenTI);
719 Value *CancelKind =
nullptr;
720 switch (CanceledDirective) {
721 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
722 case DirectiveEnum: \
723 CancelKind = Builder.getInt32(Value); \
725 #include "llvm/Frontend/OpenMP/OMPKinds.def"
731 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
732 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
733 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
735 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel),
Args);
737 if (CanceledDirective == OMPD_parallel) {
741 omp::Directive::OMPD_unknown,
false,
747 emitCancelationCheckImpl(Result, CanceledDirective, ExitCB);
750 Builder.SetInsertPoint(UI->getParent());
751 UI->eraseFromParent();
761 Type *SizeTy =
M.getDataLayout().getIntPtrType(
M.getContext());
769 ".omp_offloading.entry_name");
784 M, OpenMPIRBuilder::OffloadEntry,
787 M.getDataLayout().getDefaultGlobalsAddressSpace());
791 Entry->setAlignment(
Align(1));
795 omp::Directive CanceledDirective,
797 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
798 "Unexpected cancellation!");
803 if (
Builder.GetInsertPoint() ==
BB->end()) {
807 BB->getContext(),
BB->getName() +
".cont",
BB->getParent());
810 BB->getTerminator()->eraseFromParent();
814 BB->getContext(),
BB->getName() +
".cncl",
BB->getParent());
818 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
823 Builder.SetInsertPoint(CancellationBlock);
826 auto &FI = FinalizationStack.back();
830 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
837 omp::ProcBindKind ProcBind,
bool IsCancellable) {
840 if (!updateToLocation(Loc))
844 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
845 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
846 Value *ThreadID = getOrCreateThreadID(Ident);
852 Builder.CreateIntCast(NumThreads, Int32,
false)};
854 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads),
Args);
857 if (ProcBind != OMP_PROC_BIND_default) {
863 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind),
Args);
871 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
879 Builder.restoreIP(OuterAllocaIP);
891 ToBeDeleted.push_back(TIDAddr);
892 ToBeDeleted.push_back(ZeroAddr);
915 if (
IP.getBlock()->end() ==
IP.getPoint()) {
921 assert(
IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
922 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
923 "Unexpected insertion point for finalization call!");
927 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
935 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
939 ToBeDeleted.push_back(
Builder.CreateLoad(Int32, TIDAddr,
"tid.addr.use"));
941 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
942 ToBeDeleted.push_back(ZeroAddrUse);
959 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
962 assert(BodyGenCB &&
"Expected body generation callback!");
964 BodyGenCB(InnerAllocaIP, CodeGenIP);
968 FunctionCallee RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
969 if (
auto *
F = dyn_cast<llvm::Function>(RTLFn.
getCallee())) {
970 if (!
F->hasMetadata(llvm::LLVMContext::MD_callback)) {
979 llvm::LLVMContext::MD_callback,
989 OutlinedFn.addParamAttr(0, Attribute::NoAlias);
990 OutlinedFn.addParamAttr(1, Attribute::NoAlias);
991 OutlinedFn.addFnAttr(Attribute::NoUnwind);
992 OutlinedFn.addFnAttr(Attribute::NoRecurse);
994 assert(OutlinedFn.arg_size() >= 2 &&
995 "Expected at least tid and bounded tid as arguments");
996 unsigned NumCapturedVars =
997 OutlinedFn.arg_size() - 2;
999 CallInst *CI = cast<CallInst>(OutlinedFn.user_back());
1004 Value *ForkCallArgs[] = {
1005 Ident,
Builder.getInt32(NumCapturedVars),
1006 Builder.CreateBitCast(&OutlinedFn, ParallelTaskPtr)};
1012 Builder.CreateCall(RTLFn, RealArgs);
1015 << *
Builder.GetInsertBlock()->getParent() <<
"\n");
1020 Builder.SetInsertPoint(PrivTID);
1022 Builder.CreateStore(
Builder.CreateLoad(Int32, OutlinedAI), PrivTIDAddr);
1032 Builder.SetInsertPoint(ElseTI);
1035 Value *SerializedParallelCallArgs[] = {Ident, ThreadID};
1037 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel),
1038 SerializedParallelCallArgs);
1045 Value *EndArgs[] = {Ident, ThreadID};
1047 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel),
1051 << *
Builder.GetInsertBlock()->getParent() <<
"\n");
1055 I->eraseFromParent();
1061 auto FiniInfo = FinalizationStack.pop_back_val();
1063 assert(FiniInfo.DK == OMPD_parallel &&
1064 "Unexpected finalization stack state!");
1084 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1085 Blocks.push_back(PRegOutlinedExitBB);
1101 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1104 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1107 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1109 auto PrivHelper = [&](
Value &V) {
1110 if (&V == TIDAddr || &V == ZeroAddr) {
1116 for (
Use &U : V.uses())
1117 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1118 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1128 if (!V.getType()->isPointerTy()) {
1132 Builder.restoreIP(OuterAllocaIP);
1134 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1138 Builder.SetInsertPoint(InsertBB,
1143 Builder.restoreIP(InnerAllocaIP);
1144 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1147 Value *ReplacementValue =
nullptr;
1148 CallInst *CI = dyn_cast<CallInst>(&V);
1150 ReplacementValue = PrivTID;
1153 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue));
1154 assert(ReplacementValue &&
1155 "Expected copy/create callback to set replacement value!");
1156 if (ReplacementValue == &V)
1161 UPtr->set(ReplacementValue);
1178 for (
Value *Input : Inputs) {
1183 for (
Value *Output : Outputs)
1187 "OpenMP outlining should not produce live-out values!");
1189 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1191 for (
auto *
BB : Blocks)
1192 dbgs() <<
" PBR: " <<
BB->getName() <<
"\n";
1198 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1199 UI->eraseFromParent();
1207 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1208 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1210 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush),
Args);
1214 if (!updateToLocation(Loc))
1223 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1224 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1225 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1228 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait),
1233 if (!updateToLocation(Loc))
1235 emitTaskwaitImpl(Loc);
1241 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1242 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1244 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1246 Builder.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield),
1251 if (!updateToLocation(Loc))
1253 emitTaskyieldImpl(Loc);
1262 if (!updateToLocation(Loc))
1266 if (
IP.getBlock()->end() !=
IP.getPoint())
1277 auto *CaseBB =
IP.getBlock()->getSinglePredecessor();
1278 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1279 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1285 FinalizationStack.push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
1310 unsigned CaseNumber = 0;
1311 for (
auto SectionCB : SectionCBs) {
1313 M.getContext(),
"omp_section_loop.body.case", CurFn, Continue);
1315 Builder.SetInsertPoint(CaseBB);
1331 Loc, LoopBodyGenCB, LB, UB,
ST,
true,
false, AllocaIP,
"section_loop");
1333 applyStaticWorkshareLoop(Loc.
DL,
LoopInfo, AllocaIP, !IsNowait);
1336 auto FiniInfo = FinalizationStack.pop_back_val();
1337 assert(FiniInfo.DK == OMPD_sections &&
1338 "Unexpected finalization stack state!");
1344 AfterIP = {FiniBB, FiniBB->
begin()};
1354 if (!updateToLocation(Loc))
1358 if (
IP.getBlock()->end() !=
IP.getPoint())
1369 auto *CaseBB = Loc.
IP.getBlock();
1370 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
1371 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
1377 Directive OMPD = Directive::OMPD_sections;
1380 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
1393 M.getDataLayout().getDefaultGlobalsAddressSpace(),
1394 ".omp.reduction.func", &
M);
1402 assert(RI.Variable &&
"expected non-null variable");
1403 assert(RI.PrivateVariable &&
"expected non-null private variable");
1404 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
1405 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
1406 "expected variables and their private equivalents to have the same "
1408 assert(RI.Variable->getType()->isPointerTy() &&
1409 "expected variables to be pointers");
1412 if (!updateToLocation(Loc))
1422 unsigned NumReductions = ReductionInfos.size();
1425 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
1427 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
1429 for (
auto En :
enumerate(ReductionInfos)) {
1430 unsigned Index = En.index();
1432 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
1433 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
1436 "private.red.var." +
Twine(Index) +
".casted");
1437 Builder.CreateStore(Casted, RedArrayElemPtr);
1444 Value *RedArrayPtr =
1445 Builder.CreateBitCast(RedArray,
Builder.getInt8PtrTy(),
"red.array.ptr");
1447 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1448 bool CanGenerateAtomic =
1452 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
1454 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1456 Value *ThreadId = getOrCreateThreadID(Ident);
1459 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
1462 Value *
Lock = getOMPCriticalRegionLock(
".reduction");
1463 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
1464 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1465 : RuntimeFunction::OMPRTL___kmpc_reduce);
1467 Builder.CreateCall(ReduceFunc,
1468 {Ident, ThreadId, NumVariables, RedArraySize,
1469 RedArrayPtr, ReductionFunc,
Lock},
1480 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
1481 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
1482 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
1487 Builder.SetInsertPoint(NonAtomicRedBlock);
1488 for (
auto En :
enumerate(ReductionInfos)) {
1492 "red.value." +
Twine(En.index()));
1493 Value *PrivateRedValue =
1495 "red.private.value." +
Twine(En.index()));
1499 if (!
Builder.GetInsertBlock())
1503 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
1504 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1505 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
1506 Builder.CreateCall(EndReduceFunc, {Ident, ThreadId,
Lock});
1507 Builder.CreateBr(ContinuationBlock);
1512 Builder.SetInsertPoint(AtomicRedBlock);
1513 if (CanGenerateAtomic) {
1517 if (!
Builder.GetInsertBlock())
1520 Builder.CreateBr(ContinuationBlock);
1530 Builder.SetInsertPoint(ReductionFuncBlock);
1535 for (
auto En :
enumerate(ReductionInfos)) {
1537 Value *LHSI8PtrPtr =
Builder.CreateConstInBoundsGEP2_64(
1538 RedArrayTy, LHSArrayPtr, 0, En.index());
1542 Value *RHSI8PtrPtr =
Builder.CreateConstInBoundsGEP2_64(
1543 RedArrayTy, RHSArrayPtr, 0, En.index());
1550 if (!
Builder.GetInsertBlock())
1552 Builder.CreateStore(Reduced, LHSPtr);
1556 Builder.SetInsertPoint(ContinuationBlock);
1565 if (!updateToLocation(Loc))
1568 Directive OMPD = Directive::OMPD_master;
1570 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1571 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1572 Value *ThreadId = getOrCreateThreadID(Ident);
1575 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
1578 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
1581 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1589 if (!updateToLocation(Loc))
1592 Directive OMPD = Directive::OMPD_masked;
1594 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
1595 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1596 Value *ThreadId = getOrCreateThreadID(Ident);
1598 Value *ArgsEnd[] = {Ident, ThreadId};
1600 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
1603 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
1606 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
1636 Builder.SetInsertPoint(Preheader);
1639 Builder.SetInsertPoint(Header);
1646 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" +
Name +
".cmp");
1647 Builder.CreateCondBr(Cmp, Body, Exit);
1652 Builder.SetInsertPoint(Latch);
1654 "omp_" +
Name +
".next",
true);
1662 LoopInfos.emplace_front();
1665 CL->Header = Header;
1684 NextBB, NextBB,
Name);
1688 if (updateToLocation(Loc)) {
1708 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
1718 auto *IndVarTy = cast<IntegerType>(Start->getType());
1719 assert(IndVarTy == Stop->getType() &&
"Stop type mismatch");
1720 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
1724 updateToLocation(ComputeLoc);
1742 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
1745 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
1749 Span =
Builder.CreateSub(Stop, Start,
"",
true);
1754 Value *CountIfLooping;
1755 if (InclusiveStop) {
1756 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
1763 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
1765 Value *TripCount =
Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
1766 "omp_" +
Name +
".tripcount");
1772 BodyGenCB(
Builder.saveIP(), IndVar);
1775 return createCanonicalLoop(LoopLoc, BodyGen, TripCount,
Name);
1787 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
1790 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
1796 InsertPointTy AllocaIP,
1797 bool NeedsBarrier) {
1798 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
1800 "Require dedicated allocate IP");
1807 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
1808 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1812 Type *IVTy =
IV->getType();
1815 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1820 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
1821 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
1822 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
1823 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
1832 Builder.CreateStore(Zero, PLowerBound);
1834 Builder.CreateStore(UpperBound, PUpperBound);
1835 Builder.CreateStore(One, PStride);
1837 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1844 Builder.CreateCall(StaticInit,
1845 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
1846 PUpperBound, PStride, One, Zero});
1847 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
1848 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
1849 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
1850 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
1851 CLI->setTripCount(TripCount);
1861 return Builder.CreateAdd(OldIV, LowerBound);
1867 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
1871 createBarrier(LocationDescription(
Builder.saveIP(),
DL),
1872 omp::Directive::OMPD_for,
false,
1883 bool NeedsBarrier,
Value *ChunkSize) {
1884 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
1885 assert(ChunkSize &&
"Chunk size is required");
1890 Type *IVTy =
IV->getType();
1892 "Max supported tripcount bitwidth is 64 bits");
1894 :
Type::getInt64Ty(Ctx);
1903 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
1908 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
1909 Value *PLowerBound =
1910 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
1911 Value *PUpperBound =
1912 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
1913 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
1920 Value *CastedChunkSize =
1921 Builder.CreateZExtOrTrunc(ChunkSize, InternalIVTy,
"chunksize");
1922 Value *CastedTripCount =
1923 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
1927 Builder.CreateStore(Zero, PLowerBound);
1928 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
1929 Builder.CreateStore(OrigUpperBound, PUpperBound);
1930 Builder.CreateStore(One, PStride);
1935 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
1936 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1937 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
1938 Builder.CreateCall(StaticInit,
1940 SchedulingType, PLastIter,
1941 PLowerBound, PUpperBound,
1946 Value *FirstChunkStart =
1947 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
1948 Value *FirstChunkStop =
1949 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
1950 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
1952 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
1953 Value *NextChunkStride =
1954 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
1958 Value *DispatchCounter;
1961 [&](InsertPointTy BodyIP,
Value *Counter) { DispatchCounter = Counter; },
1962 FirstChunkStart, CastedTripCount, NextChunkStride,
1985 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
1986 Value *IsLastChunk =
1987 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
1988 Value *CountUntilOrigTripCount =
1989 Builder.CreateSub(CastedTripCount, DispatchCounter);
1991 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
1992 Value *BackcastedChunkTC =
1993 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
1994 CLI->setTripCount(BackcastedChunkTC);
1999 Value *BackcastedDispatchCounter =
2000 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
2003 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
2008 Builder.CreateCall(StaticFini, {SrcLoc, ThreadNum});
2012 createBarrier(LocationDescription(
Builder.saveIP(),
DL), OMPD_for,
2026 bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind,
2027 llvm::Value *ChunkSize,
bool HasSimdModifier,
bool HasMonotonicModifier,
2028 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
2030 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
2031 HasNonmonotonicModifier, HasOrderedClause);
2037 assert(!ChunkSize &&
"No chunk size with static-chunked schedule");
2039 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
2040 NeedsBarrier, ChunkSize);
2042 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier);
2046 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
2047 NeedsBarrier, ChunkSize);
2049 return applyStaticChunkedWorkshareLoop(
DL, CLI, AllocaIP, NeedsBarrier,
2060 "schedule type does not support user-defined chunk sizes");
2067 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
2068 NeedsBarrier, ChunkSize);
2084 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
2087 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
2100 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
2103 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
2115 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
2118 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
2125 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
2127 "Require dedicated allocate IP");
2129 "Require valid schedule type");
2138 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
2139 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2143 Type *IVTy =
IV->getType();
2150 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
2151 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
2152 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
2153 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
2162 Builder.CreateStore(One, PLowerBound);
2164 Builder.CreateStore(UpperBound, PUpperBound);
2165 Builder.CreateStore(One, PStride);
2179 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
2185 Builder.CreateCall(DynamicInit,
2186 {SrcLoc, ThreadNum, SchedulingType, One,
2187 UpperBound, One, Chunk});
2196 Builder.CreateCall(DynamicNext, {SrcLoc, ThreadNum, PLastIter,
2197 PLowerBound, PUpperBound, PStride});
2201 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
2202 Builder.CreateCondBr(MoreWork, Header, Exit);
2207 auto *PI = cast<PHINode>(Phi);
2208 PI->setIncomingBlock(0, OuterCond);
2209 PI->setIncomingValue(0, LowerBound);
2213 auto *Br = cast<BranchInst>(
Term);
2214 Br->setSuccessor(0, OuterCond);
2220 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
2222 auto *CI = cast<CmpInst>(Comp);
2223 CI->setOperand(1, UpperBound);
2226 auto *BI = cast<BranchInst>(
Branch);
2227 assert(BI->getSuccessor(1) == Exit);
2228 BI->setSuccessor(1, OuterCond);
2234 Builder.CreateCall(DynamicFini, {SrcLoc, ThreadNum});
2240 createBarrier(LocationDescription(
Builder.saveIP(),
DL),
2241 omp::Directive::OMPD_for,
false,
2261 auto HasRemainingUses = [&BBsToErase](
BasicBlock *
BB) {
2262 for (
Use &U :
BB->uses()) {
2263 auto *UseInst = dyn_cast<Instruction>(U.getUser());
2266 if (BBsToErase.count(UseInst->getParent()))
2274 bool Changed =
false;
2276 if (HasRemainingUses(
BB)) {
2277 BBsToErase.erase(
BB);
2292 assert(
Loops.size() >= 1 &&
"At least one loop required");
2293 size_t NumLoops =
Loops.size();
2297 return Loops.front();
2309 Loop->collectControlBlocks(OldControlBBs);
2313 if (ComputeIP.isSet())
2320 Value *CollapsedTripCount =
nullptr;
2323 "All loops to collapse must be valid canonical loops");
2324 Value *OrigTripCount = L->getTripCount();
2325 if (!CollapsedTripCount) {
2326 CollapsedTripCount = OrigTripCount;
2331 CollapsedTripCount =
Builder.CreateMul(CollapsedTripCount, OrigTripCount,
2337 createLoopSkeleton(
DL, CollapsedTripCount,
F,
2338 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
2344 Builder.restoreIP(Result->getBodyIP());
2346 Value *Leftover = Result->getIndVar();
2348 NewIndVars.
resize(NumLoops);
2349 for (
int i = NumLoops - 1;
i >= 1; --
i) {
2352 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
2353 NewIndVars[
i] = NewIndVar;
2355 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
2358 NewIndVars[0] = Leftover;
2367 BasicBlock *ContinueBlock = Result->getBody();
2369 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
2376 ContinueBlock =
nullptr;
2377 ContinuePred = NextSrc;
2384 for (
size_t i = 0;
i < NumLoops - 1; ++
i)
2385 ContinueWith(
Loops[
i]->getBody(),
Loops[
i + 1]->getHeader());
2391 for (
size_t i = NumLoops - 1;
i > 0; --
i)
2392 ContinueWith(
Loops[
i]->getAfter(),
Loops[
i - 1]->getLatch());
2395 ContinueWith(Result->getLatch(),
nullptr);
2402 for (
size_t i = 0;
i < NumLoops; ++
i)
2403 Loops[
i]->getIndVar()->replaceAllUsesWith(NewIndVars[
i]);
2417 std::vector<CanonicalLoopInfo *>
2421 "Must pass as many tile sizes as there are loops");
2422 int NumLoops =
Loops.size();
2423 assert(NumLoops >= 1 &&
"At least one loop to tile required");
2435 Loop->collectControlBlocks(OldControlBBs);
2443 assert(L->isValid() &&
"All input loops must be valid canonical loops");
2444 OrigTripCounts.push_back(L->getTripCount());
2445 OrigIndVars.push_back(L->getIndVar());
2455 for (
int i = 0;
i < NumLoops - 1; ++
i) {
2468 for (
int i = 0;
i < NumLoops; ++
i) {
2470 Value *OrigTripCount = OrigTripCounts[
i];
2483 Value *FloorTripOverflow =
2486 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
2488 Builder.CreateAdd(FloorTripCount, FloorTripOverflow,
2489 "omp_floor" +
Twine(
i) +
".tripcount",
true);
2492 FloorCount.push_back(FloorTripCount);
2493 FloorRems.push_back(FloorTripRem);
2497 std::vector<CanonicalLoopInfo *> Result;
2498 Result.reserve(NumLoops * 2);
2511 auto EmbeddNewLoop =
2512 [
this,
DL,
F, InnerEnter, &Enter, &Continue, &OutroInsertBefore](
2515 DL, TripCount,
F, InnerEnter, OutroInsertBefore,
Name);
2520 Enter = EmbeddedLoop->
getBody();
2521 Continue = EmbeddedLoop->
getLatch();
2522 OutroInsertBefore = EmbeddedLoop->
getLatch();
2523 return EmbeddedLoop;
2527 const Twine &NameBase) {
2530 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
2531 Result.push_back(EmbeddedLoop);
2535 EmbeddNewLoops(FloorCount,
"floor");
2539 Builder.SetInsertPoint(Enter->getTerminator());
2541 for (
int i = 0;
i < NumLoops; ++
i) {
2545 Value *FloorIsEpilogue =
2547 Value *TileTripCount =
2550 TileCounts.push_back(TileTripCount);
2554 EmbeddNewLoops(TileCounts,
"tile");
2559 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
2568 BodyEnter =
nullptr;
2569 BodyEntered = ExitBB;
2581 Builder.restoreIP(Result.back()->getBodyIP());
2582 for (
int i = 0;
i < NumLoops; ++
i) {
2585 Value *OrigIndVar = OrigIndVars[
i];
2586 Value *Size = TileSizes[
i];
2612 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
2615 if (Properties.
empty())
2620 NewLoopProperties.push_back(
nullptr);
2624 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
2640 if (
I.mayReadOrWriteMemory()) {
2644 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
2686 if (Block == CanonicalLoop->
getCond() ||
2728 static std::unique_ptr<TargetMachine>
2732 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
2733 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
2734 const std::string &
Triple =
M->getTargetTriple();
2768 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
2783 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
2788 nullptr, ORE, OptLevel,
2809 <<
" Threshold=" << UP.
Threshold <<
"\n"
2812 <<
" PartialOptSizeThreshold="
2831 if (
auto *
Load = dyn_cast<LoadInst>(&
I)) {
2832 Ptr =
Load->getPointerOperand();
2833 }
else if (
auto *
Store = dyn_cast<StoreInst>(&
I)) {
2834 Ptr =
Store->getPointerOperand();
2840 if (
auto *Alloca = dyn_cast<AllocaInst>(Ptr)) {
2841 if (Alloca->getParent() == &
F->getEntryBlock())
2847 unsigned NumInlineCandidates;
2848 bool NotDuplicatable;
2853 LLVM_DEBUG(
dbgs() <<
"Estimated loop size is " << LoopSize <<
"\n");
2864 int MaxTripCount = 0;
2865 bool MaxOrZero =
false;
2866 unsigned TripMultiple = 0;
2868 bool UseUpperBound =
false;
2870 MaxTripCount, MaxOrZero, TripMultiple, LoopSize, UP, PP,
2872 unsigned Factor = UP.
Count;
2873 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
2884 assert(Factor >= 0 &&
"Unroll factor must not be negative");
2893 LoopMetadata.push_back(
2900 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
2913 *UnrolledCLI =
Loop;
2918 "unrolling only makes sense with a factor of 2 or larger");
2920 Type *IndVarTy =
Loop->getIndVarType();
2927 std::vector<CanonicalLoopInfo *>
LoopNest =
2928 tileLoops(
DL, {
Loop}, {FactorVal});
2942 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
2945 (*UnrolledCLI)->assertOK();
2953 if (!updateToLocation(Loc))
2957 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2958 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2959 Value *ThreadId = getOrCreateThreadID(Ident);
2963 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
2965 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
2975 if (!updateToLocation(Loc))
2983 Directive OMPD = Directive::OMPD_single;
2985 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
2986 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2987 Value *ThreadId = getOrCreateThreadID(Ident);
2990 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
2993 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
3003 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3008 omp::Directive::OMPD_unknown,
false,
3017 if (!updateToLocation(Loc))
3020 Directive OMPD = Directive::OMPD_critical;
3022 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3023 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3024 Value *ThreadId = getOrCreateThreadID(Ident);
3025 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
3026 Value *
Args[] = {Ident, ThreadId, LockVar};
3032 EnterArgs.push_back(HintInst);
3033 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
3035 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
3040 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
3043 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3051 const Twine &
Name,
bool IsDependSource) {
3052 for (
size_t I = 0;
I < StoreValues.
size();
I++)
3054 "OpenMP runtime requires depend vec with i64 type");
3056 if (!updateToLocation(Loc))
3067 for (
unsigned I = 0;
I < NumLoops; ++
I) {
3078 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3079 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3080 Value *ThreadId = getOrCreateThreadID(Ident);
3081 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
3085 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
3087 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
3096 if (!updateToLocation(Loc))
3099 Directive OMPD = Directive::OMPD_ordered;
3105 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3106 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3107 Value *ThreadId = getOrCreateThreadID(Ident);
3110 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
3114 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
3118 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3124 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
3125 bool HasFinalize,
bool IsCancellable) {
3128 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
3134 if (!isa_and_nonnull<BranchInst>(SplitPos))
3141 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
3144 BodyGenCB( InsertPointTy(),
3151 "Unexpected control flow graph state!!");
3152 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
3154 "Unexpected Control Flow State!");
3160 "Unexpected Insertion point location!");
3163 auto InsertBB = merged ? ExitPredBB : ExitBB;
3164 if (!isa_and_nonnull<BranchInst>(SplitPos))
3166 Builder.SetInsertPoint(InsertBB);
3174 if (!Conditional || !EntryCall)
3190 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
3194 UI->eraseFromParent();
3195 Builder.SetInsertPoint(ThenBB->getTerminator());
3202 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
3209 assert(!FinalizationStack.empty() &&
3210 "Unexpected finalization stack state!");
3212 FinalizationInfo Fi = FinalizationStack.pop_back_val();
3213 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
3221 Builder.SetInsertPoint(FiniBBTI);
3263 if (isa_and_nonnull<BranchInst>(OMP_Entry->
getTerminator())) {
3265 "copyin.not.master.end");
3272 Builder.SetInsertPoint(OMP_Entry);
3273 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
3274 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
3276 Builder.CreateCondBr(
cmp, CopyBegin, CopyEnd);
3278 Builder.SetInsertPoint(CopyBegin);
3292 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3293 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3294 Value *ThreadId = getOrCreateThreadID(Ident);
3297 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
3309 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3310 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3311 Value *ThreadId = getOrCreateThreadID(Ident);
3313 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
3320 Value *DependenceAddress,
bool HaveNowaitClause) {
3325 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3326 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3327 Value *ThreadId = getOrCreateThreadID(Ident);
3328 if (Device ==
nullptr)
3331 if (NumDependences ==
nullptr) {
3338 Ident, ThreadId, InteropVar, InteropTypeVal,
3339 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
3341 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
3348 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
3353 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3354 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3355 Value *ThreadId = getOrCreateThreadID(Ident);
3356 if (Device ==
nullptr)
3358 if (NumDependences ==
nullptr) {
3365 Ident, ThreadId, InteropVar, Device,
3366 NumDependences, DependenceAddress, HaveNowaitClauseVal};
3368 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
3375 Value *NumDependences,
3376 Value *DependenceAddress,
3377 bool HaveNowaitClause) {
3381 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3382 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3383 Value *ThreadId = getOrCreateThreadID(Ident);
3384 if (Device ==
nullptr)
3386 if (NumDependences ==
nullptr) {
3393 Ident, ThreadId, InteropVar, Device,
3394 NumDependences, DependenceAddress, HaveNowaitClauseVal};
3396 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
3408 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3409 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3410 Value *ThreadId = getOrCreateThreadID(Ident);
3412 getOrCreateOMPInternalVariable(Int8PtrPtr,
Name);
3413 llvm::Value *
Args[] = {Ident, ThreadId, Pointer, Size, ThreadPrivateCache};
3416 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
3423 bool RequiresFullRuntime) {
3424 if (!updateToLocation(Loc))
3428 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3429 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3438 Function *Fn = getOrCreateRuntimeFunctionPtr(
3439 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
3442 Fn, {Ident, IsSPMDVal, UseGenericStateMachine, RequiresFullRuntimeVal});
3454 auto *UI =
Builder.CreateUnreachable();
3460 Builder.SetInsertPoint(WorkerExitBB);
3464 Builder.SetInsertPoint(CheckBBTI);
3465 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
3467 CheckBBTI->eraseFromParent();
3468 UI->eraseFromParent();
3477 bool RequiresFullRuntime) {
3478 if (!updateToLocation(Loc))
3482 Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
3483 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3490 Function *Fn = getOrCreateRuntimeFunctionPtr(
3491 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
3493 Builder.CreateCall(Fn, {Ident, IsSPMDVal, RequiresFullRuntimeVal});
3506 return OS.
str().str();
3509 Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
3520 auto &Elem = *InternalVars.try_emplace(RuntimeName,
nullptr).first;
3522 assert(cast<PointerType>(Elem.second->getType())
3523 ->isOpaqueOrPointeeTypeMatches(Ty) &&
3524 "OMP internal variable has different type than requested");
3540 Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
3541 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
3542 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
3543 return getOrCreateOMPInternalVariable(KmpCriticalNameTy,
Name);
3556 return MaptypesArrayGlobal;
3561 unsigned NumOperands,
3563 if (!updateToLocation(Loc))
3582 int64_t DeviceID,
unsigned NumOperands) {
3583 if (!updateToLocation(Loc))
3588 Value *ArgsBaseGEP =
3590 {Builder.getInt32(0), Builder.getInt32(0)});
3593 {Builder.getInt32(0), Builder.getInt32(0)});
3594 Value *ArgSizesGEP =
3596 {Builder.getInt32(0), Builder.getInt32(0)});
3598 Builder.CreateCall(MapperFunc,
3599 {SrcLocInfo,
Builder.getInt64(DeviceID),
3600 Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
3601 ArgSizesGEP, MaptypesArg, MapnamesArg, NullPtr});
3604 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
3608 "Unexpected Atomic Ordering.");
3669 if (!updateToLocation(Loc))
3672 Type *XTy =
X.Var->getType();
3674 Type *XElemTy =
X.ElemTy;
3677 "OMP atomic read expected a scalar type");
3679 Value *XRead =
nullptr;
3683 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
3685 XRead = cast<Value>(XLD);
3688 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3692 X.Var, IntCastTy->
getPointerTo(Addrspace),
"atomic.src.int.cast");
3694 Builder.CreateLoad(IntCastTy, XBCast,
X.IsVolatile,
"omp.atomic.load");
3697 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
3699 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
3702 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Read);
3711 if (!updateToLocation(Loc))
3714 Type *XTy =
X.Var->getType();
3716 Type *XElemTy =
X.ElemTy;
3719 "OMP atomic write expected a scalar type");
3726 unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
3730 X.Var, IntCastTy->
getPointerTo(Addrspace),
"atomic.dst.int.cast");
3732 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
3737 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Write);
3746 if (!updateToLocation(Loc))
3750 Type *XTy =
X.Var->getType();
3752 "OMP Atomic expects a pointer to target memory");
3753 Type *XElemTy =
X.ElemTy;
3756 "OMP atomic update expected a scalar type");
3759 "OpenMP atomic does not support LT or GT operations");
3762 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
3763 X.IsVolatile, IsXBinopExpr);
3764 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
3768 Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
3772 return Builder.CreateAdd(Src1, Src2);
3774 return Builder.CreateSub(Src1, Src2);
3776 return Builder.CreateAnd(Src1, Src2);
3780 return Builder.CreateOr(Src1, Src2);
3782 return Builder.CreateXor(Src1, Src2);
3796 std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
3799 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr) {
3802 bool emitRMWOp =
false;
3810 emitRMWOp = XElemTy;
3813 emitRMWOp = (IsXBinopExpr && XElemTy);
3820 std::pair<Value *, Value *> Res;
3827 Res.second = Res.first;
3829 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
3831 unsigned Addrspace = cast<PointerType>(
X->getType())->getAddressSpace();
3837 Builder.CreateLoad(IntCastTy, XBCast,
X->getName() +
".atomic.load");
3846 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
3850 X->getName() +
".atomic.cont");
3854 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
3855 Builder.SetInsertPoint(ContBB);
3861 Value *NewAtomicIntAddr =
3864 :
Builder.CreateBitCast(NewAtomicAddr,
3866 Value *OldExprVal = PHI;
3869 OldExprVal =
Builder.CreateBitCast(PHI, XElemTy,
3870 X->getName() +
".atomic.fltCast");
3872 OldExprVal =
Builder.CreateIntToPtr(PHI, XElemTy,
3873 X->getName() +
".atomic.ptrCast");
3878 Builder.CreateStore(Upd, NewAtomicAddr);
3879 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicIntAddr);
3888 Result->setVolatile(VolatileX);
3889 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
3890 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
3892 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
3894 Res.first = OldExprVal;
3901 Builder.SetInsertPoint(ExitBB);
3903 Builder.SetInsertPoint(ExitTI);
3914 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr) {
3915 if (!updateToLocation(Loc))
3919 Type *XTy =
X.Var->getType();
3921 "OMP Atomic expects a pointer to target memory");
3922 Type *XElemTy =
X.ElemTy;
3925 "OMP atomic capture expected a scalar type");
3927 "OpenMP atomic does not support LT or GT operations");
3933 std::pair<Value *, Value *> Result =
3934 emitAtomicUpdate(AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
3935 X.IsVolatile, IsXBinopExpr);
3937 Value *CapturedVal = (IsPostfixUpdate ? Result.first : Result.second);
3940 checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Capture);
3947 if (!updateToLocation(Loc))
3950 assert(
X.Var->getType()->isPointerTy() &&
3951 "OMP atomic expects a pointer to target memory");
3952 assert((
X.ElemTy->isIntegerTy() ||
X.ElemTy->isPointerTy()) &&
3953 "OMP atomic compare expected a integer scalar type");
3961 "Op should be either max or min at this point");
4007 return MapNamesArrayGlobal;
4012 void OpenMPIRBuilder::initializeTypes(
Module &M) {
4015 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
4016 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
4017 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
4018 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
4019 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
4020 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
4021 VarName##Ptr = PointerType::getUnqual(VarName);
4022 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
4023 T = StructType::getTypeByName(Ctx, StructName); \
4025 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
4027 VarName##Ptr = PointerType::getUnqual(T);
4028 #include "llvm/Frontend/OpenMP/OMPKinds.def"
4035 BlockSet.
insert(EntryBB);
4038 Worklist.push_back(EntryBB);
4039 while (!Worklist.empty()) {
4041 BlockVector.push_back(
BB);
4043 if (BlockSet.
insert(SuccBB).second)
4044 Worklist.push_back(SuccBB);
4048 void CanonicalLoopInfo::collectControlBlocks(
4055 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
4067 void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
4071 assert(isa<CmpInst>(CmpI) &&
"First inst must compare IV with TripCount");
4079 void CanonicalLoopInfo::mapIndVar(
4089 for (
Use &U : OldIV->
uses()) {
4090 auto *
User = dyn_cast<Instruction>(U.getUser());
4093 if (
User->getParent() == getCond())
4095 if (
User->getParent() == getLatch())
4097 ReplacableUses.push_back(&U);
4101 Value *NewIV = Updater(OldIV);
4104 for (
Use *U : ReplacableUses)
4125 "Preheader must terminate with unconditional branch");
4127 "Preheader must jump to header");
4131 "Header must terminate with unconditional branch");
4133 "Header must jump to exiting block");
4136 assert(
Cond->getSinglePredecessor() == Header &&
4137 "Exiting block only reachable from header");
4139 assert(isa<BranchInst>(
Cond->getTerminator()) &&
4140 "Exiting block must terminate with conditional branch");
4142 "Exiting block must have two successors");
4143 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(0) == Body &&
4144 "Exiting block's first successor jump to the body");
4145 assert(cast<BranchInst>(
Cond->getTerminator())->getSuccessor(1) == Exit &&
4146 "Exiting block's second successor must exit the loop");
4150 "Body only reachable from exiting block");
4155 "Latch must terminate with unconditional branch");
4164 "Exit block must terminate with unconditional branch");
4166 "Exit block must jump to after block");
4170 "After block only reachable from exit block");
4174 assert(IndVar &&
"Canonical induction variable not found?");
4176 "Induction variable must be an integer");
4178 "Induction variable must be a PHI in the loop header");
4179 assert(cast<PHINode>(IndVar)->getIncomingBlock(0) == Preheader);
4181 cast<ConstantInt>(cast<PHINode>(IndVar)->getIncomingValue(0))->
isZero());
4182 assert(cast<PHINode>(IndVar)->getIncomingBlock(1) == Latch);
4184 auto *NextIndVar = cast<PHINode>(IndVar)->getIncomingValue(1);
4187 assert(cast<BinaryOperator>(NextIndVar)->getOperand(0) == IndVar);
4188 assert(cast<ConstantInt>(cast<BinaryOperator>(NextIndVar)->getOperand(1))
4192 assert(TripCount &&
"Loop trip count not found?");
4194 "Trip count and induction variable must have the same type");
4196 auto *CmpI = cast<CmpInst>(&
Cond->front());
4198 "Exit condition must be a signed less-than comparison");
4200 "Exit condition must compare the induction variable");
4202 "Exit condition must compare with the trip count");